Updated script that can be controled by Nodejs web app
This commit is contained in:
383
lib/python3.13/site-packages/pandas/tests/api/test_api.py
Normal file
383
lib/python3.13/site-packages/pandas/tests/api/test_api.py
Normal file
@ -0,0 +1,383 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import api
|
||||
import pandas._testing as tm
|
||||
from pandas.api import (
|
||||
extensions as api_extensions,
|
||||
indexers as api_indexers,
|
||||
interchange as api_interchange,
|
||||
types as api_types,
|
||||
typing as api_typing,
|
||||
)
|
||||
|
||||
|
||||
class Base:
|
||||
def check(self, namespace, expected, ignored=None):
|
||||
# see which names are in the namespace, minus optional
|
||||
# ignored ones
|
||||
# compare vs the expected
|
||||
|
||||
result = sorted(
|
||||
f for f in dir(namespace) if not f.startswith("__") and f != "annotations"
|
||||
)
|
||||
if ignored is not None:
|
||||
result = sorted(set(result) - set(ignored))
|
||||
|
||||
expected = sorted(expected)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestPDApi(Base):
|
||||
# these are optionally imported based on testing
|
||||
# & need to be ignored
|
||||
ignored = ["tests", "locale", "conftest", "_version_meson"]
|
||||
|
||||
# top-level sub-packages
|
||||
public_lib = [
|
||||
"api",
|
||||
"arrays",
|
||||
"options",
|
||||
"test",
|
||||
"testing",
|
||||
"errors",
|
||||
"plotting",
|
||||
"io",
|
||||
"tseries",
|
||||
]
|
||||
private_lib = ["compat", "core", "pandas", "util", "_built_with_meson"]
|
||||
|
||||
# misc
|
||||
misc = ["IndexSlice", "NaT", "NA"]
|
||||
|
||||
# top-level classes
|
||||
classes = [
|
||||
"ArrowDtype",
|
||||
"Categorical",
|
||||
"CategoricalIndex",
|
||||
"DataFrame",
|
||||
"DateOffset",
|
||||
"DatetimeIndex",
|
||||
"ExcelFile",
|
||||
"ExcelWriter",
|
||||
"Flags",
|
||||
"Grouper",
|
||||
"HDFStore",
|
||||
"Index",
|
||||
"MultiIndex",
|
||||
"Period",
|
||||
"PeriodIndex",
|
||||
"RangeIndex",
|
||||
"Series",
|
||||
"SparseDtype",
|
||||
"StringDtype",
|
||||
"Timedelta",
|
||||
"TimedeltaIndex",
|
||||
"Timestamp",
|
||||
"Interval",
|
||||
"IntervalIndex",
|
||||
"CategoricalDtype",
|
||||
"PeriodDtype",
|
||||
"IntervalDtype",
|
||||
"DatetimeTZDtype",
|
||||
"BooleanDtype",
|
||||
"Int8Dtype",
|
||||
"Int16Dtype",
|
||||
"Int32Dtype",
|
||||
"Int64Dtype",
|
||||
"UInt8Dtype",
|
||||
"UInt16Dtype",
|
||||
"UInt32Dtype",
|
||||
"UInt64Dtype",
|
||||
"Float32Dtype",
|
||||
"Float64Dtype",
|
||||
"NamedAgg",
|
||||
]
|
||||
|
||||
# these are already deprecated; awaiting removal
|
||||
deprecated_classes: list[str] = []
|
||||
|
||||
# external modules exposed in pandas namespace
|
||||
modules: list[str] = []
|
||||
|
||||
# top-level functions
|
||||
funcs = [
|
||||
"array",
|
||||
"bdate_range",
|
||||
"concat",
|
||||
"crosstab",
|
||||
"cut",
|
||||
"date_range",
|
||||
"interval_range",
|
||||
"eval",
|
||||
"factorize",
|
||||
"get_dummies",
|
||||
"from_dummies",
|
||||
"infer_freq",
|
||||
"isna",
|
||||
"isnull",
|
||||
"lreshape",
|
||||
"melt",
|
||||
"notna",
|
||||
"notnull",
|
||||
"offsets",
|
||||
"merge",
|
||||
"merge_ordered",
|
||||
"merge_asof",
|
||||
"period_range",
|
||||
"pivot",
|
||||
"pivot_table",
|
||||
"qcut",
|
||||
"show_versions",
|
||||
"timedelta_range",
|
||||
"unique",
|
||||
"value_counts",
|
||||
"wide_to_long",
|
||||
]
|
||||
|
||||
# top-level option funcs
|
||||
funcs_option = [
|
||||
"reset_option",
|
||||
"describe_option",
|
||||
"get_option",
|
||||
"option_context",
|
||||
"set_option",
|
||||
"set_eng_float_format",
|
||||
]
|
||||
|
||||
# top-level read_* funcs
|
||||
funcs_read = [
|
||||
"read_clipboard",
|
||||
"read_csv",
|
||||
"read_excel",
|
||||
"read_fwf",
|
||||
"read_gbq",
|
||||
"read_hdf",
|
||||
"read_html",
|
||||
"read_xml",
|
||||
"read_json",
|
||||
"read_pickle",
|
||||
"read_sas",
|
||||
"read_sql",
|
||||
"read_sql_query",
|
||||
"read_sql_table",
|
||||
"read_stata",
|
||||
"read_table",
|
||||
"read_feather",
|
||||
"read_parquet",
|
||||
"read_orc",
|
||||
"read_spss",
|
||||
]
|
||||
|
||||
# top-level json funcs
|
||||
funcs_json = ["json_normalize"]
|
||||
|
||||
# top-level to_* funcs
|
||||
funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"]
|
||||
|
||||
# top-level to deprecate in the future
|
||||
deprecated_funcs_in_future: list[str] = []
|
||||
|
||||
# these are already deprecated; awaiting removal
|
||||
deprecated_funcs: list[str] = []
|
||||
|
||||
# private modules in pandas namespace
|
||||
private_modules = [
|
||||
"_config",
|
||||
"_libs",
|
||||
"_is_numpy_dev",
|
||||
"_pandas_datetime_CAPI",
|
||||
"_pandas_parser_CAPI",
|
||||
"_testing",
|
||||
"_typing",
|
||||
]
|
||||
if not pd._built_with_meson:
|
||||
private_modules.append("_version")
|
||||
|
||||
def test_api(self):
|
||||
checkthese = (
|
||||
self.public_lib
|
||||
+ self.private_lib
|
||||
+ self.misc
|
||||
+ self.modules
|
||||
+ self.classes
|
||||
+ self.funcs
|
||||
+ self.funcs_option
|
||||
+ self.funcs_read
|
||||
+ self.funcs_json
|
||||
+ self.funcs_to
|
||||
+ self.private_modules
|
||||
)
|
||||
self.check(namespace=pd, expected=checkthese, ignored=self.ignored)
|
||||
|
||||
def test_api_all(self):
|
||||
expected = set(
|
||||
self.public_lib
|
||||
+ self.misc
|
||||
+ self.modules
|
||||
+ self.classes
|
||||
+ self.funcs
|
||||
+ self.funcs_option
|
||||
+ self.funcs_read
|
||||
+ self.funcs_json
|
||||
+ self.funcs_to
|
||||
) - set(self.deprecated_classes)
|
||||
actual = set(pd.__all__)
|
||||
|
||||
extraneous = actual - expected
|
||||
assert not extraneous
|
||||
|
||||
missing = expected - actual
|
||||
assert not missing
|
||||
|
||||
def test_depr(self):
|
||||
deprecated_list = (
|
||||
self.deprecated_classes
|
||||
+ self.deprecated_funcs
|
||||
+ self.deprecated_funcs_in_future
|
||||
)
|
||||
for depr in deprecated_list:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
_ = getattr(pd, depr)
|
||||
|
||||
|
||||
class TestApi(Base):
|
||||
allowed_api_dirs = [
|
||||
"types",
|
||||
"extensions",
|
||||
"indexers",
|
||||
"interchange",
|
||||
"typing",
|
||||
]
|
||||
allowed_typing = [
|
||||
"DataFrameGroupBy",
|
||||
"DatetimeIndexResamplerGroupby",
|
||||
"Expanding",
|
||||
"ExpandingGroupby",
|
||||
"ExponentialMovingWindow",
|
||||
"ExponentialMovingWindowGroupby",
|
||||
"JsonReader",
|
||||
"NaTType",
|
||||
"NAType",
|
||||
"PeriodIndexResamplerGroupby",
|
||||
"Resampler",
|
||||
"Rolling",
|
||||
"RollingGroupby",
|
||||
"SeriesGroupBy",
|
||||
"StataReader",
|
||||
"TimedeltaIndexResamplerGroupby",
|
||||
"TimeGrouper",
|
||||
"Window",
|
||||
]
|
||||
allowed_api_types = [
|
||||
"is_any_real_numeric_dtype",
|
||||
"is_array_like",
|
||||
"is_bool",
|
||||
"is_bool_dtype",
|
||||
"is_categorical_dtype",
|
||||
"is_complex",
|
||||
"is_complex_dtype",
|
||||
"is_datetime64_any_dtype",
|
||||
"is_datetime64_dtype",
|
||||
"is_datetime64_ns_dtype",
|
||||
"is_datetime64tz_dtype",
|
||||
"is_dict_like",
|
||||
"is_dtype_equal",
|
||||
"is_extension_array_dtype",
|
||||
"is_file_like",
|
||||
"is_float",
|
||||
"is_float_dtype",
|
||||
"is_hashable",
|
||||
"is_int64_dtype",
|
||||
"is_integer",
|
||||
"is_integer_dtype",
|
||||
"is_interval",
|
||||
"is_interval_dtype",
|
||||
"is_iterator",
|
||||
"is_list_like",
|
||||
"is_named_tuple",
|
||||
"is_number",
|
||||
"is_numeric_dtype",
|
||||
"is_object_dtype",
|
||||
"is_period_dtype",
|
||||
"is_re",
|
||||
"is_re_compilable",
|
||||
"is_scalar",
|
||||
"is_signed_integer_dtype",
|
||||
"is_sparse",
|
||||
"is_string_dtype",
|
||||
"is_timedelta64_dtype",
|
||||
"is_timedelta64_ns_dtype",
|
||||
"is_unsigned_integer_dtype",
|
||||
"pandas_dtype",
|
||||
"infer_dtype",
|
||||
"union_categoricals",
|
||||
"CategoricalDtype",
|
||||
"DatetimeTZDtype",
|
||||
"IntervalDtype",
|
||||
"PeriodDtype",
|
||||
]
|
||||
allowed_api_interchange = ["from_dataframe", "DataFrame"]
|
||||
allowed_api_indexers = [
|
||||
"check_array_indexer",
|
||||
"BaseIndexer",
|
||||
"FixedForwardWindowIndexer",
|
||||
"VariableOffsetWindowIndexer",
|
||||
]
|
||||
allowed_api_extensions = [
|
||||
"no_default",
|
||||
"ExtensionDtype",
|
||||
"register_extension_dtype",
|
||||
"register_dataframe_accessor",
|
||||
"register_index_accessor",
|
||||
"register_series_accessor",
|
||||
"take",
|
||||
"ExtensionArray",
|
||||
"ExtensionScalarOpsMixin",
|
||||
]
|
||||
|
||||
def test_api(self):
|
||||
self.check(api, self.allowed_api_dirs)
|
||||
|
||||
def test_api_typing(self):
|
||||
self.check(api_typing, self.allowed_typing)
|
||||
|
||||
def test_api_types(self):
|
||||
self.check(api_types, self.allowed_api_types)
|
||||
|
||||
def test_api_interchange(self):
|
||||
self.check(api_interchange, self.allowed_api_interchange)
|
||||
|
||||
def test_api_indexers(self):
|
||||
self.check(api_indexers, self.allowed_api_indexers)
|
||||
|
||||
def test_api_extensions(self):
|
||||
self.check(api_extensions, self.allowed_api_extensions)
|
||||
|
||||
|
||||
class TestTesting(Base):
|
||||
funcs = [
|
||||
"assert_frame_equal",
|
||||
"assert_series_equal",
|
||||
"assert_index_equal",
|
||||
"assert_extension_array_equal",
|
||||
]
|
||||
|
||||
def test_testing(self):
|
||||
from pandas import testing
|
||||
|
||||
self.check(testing, self.funcs)
|
||||
|
||||
def test_util_in_top_level(self):
|
||||
with pytest.raises(AttributeError, match="foo"):
|
||||
pd.util.foo
|
||||
|
||||
|
||||
def test_pandas_array_alias():
|
||||
msg = "PandasArray has been renamed NumpyExtensionArray"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = pd.arrays.PandasArray
|
||||
|
||||
assert res is pd.arrays.NumpyExtensionArray
|
62
lib/python3.13/site-packages/pandas/tests/api/test_types.py
Normal file
62
lib/python3.13/site-packages/pandas/tests/api/test_types.py
Normal file
@ -0,0 +1,62 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.api import types
|
||||
from pandas.tests.api.test_api import Base
|
||||
|
||||
|
||||
class TestTypes(Base):
|
||||
allowed = [
|
||||
"is_any_real_numeric_dtype",
|
||||
"is_bool",
|
||||
"is_bool_dtype",
|
||||
"is_categorical_dtype",
|
||||
"is_complex",
|
||||
"is_complex_dtype",
|
||||
"is_datetime64_any_dtype",
|
||||
"is_datetime64_dtype",
|
||||
"is_datetime64_ns_dtype",
|
||||
"is_datetime64tz_dtype",
|
||||
"is_dtype_equal",
|
||||
"is_float",
|
||||
"is_float_dtype",
|
||||
"is_int64_dtype",
|
||||
"is_integer",
|
||||
"is_integer_dtype",
|
||||
"is_number",
|
||||
"is_numeric_dtype",
|
||||
"is_object_dtype",
|
||||
"is_scalar",
|
||||
"is_sparse",
|
||||
"is_string_dtype",
|
||||
"is_signed_integer_dtype",
|
||||
"is_timedelta64_dtype",
|
||||
"is_timedelta64_ns_dtype",
|
||||
"is_unsigned_integer_dtype",
|
||||
"is_period_dtype",
|
||||
"is_interval",
|
||||
"is_interval_dtype",
|
||||
"is_re",
|
||||
"is_re_compilable",
|
||||
"is_dict_like",
|
||||
"is_iterator",
|
||||
"is_file_like",
|
||||
"is_list_like",
|
||||
"is_hashable",
|
||||
"is_array_like",
|
||||
"is_named_tuple",
|
||||
"pandas_dtype",
|
||||
"union_categoricals",
|
||||
"infer_dtype",
|
||||
"is_extension_array_dtype",
|
||||
]
|
||||
deprecated: list[str] = []
|
||||
dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"]
|
||||
|
||||
def test_types(self):
|
||||
self.check(types, self.allowed + self.dtypes + self.deprecated)
|
||||
|
||||
def test_deprecated_from_api_types(self):
|
||||
for t in self.deprecated:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
getattr(types, t)(1)
|
@ -0,0 +1,7 @@
|
||||
from pandas.core.groupby.base import transformation_kernels
|
||||
|
||||
# There is no Series.cumcount or DataFrame.cumcount
|
||||
series_transform_kernels = [
|
||||
x for x in sorted(transformation_kernels) if x != "cumcount"
|
||||
]
|
||||
frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"]
|
1733
lib/python3.13/site-packages/pandas/tests/apply/test_frame_apply.py
Normal file
1733
lib/python3.13/site-packages/pandas/tests/apply/test_frame_apply.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,113 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_agg_relabel():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
|
||||
# simplest case with one column, one func
|
||||
result = df.agg(foo=("B", "sum"))
|
||||
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test on same column with different methods
|
||||
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
|
||||
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_relabel_multi_columns_multi_methods():
|
||||
# GH 26513, test on multiple columns with multiple methods
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
result = df.agg(
|
||||
foo=("A", "sum"),
|
||||
bar=("B", "mean"),
|
||||
cat=("A", "min"),
|
||||
dat=("B", "max"),
|
||||
f=("A", "max"),
|
||||
g=("C", "min"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
|
||||
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.xfail(np_version_gte1p25, reason="name of min now equals name of np.min")
|
||||
def test_agg_relabel_partial_functions():
|
||||
# GH 26513, test on partial, functools or more complex cases
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
msg = "using Series.[mean|min]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "using Series.[mean|min|max|sum]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.agg(
|
||||
foo=("A", min),
|
||||
bar=("A", np.min),
|
||||
cat=("B", max),
|
||||
dat=("C", "min"),
|
||||
f=("B", np.sum),
|
||||
kk=("B", lambda x: min(x)),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
|
||||
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_namedtuple():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("B", "sum"),
|
||||
bar=pd.NamedAgg("B", "min"),
|
||||
cat=pd.NamedAgg(column="B", aggfunc="count"),
|
||||
fft=pd.NamedAgg("B", aggfunc="max"),
|
||||
)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("A", "min"),
|
||||
bar=pd.NamedAgg(column="B", aggfunc="max"),
|
||||
cat=pd.NamedAgg(column="A", aggfunc="max"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
|
||||
index=pd.Index(["foo", "bar", "cat"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_reconstruct_func():
|
||||
# GH 28472, test to ensure reconstruct_func isn't moved;
|
||||
# This method is used by other libraries (e.g. dask)
|
||||
result = pd.core.apply.reconstruct_func("min")
|
||||
expected = (False, "min", None, None)
|
||||
tm.assert_equal(result, expected)
|
@ -0,0 +1,264 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import frame_transform_kernels
|
||||
from pandas.tests.frame.common import zip_frames
|
||||
|
||||
|
||||
def unpack_obj(obj, klass, axis):
|
||||
"""
|
||||
Helper to ensure we have the right type of object for a test parametrized
|
||||
over frame_or_series.
|
||||
"""
|
||||
if klass is not DataFrame:
|
||||
obj = obj["A"]
|
||||
if axis != 0:
|
||||
pytest.skip(f"Test is only for DataFrame with axis={axis}")
|
||||
return obj
|
||||
|
||||
|
||||
def test_transform_ufunc(axis, float_frame, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(obj)
|
||||
|
||||
# ufunc
|
||||
result = obj.transform(np.sqrt, axis=axis)
|
||||
expected = f_sqrt
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(axis, float_frame, ops, names):
|
||||
# GH 35964
|
||||
other_axis = 1 if axis in {0, "index"} else 0
|
||||
with np.errstate(all="ignore"):
|
||||
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
|
||||
if axis in {0, "index"}:
|
||||
expected.columns = MultiIndex.from_product([float_frame.columns, names])
|
||||
else:
|
||||
expected.index = MultiIndex.from_product([float_frame.index, names])
|
||||
result = float_frame.transform(ops, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ops", [[], np.array([])])
|
||||
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
df = DataFrame({"x": [1, 2, 3]})
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = df.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame(
|
||||
[[8, 8], [9, 9], [10, 10]],
|
||||
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(axis, float_frame, box):
|
||||
# GH 35964
|
||||
if axis in (0, "index"):
|
||||
e = float_frame.columns[0]
|
||||
expected = float_frame[[e]].transform(np.abs)
|
||||
else:
|
||||
e = float_frame.index[0]
|
||||
expected = float_frame.iloc[[0]].transform(np.abs)
|
||||
result = float_frame.transform(box({e: np.abs}), axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{},
|
||||
{"A": []},
|
||||
{"A": [], "B": "cumsum"},
|
||||
{"A": "cumsum", "B": []},
|
||||
{"A": [], "B": ["cumsum"]},
|
||||
{"A": ["cumsum"], "B": []},
|
||||
],
|
||||
)
|
||||
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
def func(x):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
return x + 1
|
||||
|
||||
result = obj.transform(func, axis=axis)
|
||||
expected = obj + 1
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
|
||||
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
|
||||
def test_transform_bad_dtype(op, frame_or_series, request):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform(op)
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform([op])
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": op})
|
||||
with pytest.raises(error, match=msg):
|
||||
obj.transform({"A": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_kernels_raise)
|
||||
def test_transform_failure_typeerror(request, op):
|
||||
# GH 35964
|
||||
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_failure_valueerror():
|
||||
# GH 40211
|
||||
def op(x):
|
||||
if np.sum(np.sum(x)) < 10:
|
||||
raise ValueError
|
||||
return x
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform([op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": op, "B": op})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op], "B": [op]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_passes_args(use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
expected_args = [1, 2]
|
||||
expected_kwargs = {"c": 3}
|
||||
|
||||
def f(x, a, b, c):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
assert [a, b] == expected_args
|
||||
assert c == expected_kwargs["c"]
|
||||
return x
|
||||
|
||||
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
|
||||
|
||||
|
||||
def test_transform_empty_dataframe():
|
||||
# https://github.com/pandas-dev/pandas/issues/39636
|
||||
df = DataFrame([], columns=["col1", "col2"])
|
||||
result = df.transform(lambda x: x + 10)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df["col1"].transform(lambda x: x + 10)
|
||||
tm.assert_series_equal(result, df["col1"])
|
@ -0,0 +1,361 @@
|
||||
# Tests specifically aimed at detecting bad arguments.
|
||||
# This file is organized by reason for exception.
|
||||
# 1. always invalid argument values
|
||||
# 2. missing column(s)
|
||||
# 3. incompatible ops/dtype/args/kwargs
|
||||
# 4. invalid result shape/type
|
||||
# If your test does not fit into one of these categories, add to this list.
|
||||
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import SpecificationError
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result_type", ["foo", 1])
|
||||
def test_result_type_error(result_type):
|
||||
# allowed result_type
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
msg = (
|
||||
"invalid value for result_type, must be one of "
|
||||
"{None, 'reduce', 'broadcast', 'expand'}"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
|
||||
|
||||
|
||||
def test_apply_invalid_axis_value():
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: x, 2)
|
||||
|
||||
|
||||
def test_agg_raises():
|
||||
# GH 26513
|
||||
df = DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
msg = "Must provide"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg()
|
||||
|
||||
|
||||
def test_map_with_invalid_na_action_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/32815
|
||||
s = Series([1, 2, 3])
|
||||
msg = "na_action must either be 'ignore' or None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map(lambda x: x, na_action="____")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_na_action", ["____", True])
|
||||
def test_map_arg_is_dict_with_invalid_na_action_raises(input_na_action):
|
||||
# https://github.com/pandas-dev/pandas/issues/46588
|
||||
s = Series([1, 2, 3])
|
||||
msg = f"na_action must either be 'ignore' or None, {input_na_action} was passed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map({1: 2}, na_action=input_na_action)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
|
||||
def test_nested_renamer(frame_or_series, method, func):
|
||||
# GH 35964
|
||||
obj = frame_or_series({"A": [1]})
|
||||
match = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"renamer",
|
||||
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
|
||||
)
|
||||
def test_series_nested_renamer(renamer):
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
s.agg(renamer)
|
||||
|
||||
|
||||
def test_apply_dict_depr():
|
||||
tsdf = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 3)),
|
||||
columns=["A", "B", "C"],
|
||||
index=date_range("1/1/2000", periods=10),
|
||||
)
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
tsdf.A.agg({"foo": ["sum", "mean"]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["agg", "transform"])
|
||||
def test_dict_nested_renaming_depr(method):
|
||||
df = DataFrame({"A": range(5), "B": 5})
|
||||
|
||||
# nested renaming
|
||||
msg = r"nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
|
||||
def test_missing_column(method, func):
|
||||
# GH 40004
|
||||
obj = DataFrame({"A": [1]})
|
||||
match = re.escape("Column(s) ['B'] do not exist")
|
||||
with pytest.raises(KeyError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
def test_transform_mixed_column_name_dtypes():
|
||||
# GH39025
|
||||
df = DataFrame({"a": ["1"]})
|
||||
msg = r"Column\(s\) \[1, 'b'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.transform({"a": int, 1: str, "b": int})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
|
||||
)
|
||||
def test_apply_str_axis_1_raises(how, args):
|
||||
# GH 39211 - some ops don't support axis=1
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
msg = f"Operation {how} does not support axis=1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(how, axis=1, args=args)
|
||||
|
||||
|
||||
def test_transform_axis_1_raises():
|
||||
# GH 35964
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1]).transform("sum", axis=1)
|
||||
|
||||
|
||||
def test_apply_modify_traceback():
|
||||
data = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
],
|
||||
"B": [
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"two",
|
||||
"two",
|
||||
"one",
|
||||
],
|
||||
"C": [
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"shiny",
|
||||
],
|
||||
"D": np.random.default_rng(2).standard_normal(11),
|
||||
"E": np.random.default_rng(2).standard_normal(11),
|
||||
"F": np.random.default_rng(2).standard_normal(11),
|
||||
}
|
||||
)
|
||||
|
||||
data.loc[4, "C"] = np.nan
|
||||
|
||||
def transform(row):
|
||||
if row["C"].startswith("shin") and row["A"] == "foo":
|
||||
row["D"] = 7
|
||||
return row
|
||||
|
||||
msg = "'float' object has no attribute 'startswith'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
data.apply(transform, axis=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_string):
|
||||
# GH 21224
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = "can't multiply sequence by non-int of type 'str'|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with pytest.raises(expected, match=msg):
|
||||
with tm.assert_produces_warning(warn, match="using DataFrame.cumprod"):
|
||||
df.agg(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("mean", TypeError), # mean raises TypeError
|
||||
("prod", TypeError),
|
||||
("std", TypeError),
|
||||
("var", TypeError),
|
||||
("median", TypeError),
|
||||
("cumprod", TypeError),
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_series(series, func, expected, using_infer_string):
|
||||
# GH21224
|
||||
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
|
||||
if func == "median" or func is np.nanmedian or func is np.median:
|
||||
msg = r"Cannot convert \['a' 'b' 'c'\] to numeric"
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
expected = (expected, pa.lib.ArrowNotImplementedError)
|
||||
|
||||
msg = msg + "|does not support|has no kernel"
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
|
||||
with pytest.raises(expected, match=msg):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
series.agg(func)
|
||||
|
||||
|
||||
def test_agg_none_to_type():
|
||||
# GH 40543
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = re.escape("int() argument must be a string")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
def test_transform_none_to_type():
|
||||
# GH#34377
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = "argument must be a"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform({"a": lambda x: int(x.iloc[0])})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: np.array([1, 2]).reshape(-1, 2),
|
||||
lambda x: [1, 2],
|
||||
lambda x: Series([1, 2]),
|
||||
],
|
||||
)
|
||||
def test_apply_broadcast_error(func):
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
# > 1 ndim
|
||||
msg = "too many dims to broadcast|cannot broadcast result"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(func, axis=1, result_type="broadcast")
|
||||
|
||||
|
||||
def test_transform_and_agg_err_agg(axis, float_frame):
|
||||
# cannot both transform and agg
|
||||
msg = "cannot combine transform and aggregation operations"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
float_frame.agg(["max", "sqrt"], axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning") # GH53325
|
||||
@pytest.mark.parametrize(
|
||||
"func, msg",
|
||||
[
|
||||
(["sqrt", "max"], "cannot combine transform and aggregation"),
|
||||
(
|
||||
{"foo": np.sqrt, "bar": "sum"},
|
||||
"cannot perform both aggregation and transformation",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_transform_and_agg_err_series(string_series, func, msg):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
|
||||
def test_transform_wont_agg_frame(axis, float_frame, func):
|
||||
# GH 35964
|
||||
# cannot both transform and agg
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame.transform(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
|
||||
def test_transform_wont_agg_series(string_series, func):
|
||||
# GH 35964
|
||||
# we are trying to transform with an aggregator
|
||||
msg = "Function did not transform"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
string_series.transform(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
|
||||
)
|
||||
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
|
||||
# GH 35964
|
||||
op = op_wrapper(all_reductions)
|
||||
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.transform(op)
|
118
lib/python3.13/site-packages/pandas/tests/apply/test_numba.py
Normal file
118
lib/python3.13/site-packages/pandas/tests/apply/test_numba.py
Normal file
@ -0,0 +1,118 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu]
|
||||
|
||||
|
||||
@pytest.fixture(params=[0, 1])
|
||||
def apply_axis(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_numba_vs_python_noop(float_frame, apply_axis):
|
||||
func = lambda x: x
|
||||
result = float_frame.apply(func, engine="numba", axis=apply_axis)
|
||||
expected = float_frame.apply(func, engine="python", axis=apply_axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_vs_python_string_index():
|
||||
# GH#56189
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
1,
|
||||
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"),
|
||||
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
|
||||
)
|
||||
func = lambda x: x
|
||||
result = df.apply(func, engine="numba", axis=0)
|
||||
expected = df.apply(func, engine="python", axis=0)
|
||||
tm.assert_frame_equal(
|
||||
result, expected, check_column_type=False, check_index_type=False
|
||||
)
|
||||
|
||||
|
||||
def test_numba_vs_python_indexing():
|
||||
frame = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]},
|
||||
index=Index(["A", "B", "C"]),
|
||||
)
|
||||
row_func = lambda x: x["c"]
|
||||
result = frame.apply(row_func, engine="numba", axis=1)
|
||||
expected = frame.apply(row_func, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
col_func = lambda x: x["A"]
|
||||
result = frame.apply(col_func, engine="numba", axis=0)
|
||||
expected = frame.apply(col_func, engine="python", axis=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"reduction",
|
||||
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()],
|
||||
)
|
||||
def test_numba_vs_python_reductions(reduction, apply_axis):
|
||||
df = DataFrame(np.ones((4, 4), dtype=np.float64))
|
||||
result = df.apply(reduction, engine="numba", axis=apply_axis)
|
||||
expected = df.apply(reduction, engine="python", axis=apply_axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]])
|
||||
def test_numba_numeric_colnames(colnames):
|
||||
# Check that numeric column names lower properly and can be indxed on
|
||||
df = DataFrame(
|
||||
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames
|
||||
)
|
||||
first_col = colnames[0]
|
||||
f = lambda x: x[first_col] # Get the first column
|
||||
result = df.apply(f, engine="numba", axis=1)
|
||||
expected = df.apply(f, engine="python", axis=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numba_parallel_unsupported(float_frame):
|
||||
f = lambda x: x
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="Parallel apply is not supported when raw=False and engine='numba'",
|
||||
):
|
||||
float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True})
|
||||
|
||||
|
||||
def test_numba_nonunique_unsupported(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"]))
|
||||
with pytest.raises(
|
||||
NotImplementedError,
|
||||
match="The index/columns must be unique when raw=False and engine='numba'",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
|
||||
def test_numba_unsupported_dtypes(apply_axis):
|
||||
f = lambda x: x
|
||||
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
|
||||
df["c"] = df["c"].astype("double[pyarrow]")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column b must have a numeric dtype. Found 'object|string' instead",
|
||||
):
|
||||
df.apply(f, engine="numba", axis=apply_axis)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Column c is backed by an extension array, "
|
||||
"which is not supported by the numba engine.",
|
||||
):
|
||||
df["c"].to_frame().apply(f, engine="numba", axis=apply_axis)
|
@ -0,0 +1,701 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import series_transform_kernels
|
||||
|
||||
|
||||
@pytest.fixture(params=[False, "compat"])
|
||||
def by_row(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_series_map_box_timedelta(by_row):
|
||||
# GH#11349
|
||||
ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h"))
|
||||
|
||||
def f(x):
|
||||
return x.total_seconds() if by_row else x.dt.total_seconds()
|
||||
|
||||
result = ser.apply(f, by_row=by_row)
|
||||
|
||||
expected = ser.map(lambda x: x.total_seconds())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([86401.0, 90001.0, 93601.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply(datetime_series, by_row):
|
||||
result = datetime_series.apply(np.sqrt, by_row=by_row)
|
||||
with np.errstate(all="ignore"):
|
||||
expected = np.sqrt(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# element-wise apply (ufunc)
|
||||
result = datetime_series.apply(np.exp, by_row=by_row)
|
||||
expected = np.exp(datetime_series)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3], dtype=np.float64)
|
||||
rs = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
|
||||
def test_apply_map_same_length_inference_bug():
|
||||
s = Series([1, 2])
|
||||
|
||||
def f(x):
|
||||
return (x, x + 1)
|
||||
|
||||
result = s.apply(f, by_row="compat")
|
||||
expected = s.map(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("convert_dtype", [True, False])
|
||||
def test_apply_convert_dtype_deprecated(convert_dtype):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10))
|
||||
|
||||
def func(x):
|
||||
return x if x > 0 else np.nan
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.apply(func, convert_dtype=convert_dtype, by_row="compat")
|
||||
|
||||
|
||||
def test_apply_args():
|
||||
s = Series(["foo,bar"])
|
||||
|
||||
result = s.apply(str.split, args=(",",))
|
||||
assert result[0] == ["foo", "bar"]
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
msg = (
|
||||
"in Series.agg cannot aggregate and has been deprecated. "
|
||||
"Use Series.transform to keep behavior unchanged."
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.agg(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_mapping_func_deprecated():
|
||||
# GH 53325
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = "using .+ in Series.agg cannot aggregate and"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg(foo1, 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg([foo1, foo2], 0, 3, c=4)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
|
||||
|
||||
|
||||
def test_series_apply_map_box_timestamps(by_row):
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(date_range("1/1/2000", periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'hour'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(func, by_row=by_row)
|
||||
return
|
||||
|
||||
result = ser.apply(func, by_row=by_row)
|
||||
expected = ser.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_box_dt64():
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
ser = Series(vals, dtype="M8[ns]")
|
||||
assert ser.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_dt64tz():
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
ser = Series(vals, dtype="M8[ns, US/Eastern]")
|
||||
assert ser.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row="compat")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_td64():
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "timedelta64[ns]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row="compat")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_box_period():
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "Period[M]"
|
||||
res = ser.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row="compat")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_datetimetz(by_row):
|
||||
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
|
||||
s = Series(values, name="XX")
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row)
|
||||
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
return str(x.tz) if by_row else str(x.dt.tz)
|
||||
|
||||
result = s.apply(f, by_row=by_row)
|
||||
if by_row:
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
else:
|
||||
assert result == "Asia/Tokyo"
|
||||
|
||||
|
||||
def test_apply_categorical(by_row, using_infer_string):
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
ser = Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
if not by_row:
|
||||
msg = "Series' object has no attribute 'lower"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
assert ser.apply(lambda x: "A", by_row=by_row) == "A"
|
||||
return
|
||||
|
||||
result = ser.apply(lambda x: x.lower(), by_row=by_row)
|
||||
|
||||
# should be categorical dtype when the number of categories are
|
||||
# the same
|
||||
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp.values)
|
||||
|
||||
result = ser.apply(lambda x: "A")
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object if not using_infer_string else "string[pyarrow_numpy]"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
|
||||
def test_apply_categorical_with_nan_values(series, by_row):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
s = Series(series, dtype="category")
|
||||
if not by_row:
|
||||
msg = "'Series' object has no attribute 'split'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
return
|
||||
|
||||
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
|
||||
result = result.astype(object)
|
||||
expected = Series(["1", "1", np.nan], dtype="category")
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_empty_integer_series_with_datetime_index(by_row):
|
||||
# GH 21245
|
||||
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
|
||||
result = s.apply(lambda x: x, by_row=by_row)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_apply_dataframe_iloc():
|
||||
uintDF = DataFrame(np.uint64([1, 2, 3, 4, 5]), columns=["Numbers"])
|
||||
indexDF = DataFrame([2, 3, 2, 1, 2], columns=["Indices"])
|
||||
|
||||
def retrieve(targetRow, targetDF):
|
||||
val = targetDF["Numbers"].iloc[targetRow]
|
||||
return val
|
||||
|
||||
result = indexDF["Indices"].apply(retrieve, args=(uintDF,))
|
||||
expected = Series([3, 4, 3, 2, 3], name="Indices", dtype="uint64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform(string_series, by_row):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.apply(np.sqrt, by_row=by_row)
|
||||
expected = f_sqrt.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.apply([np.sqrt], by_row=by_row)
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ["sqrt"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.apply(["sqrt"], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["sqrt", "absolute"]
|
||||
result = string_series.apply([np.sqrt, np.abs], by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
expected = expected.unstack().rename("series")
|
||||
|
||||
result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row)
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_partial_failure(op, request):
|
||||
# GH 35964
|
||||
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason=f"{op} is successful on any dtype")
|
||||
)
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
ser = Series(3 * [object])
|
||||
|
||||
if op in ("fillna", "ngroup"):
|
||||
error = ValueError
|
||||
msg = "Transform function failed"
|
||||
else:
|
||||
error = TypeError
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'type' and 'type'",
|
||||
"unsupported operand type",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform([op, "shift"])
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": op, "B": "shift"})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op], "B": ["shift"]})
|
||||
|
||||
with pytest.raises(error, match=msg):
|
||||
ser.transform({"A": [op, "shift"], "B": [op]})
|
||||
|
||||
|
||||
def test_transform_partial_failure_valueerror():
|
||||
# GH 40211
|
||||
def noop(x):
|
||||
return x
|
||||
|
||||
def raising_op(_):
|
||||
raise ValueError
|
||||
|
||||
ser = Series(3 * [object])
|
||||
msg = "Transform function failed"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform([noop, raising_op])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": raising_op, "B": noop})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [raising_op], "B": [noop]})
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.transform({"A": [noop, raising_op], "B": [noop]})
|
||||
|
||||
|
||||
def test_demo():
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
|
||||
result = s.agg(["min", "max"])
|
||||
expected = Series([0, 5], index=["min", "max"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({"foo": "min"})
|
||||
expected = Series([0], index=["foo"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
|
||||
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
|
||||
# test that we are evaluating row-by-row first if by_row="compat"
|
||||
# else vectorized evaluation
|
||||
result = string_series.apply(func, by_row=by_row)
|
||||
|
||||
if by_row:
|
||||
expected = string_series.map(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
assert result == str(string_series)
|
||||
|
||||
|
||||
def test_agg_evaluate_lambdas(string_series):
|
||||
# GH53325
|
||||
# in the future, the result will be a Series class.
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(lambda x: type(x))
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = string_series.agg(type)
|
||||
assert isinstance(result, Series) and len(result) == len(string_series)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op_name", ["agg", "apply"])
|
||||
def test_with_nested_series(datetime_series, op_name):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
msg = "cannot aggregate"
|
||||
warning = FutureWarning if op_name == "agg" else None
|
||||
with tm.assert_produces_warning(warning, match=msg):
|
||||
# GH52123
|
||||
result = getattr(datetime_series, op_name)(
|
||||
lambda x: Series([x, x**2], index=["x", "x^2"])
|
||||
)
|
||||
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_replicate_describe(string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(
|
||||
{
|
||||
"count": "count",
|
||||
"mean": "mean",
|
||||
"std": "std",
|
||||
"min": "min",
|
||||
"25%": lambda x: x.quantile(0.25),
|
||||
"50%": "median",
|
||||
"75%": lambda x: x.quantile(0.75),
|
||||
"max": "max",
|
||||
},
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reduce(string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(["sum", "mean"])
|
||||
expected = Series(
|
||||
[string_series.sum(), string_series.mean()],
|
||||
["sum", "mean"],
|
||||
name=string_series.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwds",
|
||||
[("agg", {}), ("apply", {"by_row": "compat"}), ("apply", {"by_row": False})],
|
||||
)
|
||||
def test_non_callable_aggregates(how, kwds):
|
||||
# test agg using non-callable series attributes
|
||||
# GH 39116 - expand to apply
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = getattr(s, how)("size", **kwds)
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = getattr(s, how)(["size", "count", "mean"], **kwds)
|
||||
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_no_suffix_index(by_row):
|
||||
# GH36189
|
||||
s = Series([4] * 3)
|
||||
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row)
|
||||
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dti,exp",
|
||||
[
|
||||
(
|
||||
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
|
||||
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
|
||||
),
|
||||
(
|
||||
Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
),
|
||||
DataFrame(np.repeat([[1, 2]], 10, axis=0), dtype="int64"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aware", [True, False])
|
||||
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
if aware:
|
||||
index = dti.tz_localize("UTC").index
|
||||
else:
|
||||
index = dti.index
|
||||
result = Series(index).apply(lambda x: Series([1, 2]))
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"by_row, expected", [("compat", Series(np.ones(10), dtype="int64")), (False, 1)]
|
||||
)
|
||||
def test_apply_scalar_on_date_time_index_aware_series(by_row, expected):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
series = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10, tz="UTC"),
|
||||
)
|
||||
result = Series(series.index).apply(lambda x: 1, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_timedelta(by_row):
|
||||
list_of_valid_strings = ["00:00:01", "00:00:02"]
|
||||
a = pd.to_timedelta(list_of_valid_strings)
|
||||
b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
|
||||
|
||||
a = pd.to_timedelta(list_of_strings)
|
||||
ser = Series(list_of_strings)
|
||||
b = ser.apply(pd.to_timedelta, by_row=by_row)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sum], ["sum"]),
|
||||
([np.sum, np.mean], ["sum", "mean"]),
|
||||
(np.array([np.sum]), ["sum"]),
|
||||
(np.array([np.sum, np.mean]), ["sum", "mean"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
|
||||
expected.name = "series"
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = f"using Series.[{'|'.join(names)}]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sum},
|
||||
{"A": np.sum, "B": np.mean},
|
||||
Series({"A": np.sum}),
|
||||
Series({"A": np.sum, "B": np.mean}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"how, kwargs",
|
||||
[["agg", {}], ["apply", {"by_row": "compat"}], ["apply", {"by_row": False}]],
|
||||
)
|
||||
def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
warn = FutureWarning if how == "agg" else None
|
||||
msg = "using Series.[sum|mean]"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = getattr(string_series, how)(ops, **kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_transformer(string_series, ops, names, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
([lambda x: x], DataFrame({"<lambda>": [1, 2, 3]})),
|
||||
([lambda x: x.sum()], Series([6], index=["<lambda>"])),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_lambda(ops, expected, by_row):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sqrt},
|
||||
{"A": np.sqrt, "B": np.exp},
|
||||
Series({"A": np.sqrt}),
|
||||
Series({"A": np.sqrt, "B": np.exp}),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_transformer(string_series, ops, by_row):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
result = string_series.apply(ops, by_row=by_row)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, expected",
|
||||
[
|
||||
(
|
||||
{"a": lambda x: x},
|
||||
Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])),
|
||||
),
|
||||
({"a": lambda x: x.sum()}, Series([6], index=["a"])),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_lambda(ops, by_row, expected):
|
||||
# GH53400
|
||||
ser = Series([1, 2, 3])
|
||||
result = ser.apply(ops, by_row=by_row)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_retains_column_name(by_row):
|
||||
# GH 16380
|
||||
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
|
||||
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
|
||||
expected = DataFrame(
|
||||
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
|
||||
columns=Index(range(3), name="y"),
|
||||
index=Index(range(3), name="x"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_type():
|
||||
# GH 46719
|
||||
s = Series([3, "string", float], index=["a", "b", "c"])
|
||||
result = s.apply(type)
|
||||
expected = Series([int, str, type], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_unpack_nested_data():
|
||||
# GH#55189
|
||||
ser = Series([[1, 2, 3], [4, 5, 6, 7]])
|
||||
result = ser.apply(lambda x: Series(x))
|
||||
expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_relabel_no_duplicated_method():
|
||||
# this is to test there is no duplicated method used in agg
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum")
|
||||
expected = df["A"].agg({"foo": "sum"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo="min", bar="max")
|
||||
expected = df["B"].agg({"foo": "min", "bar": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=sum, bar=min, cat="max")
|
||||
msg = "using Series.[sum|min|max]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_relabel_duplicated_method():
|
||||
# this is to test with nested renaming, duplicated method can be used
|
||||
# if they are assigned with different new names
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum", bar="sum")
|
||||
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "using Series.min"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df["B"].agg(foo=min, bar="min")
|
||||
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
result = s.transform(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(string_series, ops, names):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.transform(ops)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_listlike_func_with_args():
|
||||
# GH 50624
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
def foo1(x, a=1, c=0):
|
||||
return x + a + c
|
||||
|
||||
def foo2(x, b=2, c=0):
|
||||
return x + b + c
|
||||
|
||||
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.transform([foo1, foo2], 0, 3, b=3, c=4)
|
||||
|
||||
result = s.transform([foo1, foo2], 0, 3, c=4)
|
||||
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(string_series, box):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = Series([1, 4])
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
326
lib/python3.13/site-packages/pandas/tests/apply/test_str.py
Normal file
326
lib/python3.13/site-packages/pandas/tests/apply/test_str.py
Normal file
@ -0,0 +1,326 @@
|
||||
from itertools import chain
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_number
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import (
|
||||
frame_transform_kernels,
|
||||
series_transform_kernels,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
|
||||
@pytest.mark.parametrize(
|
||||
"args,kwds",
|
||||
[
|
||||
pytest.param([], {}, id="no_args_or_kwds"),
|
||||
pytest.param([1], {}, id="axis_from_args"),
|
||||
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
|
||||
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
|
||||
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
|
||||
if len(args) > 1 and how == "agg":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
raises=TypeError,
|
||||
reason="agg/apply signature mismatch - agg passes 2nd "
|
||||
"argument to func",
|
||||
)
|
||||
)
|
||||
result = getattr(float_frame, how)(func, *args, **kwds)
|
||||
expected = getattr(float_frame, func)(*args, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg", ["sum", "mean", "min", "max", "std"])
|
||||
def test_with_string_args(datetime_series, arg):
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_np_reducer(op, how):
|
||||
# GH 39116
|
||||
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = getattr(float_frame, how)(op)
|
||||
# pandas ddof defaults to 1, numpy to 0
|
||||
kwargs = {"ddof": 1} if op in ("std", "var") else {}
|
||||
expected = Series(
|
||||
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["transform", "apply"])
|
||||
def test_apply_np_transformer(float_frame, op, how):
|
||||
# GH 39116
|
||||
|
||||
# float_frame will _usually_ have negative values, which will
|
||||
# trigger the warning here, but let's put one in just to be sure
|
||||
float_frame.iloc[0, 0] = -1.0
|
||||
warn = None
|
||||
if op in ["log", "sqrt"]:
|
||||
warn = RuntimeWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, check_stacklevel=False):
|
||||
# float_frame fixture is defined in conftest.py, so we don't check the
|
||||
# stacklevel as otherwise the test would fail.
|
||||
result = getattr(float_frame, how)(op)
|
||||
expected = getattr(np, op)(float_frame)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("sum", 0),
|
||||
("max", np.nan),
|
||||
("min", np.nan),
|
||||
("all", True),
|
||||
("any", False),
|
||||
("mean", np.nan),
|
||||
("prod", 1),
|
||||
("std", np.nan),
|
||||
("var", np.nan),
|
||||
("median", np.nan),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("sum", 6),
|
||||
("max", 3),
|
||||
("min", 1),
|
||||
("all", True),
|
||||
("any", True),
|
||||
("mean", 2),
|
||||
("prod", 6),
|
||||
("std", 1),
|
||||
("var", 1),
|
||||
("median", 2),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("sum", "abc"),
|
||||
("max", "c"),
|
||||
("min", "a"),
|
||||
("all", True),
|
||||
("any", True),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_series(series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
if is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("cumprod", Series([], dtype=np.float64)),
|
||||
("cumsum", Series([], dtype=np.float64)),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("cumprod", Series([np.nan, 1, 2, 6])),
|
||||
("cumsum", Series([np.nan, 1, 3, 6])),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_series(series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using Series.*"):
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(),
|
||||
[
|
||||
("sum", Series(dtype="float64")),
|
||||
("max", Series(dtype="float64")),
|
||||
("min", Series(dtype="float64")),
|
||||
("all", Series(dtype=bool)),
|
||||
("any", Series(dtype=bool)),
|
||||
("mean", Series(dtype="float64")),
|
||||
("prod", Series(dtype="float64")),
|
||||
("std", Series(dtype="float64")),
|
||||
("var", Series(dtype="float64")),
|
||||
("median", Series(dtype="float64")),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("sum", Series([1.0, 3])),
|
||||
("max", Series([1.0, 2])),
|
||||
("min", Series([1.0, 1])),
|
||||
("all", Series([True, True])),
|
||||
("any", Series([True, True])),
|
||||
("mean", Series([1, 1.5])),
|
||||
("prod", Series([1.0, 2])),
|
||||
("std", Series([np.nan, 0.707107])),
|
||||
("var", Series([np.nan, 0.5])),
|
||||
("median", Series([1, 1.5])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
|
||||
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
if axis in ("columns", 1):
|
||||
# operating blockwise doesn't let us preserve dtypes
|
||||
expected = expected.astype("float64")
|
||||
|
||||
warn = None if isinstance(func, str) else FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="is currently using DataFrame.*"):
|
||||
# GH#53425
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_groupby_kernel_series(request, string_series, op):
|
||||
# GH 35964
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
args = [0.0] if op == "fillna" else []
|
||||
ones = np.ones(string_series.shape[0])
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
msg = "SeriesGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
expected = string_series.groupby(ones).transform(op, *args)
|
||||
result = string_series.transform(op, 0, *args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_transform_kernels)
|
||||
def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
|
||||
if op == "ngroup":
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
|
||||
)
|
||||
|
||||
# GH 35964
|
||||
|
||||
args = [0.0] if op == "fillna" else []
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
msg = "The 'axis' keyword in DataFrame.groupby is deprecated"
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
msg = "DataFrame.groupby with axis=1 is deprecated"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb = float_frame.groupby(ones, axis=axis)
|
||||
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected = gb.transform(op, *args)
|
||||
|
||||
result = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# same thing, but ensuring we have multiple blocks
|
||||
assert "E" not in float_frame.columns
|
||||
float_frame["E"] = float_frame["A"].copy()
|
||||
assert len(float_frame._mgr.arrays) > 1
|
||||
|
||||
if axis in (0, "index"):
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
gb2 = float_frame.groupby(ones, axis=axis)
|
||||
warn = FutureWarning if op == "fillna" else None
|
||||
op_msg = "DataFrameGroupBy.fillna is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=op_msg):
|
||||
expected2 = gb2.transform(op, *args)
|
||||
result2 = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
|
||||
def test_transform_method_name(method):
|
||||
# GH 19760
|
||||
df = DataFrame({"A": [-1, 2]})
|
||||
result = df.transform(method)
|
||||
expected = operator.methodcaller(method)(df)
|
||||
tm.assert_frame_equal(result, expected)
|
155
lib/python3.13/site-packages/pandas/tests/arithmetic/common.py
Normal file
155
lib/python3.13/site-packages/pandas/tests/arithmetic/common.py
Normal file
@ -0,0 +1,155 @@
|
||||
"""
|
||||
Assertion helpers for arithmetic tests.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
NumpyExtensionArray,
|
||||
)
|
||||
|
||||
|
||||
def assert_cannot_add(left, right, msg="cannot add"):
|
||||
"""
|
||||
Helper to assert that left and right cannot be added.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
right : object
|
||||
msg : str, default "cannot add"
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
|
||||
|
||||
def assert_invalid_addsub_type(left, right, msg=None):
|
||||
"""
|
||||
Helper to assert that left and right can be neither added nor subtracted.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
right : object
|
||||
msg : str or None, default None
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left - right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right - left
|
||||
|
||||
|
||||
def get_upcast_box(left, right, is_cmp: bool = False):
|
||||
"""
|
||||
Get the box to use for 'expected' in an arithmetic or comparison operation.
|
||||
|
||||
Parameters
|
||||
left : Any
|
||||
right : Any
|
||||
is_cmp : bool, default False
|
||||
Whether the operation is a comparison method.
|
||||
"""
|
||||
|
||||
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
|
||||
return DataFrame
|
||||
if isinstance(left, Series) or isinstance(right, Series):
|
||||
if is_cmp and isinstance(left, Index):
|
||||
# Index does not defer for comparisons
|
||||
return np.array
|
||||
return Series
|
||||
if isinstance(left, Index) or isinstance(right, Index):
|
||||
if is_cmp:
|
||||
return np.array
|
||||
return Index
|
||||
return tm.to_array
|
||||
|
||||
|
||||
def assert_invalid_comparison(left, right, box):
|
||||
"""
|
||||
Assert that comparison operations with mismatched types behave correctly.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray, ExtensionArray, Index, or Series
|
||||
right : object
|
||||
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
|
||||
"""
|
||||
# Not for tznaive-tzaware comparison
|
||||
|
||||
# Note: not quite the same as how we do this for tm.box_expected
|
||||
xbox = box if box not in [Index, array] else np.array
|
||||
|
||||
def xbox2(x):
|
||||
# Eventually we'd like this to be tighter, but for now we'll
|
||||
# just exclude NumpyExtensionArray[bool]
|
||||
if isinstance(x, NumpyExtensionArray):
|
||||
return x._ndarray
|
||||
if isinstance(x, BooleanArray):
|
||||
# NB: we are assuming no pd.NAs for now
|
||||
return x.astype(bool)
|
||||
return x
|
||||
|
||||
# rev_box: box to use for reversed comparisons
|
||||
rev_box = xbox
|
||||
if isinstance(right, Index) and isinstance(left, Series):
|
||||
rev_box = np.array
|
||||
|
||||
result = xbox2(left == right)
|
||||
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = xbox2(right == left)
|
||||
tm.assert_equal(result, rev_box(expected))
|
||||
|
||||
result = xbox2(left != right)
|
||||
tm.assert_equal(result, ~expected)
|
||||
|
||||
result = xbox2(right != left)
|
||||
tm.assert_equal(result, rev_box(~expected))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"Invalid comparison between",
|
||||
"Cannot compare type",
|
||||
"not supported between",
|
||||
"invalid type promotion",
|
||||
(
|
||||
# GH#36706 npdev 1.20.0 2020-09-28
|
||||
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
|
||||
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
|
||||
"For example they cannot be stored in a single array unless the "
|
||||
"dtype is `object`."
|
||||
),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left < right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left <= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left > right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left >= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right < left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right <= left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right > left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right >= left
|
139
lib/python3.13/site-packages/pandas/tests/arithmetic/conftest.py
Normal file
139
lib/python3.13/site-packages/pandas/tests/arithmetic/conftest.py
Normal file
@ -0,0 +1,139 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index
|
||||
|
||||
|
||||
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
|
||||
def one(request):
|
||||
"""
|
||||
Several variants of integer value 1. The zero-dim integer array
|
||||
behaves like an integer.
|
||||
|
||||
This fixture can be used to check that datetimelike indexes handle
|
||||
addition and subtraction of integers and zero-dimensional arrays
|
||||
of integers.
|
||||
|
||||
Examples
|
||||
--------
|
||||
dti = pd.date_range('2016-01-01', periods=2, freq='h')
|
||||
dti
|
||||
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
|
||||
dtype='datetime64[ns]', freq='h')
|
||||
dti + one
|
||||
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
|
||||
dtype='datetime64[ns]', freq='h')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
zeros = [
|
||||
box_cls([0] * 5, dtype=dtype)
|
||||
for box_cls in [Index, np.array, pd.array]
|
||||
for dtype in [np.int64, np.uint64, np.float64]
|
||||
]
|
||||
zeros.extend([box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [Index, np.array]])
|
||||
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
|
||||
zeros.extend([np.array(-0.0, dtype=np.float64)])
|
||||
zeros.extend([0, 0.0, -0.0])
|
||||
|
||||
|
||||
@pytest.fixture(params=zeros)
|
||||
def zero(request):
|
||||
"""
|
||||
Several types of scalar zeros and length 5 vectors of zeros.
|
||||
|
||||
This fixture can be used to check that numeric-dtype indexes handle
|
||||
division by any zero numeric-dtype.
|
||||
|
||||
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
|
||||
which creates numeric-dtype vectors also of length 5.
|
||||
|
||||
Examples
|
||||
--------
|
||||
arr = RangeIndex(5)
|
||||
arr / zeros
|
||||
Index([nan, inf, inf, inf, inf], dtype='float64')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Scalar Fixtures
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.Timedelta("10m7s").to_pytimedelta(),
|
||||
pd.Timedelta("10m7s"),
|
||||
pd.Timedelta("10m7s").to_timedelta64(),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def scalar_td(request):
|
||||
"""
|
||||
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Day(3),
|
||||
pd.offsets.Hour(72),
|
||||
pd.Timedelta(days=3).to_pytimedelta(),
|
||||
pd.Timedelta("72:00:00"),
|
||||
np.timedelta64(3, "D"),
|
||||
np.timedelta64(72, "h"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def three_days(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 3-day timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Hour(2),
|
||||
pd.offsets.Minute(120),
|
||||
pd.Timedelta(hours=2).to_pytimedelta(),
|
||||
pd.Timedelta(seconds=2 * 3600),
|
||||
np.timedelta64(2, "h"),
|
||||
np.timedelta64(120, "m"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def two_hours(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 2-hour timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_common_mismatch = [
|
||||
pd.offsets.YearBegin(2),
|
||||
pd.offsets.MonthBegin(1),
|
||||
pd.offsets.Minute(),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.timedelta64(4, "h"),
|
||||
pd.Timedelta(hours=23).to_pytimedelta(),
|
||||
pd.Timedelta("23:00:00"),
|
||||
]
|
||||
+ _common_mismatch
|
||||
)
|
||||
def not_daily(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset instances that are _not_
|
||||
compatible with Daily frequencies.
|
||||
"""
|
||||
return request.param
|
@ -0,0 +1,39 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.ops.array_ops import (
|
||||
comparison_op,
|
||||
na_logical_op,
|
||||
)
|
||||
|
||||
|
||||
def test_na_logical_op_2d():
|
||||
left = np.arange(8).reshape(4, 2)
|
||||
right = left.astype(object)
|
||||
right[0, 0] = np.nan
|
||||
|
||||
# Check that we fall back to the vec_binop branch
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
operator.or_(left, right)
|
||||
|
||||
result = na_logical_op(left, right, operator.or_)
|
||||
expected = right
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_object_comparison_2d():
|
||||
left = np.arange(9).reshape(3, 3).astype(object)
|
||||
right = left.T
|
||||
|
||||
result = comparison_op(left, right, operator.eq)
|
||||
expected = np.eye(3).astype(bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Ensure that cython doesn't raise on non-writeable arg, which
|
||||
# we can get from np.broadcast_to
|
||||
right.flags.writeable = False
|
||||
result = comparison_op(left, right, operator.ne)
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
@ -0,0 +1,25 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalComparisons:
|
||||
def test_categorical_nan_equality(self):
|
||||
cat = Series(Categorical(["a", "b", "c", np.nan]))
|
||||
expected = Series([True, True, True, False])
|
||||
result = cat == cat
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_categorical_tuple_equality(self):
|
||||
# GH 18050
|
||||
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
|
||||
expected = Series([True, False, True, False, False])
|
||||
result = ser == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype("category") == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,306 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
IntervalArray,
|
||||
)
|
||||
from pandas.tests.arithmetic.common import get_upcast_box
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
|
||||
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
|
||||
(
|
||||
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
|
||||
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3).insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_array(left_right_dtypes):
|
||||
"""
|
||||
Fixture to generate an IntervalArray of various dtypes containing NA if possible
|
||||
"""
|
||||
left, right = left_right_dtypes
|
||||
return IntervalArray.from_arrays(left, right)
|
||||
|
||||
|
||||
def create_categorical_intervals(left, right, closed="right"):
|
||||
return Categorical(IntervalIndex.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_intervals(left, right, closed="right"):
|
||||
return Series(IntervalArray.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_categorical_intervals(left, right, closed="right"):
|
||||
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
|
||||
|
||||
|
||||
class TestComparison:
|
||||
@pytest.fixture(params=[operator.eq, operator.ne])
|
||||
def op(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
IntervalArray.from_arrays,
|
||||
IntervalIndex.from_arrays,
|
||||
create_categorical_intervals,
|
||||
create_series_intervals,
|
||||
create_series_categorical_intervals,
|
||||
],
|
||||
ids=[
|
||||
"IntervalArray",
|
||||
"IntervalIndex",
|
||||
"Categorical[Interval]",
|
||||
"Series[Interval]",
|
||||
"Series[Categorical[Interval]]",
|
||||
],
|
||||
)
|
||||
def interval_constructor(self, request):
|
||||
"""
|
||||
Fixture for all pandas native interval constructors.
|
||||
To be used as the LHS of IntervalArray comparisons.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
def elementwise_comparison(self, op, interval_array, other):
|
||||
"""
|
||||
Helper that performs elementwise comparisons between `array` and `other`
|
||||
"""
|
||||
other = other if is_list_like(other) else [other] * len(interval_array)
|
||||
expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
|
||||
if isinstance(other, Series):
|
||||
return Series(expected, index=other.index)
|
||||
return expected
|
||||
|
||||
def test_compare_scalar_interval(self, op, interval_array):
|
||||
# matches first interval
|
||||
other = interval_array[0]
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# matches on a single endpoint but not both
|
||||
other = Interval(interval_array.left[0], interval_array.right[1])
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = Interval(0, 1, closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_na(self, op, interval_array, nulls_fixture, box_with_array):
|
||||
box = box_with_array
|
||||
obj = tm.box_expected(interval_array, box)
|
||||
result = op(obj, nulls_fixture)
|
||||
|
||||
if nulls_fixture is pd.NA:
|
||||
# GH#31882
|
||||
exp = np.ones(interval_array.shape, dtype=bool)
|
||||
expected = BooleanArray(exp, exp)
|
||||
else:
|
||||
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
|
||||
|
||||
if not (box is Index and nulls_fixture is pd.NA):
|
||||
# don't cast expected from BooleanArray to ndarray[object]
|
||||
xbox = get_upcast_box(obj, nulls_fixture, True)
|
||||
expected = tm.box_expected(expected, xbox)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
rev = op(nulls_fixture, obj)
|
||||
tm.assert_equal(rev, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
0,
|
||||
1.0,
|
||||
True,
|
||||
"foo",
|
||||
Timestamp("2017-01-01"),
|
||||
Timestamp("2017-01-01", tz="US/Eastern"),
|
||||
Timedelta("0 days"),
|
||||
Period("2017-01-01", "D"),
|
||||
],
|
||||
)
|
||||
def test_compare_scalar_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
|
||||
# same endpoints
|
||||
other = interval_constructor(interval_array.left, interval_array.right)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# different endpoints
|
||||
other = interval_constructor(
|
||||
interval_array.left[::-1], interval_array.right[::-1]
|
||||
)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# all nan endpoints
|
||||
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval_mixed_closed(
|
||||
self, op, interval_constructor, closed, other_closed
|
||||
):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
(
|
||||
Interval(0, 1),
|
||||
Interval(Timedelta("1 day"), Timedelta("2 days")),
|
||||
Interval(4, 5, "both"),
|
||||
Interval(10, 20, "neither"),
|
||||
),
|
||||
(0, 1.5, Timestamp("20170103"), np.nan),
|
||||
(
|
||||
Timestamp("20170102", tz="US/Eastern"),
|
||||
Timedelta("2 days"),
|
||||
"baz",
|
||||
pd.NaT,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_compare_list_like_object(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
|
||||
other = [nulls_fixture] * 4
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
np.arange(4, dtype="int64"),
|
||||
np.arange(4, dtype="float64"),
|
||||
date_range("2017-01-01", periods=4),
|
||||
date_range("2017-01-01", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
period_range("2017-01-01", periods=4, freq="D"),
|
||||
Categorical(list("abab")),
|
||||
Categorical(date_range("2017-01-01", periods=4)),
|
||||
pd.array(list("abcd")),
|
||||
pd.array(["foo", 3.14, None, object()], dtype=object),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_compare_list_like_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("length", [1, 3, 5])
|
||||
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
|
||||
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
|
||||
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
|
||||
other = other_constructor([Interval(0, 1)] * length)
|
||||
with pytest.raises(ValueError, match="Lengths must match to compare"):
|
||||
op(interval_array, other)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"constructor, expected_type, assert_func",
|
||||
[
|
||||
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
|
||||
(Series, Series, tm.assert_series_equal),
|
||||
],
|
||||
)
|
||||
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
|
||||
# IntervalIndex/Series that rely on IntervalArray for comparisons
|
||||
breaks = range(4)
|
||||
index = constructor(IntervalIndex.from_breaks(breaks))
|
||||
|
||||
# scalar comparisons
|
||||
other = index[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = breaks[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
# list-like comparisons
|
||||
other = IntervalArray.from_breaks(breaks)
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = [index[0], breaks[0], "foo"]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
|
||||
def test_comparison_operations(self, scalars):
|
||||
# GH #28981
|
||||
expected = Series([False, False])
|
||||
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
|
||||
result = s == scalars
|
||||
tm.assert_series_equal(result, expected)
|
1567
lib/python3.13/site-packages/pandas/tests/arithmetic/test_numeric.py
Normal file
1567
lib/python3.13/site-packages/pandas/tests/arithmetic/test_numeric.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,420 @@
|
||||
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
|
||||
# behave identically.
|
||||
# Specifically for object dtype
|
||||
import datetime
|
||||
from decimal import Decimal
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
|
||||
class TestObjectComparisons:
|
||||
def test_comparison_object_numeric_nas(self, comparison_op):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(10), dtype=object)
|
||||
shifted = ser.shift(2)
|
||||
|
||||
func = comparison_op
|
||||
|
||||
result = func(ser, shifted)
|
||||
expected = func(ser.astype(float), shifted.astype(float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
|
||||
)
|
||||
def test_object_comparisons(self, infer_string):
|
||||
with option_context("future.infer_string", infer_string):
|
||||
ser = Series(["a", "b", np.nan, "c", "a"])
|
||||
|
||||
result = ser == "a"
|
||||
expected = Series([True, False, False, False, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser < "a"
|
||||
expected = Series([False, False, False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser != "a"
|
||||
expected = -(ser == "a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_more_na_comparisons(self, dtype):
|
||||
left = Series(["a", np.nan, "c"], dtype=dtype)
|
||||
right = Series(["a", np.nan, "d"], dtype=dtype)
|
||||
|
||||
result = left == right
|
||||
expected = Series([True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != right
|
||||
expected = Series([False, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left == np.nan
|
||||
expected = Series([False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != np.nan
|
||||
expected = Series([True, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Arithmetic
|
||||
|
||||
|
||||
class TestArithmetic:
|
||||
def test_add_period_to_array_of_offset(self):
|
||||
# GH#50162
|
||||
per = pd.Period("2012-1-1", freq="D")
|
||||
pi = pd.period_range("2012-1-1", periods=10, freq="D")
|
||||
idx = per - pi
|
||||
|
||||
expected = pd.Index([x + per for x in idx], dtype=object)
|
||||
result = idx + per
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = per + idx
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: parametrize
|
||||
def test_pow_ops_object(self):
|
||||
# GH#22922
|
||||
# pow is weird with masking & 1, so testing here
|
||||
a = Series([1, np.nan, 1, np.nan], dtype=object)
|
||||
b = Series([1, np.nan, np.nan, 1], dtype=object)
|
||||
result = a**b
|
||||
expected = Series(a.values**b.values, dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = b**a
|
||||
expected = Series(b.values**a.values, dtype=object)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd])
|
||||
@pytest.mark.parametrize("other", ["category", "Int64"])
|
||||
def test_add_extension_scalar(self, other, box_with_array, op):
|
||||
# GH#22378
|
||||
# Check that scalars satisfying is_extension_array_dtype(obj)
|
||||
# do not incorrectly try to dispatch to an ExtensionArray operation
|
||||
|
||||
arr = Series(["a", "b", "c"])
|
||||
expected = Series([op(x, other) for x in arr])
|
||||
|
||||
arr = tm.box_expected(arr, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = op(arr, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_add_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["xa", np.nan, "xa"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = ser + "a"
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_radd_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["ax", np.nan, "ax"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = "a" + ser
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 2, 3],
|
||||
[1.1, 2.2, 3.3],
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
|
||||
["x", "y", 1],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only concatenate str",
|
||||
"did not contain a loop with signature matching types",
|
||||
"unsupported operand type",
|
||||
"must be str",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo_" + ser
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
|
||||
def test_objarr_add_invalid(self, op, box_with_array):
|
||||
# invalid ops
|
||||
box = box_with_array
|
||||
|
||||
obj_ser = Series(list("abc"), dtype=object, name="objects")
|
||||
|
||||
obj_ser = tm.box_expected(obj_ser, box)
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only concatenate str",
|
||||
"unsupported operand type",
|
||||
"must be str",
|
||||
"has no kernel",
|
||||
]
|
||||
)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, 1)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, np.array(1, dtype=np.int64))
|
||||
|
||||
# TODO: Moved from tests.series.test_operators; needs cleanup
|
||||
def test_operators_na_handling(self):
|
||||
ser = Series(["foo", "bar", "baz", np.nan])
|
||||
result = "prefix_" + ser
|
||||
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + "_suffix"
|
||||
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: parametrize over box
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_series_with_dtype_radd_timedelta(self, dtype):
|
||||
# note this test is _not_ aimed at timedelta64-dtyped Series
|
||||
# as of 2.0 we retain object dtype when ser.dtype == object
|
||||
ser = Series(
|
||||
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
|
||||
dtype=dtype,
|
||||
)
|
||||
expected = Series(
|
||||
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
result = pd.Timedelta("3 days") + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + pd.Timedelta("3 days")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_mixed_timezone_series_ops_object(self):
|
||||
# GH#13043
|
||||
ser = Series(
|
||||
[
|
||||
Timestamp("2015-01-01", tz="US/Eastern"),
|
||||
Timestamp("2015-01-01", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series(
|
||||
[
|
||||
Timestamp("2015-01-02", tz="US/Eastern"),
|
||||
Timestamp("2015-01-02", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
|
||||
|
||||
# object series & object series
|
||||
ser2 = Series(
|
||||
[
|
||||
Timestamp("2015-01-03", tz="US/Eastern"),
|
||||
Timestamp("2015-01-05", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser2.dtype == object
|
||||
exp = Series(
|
||||
[pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
|
||||
)
|
||||
tm.assert_series_equal(ser2 - ser, exp)
|
||||
tm.assert_series_equal(ser - ser2, -exp)
|
||||
|
||||
ser = Series(
|
||||
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
|
||||
name="xxx",
|
||||
dtype=object,
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series(
|
||||
[pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
|
||||
name="xxx",
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_iadd_preserves_name(self):
|
||||
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
|
||||
ser = Series([1, 2, 3])
|
||||
ser.index.name = "foo"
|
||||
|
||||
ser.index += 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
ser.index -= 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
def test_add_string(self):
|
||||
# from bug report
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
index2 = index + "foo"
|
||||
|
||||
assert "a" not in index2
|
||||
assert "afoo" in index2
|
||||
|
||||
def test_iadd_string(self):
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
# doesn't fail test unless there is a check before `+=`
|
||||
assert "a" in index
|
||||
|
||||
index += "_x"
|
||||
assert "a_x" in index
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
|
||||
def test_add(self):
|
||||
index = pd.Index([str(i) for i in range(10)])
|
||||
expected = pd.Index(index.values * 2)
|
||||
tm.assert_index_equal(index + index, expected)
|
||||
tm.assert_index_equal(index + index.tolist(), expected)
|
||||
tm.assert_index_equal(index.tolist() + index, expected)
|
||||
|
||||
# test add and radd
|
||||
index = pd.Index(list("abc"))
|
||||
expected = pd.Index(["a1", "b1", "c1"])
|
||||
tm.assert_index_equal(index + "1", expected)
|
||||
expected = pd.Index(["1a", "1b", "1c"])
|
||||
tm.assert_index_equal("1" + index, expected)
|
||||
|
||||
def test_sub_fail(self, using_infer_string):
|
||||
index = pd.Index([str(i) for i in range(10)])
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
err = pa.lib.ArrowNotImplementedError
|
||||
msg = "has no kernel"
|
||||
else:
|
||||
err = TypeError
|
||||
msg = "unsupported operand type|Cannot broadcast"
|
||||
with pytest.raises(err, match=msg):
|
||||
index - "a"
|
||||
with pytest.raises(err, match=msg):
|
||||
index - index
|
||||
with pytest.raises(err, match=msg):
|
||||
index - index.tolist()
|
||||
with pytest.raises(err, match=msg):
|
||||
index.tolist() - index
|
||||
|
||||
def test_sub_object(self):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(0), Decimal(1)])
|
||||
|
||||
result = index - Decimal(1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index - pd.Index([Decimal(1), Decimal(1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - "foo"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - np.array([2, "foo"], dtype=object)
|
||||
|
||||
def test_rsub_object(self, fixed_now_ts):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(1), Decimal(0)])
|
||||
|
||||
result = Decimal(2) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = np.array([Decimal(2), Decimal(2)]) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo" - index
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([True, fixed_now_ts]) - index
|
||||
|
||||
|
||||
class MyIndex(pd.Index):
|
||||
# Simple index subclass that tracks ops calls.
|
||||
|
||||
_calls: int
|
||||
|
||||
@classmethod
|
||||
def _simple_new(cls, values, name=None, dtype=None):
|
||||
result = object.__new__(cls)
|
||||
result._data = values
|
||||
result._name = name
|
||||
result._calls = 0
|
||||
result._reset_identity()
|
||||
|
||||
return result
|
||||
|
||||
def __add__(self, other):
|
||||
self._calls += 1
|
||||
return self._simple_new(self._data)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
[datetime.timedelta(1), datetime.timedelta(2)],
|
||||
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
|
||||
[pd.Period("2000"), pd.Period("2001")],
|
||||
["a", "b"],
|
||||
],
|
||||
ids=["timedelta", "datetime", "period", "object"],
|
||||
)
|
||||
def test_index_ops_defer_to_unknown_subclasses(other):
|
||||
# https://github.com/pandas-dev/pandas/issues/31109
|
||||
values = np.array(
|
||||
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
|
||||
)
|
||||
a = MyIndex._simple_new(values)
|
||||
other = pd.Index(other)
|
||||
result = other + a
|
||||
assert isinstance(result, MyIndex)
|
||||
assert a._calls == 1
|
1675
lib/python3.13/site-packages/pandas/tests/arithmetic/test_period.py
Normal file
1675
lib/python3.13/site-packages/pandas/tests/arithmetic/test_period.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,139 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def left_array():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def right_array():
|
||||
"""Fixture returning boolean array with valid and missing values."""
|
||||
return pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [True, True, None, True, False, None, None, None, None]),
|
||||
("mul", [True, False, None, False, False, None, None, None, None]),
|
||||
],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(left_array, right_array, opname, exp):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = pd.array(exp, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(left_array, right_array):
|
||||
msg = (
|
||||
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
|
||||
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left_array - right_array
|
||||
|
||||
|
||||
def test_div(left_array, right_array):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
# check that we are matching the non-masked Series behavior
|
||||
pd.Series(left_array._data) / pd.Series(right_array._data)
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
left_array / right_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname",
|
||||
[
|
||||
"floordiv",
|
||||
"mod",
|
||||
"pow",
|
||||
],
|
||||
)
|
||||
def test_op_int8(left_array, right_array, opname):
|
||||
op = getattr(operator, opname)
|
||||
if opname != "mod":
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
result = op(left_array, right_array)
|
||||
return
|
||||
result = op(left_array, right_array)
|
||||
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
# invalid ops
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
err = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
err = TypeError
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
# invalid scalars
|
||||
msg = (
|
||||
"did not contain a loop with signature matching types|"
|
||||
"BooleanArray cannot perform the operation|"
|
||||
"not supported for the input types, and the inputs could not be safely coerced "
|
||||
"to any supported types according to the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops("foo")
|
||||
msg = "|".join(
|
||||
[
|
||||
r"unsupported operand type\(s\) for",
|
||||
"Concatenation operation is not implemented for NumPy arrays",
|
||||
"has no kernel",
|
||||
]
|
||||
)
|
||||
with pytest.raises(err, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
if op not in ("__mul__", "__rmul__"):
|
||||
# TODO(extension) numpy's mul with object array sees booleans as numbers
|
||||
msg = "|".join(
|
||||
[
|
||||
r"unsupported operand type\(s\) for",
|
||||
"can only concatenate str",
|
||||
"not all arguments converted during string formatting",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(err, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("str")
|
||||
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([1, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([1, 0, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,60 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.tests.arrays.masked_shared import ComparisonOps
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array with valid and missing data"""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
"""Fixture returning BooleanDtype"""
|
||||
return pd.BooleanDtype()
|
||||
|
||||
|
||||
class TestComparisonOps(ComparisonOps):
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
self._compare_other(data, comparison_op, True)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
other = pd.array([True] * len(data), dtype="boolean")
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = np.array([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = pd.Series([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_array(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
result = op(a, b)
|
||||
|
||||
values = op(a._data, b._data)
|
||||
mask = a._mask | b._mask
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = None
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
@ -0,0 +1,325 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.arrays.boolean import coerce_to_array
|
||||
|
||||
|
||||
def test_boolean_array_constructor():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.astype(int), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, None)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values.reshape(1, -1), mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values, mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_boolean_array_constructor_copy():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = BooleanArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_boolean_array():
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, False])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, True], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True]), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, True])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_all_none():
|
||||
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
|
||||
|
||||
result = pd.array([None, None, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
|
||||
([True, np.nan], [True, None]),
|
||||
([True, pd.NA], [True, None]),
|
||||
([np.nan, np.nan], [None, None]),
|
||||
(np.array([np.nan, np.nan], dtype=float), [None, None]),
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_missing_indicators(a, b):
|
||||
result = pd.array(a, dtype="boolean")
|
||||
expected = pd.array(b, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
["1", "2"],
|
||||
# "foo",
|
||||
[1, 2],
|
||||
[1.0, 2.0],
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
np.array([1, 2]),
|
||||
np.array([1.0, 2.0]),
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_error(values):
|
||||
# error in converting existing arrays to BooleanArray
|
||||
msg = "Need to pass bool-like value"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(values, dtype="boolean")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_integer_array():
|
||||
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_float_array():
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_integer_like():
|
||||
# integers of 0's and 1's
|
||||
result = pd.array([1, 0, 1, 0], dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array([1, 0, 1, None], dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_coerce_to_array():
|
||||
# TODO this is currently not public API
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
# mixed missing from values and mask
|
||||
values = [True, False, None, False]
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True]), np.array([False, False, True, True])
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# raise errors for wrong dimension
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
# passing 2D values is OK as long as no mask
|
||||
coerce_to_array(values.reshape(1, -1))
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values.reshape(1, -1), mask=mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values, mask=mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_coerce_to_array_from_boolean_array():
|
||||
# passing BooleanArray to coerce_to_array
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
arr = BooleanArray(values, mask)
|
||||
result = BooleanArray(*coerce_to_array(arr))
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
# no copy
|
||||
assert result._data is arr._data
|
||||
assert result._mask is arr._mask
|
||||
|
||||
result = BooleanArray(*coerce_to_array(arr), copy=True)
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
assert result._data is not arr._data
|
||||
assert result._mask is not arr._mask
|
||||
|
||||
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
|
||||
coerce_to_array(arr, mask=mask)
|
||||
|
||||
|
||||
def test_coerce_to_numpy_array():
|
||||
# with missing values -> object dtype
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also with no missing values -> object dtype
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# force bool dtype
|
||||
result = np.array(arr, dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# with missing values will raise error
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
msg = (
|
||||
"cannot convert to 'bool'-dtype NumPy array with missing values. "
|
||||
"Specify an appropriate 'na_value' for this dtype."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.array(arr, dtype="bool")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings():
|
||||
result = BooleanArray._from_sequence_of_strings(
|
||||
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
|
||||
dtype="boolean",
|
||||
)
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True, False, False, False]),
|
||||
np.array([False, False, False, False, False, False, True]),
|
||||
)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings_invalid_string():
|
||||
with pytest.raises(ValueError, match="cannot be cast"):
|
||||
BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values -> can convert to bool, otherwise raises
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
|
||||
# specify dtype and na_value
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([True, False, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([True, False, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([1, 0, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# converting to int or float without specifying na_value raises
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
arr.to_numpy(dtype="int64")
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(
|
||||
arr, pd.array([False, False, True], dtype="boolean")
|
||||
)
|
||||
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool, copy=True)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
|
@ -0,0 +1,126 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
|
||||
)
|
||||
def test_ufuncs_binary(ufunc):
|
||||
# two BooleanArrays
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s, a)
|
||||
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Boolean with numpy array
|
||||
arr = np.array([True, True, False])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# BooleanArray with scalar
|
||||
result = ufunc(a, True)
|
||||
expected = pd.array(ufunc(a._data, True), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(True, a)
|
||||
expected = pd.array(ufunc(True, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# not handled types
|
||||
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc(a, "test")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.logical_not])
|
||||
def test_ufuncs_unary(ufunc):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ufunc(ser)
|
||||
expected = pd.Series(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_numeric():
|
||||
# np.sqrt on np.bool_ returns float16, which we upcast to Float32
|
||||
# bc we do not have Float16
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
res = np.sqrt(arr)
|
||||
|
||||
expected = pd.array([1, 0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[True, False], [True, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="boolean")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
if arr[-1] is pd.NA:
|
||||
expected = pd.NA
|
||||
else:
|
||||
expected = arr._data.sum()
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([True, False, pd.NA], dtype="boolean")
|
||||
result = arr.value_counts(dropna=False)
|
||||
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64", name="proportion") / 2
|
||||
assert expected.index.dtype == "boolean"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_diff():
|
||||
a = pd.array(
|
||||
[True, True, False, False, True, None, True, None, False], dtype="boolean"
|
||||
)
|
||||
result = pd.core.algorithms.diff(a, 1)
|
||||
expected = pd.array(
|
||||
[None, False, True, False, True, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ser.diff()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
|
||||
def test_setitem_missing_values(na):
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([True, None, None], dtype="boolean")
|
||||
arr[1] = na
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,254 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.ops.mask_ops import (
|
||||
kleene_and,
|
||||
kleene_or,
|
||||
kleene_xor,
|
||||
)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
|
||||
|
||||
class TestLogicalOps(BaseOpsUtil):
|
||||
def test_numpy_scalars_ok(self, all_logical_operators):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
op = getattr(a, all_logical_operators)
|
||||
|
||||
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
|
||||
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip("_")
|
||||
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def test_empty_ok(self, all_logical_operators):
|
||||
a = pd.array([], dtype="boolean")
|
||||
op_name = all_logical_operators
|
||||
result = getattr(a, op_name)(True)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(False)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(pd.NA)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
|
||||
)
|
||||
def test_eq_mismatched_type(self, other):
|
||||
# GH-44499
|
||||
arr = pd.array([True, False])
|
||||
result = arr == other
|
||||
expected = pd.array([False, False])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr != other
|
||||
expected = pd.array([True, True])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_logical_length_mismatch_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Lengths must match"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)([True, False])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(np.array([True, False]))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
|
||||
|
||||
def test_logical_nan_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Got float instead"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(a, op_name)(np.nan)
|
||||
|
||||
@pytest.mark.parametrize("other", ["a", 1])
|
||||
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
|
||||
a = pd.array([True, False], dtype="boolean")
|
||||
with pytest.raises(TypeError, match=str(type(other).__name__)):
|
||||
getattr(a, all_logical_operators)(other)
|
||||
|
||||
def test_kleene_or(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a | b
|
||||
expected = pd.array(
|
||||
[True, True, True, True, False, None, True, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [True, None, None]),
|
||||
(True, [True, True, True]),
|
||||
(np.bool_(True), [True, True, True]),
|
||||
(False, [True, False, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_or_scalar(self, other, expected):
|
||||
# TODO: test True & False
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a | other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_and(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a & b
|
||||
expected = pd.array(
|
||||
[True, False, None, False, False, False, None, False, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, False, None]),
|
||||
(True, [True, False, None]),
|
||||
(False, [False, False, False]),
|
||||
(np.bool_(True), [True, False, None]),
|
||||
(np.bool_(False), [False, False, False]),
|
||||
],
|
||||
)
|
||||
def test_kleene_and_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a & other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_xor(self):
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a ^ b
|
||||
expected = pd.array(
|
||||
[False, True, None, True, False, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, None, None]),
|
||||
(True, [False, True, None]),
|
||||
(np.bool_(True), [False, True, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_xor_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a ^ other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
|
||||
def test_no_masked_assumptions(self, other, all_logical_operators):
|
||||
# The logical operations should not assume that masked values are False!
|
||||
a = pd.arrays.BooleanArray(
|
||||
np.array([True, True, True, False, False, False, True, False, True]),
|
||||
np.array([False] * 6 + [True, True, True]),
|
||||
)
|
||||
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
if isinstance(other, list):
|
||||
other = pd.array(other, dtype="boolean")
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
if isinstance(other, BooleanArray):
|
||||
other._data[other._mask] = True
|
||||
a._data[a._mask] = False
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
|
||||
def test_error_both_scalar(operation):
|
||||
msg = r"Either `left` or `right` need to be a np\.ndarray."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# masks need to be non-None, otherwise it ends up in an infinite recursion
|
||||
operation(True, True, np.zeros(1), np.zeros(1))
|
@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUnaryOps:
|
||||
def test_invert(self):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(~a, expected)
|
||||
|
||||
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
|
||||
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
|
||||
result = ~df
|
||||
expected = pd.DataFrame(
|
||||
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_abs(self):
|
||||
# matching numpy behavior, abs is the identity function
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = abs(arr)
|
||||
|
||||
tm.assert_extension_array_equal(result, arr)
|
@ -0,0 +1,62 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
"""Fixture returning boolean array, with valid and missing values."""
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
|
||||
[
|
||||
([True, pd.NA], True, True, True, pd.NA),
|
||||
([False, pd.NA], False, False, pd.NA, False),
|
||||
([pd.NA], False, True, pd.NA, pd.NA),
|
||||
([], False, True, False, True),
|
||||
# GH-33253: all True / all False values buggy with skipna=False
|
||||
([True, True], True, True, True, True),
|
||||
([False, False], False, False, False, False),
|
||||
],
|
||||
)
|
||||
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
|
||||
# the methods return numpy scalars
|
||||
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
|
||||
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
|
||||
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
|
||||
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
|
||||
|
||||
for con in [pd.array, pd.Series]:
|
||||
a = con(values, dtype="boolean")
|
||||
assert a.any() is exp_any
|
||||
assert a.all() is exp_all
|
||||
assert a.any(skipna=False) is exp_any_noskip
|
||||
assert a.all(skipna=False) is exp_all_noskip
|
||||
|
||||
assert np.any(a.any()) is exp_any
|
||||
assert np.all(a.all()) is exp_all
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_reductions_return_types(dropna, data, all_numeric_reductions):
|
||||
op = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
if dropna:
|
||||
s = s.dropna()
|
||||
|
||||
if op in ("sum", "prod"):
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op == "count":
|
||||
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
|
||||
assert isinstance(getattr(s, op)(), np.integer)
|
||||
elif op in ("min", "max"):
|
||||
assert isinstance(getattr(s, op)(), np.bool_)
|
||||
else:
|
||||
# "mean", "std", "var", "median", "kurt", "skew"
|
||||
assert isinstance(getattr(s, op)(), np.float64)
|
@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_repr():
|
||||
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
|
||||
expected = " A\n0 True\n1 False\n2 <NA>"
|
||||
assert repr(df) == expected
|
||||
|
||||
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
|
||||
assert repr(df.A) == expected
|
||||
|
||||
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
|
||||
assert repr(df.A.array) == expected
|
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]])
|
||||
def test_factorize(categories, ordered):
|
||||
cat = pd.Categorical(
|
||||
["b", "b", "a", "c", None], categories=categories, ordered=ordered
|
||||
)
|
||||
codes, uniques = pd.factorize(cat)
|
||||
expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort():
|
||||
cat = pd.Categorical(["b", "b", None, "a"])
|
||||
codes, uniques = pd.factorize(cat, sort=True)
|
||||
expected_codes = np.array([1, 1, -1, 0], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(["a", "b"])
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_factorized_sort_ordered():
|
||||
cat = pd.Categorical(
|
||||
["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
codes, uniques = pd.factorize(cat, sort=True)
|
||||
expected_codes = np.array([0, 0, -1, 1], dtype=np.intp)
|
||||
expected_uniques = pd.Categorical(
|
||||
["b", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(codes, expected_codes)
|
||||
tm.assert_categorical_equal(uniques, expected_uniques)
|
||||
|
||||
|
||||
def test_isin_cats():
|
||||
# GH2003
|
||||
cat = pd.Categorical(["a", "b", np.nan])
|
||||
|
||||
result = cat.isin(["a", np.nan])
|
||||
expected = np.array([True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
result = cat.isin(["a", "c"])
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", [[""], [None, ""], [pd.NaT, ""]])
|
||||
def test_isin_cats_corner_cases(value):
|
||||
# GH36550
|
||||
cat = pd.Categorical([""])
|
||||
result = cat.isin(value)
|
||||
expected = np.array([True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
|
||||
def test_isin_empty(empty):
|
||||
s = pd.Categorical(["a", "b"])
|
||||
expected = np.array([False, False], dtype=bool)
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
|
||||
|
||||
def test_diff():
|
||||
ser = pd.Series([1, 2, 3], dtype="category")
|
||||
|
||||
msg = "Convert to a suitable dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.diff()
|
||||
|
||||
df = ser.to_frame(name="A")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.diff()
|
@ -0,0 +1,349 @@
|
||||
import re
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PYPY
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
Index,
|
||||
NaT,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
class TestCategoricalAnalytics:
|
||||
@pytest.mark.parametrize("aggregation", ["min", "max"])
|
||||
def test_min_max_not_ordered_raises(self, aggregation):
|
||||
# unordered cats have no min/max
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=False)
|
||||
msg = f"Categorical is not ordered for operation {aggregation}"
|
||||
agg_func = getattr(cat, aggregation)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
agg_func()
|
||||
|
||||
ufunc = np.minimum if aggregation == "min" else np.maximum
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc.reduce(cat)
|
||||
|
||||
def test_min_max_ordered(self, index_or_series_or_array):
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
obj = index_or_series_or_array(cat)
|
||||
_min = obj.min()
|
||||
_max = obj.max()
|
||||
assert _min == "a"
|
||||
assert _max == "d"
|
||||
|
||||
assert np.minimum.reduce(obj) == "a"
|
||||
assert np.maximum.reduce(obj) == "d"
|
||||
# TODO: raises if we pass axis=0 (on Index and Categorical, not Series)
|
||||
|
||||
cat = Categorical(
|
||||
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
obj = index_or_series_or_array(cat)
|
||||
_min = obj.min()
|
||||
_max = obj.max()
|
||||
assert _min == "d"
|
||||
assert _max == "a"
|
||||
assert np.minimum.reduce(obj) == "d"
|
||||
assert np.maximum.reduce(obj) == "a"
|
||||
|
||||
def test_min_max_reduce(self):
|
||||
# GH52788
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
df = DataFrame(cat)
|
||||
|
||||
result_max = df.agg("max")
|
||||
expected_max = Series(Categorical(["d"], dtype=cat.dtype))
|
||||
tm.assert_series_equal(result_max, expected_max)
|
||||
|
||||
result_min = df.agg("min")
|
||||
expected_min = Series(Categorical(["a"], dtype=cat.dtype))
|
||||
tm.assert_series_equal(result_min, expected_min)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"categories,expected",
|
||||
[
|
||||
(list("ABC"), np.nan),
|
||||
([1, 2, 3], np.nan),
|
||||
pytest.param(
|
||||
Series(date_range("2020-01-01", periods=3), dtype="category"),
|
||||
NaT,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="https://github.com/pandas-dev/pandas/issues/29962"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aggregation", ["min", "max"])
|
||||
def test_min_max_ordered_empty(self, categories, expected, aggregation):
|
||||
# GH 30227
|
||||
cat = Categorical([], categories=categories, ordered=True)
|
||||
|
||||
agg_func = getattr(cat, aggregation)
|
||||
result = agg_func()
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories",
|
||||
[(["a", "b", "c", np.nan], list("cba")), ([1, 2, 3, np.nan], [3, 2, 1])],
|
||||
)
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("function", ["min", "max"])
|
||||
def test_min_max_with_nan(self, values, categories, function, skipna):
|
||||
# GH 25303
|
||||
cat = Categorical(values, categories=categories, ordered=True)
|
||||
result = getattr(cat, function)(skipna=skipna)
|
||||
|
||||
if skipna is False:
|
||||
assert result is np.nan
|
||||
else:
|
||||
expected = categories[0] if function == "min" else categories[2]
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("function", ["min", "max"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_only_nan(self, function, skipna):
|
||||
# https://github.com/pandas-dev/pandas/issues/33450
|
||||
cat = Categorical([np.nan], categories=[1, 2], ordered=True)
|
||||
result = getattr(cat, function)(skipna=skipna)
|
||||
assert result is np.nan
|
||||
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numeric_only_min_max_raises(self, method):
|
||||
# GH 25303
|
||||
cat = Categorical(
|
||||
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
|
||||
)
|
||||
with pytest.raises(TypeError, match=".* got an unexpected keyword"):
|
||||
getattr(cat, method)(numeric_only=True)
|
||||
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numpy_min_max_raises(self, method):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=False)
|
||||
msg = (
|
||||
f"Categorical is not ordered for operation {method}\n"
|
||||
"you can use .as_ordered() to change the Categorical to an ordered one"
|
||||
)
|
||||
method = getattr(np, method)
|
||||
with pytest.raises(TypeError, match=re.escape(msg)):
|
||||
method(cat)
|
||||
|
||||
@pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=True)
|
||||
msg = (
|
||||
f"the '{kwarg}' parameter is not supported in the pandas implementation "
|
||||
f"of {method}"
|
||||
)
|
||||
if kwarg == "axis":
|
||||
msg = r"`axis` must be fewer than the number of dimensions \(1\)"
|
||||
kwargs = {kwarg: 42}
|
||||
method = getattr(np, method)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
method(cat, **kwargs)
|
||||
|
||||
@pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")])
|
||||
def test_numpy_min_max_axis_equals_none(self, method, expected):
|
||||
cat = Categorical(["a", "b", "c", "b"], ordered=True)
|
||||
method = getattr(np, method)
|
||||
result = method(cat, axis=None)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values,categories,exp_mode",
|
||||
[
|
||||
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
|
||||
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
|
||||
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
|
||||
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
|
||||
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
|
||||
],
|
||||
)
|
||||
def test_mode(self, values, categories, exp_mode):
|
||||
cat = Categorical(values, categories=categories, ordered=True)
|
||||
res = Series(cat).mode()._values
|
||||
exp = Categorical(exp_mode, categories=categories, ordered=True)
|
||||
tm.assert_categorical_equal(res, exp)
|
||||
|
||||
def test_searchsorted(self, ordered):
|
||||
# https://github.com/pandas-dev/pandas/issues/8420
|
||||
# https://github.com/pandas-dev/pandas/issues/14522
|
||||
|
||||
cat = Categorical(
|
||||
["cheese", "milk", "apple", "bread", "bread"],
|
||||
categories=["cheese", "milk", "apple", "bread"],
|
||||
ordered=ordered,
|
||||
)
|
||||
ser = Series(cat)
|
||||
|
||||
# Searching for single item argument, side='left' (default)
|
||||
res_cat = cat.searchsorted("apple")
|
||||
assert res_cat == 2
|
||||
assert is_scalar(res_cat)
|
||||
|
||||
res_ser = ser.searchsorted("apple")
|
||||
assert res_ser == 2
|
||||
assert is_scalar(res_ser)
|
||||
|
||||
# Searching for single item array, side='left' (default)
|
||||
res_cat = cat.searchsorted(["bread"])
|
||||
res_ser = ser.searchsorted(["bread"])
|
||||
exp = np.array([3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for several items array, side='right'
|
||||
res_cat = cat.searchsorted(["apple", "bread"], side="right")
|
||||
res_ser = ser.searchsorted(["apple", "bread"], side="right")
|
||||
exp = np.array([3, 5], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res_cat, exp)
|
||||
tm.assert_numpy_array_equal(res_ser, exp)
|
||||
|
||||
# Searching for a single value that is not from the Categorical
|
||||
with pytest.raises(TypeError, match="cucumber"):
|
||||
cat.searchsorted("cucumber")
|
||||
with pytest.raises(TypeError, match="cucumber"):
|
||||
ser.searchsorted("cucumber")
|
||||
|
||||
# Searching for multiple values one of each is not from the Categorical
|
||||
msg = (
|
||||
"Cannot setitem on a Categorical with a new category, "
|
||||
"set the categories first"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.searchsorted(["bread", "cucumber"])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.searchsorted(["bread", "cucumber"])
|
||||
|
||||
def test_unique(self, ordered):
|
||||
# GH38140
|
||||
dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered)
|
||||
|
||||
# categories are reordered based on value when ordered=False
|
||||
cat = Categorical(["a", "b", "c"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
tm.assert_categorical_equal(res, cat)
|
||||
|
||||
cat = Categorical(["a", "b", "a", "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype))
|
||||
|
||||
cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["c", "a", "b"], dtype=dtype)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
# nan must be removed
|
||||
cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype)
|
||||
res = cat.unique()
|
||||
exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype)
|
||||
tm.assert_categorical_equal(res, exp_cat)
|
||||
|
||||
def test_unique_index_series(self, ordered):
|
||||
# GH38140
|
||||
dtype = CategoricalDtype([3, 2, 1], ordered=ordered)
|
||||
|
||||
c = Categorical([3, 1, 2, 2, 1], dtype=dtype)
|
||||
# Categorical.unique sorts categories by appearance order
|
||||
# if ordered=False
|
||||
exp = Categorical([3, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
c = Categorical([1, 1, 2, 2], dtype=dtype)
|
||||
exp = Categorical([1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(c.unique(), exp)
|
||||
tm.assert_index_equal(Index(c).unique(), Index(exp))
|
||||
tm.assert_categorical_equal(Series(c).unique(), exp)
|
||||
|
||||
def test_shift(self):
|
||||
# GH 9416
|
||||
cat = Categorical(["a", "b", "c", "d", "a"])
|
||||
|
||||
# shift forward
|
||||
sp1 = cat.shift(1)
|
||||
xp1 = Categorical([np.nan, "a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(sp1, xp1)
|
||||
tm.assert_categorical_equal(cat[:-1], sp1[1:])
|
||||
|
||||
# shift back
|
||||
sn2 = cat.shift(-2)
|
||||
xp2 = Categorical(
|
||||
["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"]
|
||||
)
|
||||
tm.assert_categorical_equal(sn2, xp2)
|
||||
tm.assert_categorical_equal(cat[2:], sn2[:-2])
|
||||
|
||||
# shift by zero
|
||||
tm.assert_categorical_equal(cat, cat.shift(0))
|
||||
|
||||
def test_nbytes(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
|
||||
assert cat.nbytes == exp
|
||||
|
||||
def test_memory_usage(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
# .categories is an index, so we include the hashtable
|
||||
assert 0 < cat.nbytes <= cat.memory_usage()
|
||||
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
|
||||
|
||||
cat = Categorical(["foo", "foo", "bar"])
|
||||
assert cat.memory_usage(deep=True) > cat.nbytes
|
||||
|
||||
if not PYPY:
|
||||
# sys.getsizeof will call the .memory_usage with
|
||||
# deep=True, and add on some GC overhead
|
||||
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
|
||||
assert abs(diff) < 100
|
||||
|
||||
def test_map(self):
|
||||
c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = c.map(lambda x: x.lower(), na_action=None)
|
||||
exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False)
|
||||
result = c.map(lambda x: x.lower(), na_action=None)
|
||||
exp = Categorical(list("ababc"), categories=list("abc"), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = c.map(lambda x: 1, na_action=None)
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
|
||||
|
||||
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
|
||||
def test_validate_inplace_raises(self, value):
|
||||
cat = Categorical(["A", "B", "B", "C", "A"])
|
||||
msg = (
|
||||
'For argument "inplace" expected type bool, '
|
||||
f"received type {type(value).__name__}"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.sort_values(inplace=value)
|
||||
|
||||
def test_quantile_empty(self):
|
||||
# make sure we have correct itemsize on resulting codes
|
||||
cat = Categorical(["A", "B"])
|
||||
idx = Index([0.0, 0.5])
|
||||
result = cat[:0]._quantile(idx, interpolation="linear")
|
||||
assert result._codes.dtype == np.int8
|
||||
|
||||
expected = cat.take([-1, -1], allow_fill=True)
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,501 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
StringDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.categorical import recode_for_categories
|
||||
|
||||
|
||||
class TestCategoricalAPI:
|
||||
def test_to_list_deprecated(self):
|
||||
# GH#51254
|
||||
cat1 = Categorical(list("acb"), ordered=False)
|
||||
msg = "Categorical.to_list is deprecated and will be removed"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
cat1.to_list()
|
||||
|
||||
def test_ordered_api(self):
|
||||
# GH 9347
|
||||
cat1 = Categorical(list("acb"), ordered=False)
|
||||
tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"]))
|
||||
assert not cat1.ordered
|
||||
|
||||
cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False)
|
||||
tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"]))
|
||||
assert not cat2.ordered
|
||||
|
||||
cat3 = Categorical(list("acb"), ordered=True)
|
||||
tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"]))
|
||||
assert cat3.ordered
|
||||
|
||||
cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True)
|
||||
tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"]))
|
||||
assert cat4.ordered
|
||||
|
||||
def test_set_ordered(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
cat2 = cat.as_unordered()
|
||||
assert not cat2.ordered
|
||||
cat2 = cat.as_ordered()
|
||||
assert cat2.ordered
|
||||
|
||||
assert cat2.set_ordered(True).ordered
|
||||
assert not cat2.set_ordered(False).ordered
|
||||
|
||||
# removed in 0.19.0
|
||||
msg = (
|
||||
"property 'ordered' of 'Categorical' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = True
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
cat.ordered = False
|
||||
|
||||
def test_rename_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
|
||||
# inplace=False: the old one must not be changed
|
||||
res = cat.rename_categories([1, 2, 3])
|
||||
tm.assert_numpy_array_equal(
|
||||
res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)
|
||||
)
|
||||
tm.assert_index_equal(res.categories, Index([1, 2, 3]))
|
||||
|
||||
exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_cat)
|
||||
|
||||
exp_cat = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(cat.categories, exp_cat)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = cat.rename_categories(lambda x: x.upper())
|
||||
expected = Categorical(["A", "B", "C", "A"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])
|
||||
def test_rename_categories_wrong_length_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
msg = (
|
||||
"new categories need to have the same number of items as the "
|
||||
"old categories!"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.rename_categories(new_categories)
|
||||
|
||||
def test_rename_categories_series(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/17981
|
||||
c = Categorical(["a", "b"])
|
||||
result = c.rename_categories(Series([0, 1], index=["a", "b"]))
|
||||
expected = Categorical([0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_rename_categories_dict(self):
|
||||
# GH 17336
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1})
|
||||
expected = Index([4, 3, 2, 1])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts of smaller length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "c": 3})
|
||||
|
||||
expected = Index([1, "b", 3, "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with bigger length
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6})
|
||||
expected = Index([1, 2, 3, 4])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
# Test for dicts with no items from old categories
|
||||
cat = Categorical(["a", "b", "c", "d"])
|
||||
res = cat.rename_categories({"f": 1, "g": 3})
|
||||
|
||||
expected = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res.categories, expected)
|
||||
|
||||
def test_reorder_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
|
||||
res = cat.reorder_categories(["c", "b", "a"])
|
||||
# cat must be the same as before
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
# only res is changed
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"new_categories",
|
||||
[
|
||||
["a"], # not all "old" included in "new"
|
||||
["a", "b", "d"], # still not all "old" in "new"
|
||||
["a", "b", "c", "d"], # all "old" included in "new", but too long
|
||||
],
|
||||
)
|
||||
def test_reorder_categories_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
msg = "items in new_categories are not the same as in old categories"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.reorder_categories(new_categories)
|
||||
|
||||
def test_add_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(
|
||||
["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
|
||||
res = cat.add_categories("d")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.add_categories(["d"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
# GH 9927
|
||||
cat = Categorical(list("abc"), ordered=True)
|
||||
expected = Categorical(list("abc"), categories=list("abcde"), ordered=True)
|
||||
# test with Series, np.array, index, list
|
||||
res = cat.add_categories(Series(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(np.array(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(Index(["d", "e"]))
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
res = cat.add_categories(["d", "e"])
|
||||
tm.assert_categorical_equal(res, expected)
|
||||
|
||||
def test_add_categories_existing_raises(self):
|
||||
# new is in old categories
|
||||
cat = Categorical(["a", "b", "c", "d"], ordered=True)
|
||||
msg = re.escape("new categories must not include old categories: {'d'}")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.add_categories(["d"])
|
||||
|
||||
def test_add_categories_losing_dtype_information(self):
|
||||
# GH#48812
|
||||
cat = Categorical(Series([1, 2], dtype="Int64"))
|
||||
ser = Series([4], dtype="Int64")
|
||||
result = cat.add_categories(ser)
|
||||
expected = Categorical(
|
||||
Series([1, 2], dtype="Int64"), categories=Series([1, 2, 4], dtype="Int64")
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
cat = Categorical(Series(["a", "b", "a"], dtype=StringDtype()))
|
||||
ser = Series(["d"], dtype=StringDtype())
|
||||
result = cat.add_categories(ser)
|
||||
expected = Categorical(
|
||||
Series(["a", "b", "a"], dtype=StringDtype()),
|
||||
categories=Series(["a", "b", "d"], dtype=StringDtype()),
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
|
||||
cat = cat.set_categories(["c", "b", "a"])
|
||||
res = cat.set_categories(["a", "b", "c"])
|
||||
# cat must be the same as before
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(cat.__array__(), exp_values)
|
||||
# only res is changed
|
||||
exp_categories_back = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(res.categories, exp_categories_back)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_values)
|
||||
|
||||
# not all "old" included in "new" -> all not included ones are now
|
||||
# np.nan
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
res = cat.set_categories(["a"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# still not all "old" in "new"
|
||||
res = cat.set_categories(["a", "b", "d"])
|
||||
tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(res.categories, Index(["a", "b", "d"]))
|
||||
|
||||
# all "old" included in "new"
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(cat.categories, exp_categories)
|
||||
|
||||
# internals...
|
||||
c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True)
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8))
|
||||
tm.assert_index_equal(c.categories, Index([1, 2, 3, 4]))
|
||||
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(np.asarray(c), exp)
|
||||
|
||||
# all "pointers" to '4' must be changed from 3 to 0,...
|
||||
c = c.set_categories([4, 3, 2, 1])
|
||||
|
||||
# positions are changed
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8))
|
||||
|
||||
# categories are now in new order
|
||||
tm.assert_index_equal(c.categories, Index([4, 3, 2, 1]))
|
||||
|
||||
# output is the same
|
||||
exp = np.array([1, 2, 3, 4, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(np.asarray(c), exp)
|
||||
assert c.min() == 4
|
||||
assert c.max() == 1
|
||||
|
||||
# set_categories should set the ordering if specified
|
||||
c2 = c.set_categories([4, 3, 2, 1], ordered=False)
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2))
|
||||
|
||||
# set_categories should pass thru the ordering
|
||||
c2 = c.set_ordered(False).set_categories([4, 3, 2, 1])
|
||||
assert not c2.ordered
|
||||
|
||||
tm.assert_numpy_array_equal(np.asarray(c), np.asarray(c2))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_categories_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c.set_categories(new_categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_rename_less(self):
|
||||
# GH 24675
|
||||
cat = Categorical(["A", "B"])
|
||||
result = cat.set_categories(["A"], rename=True)
|
||||
expected = Categorical(["A", np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_categories_private(self):
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"])
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
# fastpath
|
||||
cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
cat._set_categories(["a", "c", "d", "e"], fastpath=True)
|
||||
expected = Categorical(["a", "c", "d"], categories=list("acde"))
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
def test_remove_categories(self):
|
||||
cat = Categorical(["a", "b", "c", "a"], ordered=True)
|
||||
old = cat.copy()
|
||||
new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True)
|
||||
|
||||
res = cat.remove_categories("c")
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
res = cat.remove_categories(["c"])
|
||||
tm.assert_categorical_equal(cat, old)
|
||||
tm.assert_categorical_equal(res, new)
|
||||
|
||||
@pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]])
|
||||
def test_remove_categories_raises(self, removals):
|
||||
cat = Categorical(["a", "b", "a"])
|
||||
message = re.escape("removals must all be in old categories: {'c'}")
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
cat.remove_categories(removals)
|
||||
|
||||
def test_remove_unused_categories(self):
|
||||
c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"])
|
||||
exp_categories_all = Index(["a", "b", "c", "d", "e"])
|
||||
exp_categories_dropped = Index(["a", "b", "c", "d"])
|
||||
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, exp_categories_dropped)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
# with NaN values (GH11599)
|
||||
c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"])
|
||||
res = c.remove_unused_categories()
|
||||
tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"])))
|
||||
exp_codes = np.array([0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(res.codes, exp_codes)
|
||||
tm.assert_index_equal(c.categories, exp_categories_all)
|
||||
|
||||
val = ["F", np.nan, "D", "B", "D", "F", np.nan]
|
||||
cat = Categorical(values=val, categories=list("ABCDEFG"))
|
||||
out = cat.remove_unused_categories()
|
||||
tm.assert_index_equal(out.categories, Index(["B", "D", "F"]))
|
||||
exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(out.codes, exp_codes)
|
||||
assert out.tolist() == val
|
||||
|
||||
alpha = list("abcdefghijklmnopqrstuvwxyz")
|
||||
val = np.random.default_rng(2).choice(alpha[::2], 10000).astype("object")
|
||||
val[np.random.default_rng(2).choice(len(val), 100)] = np.nan
|
||||
|
||||
cat = Categorical(values=val, categories=alpha)
|
||||
out = cat.remove_unused_categories()
|
||||
assert out.tolist() == val.tolist()
|
||||
|
||||
|
||||
class TestCategoricalAPIWithFactor:
|
||||
def test_describe(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
# string type
|
||||
desc = factor.describe()
|
||||
assert factor.ordered
|
||||
exp_index = CategoricalIndex(
|
||||
["a", "b", "c"], name="categories", ordered=factor.ordered
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check unused categories
|
||||
cat = factor.copy()
|
||||
cat = cat.set_categories(["a", "b", "c", "d"])
|
||||
desc = cat.describe()
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("abcd"), ordered=factor.ordered, name="categories"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# check an integer one
|
||||
cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
|
||||
desc = cat.describe()
|
||||
exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories")
|
||||
expected = DataFrame(
|
||||
{"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]},
|
||||
index=exp_index,
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
# describe should work with NaN
|
||||
cat = Categorical([np.nan, 1, 2, 2])
|
||||
desc = cat.describe()
|
||||
expected = DataFrame(
|
||||
{"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]},
|
||||
index=CategoricalIndex(
|
||||
[1, 2, np.nan], categories=[1, 2], name="categories"
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(desc, expected)
|
||||
|
||||
|
||||
class TestPrivateCategoricalAPI:
|
||||
def test_codes_immutable(self):
|
||||
# Codes should be read only
|
||||
c = Categorical(["a", "b", "c", "a", np.nan])
|
||||
exp = np.array([0, 1, 2, 0, -1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
# Assignments to codes should raise
|
||||
msg = (
|
||||
"property 'codes' of 'Categorical' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
c.codes = np.array([0, 1, 2, 0, 1], dtype="int8")
|
||||
|
||||
# changes in the codes array should raise
|
||||
codes = c.codes
|
||||
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[4] = 1
|
||||
|
||||
# But even after getting the codes, the original array should still be
|
||||
# writeable!
|
||||
c[4] = "a"
|
||||
exp = np.array([0, 1, 2, 0, 0], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
c._codes[4] = 2
|
||||
exp = np.array([0, 1, 2, 0, 2], dtype="int8")
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"codes, old, new, expected",
|
||||
[
|
||||
([0, 1], ["a", "b"], ["a", "b"], [0, 1]),
|
||||
([0, 1], ["b", "a"], ["b", "a"], [0, 1]),
|
||||
([0, 1], ["a", "b"], ["b", "a"], [1, 0]),
|
||||
([0, 1], ["b", "a"], ["a", "b"], [1, 0]),
|
||||
([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]),
|
||||
([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]),
|
||||
([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]),
|
||||
([-1, -1], [], ["a", "b"], [-1, -1]),
|
||||
([1, 0], ["b", "a"], ["a", "b"], [0, 1]),
|
||||
],
|
||||
)
|
||||
def test_recode_to_categories(self, codes, old, new, expected):
|
||||
codes = np.asanyarray(codes, dtype=np.int8)
|
||||
expected = np.asanyarray(expected, dtype=np.int8)
|
||||
old = Index(old)
|
||||
new = Index(new)
|
||||
result = recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_recode_to_categories_large(self):
|
||||
N = 1000
|
||||
codes = np.arange(N)
|
||||
old = Index(codes)
|
||||
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
|
||||
new = Index(expected)
|
||||
result = recode_for_categories(codes, old, new)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,155 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Interval,
|
||||
NaT,
|
||||
Period,
|
||||
Timestamp,
|
||||
array,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("cls", [Categorical, CategoricalIndex])
|
||||
@pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), NaT]])
|
||||
def test_astype_nan_to_int(self, cls, values):
|
||||
# GH#28406
|
||||
obj = cls(values)
|
||||
|
||||
msg = "Cannot (cast|convert)"
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
obj.astype(int)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expected",
|
||||
[
|
||||
array(["2019", "2020"], dtype="datetime64[ns, UTC]"),
|
||||
array([0, 0], dtype="timedelta64[ns]"),
|
||||
array([Period("2019"), Period("2020")], dtype="period[Y-DEC]"),
|
||||
array([Interval(0, 1), Interval(1, 2)], dtype="interval"),
|
||||
array([1, np.nan], dtype="Int64"),
|
||||
],
|
||||
)
|
||||
def test_astype_category_to_extension_dtype(self, expected):
|
||||
# GH#28668
|
||||
result = expected.astype("category").astype(expected.dtype)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(
|
||||
"datetime64[ns]",
|
||||
np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"),
|
||||
),
|
||||
(
|
||||
"datetime64[ns, MET]",
|
||||
DatetimeIndex([Timestamp("2015-01-01 00:00:00+0100", tz="MET")]).array,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_astype_to_datetime64(self, dtype, expected):
|
||||
# GH#28448
|
||||
result = Categorical(["2015-01-01"]).astype(dtype)
|
||||
assert result == expected
|
||||
|
||||
def test_astype_str_int_categories_to_nullable_int(self):
|
||||
# GH#39616
|
||||
dtype = CategoricalDtype([str(i) for i in range(5)])
|
||||
codes = np.random.default_rng(2).integers(5, size=20)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
res = arr.astype("Int64")
|
||||
expected = array(codes, dtype="Int64")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
def test_astype_str_int_categories_to_nullable_float(self):
|
||||
# GH#39616
|
||||
dtype = CategoricalDtype([str(i / 2) for i in range(5)])
|
||||
codes = np.random.default_rng(2).integers(5, size=20)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
res = arr.astype("Float64")
|
||||
expected = array(codes, dtype="Float64") / 2
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype(self, ordered):
|
||||
# string
|
||||
cat = Categorical(list("abbaaccc"), ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
msg = r"Cannot cast object|string dtype to float64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.astype(float)
|
||||
|
||||
# numeric
|
||||
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||||
result = cat.astype(object)
|
||||
expected = np.array(cat, dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(int)
|
||||
expected = np.array(cat, dtype="int")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat.astype(float)
|
||||
expected = np.array(cat, dtype=float)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("cat_ordered", [True, False])
|
||||
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||||
# GH#10696/GH#18593
|
||||
data = list("abcaacbab")
|
||||
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = cat.astype(dtype)
|
||||
expected = Categorical(data, dtype=dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = cat.astype("category")
|
||||
expected = cat
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_astype_object_datetime_categories(self):
|
||||
# GH#40754
|
||||
cat = Categorical(to_datetime(["2021-03-27", NaT]))
|
||||
result = cat.astype(object)
|
||||
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_astype_object_timestamp_categories(self):
|
||||
# GH#18024
|
||||
cat = Categorical([Timestamp("2014-01-01")])
|
||||
result = cat.astype(object)
|
||||
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_astype_category_readonly_mask_values(self):
|
||||
# GH#53658
|
||||
arr = array([0, 1, 2], dtype="Int64")
|
||||
arr._mask.flags["WRITEABLE"] = False
|
||||
result = arr.astype("category")
|
||||
expected = array([0, 1, 2], dtype="Int64").astype("category")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,783 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalConstructors:
|
||||
def test_fastpath_deprecated(self):
|
||||
codes = np.array([1, 2, 3])
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c", "d"], ordered=False)
|
||||
msg = "The 'fastpath' keyword in Categorical is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
Categorical(codes, dtype=dtype, fastpath=True)
|
||||
|
||||
def test_categorical_from_cat_and_dtype_str_preserve_ordered(self):
|
||||
# GH#49309 we should preserve orderedness in `res`
|
||||
cat = Categorical([3, 1], categories=[3, 2, 1], ordered=True)
|
||||
|
||||
res = Categorical(cat, dtype="category")
|
||||
assert res.dtype.ordered
|
||||
|
||||
def test_categorical_disallows_scalar(self):
|
||||
# GH#38433
|
||||
with pytest.raises(TypeError, match="Categorical input must be list-like"):
|
||||
Categorical("A", categories=["A", "B"])
|
||||
|
||||
def test_categorical_1d_only(self):
|
||||
# ndim > 1
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
Categorical(np.array([list("abcd")]))
|
||||
|
||||
def test_validate_ordered(self):
|
||||
# see gh-14058
|
||||
exp_msg = "'ordered' must either be 'True' or 'False'"
|
||||
exp_err = TypeError
|
||||
|
||||
# This should be a boolean.
|
||||
ordered = np.array([0, 1, 2])
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical([1, 2, 3], ordered=ordered)
|
||||
|
||||
with pytest.raises(exp_err, match=exp_msg):
|
||||
Categorical.from_codes(
|
||||
[0, 0, 1], categories=["a", "b", "c"], ordered=ordered
|
||||
)
|
||||
|
||||
def test_constructor_empty(self):
|
||||
# GH 17248
|
||||
c = Categorical([])
|
||||
expected = Index([])
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
c = Categorical([], categories=[1, 2, 3])
|
||||
expected = Index([1, 2, 3], dtype=np.int64)
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
def test_constructor_empty_boolean(self):
|
||||
# see gh-22702
|
||||
cat = Categorical([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_constructor_tuples(self):
|
||||
values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
|
||||
result = Categorical(values)
|
||||
expected = Index([(1,), (1, 2)], tupleize_cols=False)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
assert result.ordered is False
|
||||
|
||||
def test_constructor_tuples_datetimes(self):
|
||||
# numpy will auto reshape when all of the tuples are the
|
||||
# same len, so add an extra one with 2 items and slice it off
|
||||
values = np.array(
|
||||
[
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
(Timestamp("2010-01-01"),),
|
||||
(Timestamp("2010-01-02"),),
|
||||
("a", "b"),
|
||||
],
|
||||
dtype=object,
|
||||
)[:-1]
|
||||
result = Categorical(values)
|
||||
expected = Index(
|
||||
[(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)],
|
||||
tupleize_cols=False,
|
||||
)
|
||||
tm.assert_index_equal(result.categories, expected)
|
||||
|
||||
def test_constructor_unsortable(self):
|
||||
# it works!
|
||||
arr = np.array([1, 2, 3, datetime.now()], dtype="O")
|
||||
factor = Categorical(arr, ordered=False)
|
||||
assert not factor.ordered
|
||||
|
||||
# this however will raise as cannot be sorted
|
||||
msg = (
|
||||
"'values' is not ordered, please explicitly specify the "
|
||||
"categories order by passing in a categories argument."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(arr, ordered=True)
|
||||
|
||||
def test_constructor_interval(self):
|
||||
result = Categorical(
|
||||
[Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True
|
||||
)
|
||||
ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)])
|
||||
exp = Categorical(ii, ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
tm.assert_index_equal(result.categories, ii)
|
||||
|
||||
def test_constructor(self):
|
||||
exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
|
||||
c1 = Categorical(exp_arr)
|
||||
tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
c2 = Categorical(exp_arr, categories=["c", "b", "a"])
|
||||
tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
|
||||
|
||||
# categories must be unique
|
||||
msg = "Categorical categories must be unique"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([1, 2], [1, 2, 2])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ["a", "b", "b"])
|
||||
|
||||
# The default should be unordered
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
assert not c1.ordered
|
||||
|
||||
# Categorical as input
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(c1, categories=["a", "b", "c"])
|
||||
tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
|
||||
tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))
|
||||
|
||||
# Series of dtype category
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
|
||||
c2 = Categorical(Series(c1))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# Series
|
||||
c1 = Categorical(["a", "b", "c", "a"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]))
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
# This should result in integer categories, not float!
|
||||
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/3678
|
||||
cat = Categorical([np.nan, 1, 2, 3])
|
||||
assert is_integer_dtype(cat.categories)
|
||||
|
||||
# this should result in floats
|
||||
cat = Categorical([np.nan, 1, 2.0, 3])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
cat = Categorical([np.nan, 1.0, 2.0, 3.0])
|
||||
assert is_float_dtype(cat.categories)
|
||||
|
||||
# This doesn't work -> this would probably need some kind of "remember
|
||||
# the original type" feature to try to cast the array interface result
|
||||
# to...
|
||||
|
||||
# vals = np.asarray(cat[cat.notna()])
|
||||
# assert is_integer_dtype(vals)
|
||||
|
||||
# corner cases
|
||||
cat = Categorical([1])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == 1
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
cat = Categorical(["a"])
|
||||
assert len(cat.categories) == 1
|
||||
assert cat.categories[0] == "a"
|
||||
assert len(cat.codes) == 1
|
||||
assert cat.codes[0] == 0
|
||||
|
||||
# two arrays
|
||||
# - when the first is an integer dtype and the second is not
|
||||
# - when the resulting codes are all -1/NaN
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5])
|
||||
|
||||
# the next one are from the old docs
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])
|
||||
cat = Categorical([1, 2], categories=[1, 2, 3])
|
||||
|
||||
# this is a legitimate constructor
|
||||
with tm.assert_produces_warning(None):
|
||||
Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True)
|
||||
|
||||
def test_constructor_with_existing_categories(self):
|
||||
# GH25318: constructing with pd.Series used to bogusly skip recoding
|
||||
# categories
|
||||
c0 = Categorical(["a", "b", "c", "a"])
|
||||
c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"])
|
||||
|
||||
c2 = Categorical(c0, categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c2)
|
||||
|
||||
c3 = Categorical(Series(c0), categories=c1.categories)
|
||||
tm.assert_categorical_equal(c1, c3)
|
||||
|
||||
def test_constructor_not_sequence(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16022
|
||||
msg = r"^Parameter 'categories' must be list-like, was"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Categorical(["a", "b"], categories="a")
|
||||
|
||||
def test_constructor_with_null(self):
|
||||
# Cannot have NaN in categories
|
||||
msg = "Categorical categories cannot be null"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(
|
||||
DatetimeIndex(["nat", "20160101"]),
|
||||
categories=[NaT, Timestamp("20160101")],
|
||||
)
|
||||
|
||||
def test_constructor_with_index(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(ci.values, Categorical(ci))
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
tm.assert_categorical_equal(
|
||||
ci.values, Categorical(ci.astype(object), categories=ci.categories)
|
||||
)
|
||||
|
||||
def test_constructor_with_generator(self):
|
||||
# This was raising an Error in isna(single_val).any() because isna
|
||||
# returned a scalar for a generator
|
||||
|
||||
exp = Categorical([0, 1, 2])
|
||||
cat = Categorical(x for x in [0, 1, 2])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical(range(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]])
|
||||
|
||||
# check that categories accept generators and sequences
|
||||
cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2]))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
cat = Categorical([0, 1, 2], categories=range(3))
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
def test_constructor_with_rangeindex(self):
|
||||
# RangeIndex is preserved in Categories
|
||||
rng = Index(range(3))
|
||||
|
||||
cat = Categorical(rng)
|
||||
tm.assert_index_equal(cat.categories, rng, exact=True)
|
||||
|
||||
cat = Categorical([1, 2, 0], categories=rng)
|
||||
tm.assert_index_equal(cat.categories, rng, exact=True)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtl",
|
||||
[
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s"),
|
||||
date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"),
|
||||
timedelta_range("1 day", periods=5, freq="s"),
|
||||
],
|
||||
)
|
||||
def test_constructor_with_datetimelike(self, dtl):
|
||||
# see gh-12077
|
||||
# constructor with a datetimelike and NaT
|
||||
|
||||
s = Series(dtl)
|
||||
c = Categorical(s)
|
||||
|
||||
expected = type(dtl)(s)
|
||||
expected._data.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8"))
|
||||
|
||||
# with NaT
|
||||
s2 = s.copy()
|
||||
s2.iloc[-1] = NaT
|
||||
c = Categorical(s2)
|
||||
|
||||
expected = type(dtl)(s2.dropna())
|
||||
expected._data.freq = None
|
||||
|
||||
tm.assert_index_equal(c.categories, expected)
|
||||
|
||||
exp = np.array([0, 1, 2, 3, -1], dtype=np.int8)
|
||||
tm.assert_numpy_array_equal(c.codes, exp)
|
||||
|
||||
result = repr(c)
|
||||
assert "NaT" in result
|
||||
|
||||
def test_constructor_from_index_series_datetimetz(self):
|
||||
idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx = idx._with_freq(None) # freq not preserved in result.categories
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_date_objects(self):
|
||||
# we dont cast date objects to timestamps, matching Index constructor
|
||||
v = date.today()
|
||||
|
||||
cat = Categorical([v, v])
|
||||
assert cat.categories.dtype == object
|
||||
assert type(cat.categories[0]) is date
|
||||
|
||||
def test_constructor_from_index_series_timedelta(self):
|
||||
idx = timedelta_range("1 days", freq="D", periods=3)
|
||||
idx = idx._with_freq(None) # freq not preserved in result.categories
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
def test_constructor_from_index_series_period(self):
|
||||
idx = period_range("2015-01-01", freq="D", periods=3)
|
||||
result = Categorical(idx)
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
result = Categorical(Series(idx))
|
||||
tm.assert_index_equal(result.categories, idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
np.array([1.0, 1.2, 1.8, np.nan]),
|
||||
np.array([1, 2, 3], dtype="int64"),
|
||||
["a", "b", "c", np.nan],
|
||||
[pd.Period("2014-01"), pd.Period("2014-02"), NaT],
|
||||
[Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT],
|
||||
[
|
||||
Timestamp("2014-01-01", tz="US/Eastern"),
|
||||
Timestamp("2014-01-02", tz="US/Eastern"),
|
||||
NaT,
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_constructor_invariant(self, values):
|
||||
# GH 14190
|
||||
c = Categorical(values)
|
||||
c2 = Categorical(c)
|
||||
tm.assert_categorical_equal(c, c2)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_with_dtype(self, ordered):
|
||||
categories = ["b", "a", "c"]
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
result = Categorical(["a", "b", "a", "c"], dtype=dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "c"], categories=categories, ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert result.ordered is ordered
|
||||
|
||||
def test_constructor_dtype_and_others_raises(self):
|
||||
dtype = CategoricalDtype(["a", "b"], ordered=True)
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], categories=["a", "b"], dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=True, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical(["a", "b"], ordered=False, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]])
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_constructor_str_category(self, categories, ordered):
|
||||
result = Categorical(
|
||||
["a", "b"], categories=categories, ordered=ordered, dtype="category"
|
||||
)
|
||||
expected = Categorical(["a", "b"], categories=categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_str_unknown(self):
|
||||
with pytest.raises(ValueError, match="Unknown dtype"):
|
||||
Categorical([1, 2], dtype="foo")
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
|
||||
def test_constructor_np_strs(self):
|
||||
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
|
||||
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
|
||||
assert all(isinstance(x, np.str_) for x in cat.categories)
|
||||
|
||||
def test_constructor_from_categorical_with_dtype(self):
|
||||
dtype = CategoricalDtype(["a", "b", "c"], ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use dtype.categories, not values.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_with_unknown_dtype(self):
|
||||
dtype = CategoricalDtype(None, ordered=True)
|
||||
values = Categorical(["a", "b", "d"])
|
||||
result = Categorical(values, dtype=dtype)
|
||||
# We use values.categories, not dtype.categories
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "d"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_from_categorical_string(self):
|
||||
values = Categorical(["a", "b", "d"])
|
||||
# use categories, ordered
|
||||
result = Categorical(
|
||||
values, categories=["a", "b", "c"], ordered=True, dtype="category"
|
||||
)
|
||||
expected = Categorical(
|
||||
["a", "b", "d"], categories=["a", "b", "c"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# No string
|
||||
result = Categorical(values, categories=["a", "b", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_constructor_with_categorical_categories(self):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list])
|
||||
def test_construction_with_null(self, klass, nulls_fixture):
|
||||
# https://github.com/pandas-dev/pandas/issues/31927
|
||||
values = klass(["a", nulls_fixture, "b"])
|
||||
result = Categorical(values)
|
||||
|
||||
dtype = CategoricalDtype(["a", "b"])
|
||||
codes = [0, -1, 1]
|
||||
expected = Categorical.from_codes(codes=codes, dtype=dtype)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("validate", [True, False])
|
||||
def test_from_codes_nullable_int_categories(self, any_numeric_ea_dtype, validate):
|
||||
# GH#39649
|
||||
cats = pd.array(range(5), dtype=any_numeric_ea_dtype)
|
||||
codes = np.random.default_rng(2).integers(5, size=3)
|
||||
dtype = CategoricalDtype(cats)
|
||||
arr = Categorical.from_codes(codes, dtype=dtype, validate=validate)
|
||||
assert arr.categories.dtype == cats.dtype
|
||||
tm.assert_index_equal(arr.categories, Index(cats))
|
||||
|
||||
def test_from_codes_empty(self):
|
||||
cat = ["a", "b", "c"]
|
||||
result = Categorical.from_codes([], categories=cat)
|
||||
expected = Categorical([], categories=cat)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("validate", [True, False])
|
||||
def test_from_codes_validate(self, validate):
|
||||
# GH53122
|
||||
dtype = CategoricalDtype(["a", "b"])
|
||||
if validate:
|
||||
with pytest.raises(ValueError, match="codes need to be between "):
|
||||
Categorical.from_codes([4, 5], dtype=dtype, validate=validate)
|
||||
else:
|
||||
# passes, though has incorrect codes, but that's the user responsibility
|
||||
Categorical.from_codes([4, 5], dtype=dtype, validate=validate)
|
||||
|
||||
def test_from_codes_too_few_categories(self):
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be between "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([1, 2], dtype=dtype)
|
||||
|
||||
def test_from_codes_non_int_codes(self):
|
||||
dtype = CategoricalDtype(categories=[1, 2])
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(["a"], dtype=dtype)
|
||||
|
||||
def test_from_codes_non_unique_categories(self):
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
|
||||
|
||||
def test_from_codes_nan_cat_included(self):
|
||||
with pytest.raises(ValueError, match="Categorical categories cannot be null"):
|
||||
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
|
||||
|
||||
def test_from_codes_too_negative(self):
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
msg = r"codes need to be between -1 and len\(categories\)-1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([-2, 1, 2], dtype=dtype)
|
||||
|
||||
def test_from_codes(self):
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
exp = Categorical(["a", "b", "c"], ordered=False)
|
||||
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
|
||||
tm.assert_categorical_equal(exp, res)
|
||||
|
||||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
|
||||
def test_from_codes_with_categorical_categories(self, klass):
|
||||
# GH17884
|
||||
expected = Categorical(["a", "b"], categories=["a", "b", "c"])
|
||||
|
||||
result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [Categorical, CategoricalIndex])
|
||||
def test_from_codes_with_non_unique_categorical_categories(self, klass):
|
||||
with pytest.raises(ValueError, match="Categorical categories must be unique"):
|
||||
Categorical.from_codes([0, 1], klass(["a", "b", "a"]))
|
||||
|
||||
def test_from_codes_with_nan_code(self):
|
||||
# GH21767
|
||||
codes = [1, 2, np.nan]
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, categories=dtype.categories)
|
||||
with pytest.raises(ValueError, match="codes need to be array-like integers"):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
@pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]])
|
||||
def test_from_codes_with_float(self, codes):
|
||||
# GH21767
|
||||
# float codes should raise even if values are equal to integers
|
||||
dtype = CategoricalDtype(categories=["a", "b", "c"])
|
||||
|
||||
msg = "codes need to be array-like integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, dtype.categories)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, dtype=dtype)
|
||||
|
||||
def test_from_codes_with_dtype_raises(self):
|
||||
msg = "Cannot specify"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(
|
||||
[0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"])
|
||||
)
|
||||
|
||||
def test_from_codes_neither(self):
|
||||
msg = "Both were None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes([0, 1])
|
||||
|
||||
def test_from_codes_with_nullable_int(self):
|
||||
codes = pd.array([0, 1], dtype="Int64")
|
||||
categories = ["a", "b"]
|
||||
|
||||
result = Categorical.from_codes(codes, categories=categories)
|
||||
expected = Categorical.from_codes(codes.to_numpy(int), categories=categories)
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_codes_with_nullable_int_na_raises(self):
|
||||
codes = pd.array([0, None], dtype="Int64")
|
||||
categories = ["a", "b"]
|
||||
|
||||
msg = "codes cannot contain NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Categorical.from_codes(codes, categories=categories)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories(self, dtype):
|
||||
cats = ["a", "b"]
|
||||
codes = np.array([0, 0, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes(codes, cats)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "category"])
|
||||
def test_from_inferred_categories_sorts(self, dtype):
|
||||
cats = ["b", "a"]
|
||||
codes = np.array([0, 1, 1, 1], dtype="i8")
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_dtype(self):
|
||||
cats = ["a", "b", "d"]
|
||||
codes = np.array([0, 1, 0, 2], dtype="i8")
|
||||
dtype = CategoricalDtype(["c", "b", "a"], ordered=True)
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical(
|
||||
["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_inferred_categories_coerces(self):
|
||||
cats = ["1", "2", "bad"]
|
||||
codes = np.array([0, 0, 1, 2], dtype="i8")
|
||||
dtype = CategoricalDtype([1, 2])
|
||||
result = Categorical._from_inferred_categories(cats, codes, dtype)
|
||||
expected = Categorical([1, 1, 2, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("ordered", [None, True, False])
|
||||
def test_construction_with_ordered(self, ordered):
|
||||
# GH 9347, 9190
|
||||
cat = Categorical([0, 1, 2], ordered=ordered)
|
||||
assert cat.ordered == bool(ordered)
|
||||
|
||||
def test_constructor_imaginary(self):
|
||||
values = [1, 2, 3 + 1j]
|
||||
c1 = Categorical(values)
|
||||
tm.assert_index_equal(c1.categories, Index(values))
|
||||
tm.assert_numpy_array_equal(np.array(c1), np.array(values))
|
||||
|
||||
def test_constructor_string_and_tuples(self):
|
||||
# GH 21416
|
||||
c = Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object))
|
||||
expected_index = Index([("a", "b"), ("b", "a"), "c"])
|
||||
assert c.categories.equals(expected_index)
|
||||
|
||||
def test_interval(self):
|
||||
idx = pd.interval_range(0, 10, periods=10)
|
||||
cat = Categorical(idx, categories=idx)
|
||||
expected_codes = np.arange(10, dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# infer categories
|
||||
cat = Categorical(idx)
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# list values
|
||||
cat = Categorical(list(idx))
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# list values, categories
|
||||
cat = Categorical(list(idx), categories=list(idx))
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# shuffled
|
||||
values = idx.take([1, 2, 0])
|
||||
cat = Categorical(values, categories=idx)
|
||||
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8"))
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# extra
|
||||
values = pd.interval_range(8, 11, periods=3)
|
||||
cat = Categorical(values, categories=idx)
|
||||
expected_codes = np.array([8, 9, -1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
# overlapping
|
||||
idx = IntervalIndex([Interval(0, 2), Interval(0, 1)])
|
||||
cat = Categorical(idx, categories=idx)
|
||||
expected_codes = np.array([0, 1], dtype="int8")
|
||||
tm.assert_numpy_array_equal(cat.codes, expected_codes)
|
||||
tm.assert_index_equal(cat.categories, idx)
|
||||
|
||||
def test_categorical_extension_array_nullable(self, nulls_fixture):
|
||||
# GH:
|
||||
arr = pd.arrays.StringArray._from_sequence(
|
||||
[nulls_fixture] * 2, dtype=pd.StringDtype()
|
||||
)
|
||||
result = Categorical(arr)
|
||||
assert arr.dtype == result.categories.dtype
|
||||
expected = Categorical(Series([pd.NA, pd.NA], dtype=arr.dtype))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_from_sequence_copy(self):
|
||||
cat = Categorical(np.arange(5).repeat(2))
|
||||
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False)
|
||||
|
||||
# more generally, we'd be OK with a view
|
||||
assert result._codes is cat._codes
|
||||
|
||||
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True)
|
||||
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_constructor_datetime64_non_nano(self):
|
||||
categories = np.arange(10).view("M8[D]")
|
||||
values = categories[::2].copy()
|
||||
|
||||
cat = Categorical(values, categories=categories)
|
||||
assert (cat == values).all()
|
||||
|
||||
def test_constructor_preserves_freq(self):
|
||||
# GH33830 freq retention in categorical
|
||||
dti = date_range("2016-01-01", periods=5)
|
||||
|
||||
expected = dti.freq
|
||||
|
||||
cat = Categorical(dti)
|
||||
result = cat.categories.freq
|
||||
|
||||
assert expected == result
|
@ -0,0 +1,139 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalDtypes:
|
||||
def test_categories_match_up_to_permutation(self):
|
||||
# test dtype comparisons between cats
|
||||
|
||||
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
|
||||
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
|
||||
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
|
||||
assert c1._categories_match_up_to_permutation(c1)
|
||||
assert c2._categories_match_up_to_permutation(c2)
|
||||
assert c3._categories_match_up_to_permutation(c3)
|
||||
assert c1._categories_match_up_to_permutation(c2)
|
||||
assert not c1._categories_match_up_to_permutation(c3)
|
||||
assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
|
||||
assert not c1._categories_match_up_to_permutation(c1.astype(object))
|
||||
assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
|
||||
assert c1._categories_match_up_to_permutation(
|
||||
CategoricalIndex(c1, categories=list("cab"))
|
||||
)
|
||||
assert not c1._categories_match_up_to_permutation(
|
||||
CategoricalIndex(c1, ordered=True)
|
||||
)
|
||||
|
||||
# GH 16659
|
||||
s1 = Series(c1)
|
||||
s2 = Series(c2)
|
||||
s3 = Series(c3)
|
||||
assert c1._categories_match_up_to_permutation(s1)
|
||||
assert c2._categories_match_up_to_permutation(s2)
|
||||
assert c3._categories_match_up_to_permutation(s3)
|
||||
assert c1._categories_match_up_to_permutation(s2)
|
||||
assert not c1._categories_match_up_to_permutation(s3)
|
||||
assert not c1._categories_match_up_to_permutation(s1.astype(object))
|
||||
|
||||
def test_set_dtype_same(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
|
||||
tm.assert_categorical_equal(result, c)
|
||||
|
||||
def test_set_dtype_new_categories(self):
|
||||
c = Categorical(["a", "b", "c"])
|
||||
result = c._set_dtype(CategoricalDtype(list("abcd")))
|
||||
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||||
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, categories, new_categories",
|
||||
[
|
||||
# No NaNs, same cats, same order
|
||||
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||||
# Same, unsorted
|
||||
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||||
# No NaNs, same cats, different order
|
||||
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||||
# NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||||
# Introduce NaNs
|
||||
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||||
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||||
# No overlap
|
||||
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_dtype_many(self, values, categories, new_categories, ordered):
|
||||
c = Categorical(values, categories)
|
||||
expected = Categorical(values, new_categories, ordered)
|
||||
result = c._set_dtype(expected.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_set_dtype_no_overlap(self):
|
||||
c = Categorical(["a", "b", "c"], ["d", "e"])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "b"]))
|
||||
expected = Categorical([None, None, None], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_codes_dtypes(self):
|
||||
# GH 8453
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
result = Categorical([f"foo{i:05d}" for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
result = Categorical([f"foo{i:05d}" for i in range(40000)])
|
||||
assert result.codes.dtype == "int32"
|
||||
|
||||
# adding cats
|
||||
result = Categorical(["foo", "bar", "baz"])
|
||||
assert result.codes.dtype == "int8"
|
||||
result = result.add_categories([f"foo{i:05d}" for i in range(400)])
|
||||
assert result.codes.dtype == "int16"
|
||||
|
||||
# removing cats
|
||||
result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
|
||||
assert result.codes.dtype == "int8"
|
||||
|
||||
def test_iter_python_types(self):
|
||||
# GH-19909
|
||||
cat = Categorical([1, 2])
|
||||
assert isinstance(next(iter(cat)), int)
|
||||
assert isinstance(cat.tolist()[0], int)
|
||||
|
||||
def test_iter_python_types_datetime(self):
|
||||
cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
|
||||
assert isinstance(next(iter(cat)), Timestamp)
|
||||
assert isinstance(cat.tolist()[0], Timestamp)
|
||||
|
||||
def test_interval_index_category(self):
|
||||
# GH 38316
|
||||
index = IntervalIndex.from_breaks(np.arange(3, dtype="uint64"))
|
||||
|
||||
result = CategoricalIndex(index).dtype.categories
|
||||
expected = IntervalIndex.from_arrays(
|
||||
[0, 1], [1, 2], dtype="interval[uint64, right]"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,388 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
class TestCategoricalIndexingWithFactor:
|
||||
def test_getitem(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
assert factor[0] == "a"
|
||||
assert factor[-1] == "c"
|
||||
|
||||
subf = factor[[0, 1, 2]]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8))
|
||||
|
||||
subf = factor[np.asarray(factor) == "c"]
|
||||
tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8))
|
||||
|
||||
def test_setitem(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
# int/positional
|
||||
c = factor.copy()
|
||||
c[0] = "b"
|
||||
assert c[0] == "b"
|
||||
c[-1] = "a"
|
||||
assert c[-1] == "a"
|
||||
|
||||
# boolean
|
||||
c = factor.copy()
|
||||
indexer = np.zeros(len(c), dtype="bool")
|
||||
indexer[0] = True
|
||||
indexer[-1] = True
|
||||
c[indexer] = "c"
|
||||
expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
|
||||
tm.assert_categorical_equal(c, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[Categorical(["b", "a"]), Categorical(["b", "a"], categories=["b", "a"])],
|
||||
)
|
||||
def test_setitem_same_but_unordered(self, other):
|
||||
# GH-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
target[mask] = other[mask]
|
||||
expected = Categorical(["b", "b"], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(target, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Categorical(["b", "a"], categories=["b", "a", "c"]),
|
||||
Categorical(["b", "a"], categories=["a", "b", "c"]),
|
||||
Categorical(["a", "a"], categories=["a"]),
|
||||
Categorical(["b", "b"], categories=["b"]),
|
||||
],
|
||||
)
|
||||
def test_setitem_different_unordered_raises(self, other):
|
||||
# GH-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"])
|
||||
mask = np.array([True, False])
|
||||
msg = "Cannot set a Categorical with another, without identical categories"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
target[mask] = other[mask]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Categorical(["b", "a"]),
|
||||
Categorical(["b", "a"], categories=["b", "a"], ordered=True),
|
||||
Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True),
|
||||
],
|
||||
)
|
||||
def test_setitem_same_ordered_raises(self, other):
|
||||
# Gh-24142
|
||||
target = Categorical(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
mask = np.array([True, False])
|
||||
msg = "Cannot set a Categorical with another, without identical categories"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
target[mask] = other[mask]
|
||||
|
||||
def test_setitem_tuple(self):
|
||||
# GH#20439
|
||||
cat = Categorical([(0, 1), (0, 2), (0, 1)])
|
||||
|
||||
# This should not raise
|
||||
cat[1] = cat[0]
|
||||
assert cat[1] == (0, 1)
|
||||
|
||||
def test_setitem_listlike(self):
|
||||
# GH#9469
|
||||
# properly coerce the input indexers
|
||||
|
||||
cat = Categorical(
|
||||
np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8)
|
||||
).add_categories([-1000])
|
||||
indexer = np.array([100000]).astype(np.int64)
|
||||
cat[indexer] = -1000
|
||||
|
||||
# we are asserting the code result here
|
||||
# which maps to the -1000 category
|
||||
result = cat.codes[np.array([100000]).astype(np.int64)]
|
||||
tm.assert_numpy_array_equal(result, np.array([5], dtype="int8"))
|
||||
|
||||
|
||||
class TestCategoricalIndexing:
|
||||
def test_getitem_slice(self):
|
||||
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||||
sliced = cat[3]
|
||||
assert sliced == "d"
|
||||
|
||||
sliced = cat[3:5]
|
||||
expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_categorical_equal(sliced, expected)
|
||||
|
||||
def test_getitem_listlike(self):
|
||||
# GH 9469
|
||||
# properly coerce the input indexers
|
||||
|
||||
c = Categorical(
|
||||
np.random.default_rng(2).integers(0, 5, size=150000).astype(np.int8)
|
||||
)
|
||||
result = c.codes[np.array([100000]).astype(np.int64)]
|
||||
expected = c[np.array([100000]).astype(np.int64)].codes
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
idx1 = PeriodIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"],
|
||||
freq="M",
|
||||
)
|
||||
|
||||
cat1 = Categorical(idx1)
|
||||
str(cat1)
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
|
||||
tm.assert_index_equal(cat1.categories, exp_idx)
|
||||
|
||||
idx2 = PeriodIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"],
|
||||
freq="M",
|
||||
)
|
||||
cat2 = Categorical(idx2, ordered=True)
|
||||
str(cat2)
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
|
||||
exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")
|
||||
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
|
||||
tm.assert_index_equal(cat2.categories, exp_idx2)
|
||||
|
||||
idx3 = PeriodIndex(
|
||||
[
|
||||
"2013-12",
|
||||
"2013-11",
|
||||
"2013-10",
|
||||
"2013-09",
|
||||
"2013-08",
|
||||
"2013-07",
|
||||
"2013-05",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
cat3 = Categorical(idx3, ordered=True)
|
||||
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
|
||||
exp_idx = PeriodIndex(
|
||||
[
|
||||
"2013-05",
|
||||
"2013-07",
|
||||
"2013-08",
|
||||
"2013-09",
|
||||
"2013-10",
|
||||
"2013-11",
|
||||
"2013-12",
|
||||
],
|
||||
freq="M",
|
||||
)
|
||||
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
|
||||
tm.assert_index_equal(cat3.categories, exp_idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"null_val",
|
||||
[None, np.nan, NaT, NA, math.nan, "NaT", "nat", "NAT", "nan", "NaN", "NAN"],
|
||||
)
|
||||
def test_periodindex_on_null_types(self, null_val):
|
||||
# GH 46673
|
||||
result = PeriodIndex(["2022-04-06", "2022-04-07", null_val], freq="D")
|
||||
expected = PeriodIndex(["2022-04-06", "2022-04-07", "NaT"], dtype="period[D]")
|
||||
assert result[2] is NaT
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]])
|
||||
def test_categories_assignments_wrong_length_raises(self, new_categories):
|
||||
cat = Categorical(["a", "b", "c", "a"])
|
||||
msg = (
|
||||
"new categories need to have the same number of items "
|
||||
"as the old categories!"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
cat.rename_categories(new_categories)
|
||||
|
||||
# Combinations of sorted/unique:
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]]
|
||||
)
|
||||
# Combinations of missing/unique
|
||||
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
|
||||
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
|
||||
@pytest.mark.parametrize("dtype", [None, "category", "key"])
|
||||
def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype):
|
||||
# GH 21448
|
||||
key = key_class(key_values, categories=range(1, 5))
|
||||
|
||||
if dtype == "key":
|
||||
dtype = key.dtype
|
||||
|
||||
# Test for flat index and CategoricalIndex with same/different cats:
|
||||
idx = Index(idx_values, dtype=dtype)
|
||||
expected, exp_miss = idx.get_indexer_non_unique(key_values)
|
||||
result, res_miss = idx.get_indexer_non_unique(key)
|
||||
|
||||
tm.assert_numpy_array_equal(expected, result)
|
||||
tm.assert_numpy_array_equal(exp_miss, res_miss)
|
||||
|
||||
exp_unique = idx.unique().get_indexer(key_values)
|
||||
res_unique = idx.unique().get_indexer(key)
|
||||
tm.assert_numpy_array_equal(res_unique, exp_unique)
|
||||
|
||||
def test_where_unobserved_nan(self):
|
||||
ser = Series(Categorical(["a", "b"]))
|
||||
result = ser.where([True, False])
|
||||
expected = Series(Categorical(["a", None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# all NA
|
||||
ser = Series(Categorical(["a", "b"]))
|
||||
result = ser.where([False, False])
|
||||
expected = Series(Categorical([None, None], categories=["a", "b"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_unobserved_categories(self):
|
||||
ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
result = ser.where([True, True, False], other="b")
|
||||
expected = Series(Categorical(["a", "b", "b"], categories=ser.cat.categories))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_other_categorical(self):
|
||||
ser = Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"]))
|
||||
other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"])
|
||||
result = ser.where([True, False, True], other)
|
||||
expected = Series(Categorical(["a", "c", "c"], dtype=ser.dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_new_category_raises(self):
|
||||
ser = Series(Categorical(["a", "b", "c"]))
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.where([True, False, True], "d")
|
||||
|
||||
def test_where_ordered_differs_rasies(self):
|
||||
ser = Series(
|
||||
Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True)
|
||||
)
|
||||
other = Categorical(
|
||||
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
|
||||
)
|
||||
with pytest.raises(TypeError, match="without identical categories"):
|
||||
ser.where([True, False, True], other)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
# GH#21508
|
||||
cat = Categorical(list("aabbca"), categories=list("cab"))
|
||||
|
||||
assert "b" in cat
|
||||
assert "z" not in cat
|
||||
assert np.nan not in cat
|
||||
with pytest.raises(TypeError, match="unhashable type: 'list'"):
|
||||
assert [1] in cat
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in cat
|
||||
assert 1 not in cat
|
||||
|
||||
cat = Categorical(list("aabbca") + [np.nan], categories=list("cab"))
|
||||
assert np.nan in cat
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH#23705
|
||||
cat = Categorical(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in cat
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
cat = Categorical([1, 2, 3])
|
||||
|
||||
assert "a" not in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in cat
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in cat
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean(index):
|
||||
ser = Series(range(3))
|
||||
idx = Categorical([True, False, True])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
assert com.is_bool_indexer(idx)
|
||||
result = ser[idx]
|
||||
expected = ser[idx.astype("object")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [True, False])
|
||||
def test_mask_with_boolean_na_treated_as_false(index):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
ser = Series(range(3))
|
||||
idx = Categorical([True, False, None])
|
||||
if index:
|
||||
idx = CategoricalIndex(idx)
|
||||
|
||||
result = ser[idx]
|
||||
expected = ser[idx.fillna(False)]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_coercible_categorical(monkeypatch):
|
||||
"""
|
||||
Monkeypatch Categorical.__array__ to ensure no implicit conversion.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
When Categorical.__array__ is called.
|
||||
"""
|
||||
|
||||
# TODO(Categorical): identify other places where this may be
|
||||
# useful and move to a conftest.py
|
||||
def array(self, dtype=None):
|
||||
raise ValueError("I cannot be converted.")
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(Categorical, "__array__", array)
|
||||
yield
|
||||
|
||||
|
||||
def test_series_at():
|
||||
arr = Categorical(["a", "b", "c"])
|
||||
ser = Series(arr)
|
||||
result = ser.at[0]
|
||||
assert result == "a"
|
@ -0,0 +1,154 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "ignore"])
|
||||
def na_action(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(data, categories, ordered, na_action):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
cat = Categorical(data, categories=categories, ordered=ordered)
|
||||
result = cat.map(str, na_action=na_action)
|
||||
expected = Categorical(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_map(na_action):
|
||||
cat = Categorical(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = cat.map(lambda x: x.lower(), na_action=na_action)
|
||||
exp = Categorical(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
result = cat.map(lambda x: x.lower(), na_action=na_action)
|
||||
exp = Categorical(list("ababc"), categories=list("bac"), ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
result = cat.map(lambda x: 1, na_action=na_action)
|
||||
exp = Index(np.array([1] * 5, dtype=np.int64))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# change categories dtype
|
||||
cat = Categorical(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = cat.map(f, na_action=na_action)
|
||||
exp = Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
mapper = Series([10, 20, 30], index=["A", "B", "C"])
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
result = cat.map({"A": 10, "B": 20, "C": 30}, na_action=na_action)
|
||||
tm.assert_categorical_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
Categorical([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False] * 3),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_none(data, f, expected): # GH 24241
|
||||
values = Categorical(data)
|
||||
result = values.map(f, na_action=None)
|
||||
if isinstance(expected, Categorical):
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
||||
([1, 1, np.nan], {1: False}, Categorical([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
Categorical([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
||||
values = Categorical(data)
|
||||
result = values.map(f, na_action="ignore")
|
||||
if data[1] == 1:
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_with_dict_or_series(na_action):
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cat = Categorical(orig_values)
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
|
||||
# Order of categories in result can be different
|
||||
expected = Categorical(new_values, categories=[3.0, 2, "one"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
||||
result = cat.map(mapper, na_action=na_action)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_na_action_no_default_deprecated():
|
||||
# GH51645
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
msg = (
|
||||
"The default value of 'ignore' for the `na_action` parameter in "
|
||||
"pandas.Categorical.map is deprecated and will be "
|
||||
"changed to 'None' in a future version. Please set na_action to the "
|
||||
"desired value to avoid seeing this warning"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
cat.map(lambda x: x)
|
@ -0,0 +1,216 @@
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalMissing:
|
||||
def test_isna(self):
|
||||
exp = np.array([False, False, True])
|
||||
cat = Categorical(["a", "b", np.nan])
|
||||
res = cat.isna()
|
||||
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_na_flags_int_categories(self):
|
||||
# #1457
|
||||
|
||||
categories = list(range(10))
|
||||
labels = np.random.default_rng(2).integers(0, 10, 20)
|
||||
labels[::5] = -1
|
||||
|
||||
cat = Categorical(labels, categories)
|
||||
repr(cat)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(cat), labels == -1)
|
||||
|
||||
def test_nan_handling(self):
|
||||
# Nans are represented as -1 in codes
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
c[1] = np.nan
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))
|
||||
|
||||
# Adding nan to categories should make assigned nan point to the
|
||||
# category!
|
||||
c = Categorical(["a", "b", np.nan, "a"])
|
||||
tm.assert_index_equal(c.categories, Index(["a", "b"]))
|
||||
tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
|
||||
|
||||
def test_set_dtype_nans(self):
|
||||
c = Categorical(["a", "b", np.nan])
|
||||
result = c._set_dtype(CategoricalDtype(["a", "c"]))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
|
||||
|
||||
def test_set_item_nan(self):
|
||||
cat = Categorical([1, 2, 3])
|
||||
cat[1] = np.nan
|
||||
|
||||
exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
|
||||
tm.assert_categorical_equal(cat, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fillna_kwargs, msg",
|
||||
[
|
||||
(
|
||||
{"value": 1, "method": "ffill"},
|
||||
"Cannot specify both 'value' and 'method'.",
|
||||
),
|
||||
({}, "Must specify a fill 'value' or 'method'."),
|
||||
({"method": "bad"}, "Invalid fill method. Expecting .* bad"),
|
||||
(
|
||||
{"value": Series([1, 2, 3, 4, "a"])},
|
||||
"Cannot setitem on a Categorical with a new category",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_fillna_raises(self, fillna_kwargs, msg):
|
||||
# https://github.com/pandas-dev/pandas/issues/19682
|
||||
# https://github.com/pandas-dev/pandas/issues/13628
|
||||
cat = Categorical([1, 2, 3, None, None])
|
||||
|
||||
if len(fillna_kwargs) == 1 and "value" in fillna_kwargs:
|
||||
err = TypeError
|
||||
else:
|
||||
err = ValueError
|
||||
|
||||
with pytest.raises(err, match=msg):
|
||||
cat.fillna(**fillna_kwargs)
|
||||
|
||||
@pytest.mark.parametrize("named", [True, False])
|
||||
def test_fillna_iterable_category(self, named):
|
||||
# https://github.com/pandas-dev/pandas/issues/21097
|
||||
if named:
|
||||
Point = collections.namedtuple("Point", "x y")
|
||||
else:
|
||||
Point = lambda *args: args # tuple
|
||||
cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))
|
||||
result = cat.fillna(Point(0, 0))
|
||||
expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# Case where the Point is not among our categories; we want ValueError,
|
||||
# not NotImplementedError GH#41914
|
||||
cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object))
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(Point(0, 0))
|
||||
|
||||
def test_fillna_array(self):
|
||||
# accept Categorical or ndarray value if it holds appropriate values
|
||||
cat = Categorical(["A", "B", "C", None, None])
|
||||
|
||||
other = cat.fillna("C")
|
||||
result = cat.fillna(other)
|
||||
tm.assert_categorical_equal(result, other)
|
||||
assert isna(cat[-1]) # didn't modify original inplace
|
||||
|
||||
other = np.array(["A", "B", "C", "B", "A"])
|
||||
result = cat.fillna(other)
|
||||
expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
assert isna(cat[-1]) # didn't modify original inplace
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected",
|
||||
[
|
||||
([1, 2, 3], np.array([False, False, False])),
|
||||
([1, 2, np.nan], np.array([False, False, True])),
|
||||
([1, 2, np.inf], np.array([False, False, True])),
|
||||
([1, 2, pd.NA], np.array([False, False, True])),
|
||||
],
|
||||
)
|
||||
def test_use_inf_as_na(self, values, expected):
|
||||
# https://github.com/pandas-dev/pandas/issues/33594
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
cat = Categorical(values)
|
||||
result = cat.isna()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = Series(cat).isna()
|
||||
expected = Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = DataFrame(cat).isna()
|
||||
expected = DataFrame(expected)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected",
|
||||
[
|
||||
([1, 2, 3], np.array([False, False, False])),
|
||||
([1, 2, np.nan], np.array([False, False, True])),
|
||||
([1, 2, np.inf], np.array([False, False, True])),
|
||||
([1, 2, pd.NA], np.array([False, False, True])),
|
||||
],
|
||||
)
|
||||
def test_use_inf_as_na_outside_context(self, values, expected):
|
||||
# https://github.com/pandas-dev/pandas/issues/33594
|
||||
# Using isna directly for Categorical will fail in general here
|
||||
cat = Categorical(values)
|
||||
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("mode.use_inf_as_na", True):
|
||||
result = isna(cat)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = isna(Series(cat))
|
||||
expected = Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = isna(DataFrame(cat))
|
||||
expected = DataFrame(expected)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a1, a2, categories",
|
||||
[
|
||||
(["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),
|
||||
([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),
|
||||
],
|
||||
)
|
||||
def test_compare_categorical_with_missing(self, a1, a2, categories):
|
||||
# GH 28384
|
||||
cat_type = CategoricalDtype(categories)
|
||||
|
||||
# !=
|
||||
result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)
|
||||
expected = Series(a1) != Series(a2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ==
|
||||
result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
|
||||
expected = Series(a1) == Series(a2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_value, dtype",
|
||||
[
|
||||
(pd.NaT, "datetime64[ns]"),
|
||||
(None, "float64"),
|
||||
(np.nan, "float64"),
|
||||
(pd.NA, "float64"),
|
||||
],
|
||||
)
|
||||
def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
|
||||
# GH#44900
|
||||
result = Categorical([na_value, na_value])
|
||||
tm.assert_index_equal(result.categories, Index([], dtype=dtype))
|
@ -0,0 +1,414 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalOpsWithFactor:
|
||||
def test_categories_none_comparisons(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
tm.assert_categorical_equal(factor, factor)
|
||||
|
||||
def test_comparisons(self):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
result = factor[factor == "a"]
|
||||
expected = factor[np.asarray(factor) == "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor != "a"]
|
||||
expected = factor[np.asarray(factor) != "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor < "c"]
|
||||
expected = factor[np.asarray(factor) < "c"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor > "a"]
|
||||
expected = factor[np.asarray(factor) > "a"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor >= "b"]
|
||||
expected = factor[np.asarray(factor) >= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
result = factor[factor <= "b"]
|
||||
expected = factor[np.asarray(factor) <= "b"]
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
n = len(factor)
|
||||
|
||||
other = factor[np.random.default_rng(2).permutation(n)]
|
||||
result = factor == other
|
||||
expected = np.asarray(factor) == np.asarray(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = factor == "d"
|
||||
expected = np.zeros(len(factor), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# comparisons with categoricals
|
||||
cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
|
||||
cat_rev_base = Categorical(
|
||||
["b", "b", "b"], categories=["c", "b", "a"], ordered=True
|
||||
)
|
||||
cat = Categorical(["a", "b", "c"], ordered=True)
|
||||
cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = np.array([True, False, False])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_rev
|
||||
|
||||
cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"])
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > cat_rev_base2
|
||||
|
||||
# Only categories with same ordering information can be compared
|
||||
cat_unordered = cat.set_ordered(False)
|
||||
assert not (cat > cat).any()
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_unordered
|
||||
|
||||
# comparison (in both directions) with Series will raise
|
||||
s = Series(["b", "b", "b"], dtype=object)
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type "
|
||||
r"<class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
# comparison with numpy.array will raise in both direction, but only on
|
||||
# newer numpy versions
|
||||
a = np.array(["b", "b", "b"], dtype=object)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
# Make sure that unequal comparison take the categories order in
|
||||
# account
|
||||
cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True)
|
||||
exp = np.array([True, False, False])
|
||||
res = cat_rev > "b"
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
# check that zero-dim array gets unboxed
|
||||
res = cat_rev > np.array("b")
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
|
||||
class TestCategoricalOps:
|
||||
@pytest.mark.parametrize(
|
||||
"categories",
|
||||
[["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]],
|
||||
)
|
||||
def test_not_equal_with_na(self, categories):
|
||||
# https://github.com/pandas-dev/pandas/issues/32276
|
||||
c1 = Categorical.from_codes([-1, 0], categories=categories)
|
||||
c2 = Categorical.from_codes([0, 1], categories=categories)
|
||||
|
||||
result = c1 != c2
|
||||
|
||||
assert result.all()
|
||||
|
||||
def test_compare_frame(self):
|
||||
# GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
|
||||
data = ["a", "b", 2, "a"]
|
||||
cat = Categorical(data)
|
||||
|
||||
df = DataFrame(cat)
|
||||
|
||||
result = cat == df.T
|
||||
expected = DataFrame([[True, True, True, True]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = cat[::-1] != df.T
|
||||
expected = DataFrame([[False, True, True, False]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_compare_frame_raises(self, comparison_op):
|
||||
# alignment raises unless we transpose
|
||||
op = comparison_op
|
||||
cat = Categorical(["a", "b", 2, "a"])
|
||||
df = DataFrame(cat)
|
||||
msg = "Unable to coerce to Series, length must be 1: given 4"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
op(cat, df)
|
||||
|
||||
def test_datetime_categorical_comparison(self):
|
||||
dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True)
|
||||
tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True]))
|
||||
|
||||
def test_reflected_comparison_with_scalars(self):
|
||||
# GH8658
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True]))
|
||||
tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True]))
|
||||
|
||||
def test_comparison_with_unknown_scalars(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||||
# and following comparisons with scalars not in categories should raise
|
||||
# for unequal comps, but not for equal/not equal
|
||||
cat = Categorical([1, 2, 3], ordered=True)
|
||||
|
||||
msg = "Invalid comparison between dtype=category and int"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat < 4
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > 4
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
4 < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
4 > cat
|
||||
|
||||
tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False]))
|
||||
tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True]))
|
||||
|
||||
def test_comparison_with_tuple(self):
|
||||
cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object))
|
||||
|
||||
result = cat == "foo"
|
||||
expected = np.array([True, False, False, False], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat == (0, 1)
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = cat != (0, 1)
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_scalar(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# BUG: fix ordered categorical comparison with missing values (#26504 )
|
||||
# and following comparisons with scalars in categories with missing
|
||||
# values should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
scalar = 2
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
|
||||
def test_comparison_of_ordered_categorical_with_nan_to_listlike(
|
||||
self, compare_operators_no_eq_ne
|
||||
):
|
||||
# https://github.com/pandas-dev/pandas/issues/26504
|
||||
# and following comparisons of missing values in ordered Categorical
|
||||
# with listlike should be evaluated as False
|
||||
|
||||
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
|
||||
other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
|
||||
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
|
||||
actual = getattr(cat, compare_operators_no_eq_ne)(other)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,reverse,base",
|
||||
[(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])],
|
||||
)
|
||||
def test_comparisons(self, data, reverse, base):
|
||||
cat_rev = Series(Categorical(data, categories=reverse, ordered=True))
|
||||
cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True))
|
||||
cat = Series(Categorical(data, ordered=True))
|
||||
cat_base = Series(
|
||||
Categorical(base, categories=cat.cat.categories, ordered=True)
|
||||
)
|
||||
s = Series(base, dtype=object if base == list("bbb") else None)
|
||||
a = np.array(base)
|
||||
|
||||
# comparisons need to take categories ordering into account
|
||||
res_rev = cat_rev > cat_rev_base
|
||||
exp_rev = Series([True, False, False])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res_rev = cat_rev < cat_rev_base
|
||||
exp_rev = Series([False, False, True])
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
|
||||
res = cat > cat_base
|
||||
exp = Series([False, False, True])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
scalar = base[1]
|
||||
res = cat > scalar
|
||||
exp = Series([False, False, True])
|
||||
exp2 = cat.values > scalar
|
||||
tm.assert_series_equal(res, exp)
|
||||
tm.assert_numpy_array_equal(res.values, exp2)
|
||||
res_rev = cat_rev > scalar
|
||||
exp_rev = Series([True, False, False])
|
||||
exp_rev2 = cat_rev.values > scalar
|
||||
tm.assert_series_equal(res_rev, exp_rev)
|
||||
tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
|
||||
|
||||
# Only categories with same categories can be compared
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > cat_rev
|
||||
|
||||
# categorical cannot be compared to Series or numpy array, and also
|
||||
# not the other way around
|
||||
msg = (
|
||||
"Cannot compare a Categorical for op __gt__ with type "
|
||||
r"<class 'numpy\.ndarray'>"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > s
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat > a
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat_rev > a
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s < cat_rev
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a < cat_rev
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ctor",
|
||||
[
|
||||
lambda *args, **kwargs: Categorical(*args, **kwargs),
|
||||
lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
|
||||
],
|
||||
)
|
||||
def test_unordered_different_order_equal(self, ctor):
|
||||
# https://github.com/pandas-dev/pandas/issues/16014
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 == c2).all()
|
||||
|
||||
c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False)
|
||||
assert (c1 != c2).all()
|
||||
|
||||
c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
|
||||
c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
|
||||
result = c1 == c2
|
||||
tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
|
||||
|
||||
def test_unordered_different_categories_raises(self):
|
||||
c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False)
|
||||
c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False)
|
||||
|
||||
with pytest.raises(TypeError, match=("Categoricals can only be compared")):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_different_lengths(self):
|
||||
c1 = Categorical([], categories=["a", "b"])
|
||||
c2 = Categorical([], categories=["a"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
c1 == c2
|
||||
|
||||
def test_compare_unordered_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
|
||||
# 349290078
|
||||
a = Categorical(["a"], categories=["a", "b"])
|
||||
b = Categorical(["b"], categories=["b", "a"])
|
||||
assert not a.equals(b)
|
||||
|
||||
def test_numeric_like_ops(self):
|
||||
df = DataFrame({"value": np.random.default_rng(2).integers(0, 10000, 100)})
|
||||
labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)]
|
||||
cat_labels = Categorical(labels, labels)
|
||||
|
||||
df = df.sort_values(by=["value"], ascending=True)
|
||||
df["value_group"] = pd.cut(
|
||||
df.value, range(0, 10500, 500), right=False, labels=cat_labels
|
||||
)
|
||||
|
||||
# numeric ops should not succeed
|
||||
for op, str_rep in [
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
]:
|
||||
msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(df, op)(df)
|
||||
|
||||
# reduction ops should not succeed (unless specifically defined, e.g.
|
||||
# min/max)
|
||||
s = df["value_group"]
|
||||
for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]:
|
||||
msg = f"does not support reduction '{op}'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(numeric_only=False)
|
||||
|
||||
def test_numeric_like_ops_series(self):
|
||||
# numpy ops
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
with pytest.raises(TypeError, match="does not support reduction 'sum'"):
|
||||
np.sum(s)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, str_rep",
|
||||
[
|
||||
("__add__", r"\+"),
|
||||
("__sub__", "-"),
|
||||
("__mul__", r"\*"),
|
||||
("__truediv__", "/"),
|
||||
],
|
||||
)
|
||||
def test_numeric_like_ops_series_arith(self, op, str_rep):
|
||||
# numeric ops on a Series
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(s, op)(2)
|
||||
|
||||
def test_numeric_like_ops_series_invalid(self):
|
||||
# invalid ufunc
|
||||
s = Series(Categorical([1, 2, 3, 4]))
|
||||
msg = "Object with dtype category cannot perform the numpy op log"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.log(s)
|
@ -0,0 +1,111 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_replace,value,expected,flip_categories",
|
||||
[
|
||||
# one-to-one
|
||||
(1, 2, [2, 2, 3], False),
|
||||
(1, 4, [4, 2, 3], False),
|
||||
(4, 1, [1, 2, 3], False),
|
||||
(5, 6, [1, 2, 3], False),
|
||||
# many-to-one
|
||||
([1], 2, [2, 2, 3], False),
|
||||
([1, 2], 3, [3, 3, 3], False),
|
||||
([1, 2], 4, [4, 4, 3], False),
|
||||
((1, 2, 4), 5, [5, 5, 3], False),
|
||||
((5, 6), 2, [1, 2, 3], False),
|
||||
([1], [2], [2, 2, 3], False),
|
||||
([1, 4], [5, 2], [5, 2, 3], False),
|
||||
# GH49404: overlap between to_replace and value
|
||||
([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
|
||||
# GH50872, GH46884: replace with null
|
||||
(1, None, [None, 2, 3], False),
|
||||
(1, pd.NA, [None, 2, 3], False),
|
||||
# check_categorical sorts categories, which crashes on mixed dtypes
|
||||
(3, "4", [1, 2, "4"], False),
|
||||
([1, 2, "3"], "5", ["5", "5", 3], True),
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
|
||||
)
|
||||
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
|
||||
# GH 31720
|
||||
|
||||
ser = pd.Series([1, 2, 3], dtype="category")
|
||||
result = ser.replace(to_replace, value)
|
||||
expected = pd.Series(expected, dtype="category")
|
||||
ser.replace(to_replace, value, inplace=True)
|
||||
|
||||
if flip_categories:
|
||||
expected = expected.cat.set_categories(expected.cat.categories[::-1])
|
||||
|
||||
tm.assert_series_equal(expected, result, check_category_order=False)
|
||||
tm.assert_series_equal(expected, ser, check_category_order=False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_replace, value, result, expected_error_msg",
|
||||
[
|
||||
("b", "c", ["a", "c"], "Categorical.categories are different"),
|
||||
("c", "d", ["a", "b"], None),
|
||||
# https://github.com/pandas-dev/pandas/issues/33288
|
||||
("a", "a", ["a", "b"], None),
|
||||
("b", None, ["a", None], "Categorical.categories length are different"),
|
||||
],
|
||||
)
|
||||
def test_replace_categorical(to_replace, value, result, expected_error_msg):
|
||||
# GH#26988
|
||||
cat = Categorical(["a", "b"])
|
||||
expected = Categorical(result)
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if expected_error_msg is not None else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
|
||||
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
if to_replace == "b": # the "c" test is supposed to be unchanged
|
||||
with pytest.raises(AssertionError, match=expected_error_msg):
|
||||
# ensure non-inplace call does not affect original
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
ser = pd.Series(cat, copy=False)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
ser.replace(to_replace, value, inplace=True)
|
||||
tm.assert_categorical_equal(cat, expected)
|
||||
|
||||
|
||||
def test_replace_categorical_ea_dtype():
|
||||
# GH49404
|
||||
cat = Categorical(pd.array(["a", "b"], dtype="string"))
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
|
||||
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_replace_maintain_ordering():
|
||||
# GH51016
|
||||
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
|
||||
ser = pd.Series([0, 1, 2], dtype=dtype)
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.replace(0, 2)
|
||||
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
|
||||
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
|
||||
tm.assert_series_equal(expected, result, check_category_order=True)
|
@ -0,0 +1,550 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
option_context,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalReprWithFactor:
|
||||
def test_print(self, using_infer_string):
|
||||
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
|
||||
if using_infer_string:
|
||||
expected = [
|
||||
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
|
||||
"Categories (3, string): [a < b < c]",
|
||||
]
|
||||
else:
|
||||
expected = [
|
||||
"['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']",
|
||||
"Categories (3, object): ['a' < 'b' < 'c']",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
class TestCategoricalRepr:
|
||||
def test_big_print(self):
|
||||
codes = np.array([0, 1, 2, 0, 1, 2] * 100)
|
||||
dtype = CategoricalDtype(categories=Index(["a", "b", "c"], dtype=object))
|
||||
factor = Categorical.from_codes(codes, dtype=dtype)
|
||||
expected = [
|
||||
"['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']",
|
||||
"Length: 600",
|
||||
"Categories (3, object): ['a', 'b', 'c']",
|
||||
]
|
||||
expected = "\n".join(expected)
|
||||
|
||||
actual = repr(factor)
|
||||
|
||||
assert actual == expected
|
||||
|
||||
def test_empty_print(self):
|
||||
factor = Categorical([], Index(["a", "b", "c"], dtype=object))
|
||||
expected = "[], Categories (3, object): ['a', 'b', 'c']"
|
||||
actual = repr(factor)
|
||||
assert actual == expected
|
||||
|
||||
assert expected == actual
|
||||
factor = Categorical([], Index(["a", "b", "c"], dtype=object), ordered=True)
|
||||
expected = "[], Categories (3, object): ['a' < 'b' < 'c']"
|
||||
actual = repr(factor)
|
||||
assert expected == actual
|
||||
|
||||
factor = Categorical([], [])
|
||||
expected = "[], Categories (0, object): []"
|
||||
assert expected == repr(factor)
|
||||
|
||||
def test_print_none_width(self):
|
||||
# GH10087
|
||||
a = Series(Categorical([1, 2, 3, 4]))
|
||||
exp = (
|
||||
"0 1\n1 2\n2 3\n3 4\n"
|
||||
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
|
||||
)
|
||||
|
||||
with option_context("display.width", None):
|
||||
assert exp == repr(a)
|
||||
|
||||
@pytest.mark.skipif(
|
||||
using_pyarrow_string_dtype(),
|
||||
reason="Change once infer_string is set to True by default",
|
||||
)
|
||||
def test_unicode_print(self):
|
||||
c = Categorical(["aaaaa", "bb", "cccc"] * 20)
|
||||
expected = """\
|
||||
['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc']
|
||||
Length: 60
|
||||
Categories (3, object): ['aaaaa', 'bb', 'cccc']"""
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """\
|
||||
['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
|
||||
Length: 60
|
||||
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
# unicode option should not affect to Categorical, as it doesn't care
|
||||
# the repr width
|
||||
with option_context("display.unicode.east_asian_width", True):
|
||||
c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20)
|
||||
expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう']
|
||||
Length: 60
|
||||
Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa: E501
|
||||
|
||||
assert repr(c) == expected
|
||||
|
||||
def test_categorical_repr(self):
|
||||
c = Categorical([1, 2, 3])
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1, 2, 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1, 2, 3, 4, 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20, dtype=np.int64))
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_ordered(self):
|
||||
c = Categorical([1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True)
|
||||
exp = """[1, 2, 3, 1, 2, 3]
|
||||
Categories (3, int64): [1 < 2 < 3]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True)
|
||||
exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5]
|
||||
Length: 50
|
||||
Categories (5, int64): [1 < 2 < 3 < 4 < 5]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(np.arange(20, dtype=np.int64), ordered=True)
|
||||
exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19]
|
||||
Length: 20
|
||||
Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx)
|
||||
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
""
|
||||
)
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, "
|
||||
"2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]\n"
|
||||
"Categories (5, datetime64[ns]): [2011-01-01 09:00:00, "
|
||||
"2011-01-01 10:00:00, 2011-01-01 11:00:00,\n"
|
||||
" 2011-01-01 12:00:00, "
|
||||
"2011-01-01 13:00:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = (
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, "
|
||||
"2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, "
|
||||
"2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, "
|
||||
"2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n"
|
||||
"Categories (5, datetime64[ns, US/Eastern]): "
|
||||
"[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n"
|
||||
" "
|
||||
"2011-01-01 13:00:00-05:00]"
|
||||
)
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]
|
||||
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
|
||||
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]
|
||||
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
|
||||
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
|
||||
2011-01-01 13:00:00-05:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_int_with_nan(self):
|
||||
c = Categorical([1, 2, np.nan])
|
||||
c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]"""
|
||||
assert repr(c) == c_exp
|
||||
|
||||
s = Series([1, 2, np.nan], dtype="object").astype("category")
|
||||
s_exp = """0 1\n1 2\n2 NaN
|
||||
dtype: category
|
||||
Categories (2, int64): [1, 2]"""
|
||||
assert repr(s) == s_exp
|
||||
|
||||
def test_categorical_repr_period(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
|
||||
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
|
||||
2011-01-01 13:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
|
||||
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
|
||||
3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00,
|
||||
18 days 01:00:00, 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days]
|
||||
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=20)
|
||||
c = Categorical(idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 20
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
c = Categorical(idx.append(idx), categories=idx, ordered=True)
|
||||
exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]
|
||||
Length: 40
|
||||
Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
|
||||
3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 <
|
||||
18 days 01:00:00 < 19 days 01:00:00]""" # noqa: E501
|
||||
|
||||
assert repr(c) == exp
|
||||
|
||||
def test_categorical_index_repr(self):
|
||||
idx = CategoricalIndex(Categorical([1, 2, 3]))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64)))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_ordered(self):
|
||||
i = CategoricalIndex(Categorical([1, 2, 3], ordered=True))
|
||||
exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True))
|
||||
exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_datetime_ordered(self):
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00',
|
||||
'2011-01-01 11:00:00', '2011-01-01 12:00:00',
|
||||
'2011-01-01 13:00:00'],
|
||||
categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx), ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00',
|
||||
'2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00',
|
||||
'2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00',
|
||||
'2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00',
|
||||
'2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'],
|
||||
categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period(self):
|
||||
# test all length
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=1)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=2)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=3)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
i = CategoricalIndex(Categorical(idx.append(idx)))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00',
|
||||
'2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00',
|
||||
'2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_period_ordered(self):
|
||||
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00',
|
||||
'2011-01-01 12:00', '2011-01-01 13:00'],
|
||||
categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = period_range("2011-01", freq="M", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_index_repr_timedelta_ordered(self):
|
||||
idx = timedelta_range("1 days", periods=5)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days, 2 days, 3 days, 4 days, 5 days], ordered=True, dtype='category')""" # noqa: E501
|
||||
assert repr(i) == exp
|
||||
|
||||
idx = timedelta_range("1 hours", periods=10)
|
||||
i = CategoricalIndex(Categorical(idx, ordered=True))
|
||||
exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00',
|
||||
'3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00',
|
||||
'6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00',
|
||||
'9 days 01:00:00'],
|
||||
categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00], ordered=True, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(i) == exp
|
||||
|
||||
def test_categorical_str_repr(self):
|
||||
# GH 33676
|
||||
result = repr(Categorical([1, "2", 3, 4]))
|
||||
expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']"
|
||||
assert result == expected
|
@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalSort:
|
||||
def test_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=True), expected, check_dtype=False
|
||||
)
|
||||
|
||||
expected = expected[::-1]
|
||||
tm.assert_numpy_array_equal(
|
||||
c.argsort(ascending=False), expected, check_dtype=False
|
||||
)
|
||||
|
||||
def test_numpy_argsort(self):
|
||||
c = Categorical([5, 3, 1, 4, 2], ordered=True)
|
||||
|
||||
expected = np.array([2, 4, 1, 3, 0])
|
||||
tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False)
|
||||
|
||||
tm.assert_numpy_array_equal(
|
||||
np.argsort(c, kind="mergesort"), expected, check_dtype=False
|
||||
)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, axis=0)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(c, order="C")
|
||||
|
||||
def test_sort_values(self):
|
||||
# unordered cats are sortable
|
||||
cat = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat.sort_values()
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d"], ordered=True)
|
||||
|
||||
# sort_values
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
cat = Categorical(
|
||||
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=object)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# sort (inplace order)
|
||||
cat1 = cat.copy()
|
||||
orig_codes = cat1._codes
|
||||
cat1.sort_values(inplace=True)
|
||||
assert cat1._codes is orig_codes
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=object)
|
||||
tm.assert_numpy_array_equal(cat1.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, cat.categories)
|
||||
|
||||
# reverse
|
||||
cat = Categorical(["a", "c", "c", "b", "d"], ordered=True)
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
def test_sort_values_na_position(self):
|
||||
# see gh-12882
|
||||
cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True)
|
||||
exp_categories = Index([2, 5])
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values() # default arguments
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0])
|
||||
res = cat.sort_values(ascending=True, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0])
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=True, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan])
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="last")
|
||||
exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
||||
|
||||
cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True)
|
||||
res = cat.sort_values(ascending=False, na_position="first")
|
||||
exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object)
|
||||
exp_categories = Index(["a", "b", "c", "d"])
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp_val)
|
||||
tm.assert_index_equal(res.categories, exp_categories)
|
@ -0,0 +1,26 @@
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class SubclassedCategorical(Categorical):
|
||||
pass
|
||||
|
||||
|
||||
class TestCategoricalSubclassing:
|
||||
def test_constructor(self):
|
||||
sc = SubclassedCategorical(["a", "b", "c"])
|
||||
assert isinstance(sc, SubclassedCategorical)
|
||||
tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"]))
|
||||
|
||||
def test_from_codes(self):
|
||||
sc = SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
assert isinstance(sc, SubclassedCategorical)
|
||||
exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"])
|
||||
tm.assert_categorical_equal(sc, exp)
|
||||
|
||||
def test_map(self):
|
||||
sc = SubclassedCategorical(["a", "b", "c"])
|
||||
res = sc.map(lambda x: x.upper(), na_action=None)
|
||||
assert isinstance(res, SubclassedCategorical)
|
||||
exp = Categorical(["A", "B", "C"])
|
||||
tm.assert_categorical_equal(res, exp)
|
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Categorical
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=[True, False])
|
||||
def allow_fill(request):
|
||||
"""Boolean 'allow_fill' parameter for Categorical.take"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestTake:
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
|
||||
def test_take_default_allow_fill(self):
|
||||
cat = Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(None):
|
||||
result = cat.take([0, -1])
|
||||
|
||||
assert result.equals(cat)
|
||||
|
||||
def test_take_positive_no_warning(self):
|
||||
cat = Categorical(["a", "b"])
|
||||
with tm.assert_produces_warning(None):
|
||||
cat.take([0, 0])
|
||||
|
||||
def test_take_bounds(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = Categorical(["a", "b", "a"])
|
||||
if allow_fill:
|
||||
msg = "indices are out-of-bounds"
|
||||
else:
|
||||
msg = "index 4 is out of bounds for( axis 0 with)? size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
cat.take([4, 5], allow_fill=allow_fill)
|
||||
|
||||
def test_take_empty(self, allow_fill):
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
cat = Categorical([], categories=["a", "b"])
|
||||
if allow_fill:
|
||||
msg = "indices are out-of-bounds"
|
||||
else:
|
||||
msg = "cannot do a non-empty take from an empty axes"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
cat.take([0], allow_fill=allow_fill)
|
||||
|
||||
def test_positional_take(self, ordered):
|
||||
cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered)
|
||||
result = cat.take([0, 1, 2], allow_fill=False)
|
||||
expected = Categorical(
|
||||
["a", "a", "b"], categories=cat.categories, ordered=ordered
|
||||
)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_positional_take_unobserved(self, ordered):
|
||||
cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered)
|
||||
result = cat.take([1, 0], allow_fill=False)
|
||||
expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_allow_fill(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "a", "b"])
|
||||
result = cat.take([0, -1, -1], allow_fill=True)
|
||||
expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_with_negative_one(self):
|
||||
# -1 was a category
|
||||
cat = Categorical([-1, 0, 1])
|
||||
result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
|
||||
expected = Categorical([-1, -1, 0], categories=[-1, 0, 1])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
result = cat.take([0, 1, -1], fill_value="a", allow_fill=True)
|
||||
expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
def test_take_fill_value_new_raises(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23296
|
||||
cat = Categorical(["a", "b", "c"])
|
||||
xpr = r"Cannot setitem on a Categorical with a new category \(d\)"
|
||||
with pytest.raises(TypeError, match=xpr):
|
||||
cat.take([0, 1, -1], fill_value="d", allow_fill=True)
|
@ -0,0 +1,19 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalWarnings:
|
||||
def test_tab_complete_warning(self, ip):
|
||||
# https://github.com/pandas-dev/pandas/issues/16409
|
||||
pytest.importorskip("IPython", minversion="6.0.0")
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = "import pandas as pd; c = pd.Categorical([])"
|
||||
ip.run_cell(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None, raise_on_extra_warnings=False):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("c.", 1))
|
@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import iNaT
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestDatetimeArrayConstructor:
|
||||
def test_from_sequence_invalid_type(self):
|
||||
mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)])
|
||||
with pytest.raises(TypeError, match="Cannot create a DatetimeArray"):
|
||||
DatetimeArray._from_sequence(mi, dtype="M8[ns]")
|
||||
|
||||
def test_only_1dim_accepted(self):
|
||||
arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]")
|
||||
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 3-dim, we allow 2D to sneak in for ops purposes GH#29853
|
||||
DatetimeArray(arr.reshape(2, 2, 1))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Only 1-dimensional"):
|
||||
# 0-dim
|
||||
DatetimeArray(arr[[0]].squeeze())
|
||||
|
||||
def test_freq_validation(self):
|
||||
# GH#24623 check that invalid instances cannot be created with the
|
||||
# public constructor
|
||||
arr = np.arange(5, dtype=np.int64) * 3600 * 10**9
|
||||
|
||||
msg = (
|
||||
"Inferred frequency h from passed values does not "
|
||||
"conform to passed frequency W-SUN"
|
||||
)
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, freq="W")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"meth",
|
||||
[
|
||||
DatetimeArray._from_sequence,
|
||||
pd.to_datetime,
|
||||
pd.DatetimeIndex,
|
||||
],
|
||||
)
|
||||
def test_mixing_naive_tzaware_raises(self, meth):
|
||||
# GH#24569
|
||||
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
|
||||
|
||||
msg = (
|
||||
"Cannot mix tz-aware with tz-naive values|"
|
||||
"Tz-aware datetime.datetime cannot be converted "
|
||||
"to datetime64 unless utc=True"
|
||||
)
|
||||
|
||||
for obj in [arr, arr[::-1]]:
|
||||
# check that we raise regardless of whether naive is found
|
||||
# before aware or vice-versa
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
meth(obj)
|
||||
|
||||
def test_from_pandas_array(self):
|
||||
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9
|
||||
|
||||
result = DatetimeArray._from_sequence(arr, dtype="M8[ns]")._with_freq("infer")
|
||||
|
||||
expected = pd.date_range("1970-01-01", periods=5, freq="h")._data
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
def test_mismatched_timezone_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
arr = DatetimeArray(
|
||||
np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"),
|
||||
dtype=DatetimeTZDtype(tz="US/Central"),
|
||||
)
|
||||
dtype = DatetimeTZDtype(tz="US/Eastern")
|
||||
msg = r"dtype=datetime64\[ns.*\] does not match data dtype datetime64\[ns.*\]"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
# also with mismatched tzawareness
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr, dtype=np.dtype("M8[ns]"))
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray(arr.tz_localize(None), dtype=arr.dtype)
|
||||
|
||||
def test_non_array_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="list"):
|
||||
DatetimeArray([1, 2, 3])
|
||||
|
||||
def test_bool_dtype_raises(self):
|
||||
arr = np.array([1, 2, 3], dtype="bool")
|
||||
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
msg = "Unexpected value for 'dtype': 'bool'. Must be"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr)
|
||||
|
||||
msg = r"dtype bool cannot be converted to datetime64\[ns\]"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
DatetimeArray._from_sequence(arr, dtype="M8[ns]")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.DatetimeIndex(arr)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.to_datetime(arr)
|
||||
|
||||
def test_incorrect_dtype_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="m8[s]")
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="M8[D]")
|
||||
|
||||
def test_mismatched_values_dtype_units(self):
|
||||
arr = np.array([1, 2, 3], dtype="M8[s]")
|
||||
dtype = np.dtype("M8[ns]")
|
||||
msg = "Values resolution does not match dtype."
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype)
|
||||
|
||||
dtype2 = DatetimeTZDtype(tz="UTC", unit="ns")
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DatetimeArray(arr, dtype=dtype2)
|
||||
|
||||
def test_freq_infer_raises(self):
|
||||
depr_msg = "DatetimeArray.__init__ is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
|
||||
with pytest.raises(ValueError, match="Frequency inference"):
|
||||
DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer")
|
||||
|
||||
def test_copy(self):
|
||||
data = np.array([1, 2, 3], dtype="M8[ns]")
|
||||
arr = DatetimeArray._from_sequence(data, copy=False)
|
||||
assert arr._ndarray is data
|
||||
|
||||
arr = DatetimeArray._from_sequence(data, copy=True)
|
||||
assert arr._ndarray is not data
|
||||
|
||||
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
|
||||
def test_numpy_datetime_unit(self, unit):
|
||||
data = np.array([1, 2, 3], dtype=f"M8[{unit}]")
|
||||
arr = DatetimeArray._from_sequence(data)
|
||||
assert arr.unit == unit
|
||||
assert arr[0].unit == unit
|
||||
|
||||
|
||||
class TestSequenceToDT64NS:
|
||||
def test_tz_dtype_mismatch_raises(self):
|
||||
arr = DatetimeArray._from_sequence(
|
||||
["2000"], dtype=DatetimeTZDtype(tz="US/Central")
|
||||
)
|
||||
with pytest.raises(TypeError, match="data is already tz-aware"):
|
||||
DatetimeArray._from_sequence(arr, dtype=DatetimeTZDtype(tz="UTC"))
|
||||
|
||||
def test_tz_dtype_matches(self):
|
||||
dtype = DatetimeTZDtype(tz="US/Central")
|
||||
arr = DatetimeArray._from_sequence(["2000"], dtype=dtype)
|
||||
result = DatetimeArray._from_sequence(arr, dtype=dtype)
|
||||
tm.assert_equal(arr, result)
|
||||
|
||||
@pytest.mark.parametrize("order", ["F", "C"])
|
||||
def test_2d(self, order):
|
||||
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
|
||||
arr = np.array(dti, dtype=object).reshape(3, 2)
|
||||
if order == "F":
|
||||
arr = arr.T
|
||||
|
||||
res = DatetimeArray._from_sequence(arr, dtype=dti.dtype)
|
||||
expected = DatetimeArray._from_sequence(arr.ravel(), dtype=dti.dtype).reshape(
|
||||
arr.shape
|
||||
)
|
||||
tm.assert_datetime_array_equal(res, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Arrow interaction
|
||||
|
||||
|
||||
EXTREME_VALUES = [0, 123456789, None, iNaT, 2**63 - 1, -(2**63) + 1]
|
||||
FINE_TO_COARSE_SAFE = [123_000_000_000, None, -123_000_000_000]
|
||||
COARSE_TO_FINE_SAFE = [123, None, -123]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("pa_unit", "pd_unit", "pa_tz", "pd_tz", "data"),
|
||||
[
|
||||
("s", "s", "UTC", "UTC", EXTREME_VALUES),
|
||||
("ms", "ms", "UTC", "Europe/Berlin", EXTREME_VALUES),
|
||||
("us", "us", "US/Eastern", "UTC", EXTREME_VALUES),
|
||||
("ns", "ns", "US/Central", "Asia/Kolkata", EXTREME_VALUES),
|
||||
("ns", "s", "UTC", "UTC", FINE_TO_COARSE_SAFE),
|
||||
("us", "ms", "UTC", "Europe/Berlin", FINE_TO_COARSE_SAFE),
|
||||
("ms", "us", "US/Eastern", "UTC", COARSE_TO_FINE_SAFE),
|
||||
("s", "ns", "US/Central", "Asia/Kolkata", COARSE_TO_FINE_SAFE),
|
||||
],
|
||||
)
|
||||
def test_from_arrow_with_different_units_and_timezones_with(
|
||||
pa_unit, pd_unit, pa_tz, pd_tz, data
|
||||
):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
pa_type = pa.timestamp(pa_unit, tz=pa_tz)
|
||||
arr = pa.array(data, type=pa_type)
|
||||
dtype = DatetimeTZDtype(unit=pd_unit, tz=pd_tz)
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(data, dtype=f"M8[{pa_unit}, UTC]").astype(
|
||||
dtype, copy=False
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("unit", "tz"),
|
||||
[
|
||||
("s", "UTC"),
|
||||
("ms", "Europe/Berlin"),
|
||||
("us", "US/Eastern"),
|
||||
("ns", "Asia/Kolkata"),
|
||||
("ns", "UTC"),
|
||||
],
|
||||
)
|
||||
def test_from_arrow_from_empty(unit, tz):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = []
|
||||
arr = pa.array(data)
|
||||
dtype = DatetimeTZDtype(unit=unit, tz=tz)
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
|
||||
expected = expected.tz_localize(tz=tz)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_arrow_from_integers():
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = [0, 123456789, None, 2**63 - 1, iNaT, -123456789]
|
||||
arr = pa.array(data)
|
||||
dtype = DatetimeTZDtype(unit="ns", tz="UTC")
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
|
||||
expected = expected.tz_localize("UTC")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestAccumulator:
|
||||
def test_accumulators_freq(self):
|
||||
# GH#50297
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)._with_freq("infer")
|
||||
result = arr._accumulate("cummin")
|
||||
expected = DatetimeArray._from_sequence(["2000-01-01"] * 3, dtype="M8[ns]")
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr._accumulate("cummax")
|
||||
expected = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("func", ["cumsum", "cumprod"])
|
||||
def test_accumulators_disallowed(self, func):
|
||||
# GH#50297
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
],
|
||||
dtype="M8[ns]",
|
||||
)._with_freq("infer")
|
||||
with pytest.raises(TypeError, match=f"Accumulation {func}"):
|
||||
arr._accumulate(func)
|
@ -0,0 +1,183 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import NaT
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestReductions:
|
||||
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
||||
def unit(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def arr1d(self, tz_naive_fixture):
|
||||
"""Fixture returning DatetimeArray with parametrized timezones"""
|
||||
tz = tz_naive_fixture
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence(
|
||||
[
|
||||
"2000-01-03",
|
||||
"2000-01-03",
|
||||
"NaT",
|
||||
"2000-01-02",
|
||||
"2000-01-05",
|
||||
"2000-01-04",
|
||||
],
|
||||
dtype=dtype,
|
||||
)
|
||||
return arr
|
||||
|
||||
def test_min_max(self, arr1d, unit):
|
||||
arr = arr1d
|
||||
arr = arr.as_unit(unit)
|
||||
tz = arr.tz
|
||||
|
||||
result = arr.min()
|
||||
expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit)
|
||||
assert result == expected
|
||||
assert result.unit == expected.unit
|
||||
|
||||
result = arr.max()
|
||||
expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit)
|
||||
assert result == expected
|
||||
assert result.unit == expected.unit
|
||||
|
||||
result = arr.min(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.max(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_min_max_empty(self, skipna, tz):
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence([], dtype=dtype)
|
||||
result = arr.min(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.max(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_median_empty(self, skipna, tz):
|
||||
dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
|
||||
arr = DatetimeArray._from_sequence([], dtype=dtype)
|
||||
result = arr.median(skipna=skipna)
|
||||
assert result is NaT
|
||||
|
||||
arr = arr.reshape(0, 3)
|
||||
result = arr.median(axis=0, skipna=skipna)
|
||||
expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = arr.median(axis=1, skipna=skipna)
|
||||
expected = type(arr)._from_sequence([], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_median(self, arr1d):
|
||||
arr = arr1d
|
||||
|
||||
result = arr.median()
|
||||
assert result == arr[0]
|
||||
result = arr.median(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.dropna().median(skipna=False)
|
||||
assert result == arr[0]
|
||||
|
||||
result = arr.median(axis=0)
|
||||
assert result == arr[0]
|
||||
|
||||
def test_median_axis(self, arr1d):
|
||||
arr = arr1d
|
||||
assert arr.median(axis=0) == arr.median()
|
||||
assert arr.median(axis=0, skipna=False) is NaT
|
||||
|
||||
msg = r"abs\(axis\) must be less than ndim"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.median(axis=1)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
|
||||
def test_median_2d(self, arr1d):
|
||||
arr = arr1d.reshape(1, -1)
|
||||
|
||||
# axis = None
|
||||
assert arr.median() == arr1d.median()
|
||||
assert arr.median(skipna=False) is NaT
|
||||
|
||||
# axis = 0
|
||||
result = arr.median(axis=0)
|
||||
expected = arr1d
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# Since column 3 is all-NaT, we get NaT there with or without skipna
|
||||
result = arr.median(axis=0, skipna=False)
|
||||
expected = arr1d
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = arr.median(axis=1)
|
||||
expected = type(arr)._from_sequence([arr1d.median()], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = arr.median(axis=1, skipna=False)
|
||||
expected = type(arr)._from_sequence([NaT], dtype=arr.dtype)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_mean(self, arr1d):
|
||||
arr = arr1d
|
||||
|
||||
# manually verified result
|
||||
expected = arr[0] + 0.4 * pd.Timedelta(days=1)
|
||||
|
||||
result = arr.mean()
|
||||
assert result == expected
|
||||
result = arr.mean(skipna=False)
|
||||
assert result is NaT
|
||||
|
||||
result = arr.dropna().mean(skipna=False)
|
||||
assert result == expected
|
||||
|
||||
result = arr.mean(axis=0)
|
||||
assert result == expected
|
||||
|
||||
def test_mean_2d(self):
|
||||
dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
|
||||
dta = dti._data.reshape(3, 2)
|
||||
|
||||
result = dta.mean(axis=0)
|
||||
expected = dta[1]
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = dta.mean(axis=1)
|
||||
expected = dta[:, 0] + pd.Timedelta(hours=12)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = dta.mean(axis=None)
|
||||
expected = dti.mean()
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
def test_mean_empty(self, arr1d, skipna):
|
||||
arr = arr1d[:0]
|
||||
|
||||
assert arr.mean(skipna=skipna) is NaT
|
||||
|
||||
arr2d = arr.reshape(0, 3)
|
||||
result = arr2d.mean(axis=0, skipna=skipna)
|
||||
expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr2d.mean(axis=1, skipna=skipna)
|
||||
expected = arr # i.e. 1D, empty
|
||||
tm.assert_datetime_array_equal(result, expected)
|
||||
|
||||
result = arr2d.mean(axis=None, skipna=skipna)
|
||||
assert result is NaT
|
@ -0,0 +1,48 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[Float32Dtype, Float64Dtype])
|
||||
def dtype(request):
|
||||
"""Parametrized fixture returning a float 'dtype'"""
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
"""Fixture returning 'data' array according to parametrized float 'dtype'"""
|
||||
return pd.array(
|
||||
list(np.arange(0.1, 0.9, 0.1))
|
||||
+ [pd.NA]
|
||||
+ list(np.arange(1, 9.8, 0.1))
|
||||
+ [pd.NA]
|
||||
+ [9.9, 10.0],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
"""
|
||||
Fixture returning array with missing data according to parametrized float
|
||||
'dtype'.
|
||||
"""
|
||||
return pd.array([np.nan, 0.1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture returning 'data' or 'data_missing' float arrays.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
@ -0,0 +1,244 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [1.1, 2.2, None, None, 5.5]),
|
||||
("mul", [0.1, 0.4, None, None, 2.5]),
|
||||
("sub", [0.9, 1.8, None, None, 4.5]),
|
||||
("truediv", [10.0, 10.0, None, None, 10.0]),
|
||||
("floordiv", [9.0, 9.0, None, None, 10.0]),
|
||||
("mod", [0.1, 0.2, None, None, 0.0]),
|
||||
],
|
||||
ids=["add", "mul", "sub", "div", "floordiv", "mod"],
|
||||
)
|
||||
def test_array_op(dtype, opname, exp):
|
||||
a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype)
|
||||
b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype)
|
||||
|
||||
op = getattr(operator, opname)
|
||||
|
||||
result = op(a, b)
|
||||
expected = pd.array(exp, dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||||
def test_divide_by_zero(dtype, zero, negative):
|
||||
# TODO pending NA/NaN discussion
|
||||
# https://github.com/pandas-dev/pandas/issues/32265/
|
||||
a = pd.array([0, 1, -1, None], dtype=dtype)
|
||||
result = a / zero
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype),
|
||||
np.array([False, False, False, True]),
|
||||
)
|
||||
if negative:
|
||||
expected *= -1
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_scalar(dtype):
|
||||
a = pd.array([-1, 0, 1, None, 2], dtype=dtype)
|
||||
result = a**0
|
||||
expected = pd.array([1, 1, 1, 1, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**1
|
||||
expected = pd.array([-1, 0, 1, None, 2], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**pd.NA
|
||||
expected = pd.array([None, None, 1, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**np.nan
|
||||
# TODO np.nan should be converted to pd.NA / missing before operation?
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
|
||||
mask=a._mask,
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed
|
||||
a = a[1:] # Can't raise integers to negative powers.
|
||||
|
||||
result = 0**a
|
||||
expected = pd.array([1, 0, None, 0], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 1**a
|
||||
expected = pd.array([1, 1, 1, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.NA**a
|
||||
expected = pd.array([1, None, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = np.nan**a
|
||||
expected = FloatingArray(
|
||||
np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_array(dtype):
|
||||
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype)
|
||||
result = a**b
|
||||
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_rpow_one_to_na():
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
# https://github.com/pandas-dev/pandas/issues/29997
|
||||
arr = pd.array([np.nan, np.nan], dtype="Float64")
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_arith_zero_dim_ndarray(other):
|
||||
arr = pd.array([1, None, 2], dtype="Float64")
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
errs = TypeError
|
||||
|
||||
# invalid scalars
|
||||
msg = "|".join(
|
||||
[
|
||||
r"can only perform ops with numeric values",
|
||||
r"FloatingArray cannot perform the operation mod",
|
||||
"unsupported operand type",
|
||||
"not all arguments converted during string formatting",
|
||||
"can't multiply sequence by non-int of type 'float'",
|
||||
"ufunc 'subtract' cannot use operands with types dtype",
|
||||
r"can only concatenate str \(not \"float\"\) to str",
|
||||
"ufunc '.*' not supported for the input types, and the inputs could not",
|
||||
"ufunc '.*' did not contain a loop with signature matching types",
|
||||
"Concatenation operation is not implemented for NumPy arrays",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops("foo")
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only perform ops with numeric values",
|
||||
"cannot perform .* with this index type: DatetimeArray",
|
||||
"Addition/subtraction of integers and integer-arrays "
|
||||
"with DatetimeArray is no longer supported. *",
|
||||
"unsupported operand type",
|
||||
"not all arguments converted during string formatting",
|
||||
"can't multiply sequence by non-int of type 'float'",
|
||||
"ufunc 'subtract' cannot use operands with types dtype",
|
||||
(
|
||||
"ufunc 'add' cannot use operands with types "
|
||||
rf"dtype\('{tm.ENDIAN}M8\[ns\]'\)"
|
||||
),
|
||||
r"ufunc 'add' cannot use operands with types dtype\('float\d{2}'\)",
|
||||
"cannot subtract DatetimeArray from ndarray",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
|
||||
# Various
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.array([1, 2, np.nan], dtype="Float64"),
|
||||
"B": pd.array([1, np.nan, 3], dtype="Float32"),
|
||||
"C": np.array([1, 2, 3], dtype="float64"),
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, None], dtype="boolean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"source, neg_target, abs_target",
|
||||
[
|
||||
([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]),
|
||||
([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]),
|
||||
([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]),
|
||||
],
|
||||
)
|
||||
def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target):
|
||||
# GH38794
|
||||
dtype = float_ea_dtype
|
||||
arr = pd.array(source, dtype=dtype)
|
||||
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||||
neg_target = pd.array(neg_target, dtype=dtype)
|
||||
abs_target = pd.array(abs_target, dtype=dtype)
|
||||
|
||||
tm.assert_extension_array_equal(neg_result, neg_target)
|
||||
tm.assert_extension_array_equal(pos_result, arr)
|
||||
assert not tm.shares_memory(pos_result, arr)
|
||||
tm.assert_extension_array_equal(abs_result, abs_target)
|
||||
|
||||
|
||||
def test_bitwise(dtype):
|
||||
left = pd.array([1, None, 3, 4], dtype=dtype)
|
||||
right = pd.array([None, 3, 5, 4], dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left | right
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left & right
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left ^ right
|
@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to bool"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([0.0, 1.0, 0.5], dtype="Float64")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([0, 1, 0], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([False, True, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_floating_array():
|
||||
# astype to FloatingArray
|
||||
arr = pd.array([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Float64")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.Float64Dtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype("Float32")
|
||||
expected = pd.array([0.0, 1.0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([0.0, 1.5, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([0, 1, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_str():
|
||||
a = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
|
||||
|
||||
tm.assert_numpy_array_equal(a.astype(str), expected)
|
||||
tm.assert_numpy_array_equal(a.astype("str"), expected)
|
||||
|
||||
|
||||
def test_astype_copy():
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
orig = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
# copy=True -> ensure both data and mask are actual copies
|
||||
result = arr.astype("Float64", copy=True)
|
||||
assert result is not arr
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
# copy=False
|
||||
result = arr.astype("Float64", copy=False)
|
||||
assert result is arr
|
||||
assert np.shares_memory(result._data, arr._data)
|
||||
assert np.shares_memory(result._mask, arr._mask)
|
||||
result[0] = 10
|
||||
assert arr[0] == 10
|
||||
result[0] = pd.NA
|
||||
assert arr[0] is pd.NA
|
||||
|
||||
# astype to different dtype -> always needs a copy -> even with copy=False
|
||||
# we need to ensure that also the mask is actually copied
|
||||
arr = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
orig = pd.array([0.1, 0.2, None], dtype="Float64")
|
||||
|
||||
result = arr.astype("Float32", copy=False)
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
|
||||
def test_astype_object(dtype):
|
||||
arr = pd.array([1.0, pd.NA], dtype=dtype)
|
||||
|
||||
result = arr.astype(object)
|
||||
expected = np.array([1.0, pd.NA], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# check exact element types
|
||||
assert isinstance(result[0], float)
|
||||
assert result[1] is pd.NA
|
||||
|
||||
|
||||
def test_Float64_conversion():
|
||||
# GH#40729
|
||||
testseries = pd.Series(["1", "2", "3", "4"], dtype="object")
|
||||
result = testseries.astype(pd.Float64Dtype())
|
||||
|
||||
expected = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=pd.Float64Dtype())
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,65 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
from pandas.tests.arrays.masked_shared import (
|
||||
ComparisonOps,
|
||||
NumericOps,
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(NumericOps, ComparisonOps):
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_compare_with_integerarray(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([0, 1, None] * 3, dtype="Int64")
|
||||
b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64")
|
||||
other = b.astype("Int64")
|
||||
expected = op(a, other)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
expected = op(other, a)
|
||||
result = op(b, a)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals():
|
||||
# GH-30652
|
||||
# equals is generally tested in /tests/extension/base/methods, but this
|
||||
# specifically tests that two arrays of the same class but different dtype
|
||||
# do not evaluate equal
|
||||
a1 = pd.array([1, 2, None], dtype="Float64")
|
||||
a2 = pd.array([1, 2, None], dtype="Float32")
|
||||
assert a1.equals(a2) is False
|
||||
|
||||
|
||||
def test_equals_nan_vs_na():
|
||||
# GH#44382
|
||||
|
||||
mask = np.zeros(3, dtype=bool)
|
||||
data = np.array([1.0, np.nan, 3.0], dtype=np.float64)
|
||||
|
||||
left = FloatingArray(data, mask)
|
||||
assert left.equals(left)
|
||||
tm.assert_extension_array_equal(left, left)
|
||||
|
||||
assert left.equals(left.copy())
|
||||
assert left.equals(FloatingArray(data.copy(), mask.copy()))
|
||||
|
||||
mask2 = np.array([False, True, False], dtype=bool)
|
||||
data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
|
||||
right = FloatingArray(data2, mask2)
|
||||
assert right.equals(right)
|
||||
tm.assert_extension_array_equal(right, right)
|
||||
|
||||
assert not left.equals(right)
|
||||
|
||||
# with mask[1] = True, the only difference is data[1], which should
|
||||
# not matter for equals
|
||||
mask[1] = True
|
||||
assert left.equals(right)
|
@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Float64", "Float64"], "Float64"),
|
||||
(["Float32", "Float64"], "Float64"),
|
||||
(["Float32", "Float32"], "Float32"),
|
||||
],
|
||||
)
|
||||
def test_concat_series(to_concat_dtypes, result_dtype):
|
||||
result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
|
||||
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,204 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_uses_pandas_na():
|
||||
a = pd.array([1, None], dtype=Float64Dtype())
|
||||
assert a[1] is pd.NA
|
||||
|
||||
|
||||
def test_floating_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="float64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = FloatingArray(values, mask)
|
||||
expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
tm.assert_numpy_array_equal(result._data, values)
|
||||
tm.assert_numpy_array_equal(result._mask, mask)
|
||||
|
||||
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values.astype(int), mask)
|
||||
|
||||
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(values)
|
||||
|
||||
|
||||
def test_floating_array_disallows_float16():
|
||||
# GH#44715
|
||||
arr = np.array([1, 2], dtype=np.float16)
|
||||
mask = np.array([False, False])
|
||||
|
||||
msg = "FloatingArray does not support np.float16 dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
FloatingArray(arr, mask)
|
||||
|
||||
|
||||
def test_floating_array_disallows_Float16_dtype(request):
|
||||
# GH#44715
|
||||
with pytest.raises(TypeError, match="data type 'Float16' not understood"):
|
||||
pd.array([1.0, 2.0], dtype="Float16")
|
||||
|
||||
|
||||
def test_floating_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="float64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = FloatingArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = FloatingArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_array():
|
||||
result = pd.array([0.1, 0.2, 0.3, 0.4])
|
||||
expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, pd.NA]),
|
||||
([None], [pd.NA]),
|
||||
([None, np.nan], [pd.NA, pd.NA]),
|
||||
([1, np.nan], [1, pd.NA]),
|
||||
([np.nan], [pd.NA]),
|
||||
],
|
||||
)
|
||||
def test_to_array_none_is_nan(a, b):
|
||||
result = pd.array(a, dtype="Float64")
|
||||
expected = pd.array(b, dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_array_mixed_integer_float():
|
||||
result = pd.array([1, 2.0])
|
||||
expected = pd.array([1.0, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.array([1, None, 2.0])
|
||||
expected = pd.array([1.0, None, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
# GH#44514 all-NA case used to get quietly swapped out before checking ndim
|
||||
np.array([pd.NA] * 6, dtype=object).reshape(3, 2),
|
||||
],
|
||||
)
|
||||
def test_to_array_error(values):
|
||||
# error in converting existing arrays to FloatingArray
|
||||
msg = "|".join(
|
||||
[
|
||||
"cannot be converted to FloatingDtype",
|
||||
"values must be a 1D list-like",
|
||||
"Cannot pass scalar",
|
||||
r"float\(\) argument must be a string or a (real )?number, not 'dict'",
|
||||
"could not convert string to float: 'foo'",
|
||||
r"could not convert string to float: np\.str_\('foo'\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises((TypeError, ValueError), match=msg):
|
||||
pd.array(values, dtype="Float64")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]])
|
||||
def test_construct_from_float_strings(values):
|
||||
# see also test_to_integer_array_str
|
||||
expected = pd.array([float(values[0]), 2, None], dtype="Float64")
|
||||
|
||||
res = pd.array(values, dtype="Float64")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
res = FloatingArray._from_sequence(values)
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
def test_to_array_inferred_dtype():
|
||||
# if values has dtype -> respect it
|
||||
result = pd.array(np.array([1, 2], dtype="float32"))
|
||||
assert result.dtype == Float32Dtype()
|
||||
|
||||
# if values have no dtype -> always float64
|
||||
result = pd.array([1.0, 2.0])
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
def test_to_array_dtype_keyword():
|
||||
result = pd.array([1, 2], dtype="Float32")
|
||||
assert result.dtype == Float32Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64")
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
def test_to_array_integer():
|
||||
result = pd.array([1, 2], dtype="Float64")
|
||||
expected = pd.array([1.0, 2.0], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# for integer dtypes, the itemsize is not preserved
|
||||
# TODO can we specify "floating" in general?
|
||||
result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64")
|
||||
assert result.dtype == Float64Dtype()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Float64Dtype(), Float64Dtype()),
|
||||
([False, True], [0, 1], "Float64", Float64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_array_bool(bool_values, values, target_dtype, expected_dtype):
|
||||
result = pd.array(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = pd.array(values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_from_float(data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,12 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_contains_nan():
|
||||
# GH#52840
|
||||
arr = pd.array(range(5)) / 0
|
||||
|
||||
assert np.isnan(arr._data[0])
|
||||
assert not arr.isna()[0]
|
||||
assert np.nan in arr
|
@ -0,0 +1,194 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
|
||||
def test_ufuncs_single(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan], dtype="Float64")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64")
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(ufunc(s.astype(float)), dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_float(ufunc):
|
||||
# two FloatingArrays
|
||||
a = pd.array([1, 0.2, -3, np.nan], dtype="Float64")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# FloatingArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# FloatingArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="Float64")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
expected = arr.sum(skipna=False)
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
|
||||
@pytest.mark.parametrize(
|
||||
"pandasmethname, kwargs",
|
||||
[
|
||||
("var", {"ddof": 0}),
|
||||
("var", {"ddof": 1}),
|
||||
("std", {"ddof": 0}),
|
||||
("std", {"ddof": 1}),
|
||||
("kurtosis", {}),
|
||||
("skew", {}),
|
||||
("sem", {}),
|
||||
],
|
||||
)
|
||||
def test_stat_method(pandasmethname, kwargs):
|
||||
s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
|
||||
pandasmeth = getattr(s, pandasmethname)
|
||||
result = pandasmeth(**kwargs)
|
||||
s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
|
||||
pandasmeth = getattr(s2, pandasmethname)
|
||||
expected = pandasmeth(**kwargs)
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
|
||||
result = arr.value_counts(dropna=False)
|
||||
idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype)
|
||||
assert idx.dtype == arr.dtype
|
||||
expected = pd.Series([2, 1, 1], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_empty():
|
||||
ser = pd.Series([], dtype="Float64")
|
||||
result = ser.value_counts()
|
||||
idx = pd.Index([], dtype="Float64")
|
||||
assert idx.dtype == "Float64"
|
||||
expected = pd.Series([], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
|
||||
assert expected.index.dtype == ser.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 4])
|
||||
def test_floating_array_sum(skipna, min_count, dtype):
|
||||
arr = pd.array([1, 2, 3, None], dtype=dtype)
|
||||
result = arr.sum(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 6.0
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)]
|
||||
)
|
||||
def test_floating_array_numpy_sum(values, expected):
|
||||
arr = pd.array(values, dtype="Float64")
|
||||
result = np.sum(arr)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([0.1, None, 3.0], dtype="Float64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, np.float64)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_floating_array_min_max(skipna, method, dtype):
|
||||
arr = pd.array([0.0, 1.0, None], dtype=dtype)
|
||||
func = getattr(arr, method)
|
||||
result = func(skipna=skipna)
|
||||
if skipna:
|
||||
assert result == (0 if method == "min" else 1)
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 9])
|
||||
def test_floating_array_prod(skipna, min_count, dtype):
|
||||
arr = pd.array([1.0, 2.0, None], dtype=dtype)
|
||||
result = arr.prod(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 2
|
||||
else:
|
||||
assert result is pd.NA
|
@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.floating import (
|
||||
Float32Dtype,
|
||||
Float64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
np.dtype(dtype.type).kind == "f"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(pd.array([1.0, None, 3.0]))
|
||||
expected = "<FloatingArray>\n[1.0, <NA>, 3.0]\nLength: 3, dtype: Float64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = pd.array([1.0, 2.0, None] * 1000)
|
||||
expected = """<FloatingArray>
|
||||
[ 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0,
|
||||
...
|
||||
<NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>]
|
||||
Length: 3000, dtype: Float64"""
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 <NA>\n1 0.1"
|
||||
assert result == expected
|
@ -0,0 +1,132 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([0.1, 0.2, None], dtype="Float64")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_float(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# no missing values -> can convert to float, otherwise raises
|
||||
arr = con([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([0.1, 0.2, None], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_int(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
# no missing values -> can convert to int, otherwise raises
|
||||
arr = con([1.0, 2.0, 3.0], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
expected = np.array([1, 2, 3], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([1.0, 2.0, None], dtype="Float64")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
|
||||
# automatic casting (floors the values)
|
||||
arr = con([0.1, 0.9, 1.1], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="int64")
|
||||
expected = np.array([0, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_na_value(box):
|
||||
con = pd.Series if box else pd.array
|
||||
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([0.0, 1.0, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([False, True, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([0, 1, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy_na_value_with_nan():
|
||||
# array with both NaN and NA -> only fill NA with `na_value`
|
||||
arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True]))
|
||||
result = arr.to_numpy(dtype="float64", na_value=-1)
|
||||
expected = np.array([0.0, np.nan, -1.0], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_dtype(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0], dtype="Float64")
|
||||
|
||||
result = arr.to_numpy(dtype=dtype)
|
||||
expected = np.array([0, 1], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_na_raises(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
with pytest.raises(ValueError, match=dtype):
|
||||
arr.to_numpy(dtype=dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy_string(box, dtype):
|
||||
con = pd.Series if box else pd.array
|
||||
arr = con([0.0, 1.0, None], dtype="Float64")
|
||||
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64")
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64"))
|
||||
|
||||
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
|
||||
result = arr.to_numpy(dtype="float64", copy=True)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64"))
|
@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
"""Parametrized fixture returning integer 'dtype'"""
|
||||
return request.param()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data(dtype):
|
||||
"""
|
||||
Fixture returning 'data' array with valid and missing values according to
|
||||
parametrized integer 'dtype'.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
return pd.array(
|
||||
list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100],
|
||||
dtype=dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_missing(dtype):
|
||||
"""
|
||||
Fixture returning array with exactly one NaN and one valid integer,
|
||||
according to parametrized integer 'dtype'.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
return pd.array([np.nan, 1], dtype=dtype)
|
||||
|
||||
|
||||
@pytest.fixture(params=["data", "data_missing"])
|
||||
def all_data(request, data, data_missing):
|
||||
"""Parametrized fixture returning 'data' or 'data_missing' integer arrays.
|
||||
|
||||
Used to test dtype conversion with and without missing values.
|
||||
"""
|
||||
if request.param == "data":
|
||||
return data
|
||||
elif request.param == "data_missing":
|
||||
return data_missing
|
@ -0,0 +1,385 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(dtype, opname, exp):
|
||||
a = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
b = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
|
||||
# array / array
|
||||
expected = pd.array(exp, dtype=dtype)
|
||||
|
||||
op = getattr(operator, opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
op = getattr(ops, "r" + opname)
|
||||
result = op(a, b)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a - b
|
||||
expected = pd.array([1, 1, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_div(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a / b
|
||||
expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
|
||||
def test_divide_by_zero(zero, negative):
|
||||
# https://github.com/pandas-dev/pandas/issues/27398, GH#22793
|
||||
a = pd.array([0, 1, -1, None], dtype="Int64")
|
||||
result = a / zero
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
|
||||
np.array([False, False, False, True]),
|
||||
)
|
||||
if negative:
|
||||
expected *= -1
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_floordiv(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a // b
|
||||
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
|
||||
expected = pd.array([0, 2, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
|
||||
# GH 48223: Aligns with non-masked floordiv
|
||||
# but differs from numpy
|
||||
# https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
|
||||
ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
|
||||
result = 1 // ser
|
||||
expected = pd.Series([np.inf, 1.0], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
|
||||
result = 1 // ser_non_nullable
|
||||
expected = expected.astype(np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mod(dtype):
|
||||
a = pd.array([1, 2, 3, None, 5], dtype=dtype)
|
||||
b = pd.array([0, 1, None, 3, 4], dtype=dtype)
|
||||
|
||||
result = a % b
|
||||
expected = pd.array([0, 0, None, None, 1], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_scalar():
|
||||
a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
result = a**0
|
||||
expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**1
|
||||
expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**pd.NA
|
||||
expected = pd.array([None, None, 1, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = a**np.nan
|
||||
expected = FloatingArray(
|
||||
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed
|
||||
a = a[1:] # Can't raise integers to negative powers.
|
||||
|
||||
result = 0**a
|
||||
expected = pd.array([1, 0, None, 0], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = 1**a
|
||||
expected = pd.array([1, 1, 1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = pd.NA**a
|
||||
expected = pd.array([1, None, None, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = np.nan**a
|
||||
expected = FloatingArray(
|
||||
np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
|
||||
np.array([False, False, True, False]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_pow_array():
|
||||
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
|
||||
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
|
||||
result = a**b
|
||||
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_rpow_one_to_na():
|
||||
# https://github.com/pandas-dev/pandas/issues/22022
|
||||
# https://github.com/pandas-dev/pandas/issues/29997
|
||||
arr = pd.array([np.nan, np.nan], dtype="Int64")
|
||||
result = np.array([1.0, 2.0]) ** arr
|
||||
expected = pd.array([1.0, np.nan], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [0, 0.5])
|
||||
def test_numpy_zero_dim_ndarray(other):
|
||||
arr = pd.array([1, None, 2])
|
||||
result = arr + np.array(other)
|
||||
expected = arr + other
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
if using_infer_string:
|
||||
import pyarrow as pa
|
||||
|
||||
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
|
||||
else:
|
||||
errs = TypeError
|
||||
|
||||
# invalid scalars
|
||||
msg = "|".join(
|
||||
[
|
||||
r"can only perform ops with numeric values",
|
||||
r"IntegerArray cannot perform the operation mod",
|
||||
r"unsupported operand type",
|
||||
r"can only concatenate str \(not \"int\"\) to str",
|
||||
"not all arguments converted during string",
|
||||
"ufunc '.*' not supported for the input types, and the inputs could not",
|
||||
"ufunc '.*' did not contain a loop with signature matching types",
|
||||
"Addition/subtraction of integers and integer-arrays with Timestamp",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops("foo")
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
str_ser = pd.Series("foo", index=s.index)
|
||||
# with pytest.raises(TypeError, match=msg):
|
||||
if (
|
||||
all_arithmetic_operators
|
||||
in [
|
||||
"__mul__",
|
||||
"__rmul__",
|
||||
]
|
||||
and not using_infer_string
|
||||
): # (data[~data.isna()] >= 0).all():
|
||||
res = ops(str_ser)
|
||||
expected = pd.Series(["foo" * x for x in data], index=s.index)
|
||||
expected = expected.fillna(np.nan)
|
||||
# TODO: doing this fillna to keep tests passing as we make
|
||||
# assert_almost_equal stricter, but the expected with pd.NA seems
|
||||
# more-correct than np.nan here.
|
||||
tm.assert_series_equal(res, expected)
|
||||
else:
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(str_ser)
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only perform ops with numeric values",
|
||||
"cannot perform .* with this index type: DatetimeArray",
|
||||
"Addition/subtraction of integers and integer-arrays "
|
||||
"with DatetimeArray is no longer supported. *",
|
||||
"unsupported operand type",
|
||||
r"can only concatenate str \(not \"int\"\) to str",
|
||||
"not all arguments converted during string",
|
||||
"cannot subtract DatetimeArray from ndarray",
|
||||
"has no kernel",
|
||||
"not implemented",
|
||||
]
|
||||
)
|
||||
with pytest.raises(errs, match=msg):
|
||||
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
|
||||
|
||||
|
||||
# Various
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
# TODO test unsigned overflow
|
||||
|
||||
|
||||
def test_arith_coerce_scalar(data, all_arithmetic_operators):
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
s = pd.Series(data)
|
||||
other = 0.01
|
||||
|
||||
result = op(s, other)
|
||||
expected = op(s.astype(float), other)
|
||||
expected = expected.astype("Float64")
|
||||
|
||||
# rmod results in NaN that wasn't NA in original nullable Series -> unmask it
|
||||
if all_arithmetic_operators == "__rmod__":
|
||||
mask = (s == 0).fillna(False).to_numpy(bool)
|
||||
expected.array._mask[mask] = False
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("other", [1.0, np.array(1.0)])
|
||||
def test_arithmetic_conversion(all_arithmetic_operators, other):
|
||||
# if we have a float operand we should have a float result
|
||||
# if that is equal to an integer
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = op(s, other)
|
||||
assert result.dtype == "Float64"
|
||||
|
||||
|
||||
def test_cross_type_arithmetic():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": pd.Series([1, 2, np.nan], dtype="Int64"),
|
||||
"B": pd.Series([1, np.nan, 3], dtype="UInt8"),
|
||||
"C": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
|
||||
result = df.A + df.C
|
||||
expected = pd.Series([2, 4, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = (df.A + df.C) * 3 == 12
|
||||
expected = pd.Series([False, True, None], dtype="boolean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.A + df.B
|
||||
expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean"])
|
||||
def test_reduce_to_float(op):
|
||||
# some reduce ops always return float, even if the result
|
||||
# is a rounded number
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
assert isinstance(result, float)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"source, neg_target, abs_target",
|
||||
[
|
||||
([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
|
||||
([1, 2, None], [-1, -2, None], [1, 2, None]),
|
||||
([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
|
||||
],
|
||||
)
|
||||
def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
|
||||
dtype = any_signed_int_ea_dtype
|
||||
arr = pd.array(source, dtype=dtype)
|
||||
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
|
||||
neg_target = pd.array(neg_target, dtype=dtype)
|
||||
abs_target = pd.array(abs_target, dtype=dtype)
|
||||
|
||||
tm.assert_extension_array_equal(neg_result, neg_target)
|
||||
tm.assert_extension_array_equal(pos_result, arr)
|
||||
assert not tm.shares_memory(pos_result, arr)
|
||||
tm.assert_extension_array_equal(abs_result, abs_target)
|
||||
|
||||
|
||||
def test_values_multiplying_large_series_by_NA():
|
||||
# GH#33701
|
||||
|
||||
result = pd.NA * pd.Series(np.zeros(10001))
|
||||
expected = pd.Series([pd.NA] * 10001)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_bitwise(dtype):
|
||||
left = pd.array([1, None, 3, 4], dtype=dtype)
|
||||
right = pd.array([None, 3, 5, 4], dtype=dtype)
|
||||
|
||||
result = left | right
|
||||
expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = left & right
|
||||
expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = left ^ right
|
||||
expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# TODO: desired behavior when operating with boolean? defer?
|
||||
|
||||
floats = right.astype("Float64")
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left | floats
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left & floats
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
left ^ floats
|
@ -0,0 +1,39 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.arrays.masked_shared import (
|
||||
ComparisonOps,
|
||||
NumericOps,
|
||||
)
|
||||
|
||||
|
||||
class TestComparisonOps(NumericOps, ComparisonOps):
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_compare_to_int(self, dtype, comparison_op):
|
||||
# GH 28930
|
||||
op_name = f"__{comparison_op.__name__}__"
|
||||
s1 = pd.Series([1, None, 3], dtype=dtype)
|
||||
s2 = pd.Series([1, None, 3], dtype="float")
|
||||
|
||||
method = getattr(s1, op_name)
|
||||
result = method(2)
|
||||
|
||||
method = getattr(s2, op_name)
|
||||
expected = method(2).astype("boolean")
|
||||
expected[s2.isna()] = pd.NA
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals():
|
||||
# GH-30652
|
||||
# equals is generally tested in /tests/extension/base/methods, but this
|
||||
# specifically tests that two arrays of the same class but different dtype
|
||||
# do not evaluate equal
|
||||
a1 = pd.array([1, 2, None], dtype="Int64")
|
||||
a2 = pd.array([1, 2, None], dtype="Int32")
|
||||
assert a1.equals(a2) is False
|
@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "Int64"], "Int64"),
|
||||
(["UInt64", "UInt64"], "UInt64"),
|
||||
(["Int8", "Int8"], "Int8"),
|
||||
(["Int8", "Int16"], "Int16"),
|
||||
(["UInt8", "Int8"], "Int16"),
|
||||
(["Int32", "UInt32"], "Int64"),
|
||||
(["Int64", "UInt64"], "Float64"),
|
||||
(["Int64", "boolean"], "object"),
|
||||
(["UInt8", "boolean"], "object"),
|
||||
],
|
||||
)
|
||||
def test_concat_series(to_concat_dtypes, result_dtype):
|
||||
# we expect the same dtypes as we would get with non-masked inputs,
|
||||
# just masked where available.
|
||||
|
||||
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat(
|
||||
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
|
||||
)
|
||||
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
|
||||
result_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"to_concat_dtypes, result_dtype",
|
||||
[
|
||||
(["Int64", "int64"], "Int64"),
|
||||
(["UInt64", "uint64"], "UInt64"),
|
||||
(["Int8", "int8"], "Int8"),
|
||||
(["Int8", "int16"], "Int16"),
|
||||
(["UInt8", "int8"], "Int16"),
|
||||
(["Int32", "uint32"], "Int64"),
|
||||
(["Int64", "uint64"], "Float64"),
|
||||
(["Int64", "bool"], "object"),
|
||||
(["UInt8", "bool"], "object"),
|
||||
],
|
||||
)
|
||||
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
|
||||
# we expect the same dtypes as we would get with non-masked inputs,
|
||||
# just masked where available.
|
||||
|
||||
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
|
||||
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
|
||||
result = pd.concat([s1, s2], ignore_index=True)
|
||||
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# order doesn't matter for result
|
||||
result = pd.concat([s2, s1], ignore_index=True)
|
||||
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,245 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_integer
|
||||
from pandas.core.arrays import IntegerArray
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[pd.array, IntegerArray._from_sequence])
|
||||
def constructor(request):
|
||||
"""Fixture returning parametrized IntegerArray from given sequence.
|
||||
|
||||
Used to test dtype conversions.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_uses_pandas_na():
|
||||
a = pd.array([1, None], dtype=Int64Dtype())
|
||||
assert a[1] is pd.NA
|
||||
|
||||
|
||||
def test_from_dtype_from_float(data):
|
||||
# construct from our dtype & string dtype
|
||||
dtype = data.dtype
|
||||
|
||||
# from float
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / list
|
||||
expected = pd.Series(data)
|
||||
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# from int / array
|
||||
expected = pd.Series(data).dropna().reset_index(drop=True)
|
||||
dropped = np.array(data.dropna()).astype(np.dtype(dtype.type))
|
||||
result = pd.Series(dropped, dtype=str(dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_conversions(data_missing):
|
||||
# astype to object series
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = df["A"].astype("object")
|
||||
expected = pd.Series(np.array([pd.NA, 1], dtype=object), name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# convert to object ndarray
|
||||
# we assert that we are exactly equal
|
||||
# including type conversions of scalars
|
||||
result = df["A"].astype("object").values
|
||||
expected = np.array([pd.NA, 1], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for r, e in zip(result, expected):
|
||||
if pd.isnull(r):
|
||||
assert pd.isnull(e)
|
||||
elif is_integer(r):
|
||||
assert r == e
|
||||
assert is_integer(e)
|
||||
else:
|
||||
assert r == e
|
||||
assert type(r) == type(e)
|
||||
|
||||
|
||||
def test_integer_array_constructor():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
expected = pd.array([1, 2, 3, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values.astype(float), mask)
|
||||
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntegerArray(values)
|
||||
|
||||
|
||||
def test_integer_array_constructor_copy():
|
||||
values = np.array([1, 2, 3, 4], dtype="int64")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = IntegerArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = IntegerArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([1, None], [1, np.nan]),
|
||||
([None], [np.nan]),
|
||||
([None, np.nan], [np.nan, np.nan]),
|
||||
([np.nan, np.nan], [np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_none_is_nan(a, b):
|
||||
result = pd.array(a, dtype="Int64")
|
||||
expected = pd.array(b, dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
"foo",
|
||||
1,
|
||||
1.0,
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
[[1, 2], [3, 4]],
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_error(values):
|
||||
# error in converting existing arrays to IntegerArrays
|
||||
msg = "|".join(
|
||||
[
|
||||
r"cannot be converted to IntegerDtype",
|
||||
r"invalid literal for int\(\) with base 10:",
|
||||
r"values must be a 1D list-like",
|
||||
r"Cannot pass scalar",
|
||||
r"int\(\) argument must be a string",
|
||||
]
|
||||
)
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
pd.array(values, dtype="Int64")
|
||||
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
IntegerArray._from_sequence(values)
|
||||
|
||||
|
||||
def test_to_integer_array_inferred_dtype(constructor):
|
||||
# if values has dtype -> respect it
|
||||
result = constructor(np.array([1, 2], dtype="int8"))
|
||||
assert result.dtype == Int8Dtype()
|
||||
result = constructor(np.array([1, 2], dtype="int32"))
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
# if values have no dtype -> always int64
|
||||
result = constructor([1, 2])
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_dtype_keyword(constructor):
|
||||
result = constructor([1, 2], dtype="Int8")
|
||||
assert result.dtype == Int8Dtype()
|
||||
|
||||
# if values has dtype -> override it
|
||||
result = constructor(np.array([1, 2], dtype="int8"), dtype="Int32")
|
||||
assert result.dtype == Int32Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_float():
|
||||
result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64")
|
||||
expected = pd.array([1, 2], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
|
||||
IntegerArray._from_sequence([1.5, 2.0], dtype="Int64")
|
||||
|
||||
# for float dtypes, the itemsize is not preserved
|
||||
result = IntegerArray._from_sequence(
|
||||
np.array([1.0, 2.0], dtype="float32"), dtype="Int64"
|
||||
)
|
||||
assert result.dtype == Int64Dtype()
|
||||
|
||||
|
||||
def test_to_integer_array_str():
|
||||
result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64")
|
||||
expected = pd.array([1, 2, np.nan], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1", "2", ""], dtype="Int64")
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
|
||||
):
|
||||
IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bool_values, int_values, target_dtype, expected_dtype",
|
||||
[
|
||||
([False, True], [0, 1], Int64Dtype(), Int64Dtype()),
|
||||
([False, True], [0, 1], "Int64", Int64Dtype()),
|
||||
([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array_bool(
|
||||
constructor, bool_values, int_values, target_dtype, expected_dtype
|
||||
):
|
||||
result = constructor(bool_values, dtype=target_dtype)
|
||||
assert result.dtype == expected_dtype
|
||||
expected = pd.array(int_values, dtype=target_dtype)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, to_dtype, result_dtype",
|
||||
[
|
||||
(np.array([1], dtype="int64"), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), None, Int64Dtype),
|
||||
(np.array([1, np.nan]), "int8", Int8Dtype),
|
||||
],
|
||||
)
|
||||
def test_to_integer_array(values, to_dtype, result_dtype):
|
||||
# convert existing arrays to IntegerArrays
|
||||
result = IntegerArray._from_sequence(values, dtype=to_dtype)
|
||||
assert result.dtype == result_dtype()
|
||||
expected = pd.array(values, dtype=result_dtype())
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_integer_array_from_boolean():
|
||||
# GH31104
|
||||
expected = pd.array(np.array([True, False]), dtype="Int64")
|
||||
result = pd.array(np.array([True, False], dtype=object), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,294 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.generic import ABCIndex
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
UInt32Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
|
||||
def test_preserve_dtypes(op):
|
||||
# for ops that enable (mean would actually work here
|
||||
# but generally it is a float return value)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": pd.array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# op
|
||||
result = getattr(df.C, op)()
|
||||
if op in {"sum", "prod", "min", "max"}:
|
||||
assert isinstance(result, np.int64)
|
||||
else:
|
||||
assert isinstance(result, int)
|
||||
|
||||
# groupby
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")},
|
||||
index=pd.Index(["a", "b"], name="A"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_nansafe():
|
||||
# see gh-22343
|
||||
arr = pd.array([np.nan, 1, 2], dtype="Int8")
|
||||
msg = "cannot convert NA to integer"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype("uint32")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_construct_index(all_data, dropna):
|
||||
# ensure that we do not coerce to different Index dtype or non-index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = np.array(all_data[~all_data.isna()])
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
result = pd.Index(pd.array(other, dtype=all_data.dtype))
|
||||
expected = pd.Index(other, dtype=all_data.dtype)
|
||||
assert all_data.dtype == expected.dtype # dont coerce to object
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_astype_index(all_data, dropna):
|
||||
# as an int/uint index to Index
|
||||
|
||||
all_data = all_data[:10]
|
||||
if dropna:
|
||||
other = all_data[~all_data.isna()]
|
||||
else:
|
||||
other = all_data
|
||||
|
||||
dtype = all_data.dtype
|
||||
idx = pd.Index(np.array(other))
|
||||
assert isinstance(idx, ABCIndex)
|
||||
|
||||
result = idx.astype(dtype)
|
||||
expected = idx.astype(object).astype(dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype(all_data):
|
||||
all_data = all_data[:10]
|
||||
|
||||
ints = all_data[~all_data.isna()]
|
||||
mixed = all_data
|
||||
dtype = Int8Dtype()
|
||||
|
||||
# coerce to same type - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(ints)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(ints, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - ints
|
||||
s = pd.Series(ints)
|
||||
result = s.astype(all_data.dtype.numpy_dtype)
|
||||
expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same type - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(all_data.dtype)
|
||||
expected = pd.Series(mixed)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same other - mixed
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series(mixed, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# coerce to same numpy_dtype - mixed
|
||||
s = pd.Series(mixed)
|
||||
msg = "cannot convert NA to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.astype(all_data.dtype.numpy_dtype)
|
||||
|
||||
# coerce to object
|
||||
s = pd.Series(mixed)
|
||||
result = s.astype("object")
|
||||
expected = pd.Series(np.asarray(mixed, dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copy():
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
# copy=True -> ensure both data and mask are actual copies
|
||||
result = arr.astype("Int64", copy=True)
|
||||
assert result is not arr
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
# copy=False
|
||||
result = arr.astype("Int64", copy=False)
|
||||
assert result is arr
|
||||
assert np.shares_memory(result._data, arr._data)
|
||||
assert np.shares_memory(result._mask, arr._mask)
|
||||
result[0] = 10
|
||||
assert arr[0] == 10
|
||||
result[0] = pd.NA
|
||||
assert arr[0] is pd.NA
|
||||
|
||||
# astype to different dtype -> always needs a copy -> even with copy=False
|
||||
# we need to ensure that also the mask is actually copied
|
||||
arr = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
orig = pd.array([1, 2, 3, None], dtype="Int64")
|
||||
|
||||
result = arr.astype("Int32", copy=False)
|
||||
assert not tm.shares_memory(result, arr)
|
||||
result[0] = 10
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(arr, orig)
|
||||
|
||||
|
||||
def test_astype_to_larger_numpy():
|
||||
a = pd.array([1, 2], dtype="Int32")
|
||||
result = a.astype("int64")
|
||||
expected = np.array([1, 2], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
a = pd.array([1, 2], dtype="UInt32")
|
||||
result = a.astype("uint64")
|
||||
expected = np.array([1, 2], dtype="uint64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"])
|
||||
def test_astype_specific_casting(dtype):
|
||||
s = pd.Series([1, 2, 3], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([1, 2, 3, None], dtype="Int64")
|
||||
result = s.astype(dtype)
|
||||
expected = pd.Series([1, 2, 3, None], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_floating():
|
||||
arr = pd.array([1, 2, None], dtype="Int64")
|
||||
result = arr.astype("Float64")
|
||||
expected = pd.array([1.0, 2.0, None], dtype="Float64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_dt64():
|
||||
# GH#32435
|
||||
arr = pd.array([1, 2, 3, pd.NA]) * 10**9
|
||||
|
||||
result = arr.astype("datetime64[ns]")
|
||||
|
||||
expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_construct_cast_invalid(dtype):
|
||||
msg = "cannot safely"
|
||||
arr = [1.2, 2.3, 3.7]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
arr = [1.2, 2.3, 3.7, np.nan]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(arr).astype(dtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
def test_to_numpy_na_nan(in_series):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([0.0, 1.0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="int64", na_value=-1)
|
||||
expected = np.array([0, 1, -1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = a.to_numpy(dtype="bool", na_value=False)
|
||||
expected = np.array([False, True, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("in_series", [True, False])
|
||||
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
|
||||
def test_to_numpy_dtype(dtype, in_series):
|
||||
a = pd.array([0, 1], dtype="Int64")
|
||||
if in_series:
|
||||
a = pd.Series(a)
|
||||
|
||||
result = a.to_numpy(dtype=dtype)
|
||||
expected = np.array([0, 1], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "bool"])
|
||||
def test_to_numpy_na_raises(dtype):
|
||||
a = pd.array([0, 1, None], dtype="Int64")
|
||||
with pytest.raises(ValueError, match=dtype):
|
||||
a.to_numpy(dtype=dtype)
|
||||
|
||||
|
||||
def test_astype_str():
|
||||
a = pd.array([1, 2, None], dtype="Int64")
|
||||
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
|
||||
|
||||
tm.assert_numpy_array_equal(a.astype(str), expected)
|
||||
tm.assert_numpy_array_equal(a.astype("str"), expected)
|
||||
|
||||
|
||||
def test_astype_boolean():
|
||||
# https://github.com/pandas-dev/pandas/issues/31102
|
||||
a = pd.array([1, 0, -1, 2, None], dtype="Int64")
|
||||
result = a.astype("boolean")
|
||||
expected = pd.array([True, False, True, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,203 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
|
||||
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
|
||||
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
|
||||
def test_ufuncs_single_int(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
|
||||
def test_ufuncs_single_float(ufunc):
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(a)
|
||||
expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
with np.errstate(invalid="ignore"):
|
||||
result = ufunc(s)
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
|
||||
def test_ufuncs_binary_int(ufunc):
|
||||
# two IntegerArrays
|
||||
a = pd.array([1, 2, -3, np.nan])
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with numpy array
|
||||
arr = np.array([1, 2, 3, 4])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# IntegerArray with scalar
|
||||
result = ufunc(a, 1)
|
||||
expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(1, a)
|
||||
expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_binary_output():
|
||||
a = pd.array([1, 2, np.nan])
|
||||
result = np.modf(a)
|
||||
expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
|
||||
expected = (pd.array(expected[0]), pd.array(expected[1]))
|
||||
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
|
||||
for x, y in zip(result, expected):
|
||||
tm.assert_extension_array_equal(x, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values)
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
expected = arr.sum(skipna=False)
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pandasmethname, kwargs",
|
||||
[
|
||||
("var", {"ddof": 0}),
|
||||
("var", {"ddof": 1}),
|
||||
("std", {"ddof": 0}),
|
||||
("std", {"ddof": 1}),
|
||||
("kurtosis", {}),
|
||||
("skew", {}),
|
||||
("sem", {}),
|
||||
],
|
||||
)
|
||||
def test_stat_method(pandasmethname, kwargs):
|
||||
s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64")
|
||||
pandasmeth = getattr(s, pandasmethname)
|
||||
result = pandasmeth(**kwargs)
|
||||
s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64")
|
||||
pandasmeth = getattr(s2, pandasmethname)
|
||||
expected = pandasmeth(**kwargs)
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = arr.value_counts(dropna=False)
|
||||
ex_index = pd.Index([1, 2, pd.NA], dtype="Int64")
|
||||
assert ex_index.dtype == "Int64"
|
||||
expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_empty():
|
||||
# https://github.com/pandas-dev/pandas/issues/33317
|
||||
ser = pd.Series([], dtype="Int64")
|
||||
result = ser.value_counts()
|
||||
idx = pd.Index([], dtype=ser.dtype)
|
||||
assert idx.dtype == ser.dtype
|
||||
expected = pd.Series([], index=idx, dtype="Int64", name="count")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
# GH 33172
|
||||
ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
|
||||
assert expected.index.dtype == ser.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 4])
|
||||
def test_integer_array_sum(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, 3, None], dtype=dtype)
|
||||
result = arr.sum(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 6
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("method", ["min", "max"])
|
||||
def test_integer_array_min_max(skipna, method, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([0, 1, None], dtype=dtype)
|
||||
func = getattr(arr, method)
|
||||
result = func(skipna=skipna)
|
||||
if skipna:
|
||||
assert result == (0 if method == "min" else 1)
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize("skipna", [True, False])
|
||||
@pytest.mark.parametrize("min_count", [0, 9])
|
||||
def test_integer_array_prod(skipna, min_count, any_int_ea_dtype):
|
||||
dtype = any_int_ea_dtype
|
||||
arr = pd.array([1, 2, None], dtype=dtype)
|
||||
result = arr.prod(skipna=skipna, min_count=min_count)
|
||||
if skipna and min_count == 0:
|
||||
assert result == 2
|
||||
else:
|
||||
assert result is pd.NA
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)]
|
||||
)
|
||||
def test_integer_array_numpy_sum(values, expected):
|
||||
arr = pd.array(values, dtype="Int64")
|
||||
result = np.sum(arr)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
|
||||
def test_dataframe_reductions(op):
|
||||
# https://github.com/pandas-dev/pandas/pull/32867
|
||||
# ensure the integers are not cast to float during reductions
|
||||
df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
|
||||
result = df.max()
|
||||
assert isinstance(result["a"], np.int64)
|
||||
|
||||
|
||||
# TODO(jreback) - these need testing / are broken
|
||||
|
||||
# shift
|
||||
|
||||
# set_index (destroys type)
|
@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_array_setitem_nullable_boolean_mask():
|
||||
# GH 31446
|
||||
ser = pd.Series([1, 2], dtype="Int64")
|
||||
result = ser.where(ser > 1)
|
||||
expected = pd.Series([pd.NA, 2], dtype="Int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_setitem():
|
||||
# GH 31446
|
||||
arr = pd.Series([1, 2], dtype="Int64").array
|
||||
arr[arr > 1] = 1
|
||||
|
||||
expected = pd.array([1, 1], dtype="Int64")
|
||||
tm.assert_extension_array_equal(arr, expected)
|
@ -0,0 +1,125 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", np.int64(3)],
|
||||
["prod", np.int64(2)],
|
||||
["min", np.int64(1)],
|
||||
["max", np.int64(2)],
|
||||
["mean", np.float64(1.5)],
|
||||
["median", np.float64(1.5)],
|
||||
["var", np.float64(0.5)],
|
||||
["std", np.float64(0.5**0.5)],
|
||||
["skew", pd.NA],
|
||||
["kurt", pd.NA],
|
||||
["any", True],
|
||||
["all", True],
|
||||
],
|
||||
)
|
||||
def test_series_reductions(op, expected):
|
||||
ser = Series([1, 2], dtype="Int64")
|
||||
result = getattr(ser, op)()
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", Series([3], index=["a"], dtype="Int64")],
|
||||
["prod", Series([2], index=["a"], dtype="Int64")],
|
||||
["min", Series([1], index=["a"], dtype="Int64")],
|
||||
["max", Series([2], index=["a"], dtype="Int64")],
|
||||
["mean", Series([1.5], index=["a"], dtype="Float64")],
|
||||
["median", Series([1.5], index=["a"], dtype="Float64")],
|
||||
["var", Series([0.5], index=["a"], dtype="Float64")],
|
||||
["std", Series([0.5**0.5], index=["a"], dtype="Float64")],
|
||||
["skew", Series([pd.NA], index=["a"], dtype="Float64")],
|
||||
["kurt", Series([pd.NA], index=["a"], dtype="Float64")],
|
||||
["any", Series([True], index=["a"], dtype="boolean")],
|
||||
["all", Series([True], index=["a"], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_dataframe_reductions(op, expected):
|
||||
df = DataFrame({"a": array([1, 2], dtype="Int64")})
|
||||
result = getattr(df, op)()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", array([1, 3], dtype="Int64")],
|
||||
["prod", array([1, 3], dtype="Int64")],
|
||||
["min", array([1, 3], dtype="Int64")],
|
||||
["max", array([1, 3], dtype="Int64")],
|
||||
["mean", array([1, 3], dtype="Float64")],
|
||||
["median", array([1, 3], dtype="Float64")],
|
||||
["var", array([pd.NA], dtype="Float64")],
|
||||
["std", array([pd.NA], dtype="Float64")],
|
||||
["skew", array([pd.NA], dtype="Float64")],
|
||||
["any", array([True, True], dtype="boolean")],
|
||||
["all", array([True, True], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_groupby_reductions(op, expected):
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
result = getattr(df.groupby("A"), op)()
|
||||
expected = DataFrame(expected, index=pd.Index(["a", "b"], name="A"), columns=["B"])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op, expected",
|
||||
[
|
||||
["sum", Series([4, 4], index=["B", "C"], dtype="Float64")],
|
||||
["prod", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
||||
["min", Series([1, 1], index=["B", "C"], dtype="Float64")],
|
||||
["max", Series([3, 3], index=["B", "C"], dtype="Float64")],
|
||||
["mean", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["median", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["var", Series([2, 2], index=["B", "C"], dtype="Float64")],
|
||||
["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")],
|
||||
["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
||||
["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
|
||||
["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
||||
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
|
||||
],
|
||||
)
|
||||
def test_mixed_reductions(op, expected, using_infer_string):
|
||||
if op in ["any", "all"] and using_infer_string:
|
||||
expected = expected.astype("bool")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["a", "b", "b"],
|
||||
"B": [1, None, 3],
|
||||
"C": array([1, None, 3], dtype="Int64"),
|
||||
}
|
||||
)
|
||||
|
||||
# series
|
||||
result = getattr(df.C, op)()
|
||||
tm.assert_equal(result, expected["C"])
|
||||
|
||||
# frame
|
||||
if op in ["any", "all"]:
|
||||
result = getattr(df, op)()
|
||||
else:
|
||||
result = getattr(df, op)(numeric_only=True)
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.arrays.integer import (
|
||||
Int8Dtype,
|
||||
Int16Dtype,
|
||||
Int32Dtype,
|
||||
Int64Dtype,
|
||||
UInt8Dtype,
|
||||
UInt16Dtype,
|
||||
UInt32Dtype,
|
||||
UInt64Dtype,
|
||||
)
|
||||
|
||||
|
||||
def test_dtypes(dtype):
|
||||
# smoke tests on auto dtype construction
|
||||
|
||||
if dtype.is_signed_integer:
|
||||
assert np.dtype(dtype.type).kind == "i"
|
||||
else:
|
||||
assert np.dtype(dtype.type).kind == "u"
|
||||
assert dtype.name is not None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
(Int8Dtype(), "Int8Dtype()"),
|
||||
(Int16Dtype(), "Int16Dtype()"),
|
||||
(Int32Dtype(), "Int32Dtype()"),
|
||||
(Int64Dtype(), "Int64Dtype()"),
|
||||
(UInt8Dtype(), "UInt8Dtype()"),
|
||||
(UInt16Dtype(), "UInt16Dtype()"),
|
||||
(UInt32Dtype(), "UInt32Dtype()"),
|
||||
(UInt64Dtype(), "UInt64Dtype()"),
|
||||
],
|
||||
)
|
||||
def test_repr_dtype(dtype, expected):
|
||||
assert repr(dtype) == expected
|
||||
|
||||
|
||||
def test_repr_array():
|
||||
result = repr(pd.array([1, None, 3]))
|
||||
expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_repr_array_long():
|
||||
data = pd.array([1, 2, None] * 1000)
|
||||
expected = (
|
||||
"<IntegerArray>\n"
|
||||
"[ 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>, 1,\n"
|
||||
" ...\n"
|
||||
" <NA>, 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>]\n"
|
||||
"Length: 3000, dtype: Int64"
|
||||
)
|
||||
result = repr(data)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_repr(data_missing):
|
||||
df = pd.DataFrame({"A": data_missing})
|
||||
result = repr(df)
|
||||
expected = " A\n0 <NA>\n1 1"
|
||||
assert result == expected
|
@ -0,0 +1,28 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_categorical_retains_ordered(self, ordered):
|
||||
index = IntervalIndex.from_breaks(range(5))
|
||||
arr = index._data
|
||||
|
||||
dtype = CategoricalDtype(None, ordered=ordered)
|
||||
|
||||
expected = Categorical(list(arr), ordered=ordered)
|
||||
result = arr.astype(dtype)
|
||||
assert result.ordered is ordered
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
# test IntervalIndex.astype while we're at it.
|
||||
result = index.astype(dtype)
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,13 @@
|
||||
from pandas.core.arrays import IntervalArray
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH#25022
|
||||
arr = IntervalArray.from_tuples([(0, 1), (1, 2)])
|
||||
result = repr(arr)
|
||||
expected = (
|
||||
"<IntervalArray>\n"
|
||||
"[(0, 1], (1, 2]]\n"
|
||||
"Length: 2, dtype: interval[int64, right]"
|
||||
)
|
||||
assert result == expected
|
@ -0,0 +1,231 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Index([0, 2, 4]), Index([1, 3, 5])),
|
||||
(Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])),
|
||||
(timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)),
|
||||
(date_range("20170101", periods=3), date_range("20170102", periods=3)),
|
||||
(
|
||||
date_range("20170101", periods=3, tz="US/Eastern"),
|
||||
date_range("20170102", periods=3, tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestAttributes:
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(0, 1),
|
||||
(Timedelta("0 days"), Timedelta("1 day")),
|
||||
(Timestamp("2018-01-01"), Timestamp("2018-01-02")),
|
||||
(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-02", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex])
|
||||
def test_is_empty(self, constructor, left, right, closed):
|
||||
# GH27219
|
||||
tuples = [(left, left), (left, right), np.nan]
|
||||
expected = np.array([closed != "both", False, False])
|
||||
result = constructor.from_tuples(tuples, closed=closed).is_empty
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestMethods:
|
||||
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||||
def test_set_closed(self, closed, new_closed):
|
||||
# GH 21670
|
||||
array = IntervalArray.from_breaks(range(10), closed=closed)
|
||||
result = array.set_closed(new_closed)
|
||||
expected = IntervalArray.from_breaks(range(10), closed=new_closed)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
IntervalArray.from_breaks([1, 2, 3, 4], closed="right"),
|
||||
],
|
||||
)
|
||||
def test_where_raises(self, other):
|
||||
# GH#45768 The IntervalArray methods raises; the Series method coerces
|
||||
ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
|
||||
mask = np.array([True, False, True])
|
||||
match = "'value.closed' is 'right', expected 'left'."
|
||||
with pytest.raises(ValueError, match=match):
|
||||
ser.array._where(mask, other)
|
||||
|
||||
res = ser.where(mask, other=other)
|
||||
expected = ser.astype(object).where(mask, other)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_shift(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502
|
||||
a = IntervalArray.from_breaks([1, 2, 3])
|
||||
result = a.shift()
|
||||
# int -> float
|
||||
expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)])
|
||||
tm.assert_interval_array_equal(result, expected)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a.shift(1, fill_value=pd.NaT)
|
||||
|
||||
def test_shift_datetime(self):
|
||||
# GH#31502, GH#31504
|
||||
a = IntervalArray.from_breaks(date_range("2000", periods=4))
|
||||
result = a.shift(2)
|
||||
expected = a.take([-1, -1, 0], allow_fill=True)
|
||||
tm.assert_interval_array_equal(result, expected)
|
||||
|
||||
result = a.shift(-1)
|
||||
expected = a.take([1, 2, -1], allow_fill=True)
|
||||
tm.assert_interval_array_equal(result, expected)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
a.shift(1, fill_value=np.timedelta64("NaT", "ns"))
|
||||
|
||||
|
||||
class TestSetitem:
|
||||
def test_set_na(self, left_right_dtypes):
|
||||
left, right = left_right_dtypes
|
||||
left = left.copy(deep=True)
|
||||
right = right.copy(deep=True)
|
||||
result = IntervalArray.from_arrays(left, right)
|
||||
|
||||
if result.dtype.subtype.kind not in ["m", "M"]:
|
||||
msg = "'value' should be an interval type, got <.*NaTType'> instead."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
result[0] = pd.NaT
|
||||
if result.dtype.subtype.kind in ["i", "u"]:
|
||||
msg = "Cannot set float NaN to integer-backed IntervalArray"
|
||||
# GH#45484 TypeError, not ValueError, matches what we get with
|
||||
# non-NA un-holdable value.
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
result[0] = np.nan
|
||||
return
|
||||
|
||||
result[0] = np.nan
|
||||
|
||||
expected_left = Index([left._na_value] + list(left[1:]))
|
||||
expected_right = Index([right._na_value] + list(right[1:]))
|
||||
expected = IntervalArray.from_arrays(expected_left, expected_right)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_setitem_mismatched_closed(self):
|
||||
arr = IntervalArray.from_breaks(range(4))
|
||||
orig = arr.copy()
|
||||
other = arr.set_closed("both")
|
||||
|
||||
msg = "'value.closed' is 'both', expected 'right'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[0] = other[0]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:1] = other[:1]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:0] = other[:0]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:] = other[::-1]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:] = list(other[::-1])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:] = other[::-1].astype(object)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr[:] = other[::-1].astype("category")
|
||||
|
||||
# empty list should be no-op
|
||||
arr[:0] = []
|
||||
tm.assert_interval_array_equal(arr, orig)
|
||||
|
||||
|
||||
class TestReductions:
|
||||
def test_min_max_invalid_axis(self, left_right_dtypes):
|
||||
left, right = left_right_dtypes
|
||||
left = left.copy(deep=True)
|
||||
right = right.copy(deep=True)
|
||||
arr = IntervalArray.from_arrays(left, right)
|
||||
|
||||
msg = "`axis` must be fewer than the number of dimensions"
|
||||
for axis in [-2, 1]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.min(axis=axis)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.max(axis=axis)
|
||||
|
||||
msg = "'>=' not supported between"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr.min(axis="foo")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr.max(axis="foo")
|
||||
|
||||
def test_min_max(self, left_right_dtypes, index_or_series_or_array):
|
||||
# GH#44746
|
||||
left, right = left_right_dtypes
|
||||
left = left.copy(deep=True)
|
||||
right = right.copy(deep=True)
|
||||
arr = IntervalArray.from_arrays(left, right)
|
||||
|
||||
# The expected results below are only valid if monotonic
|
||||
assert left.is_monotonic_increasing
|
||||
assert Index(arr).is_monotonic_increasing
|
||||
|
||||
MIN = arr[0]
|
||||
MAX = arr[-1]
|
||||
|
||||
indexer = np.arange(len(arr))
|
||||
np.random.default_rng(2).shuffle(indexer)
|
||||
arr = arr.take(indexer)
|
||||
|
||||
arr_na = arr.insert(2, np.nan)
|
||||
|
||||
arr = index_or_series_or_array(arr)
|
||||
arr_na = index_or_series_or_array(arr_na)
|
||||
|
||||
for skipna in [True, False]:
|
||||
res = arr.min(skipna=skipna)
|
||||
assert res == MIN
|
||||
assert type(res) == type(MIN)
|
||||
|
||||
res = arr.max(skipna=skipna)
|
||||
assert res == MAX
|
||||
assert type(res) == type(MAX)
|
||||
|
||||
res = arr_na.min(skipna=False)
|
||||
assert np.isnan(res)
|
||||
res = arr_na.max(skipna=False)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = arr_na.min(skipna=True)
|
||||
assert res == MIN
|
||||
assert type(res) == type(MIN)
|
||||
res = arr_na.max(skipna=True)
|
||||
assert res == MAX
|
||||
assert type(res) == type(MAX)
|
@ -0,0 +1,160 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
|
||||
|
||||
def test_arrow_extension_type():
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||||
|
||||
p1 = ArrowIntervalType(pa.int64(), "left")
|
||||
p2 = ArrowIntervalType(pa.int64(), "left")
|
||||
p3 = ArrowIntervalType(pa.int64(), "right")
|
||||
|
||||
assert p1.closed == "left"
|
||||
assert p1 == p2
|
||||
assert p1 != p3
|
||||
assert hash(p1) == hash(p2)
|
||||
assert hash(p1) != hash(p3)
|
||||
|
||||
|
||||
def test_arrow_array():
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||||
|
||||
intervals = pd.interval_range(1, 5, freq=1).array
|
||||
|
||||
result = pa.array(intervals)
|
||||
assert isinstance(result.type, ArrowIntervalType)
|
||||
assert result.type.closed == intervals.closed
|
||||
assert result.type.subtype == pa.int64()
|
||||
assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64"))
|
||||
assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64"))
|
||||
|
||||
expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)])
|
||||
assert result.storage.equals(expected)
|
||||
|
||||
# convert to its storage type
|
||||
result = pa.array(intervals, type=expected.type)
|
||||
assert result.equals(expected)
|
||||
|
||||
# unsupported conversions
|
||||
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
|
||||
pa.array(intervals, type="float64")
|
||||
|
||||
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
|
||||
pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
|
||||
|
||||
|
||||
def test_arrow_array_missing():
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||||
|
||||
arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0])
|
||||
arr[1] = None
|
||||
|
||||
result = pa.array(arr)
|
||||
assert isinstance(result.type, ArrowIntervalType)
|
||||
assert result.type.closed == arr.closed
|
||||
assert result.type.subtype == pa.float64()
|
||||
|
||||
# fields have missing values (not NaN)
|
||||
left = pa.array([0.0, None, 2.0], type="float64")
|
||||
right = pa.array([1.0, None, 3.0], type="float64")
|
||||
assert result.storage.field("left").equals(left)
|
||||
assert result.storage.field("right").equals(right)
|
||||
|
||||
# structarray itself also has missing values on the array level
|
||||
vals = [
|
||||
{"left": 0.0, "right": 1.0},
|
||||
{"left": None, "right": None},
|
||||
{"left": 2.0, "right": 3.0},
|
||||
]
|
||||
expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False]))
|
||||
assert result.storage.equals(expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")],
|
||||
ids=["float", "datetime64[ns]"],
|
||||
)
|
||||
def test_arrow_table_roundtrip(breaks):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
|
||||
|
||||
arr = IntervalArray.from_breaks(breaks)
|
||||
arr[1] = None
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
table = pa.table(df)
|
||||
assert isinstance(table.field("a").type, ArrowIntervalType)
|
||||
result = table.to_pandas()
|
||||
assert isinstance(result["a"].dtype, pd.IntervalDtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
table2 = pa.concat_tables([table, table])
|
||||
result = table2.to_pandas()
|
||||
expected = pd.concat([df, df], ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH#41040
|
||||
table = pa.table(
|
||||
[pa.chunked_array([], type=table.column(0).type)], schema=table.schema
|
||||
)
|
||||
result = table.to_pandas()
|
||||
tm.assert_frame_equal(result, expected[0:0])
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[[0.0, 1.0, 2.0, 3.0], pd.date_range("2017", periods=4, freq="D")],
|
||||
ids=["float", "datetime64[ns]"],
|
||||
)
|
||||
def test_arrow_table_roundtrip_without_metadata(breaks):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
arr = IntervalArray.from_breaks(breaks)
|
||||
arr[1] = None
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
table = pa.table(df)
|
||||
# remove the metadata
|
||||
table = table.replace_schema_metadata()
|
||||
assert table.schema.metadata is None
|
||||
|
||||
result = table.to_pandas()
|
||||
assert isinstance(result["a"].dtype, pd.IntervalDtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_from_arrow_from_raw_struct_array():
|
||||
# in case pyarrow lost the Interval extension type (eg on parquet roundtrip
|
||||
# with datetime64[ns] subtype, see GH-45881), still allow conversion
|
||||
# from arrow to IntervalArray
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}])
|
||||
dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither")
|
||||
|
||||
result = dtype.__from_arrow__(arr)
|
||||
expected = IntervalArray.from_breaks(
|
||||
np.array([0, 1, 2], dtype="int64"), closed="neither"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = dtype.__from_arrow__(pa.chunked_array([arr]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,93 @@
|
||||
"""Tests for Interval-Interval operations, such as overlaps, contains, etc."""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
|
||||
|
||||
@pytest.fixture(params=[IntervalArray, IntervalIndex])
|
||||
def constructor(request):
|
||||
"""
|
||||
Fixture for testing both interval container classes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Timedelta("0 days"), Timedelta("1 day")),
|
||||
(Timestamp("2018-01-01"), Timedelta("1 day")),
|
||||
(0, 1),
|
||||
],
|
||||
ids=lambda x: type(x[0]).__name__,
|
||||
)
|
||||
def start_shift(request):
|
||||
"""
|
||||
Fixture for generating intervals of different types from a start value
|
||||
and a shift value that can be added to start to generate an endpoint.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
class TestOverlaps:
|
||||
def test_overlaps_interval(self, constructor, start_shift, closed, other_closed):
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + 3 * shift, other_closed)
|
||||
|
||||
# intervals: identical, nested, spanning, partial, adjacent, disjoint
|
||||
tuples = [
|
||||
(start, start + 3 * shift),
|
||||
(start + shift, start + 2 * shift),
|
||||
(start - shift, start + 4 * shift),
|
||||
(start + 2 * shift, start + 4 * shift),
|
||||
(start + 3 * shift, start + 4 * shift),
|
||||
(start + 4 * shift, start + 5 * shift),
|
||||
]
|
||||
interval_container = constructor.from_tuples(tuples, closed)
|
||||
|
||||
adjacent = interval.closed_right and interval_container.closed_left
|
||||
expected = np.array([True, True, True, True, adjacent, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex])
|
||||
def test_overlaps_interval_container(self, constructor, other_constructor):
|
||||
# TODO: modify this test when implemented
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
other_container = other_constructor.from_breaks(range(5))
|
||||
with pytest.raises(NotImplementedError, match="^$"):
|
||||
interval_container.overlaps(other_container)
|
||||
|
||||
def test_overlaps_na(self, constructor, start_shift):
|
||||
"""NA values are marked as False"""
|
||||
start, shift = start_shift
|
||||
interval = Interval(start, start + shift)
|
||||
|
||||
tuples = [
|
||||
(start, start + shift),
|
||||
np.nan,
|
||||
(start + 2 * shift, start + 3 * shift),
|
||||
]
|
||||
interval_container = constructor.from_tuples(tuples)
|
||||
|
||||
expected = np.array([True, False, False])
|
||||
result = interval_container.overlaps(interval)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def test_overlaps_invalid_type(self, constructor, other):
|
||||
interval_container = constructor.from_breaks(range(5))
|
||||
msg = f"`other` must be Interval-like, got {type(other).__name__}"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_container.overlaps(other)
|
@ -0,0 +1,248 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
# integer dtypes
|
||||
arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES]
|
||||
scalars: list[Any] = [2] * len(arrays)
|
||||
# floating dtypes
|
||||
arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES]
|
||||
scalars += [0.2, 0.2]
|
||||
# boolean
|
||||
arrays += [pd.array([True, False, True, None], dtype="boolean")]
|
||||
scalars += [False]
|
||||
|
||||
|
||||
@pytest.fixture(params=zip(arrays, scalars), ids=[a.dtype.name for a in arrays])
|
||||
def data(request):
|
||||
"""Fixture returning parametrized (array, scalar) tuple.
|
||||
|
||||
Used to test equivalence of scalars, numpy arrays with array ops, and the
|
||||
equivalence of DataFrame and Series ops.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def check_skip(data, op_name):
|
||||
if isinstance(data.dtype, pd.BooleanDtype) and "sub" in op_name:
|
||||
pytest.skip("subtract not implemented for boolean")
|
||||
|
||||
|
||||
def is_bool_not_implemented(data, op_name):
|
||||
# match non-masked behavior
|
||||
return data.dtype.kind == "b" and op_name.strip("_").lstrip("r") in [
|
||||
"pow",
|
||||
"truediv",
|
||||
"floordiv",
|
||||
]
|
||||
|
||||
|
||||
# Test equivalence of scalars, numpy arrays with array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_array_scalar_like_equivalence(data, all_arithmetic_operators):
|
||||
data, scalar = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
check_skip(data, all_arithmetic_operators)
|
||||
|
||||
scalar_array = pd.array([scalar] * len(data), dtype=data.dtype)
|
||||
|
||||
# TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype))
|
||||
for scalar in [scalar, data.dtype.type(scalar)]:
|
||||
if is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, scalar)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, scalar_array)
|
||||
else:
|
||||
result = op(data, scalar)
|
||||
expected = op(data, scalar_array)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_array_NA(data, all_arithmetic_operators):
|
||||
data, _ = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
check_skip(data, all_arithmetic_operators)
|
||||
|
||||
scalar = pd.NA
|
||||
scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)
|
||||
|
||||
mask = data._mask.copy()
|
||||
|
||||
if is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, scalar)
|
||||
# GH#45421 check op doesn't alter data._mask inplace
|
||||
tm.assert_numpy_array_equal(mask, data._mask)
|
||||
return
|
||||
|
||||
result = op(data, scalar)
|
||||
# GH#45421 check op doesn't alter data._mask inplace
|
||||
tm.assert_numpy_array_equal(mask, data._mask)
|
||||
|
||||
expected = op(data, scalar_array)
|
||||
tm.assert_numpy_array_equal(mask, data._mask)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_array_equivalence(data, all_arithmetic_operators):
|
||||
data, scalar = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
check_skip(data, all_arithmetic_operators)
|
||||
|
||||
numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype)
|
||||
pd_array = pd.array(numpy_array, dtype=data.dtype)
|
||||
|
||||
if is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, numpy_array)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, pd_array)
|
||||
return
|
||||
|
||||
result = op(data, numpy_array)
|
||||
expected = op(data, pd_array)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
# Test equivalence with Series and DataFrame ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_frame(data, all_arithmetic_operators):
|
||||
data, scalar = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
check_skip(data, all_arithmetic_operators)
|
||||
|
||||
# DataFrame with scalar
|
||||
df = pd.DataFrame({"A": data})
|
||||
|
||||
if is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(df, scalar)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(data, scalar)
|
||||
return
|
||||
|
||||
result = op(df, scalar)
|
||||
expected = pd.DataFrame({"A": op(data, scalar)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_series(data, all_arithmetic_operators):
|
||||
data, scalar = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
check_skip(data, all_arithmetic_operators)
|
||||
|
||||
ser = pd.Series(data)
|
||||
|
||||
others = [
|
||||
scalar,
|
||||
np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype),
|
||||
pd.array([scalar] * len(data), dtype=data.dtype),
|
||||
pd.Series([scalar] * len(data), dtype=data.dtype),
|
||||
]
|
||||
|
||||
for other in others:
|
||||
if is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
op(ser, other)
|
||||
|
||||
else:
|
||||
result = op(ser, other)
|
||||
expected = pd.Series(op(data, other))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_object(data, all_arithmetic_operators):
|
||||
data, _ = data
|
||||
|
||||
op = all_arithmetic_operators
|
||||
opa = getattr(data, op)
|
||||
|
||||
# 2d -> return NotImplemented
|
||||
result = opa(pd.DataFrame({"A": data}))
|
||||
assert result is NotImplemented
|
||||
|
||||
msg = r"can only perform ops with 1-d structures"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
opa(np.arange(len(data)).reshape(-1, len(data)))
|
||||
|
||||
|
||||
def test_error_len_mismatch(data, all_arithmetic_operators):
|
||||
# operating with a list-like with non-matching length raises
|
||||
data, scalar = data
|
||||
op = tm.get_op_from_name(all_arithmetic_operators)
|
||||
|
||||
other = [scalar] * (len(data) - 1)
|
||||
|
||||
err = ValueError
|
||||
msg = "|".join(
|
||||
[
|
||||
r"operands could not be broadcast together with shapes \(3,\) \(4,\)",
|
||||
r"operands could not be broadcast together with shapes \(4,\) \(3,\)",
|
||||
]
|
||||
)
|
||||
if data.dtype.kind == "b" and all_arithmetic_operators.strip("_") in [
|
||||
"sub",
|
||||
"rsub",
|
||||
]:
|
||||
err = TypeError
|
||||
msg = (
|
||||
r"numpy boolean subtract, the `\-` operator, is not supported, use "
|
||||
r"the bitwise_xor, the `\^` operator, or the logical_xor function instead"
|
||||
)
|
||||
elif is_bool_not_implemented(data, all_arithmetic_operators):
|
||||
msg = "operator '.*' not implemented for bool dtypes"
|
||||
err = NotImplementedError
|
||||
|
||||
for other in [other, np.array(other)]:
|
||||
with pytest.raises(err, match=msg):
|
||||
op(data, other)
|
||||
|
||||
s = pd.Series(data)
|
||||
with pytest.raises(err, match=msg):
|
||||
op(s, other)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"])
|
||||
def test_unary_op_does_not_propagate_mask(data, op):
|
||||
# https://github.com/pandas-dev/pandas/issues/39943
|
||||
data, _ = data
|
||||
ser = pd.Series(data)
|
||||
|
||||
if op == "__invert__" and data.dtype.kind == "f":
|
||||
# we follow numpy in raising
|
||||
msg = "ufunc 'invert' not supported for the input types"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(ser, op)()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(data, op)()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# Check that this is still the numpy behavior
|
||||
getattr(data._data, op)()
|
||||
|
||||
return
|
||||
|
||||
result = getattr(ser, op)()
|
||||
expected = result.copy(deep=True)
|
||||
ser[0] = None
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,209 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
|
||||
)
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
from pandas.core.arrays.arrow._arrow_utils import pyarrow_array_to_numpy_and_mask
|
||||
|
||||
arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES]
|
||||
arrays += [pd.array([0.1, 0.2, 0.3, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES]
|
||||
arrays += [pd.array([True, False, True, None], dtype="boolean")]
|
||||
|
||||
|
||||
@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays])
|
||||
def data(request):
|
||||
"""
|
||||
Fixture returning parametrized array from given dtype, including integer,
|
||||
float and boolean
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
def test_arrow_array(data):
|
||||
arr = pa.array(data)
|
||||
expected = pa.array(
|
||||
data.to_numpy(object, na_value=None),
|
||||
type=pa.from_numpy_dtype(data.dtype.numpy_dtype),
|
||||
)
|
||||
assert arr.equals(expected)
|
||||
|
||||
|
||||
def test_arrow_roundtrip(data):
|
||||
df = pd.DataFrame({"a": data})
|
||||
table = pa.table(df)
|
||||
assert table.field("a").type == str(data.dtype.numpy_dtype)
|
||||
|
||||
result = table.to_pandas()
|
||||
assert result["a"].dtype == data.dtype
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_dataframe_from_arrow_types_mapper():
|
||||
def types_mapper(arrow_type):
|
||||
if pa.types.is_boolean(arrow_type):
|
||||
return pd.BooleanDtype()
|
||||
elif pa.types.is_integer(arrow_type):
|
||||
return pd.Int64Dtype()
|
||||
|
||||
bools_array = pa.array([True, None, False], type=pa.bool_())
|
||||
ints_array = pa.array([1, None, 2], type=pa.int64())
|
||||
small_ints_array = pa.array([-1, 0, 7], type=pa.int8())
|
||||
record_batch = pa.RecordBatch.from_arrays(
|
||||
[bools_array, ints_array, small_ints_array], ["bools", "ints", "small_ints"]
|
||||
)
|
||||
result = record_batch.to_pandas(types_mapper=types_mapper)
|
||||
bools = pd.Series([True, None, False], dtype="boolean")
|
||||
ints = pd.Series([1, None, 2], dtype="Int64")
|
||||
small_ints = pd.Series([-1, 0, 7], dtype="Int64")
|
||||
expected = pd.DataFrame({"bools": bools, "ints": ints, "small_ints": small_ints})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_arrow_load_from_zero_chunks(data):
|
||||
# GH-41040
|
||||
|
||||
df = pd.DataFrame({"a": data[0:0]})
|
||||
table = pa.table(df)
|
||||
assert table.field("a").type == str(data.dtype.numpy_dtype)
|
||||
table = pa.table(
|
||||
[pa.chunked_array([], type=table.field("a").type)], schema=table.schema
|
||||
)
|
||||
result = table.to_pandas()
|
||||
assert result["a"].dtype == data.dtype
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_arrow_from_arrow_uint():
|
||||
# https://github.com/pandas-dev/pandas/issues/31896
|
||||
# possible mismatch in types
|
||||
|
||||
dtype = pd.UInt32Dtype()
|
||||
result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64"))
|
||||
expected = pd.array([1, 2, 3, 4, None], dtype="UInt32")
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_arrow_sliced(data):
|
||||
# https://github.com/pandas-dev/pandas/issues/38525
|
||||
|
||||
df = pd.DataFrame({"a": data})
|
||||
table = pa.table(df)
|
||||
result = table.slice(2, None).to_pandas()
|
||||
expected = df.iloc[2:].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
df2 = df.fillna(data[0])
|
||||
table = pa.table(df2)
|
||||
result = table.slice(2, None).to_pandas()
|
||||
expected = df2.iloc[2:].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def np_dtype_to_arrays(any_real_numpy_dtype):
|
||||
"""
|
||||
Fixture returning actual and expected dtype, pandas and numpy arrays and
|
||||
mask from a given numpy dtype
|
||||
"""
|
||||
np_dtype = np.dtype(any_real_numpy_dtype)
|
||||
pa_type = pa.from_numpy_dtype(np_dtype)
|
||||
|
||||
# None ensures the creation of a bitmask buffer.
|
||||
pa_array = pa.array([0, 1, 2, None], type=pa_type)
|
||||
# Since masked Arrow buffer slots are not required to contain a specific
|
||||
# value, assert only the first three values of the created np.array
|
||||
np_expected = np.array([0, 1, 2], dtype=np_dtype)
|
||||
mask_expected = np.array([True, True, True, False])
|
||||
return np_dtype, pa_array, np_expected, mask_expected
|
||||
|
||||
|
||||
def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
|
||||
"""
|
||||
Test conversion from pyarrow array to numpy array.
|
||||
|
||||
Modifies the pyarrow buffer to contain padding and offset, which are
|
||||
considered valid buffers by pyarrow.
|
||||
|
||||
Also tests empty pyarrow arrays with non empty buffers.
|
||||
See https://github.com/pandas-dev/pandas/issues/40896
|
||||
"""
|
||||
np_dtype, pa_array, np_expected, mask_expected = np_dtype_to_arrays
|
||||
data, mask = pyarrow_array_to_numpy_and_mask(pa_array, np_dtype)
|
||||
tm.assert_numpy_array_equal(data[:3], np_expected)
|
||||
tm.assert_numpy_array_equal(mask, mask_expected)
|
||||
|
||||
mask_buffer = pa_array.buffers()[0]
|
||||
data_buffer = pa_array.buffers()[1]
|
||||
data_buffer_bytes = pa_array.buffers()[1].to_pybytes()
|
||||
|
||||
# Add trailing padding to the buffer.
|
||||
data_buffer_trail = pa.py_buffer(data_buffer_bytes + b"\x00")
|
||||
pa_array_trail = pa.Array.from_buffers(
|
||||
type=pa_array.type,
|
||||
length=len(pa_array),
|
||||
buffers=[mask_buffer, data_buffer_trail],
|
||||
offset=pa_array.offset,
|
||||
)
|
||||
pa_array_trail.validate()
|
||||
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_trail, np_dtype)
|
||||
tm.assert_numpy_array_equal(data[:3], np_expected)
|
||||
tm.assert_numpy_array_equal(mask, mask_expected)
|
||||
|
||||
# Add offset to the buffer.
|
||||
offset = b"\x00" * (pa_array.type.bit_width // 8)
|
||||
data_buffer_offset = pa.py_buffer(offset + data_buffer_bytes)
|
||||
mask_buffer_offset = pa.py_buffer(b"\x0E")
|
||||
pa_array_offset = pa.Array.from_buffers(
|
||||
type=pa_array.type,
|
||||
length=len(pa_array),
|
||||
buffers=[mask_buffer_offset, data_buffer_offset],
|
||||
offset=pa_array.offset + 1,
|
||||
)
|
||||
pa_array_offset.validate()
|
||||
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype)
|
||||
tm.assert_numpy_array_equal(data[:3], np_expected)
|
||||
tm.assert_numpy_array_equal(mask, mask_expected)
|
||||
|
||||
# Empty array
|
||||
np_expected_empty = np.array([], dtype=np_dtype)
|
||||
mask_expected_empty = np.array([], dtype=np.bool_)
|
||||
|
||||
pa_array_offset = pa.Array.from_buffers(
|
||||
type=pa_array.type,
|
||||
length=0,
|
||||
buffers=[mask_buffer, data_buffer],
|
||||
offset=pa_array.offset,
|
||||
)
|
||||
pa_array_offset.validate()
|
||||
data, mask = pyarrow_array_to_numpy_and_mask(pa_array_offset, np_dtype)
|
||||
tm.assert_numpy_array_equal(data[:3], np_expected_empty)
|
||||
tm.assert_numpy_array_equal(mask, mask_expected_empty)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [pa.nulls(10), pa.chunked_array([pa.nulls(4), pa.nulls(6)])]
|
||||
)
|
||||
def test_from_arrow_null(data, arr):
|
||||
res = data.dtype.__from_arrow__(arr)
|
||||
assert res.isna().all()
|
||||
assert len(res) == 10
|
||||
|
||||
|
||||
def test_from_arrow_type_error(data):
|
||||
# ensure that __from_arrow__ returns a TypeError when getting a wrong
|
||||
# array type
|
||||
|
||||
arr = pa.array(data).cast("string")
|
||||
with pytest.raises(TypeError, match=None):
|
||||
# we don't test the exact error message, only the fact that it raises
|
||||
# a TypeError is relevant
|
||||
data.dtype.__from_arrow__(arr)
|
@ -0,0 +1,74 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer_dtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import BaseMaskedArray
|
||||
|
||||
arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES]
|
||||
arrays += [
|
||||
pd.array([0.141, -0.268, 5.895, None], dtype=dtype) for dtype in tm.FLOAT_EA_DTYPES
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(params=arrays, ids=[a.dtype.name for a in arrays])
|
||||
def data(request):
|
||||
"""
|
||||
Fixture returning parametrized 'data' array with different integer and
|
||||
floating point types
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def numpy_dtype(data):
|
||||
"""
|
||||
Fixture returning numpy dtype from 'data' input array.
|
||||
"""
|
||||
# For integer dtype, the numpy conversion must be done to float
|
||||
if is_integer_dtype(data):
|
||||
numpy_dtype = float
|
||||
else:
|
||||
numpy_dtype = data.dtype.type
|
||||
return numpy_dtype
|
||||
|
||||
|
||||
def test_round(data, numpy_dtype):
|
||||
# No arguments
|
||||
result = data.round()
|
||||
expected = pd.array(
|
||||
np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# Decimals argument
|
||||
result = data.round(decimals=2)
|
||||
expected = pd.array(
|
||||
np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2),
|
||||
dtype=data.dtype,
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_tolist(data):
|
||||
result = data.tolist()
|
||||
expected = list(data)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy():
|
||||
# GH#56991
|
||||
|
||||
class MyStringArray(BaseMaskedArray):
|
||||
dtype = pd.StringDtype()
|
||||
_dtype_cls = pd.StringDtype
|
||||
_internal_fill_value = pd.NA
|
||||
|
||||
arr = MyStringArray(
|
||||
values=np.array(["a", "b", "c"]), mask=np.array([False, True, False])
|
||||
)
|
||||
result = arr.to_numpy()
|
||||
expected = np.array(["a", pd.NA, "c"])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,60 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class TestSetitemValidation:
|
||||
def _check_setitem_invalid(self, arr, invalid):
|
||||
msg = f"Invalid value '{str(invalid)}' for dtype {arr.dtype}"
|
||||
msg = re.escape(msg)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr[0] = invalid
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr[:] = invalid
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr[[0]] = invalid
|
||||
|
||||
# FIXME: don't leave commented-out
|
||||
# with pytest.raises(TypeError):
|
||||
# arr[[0]] = [invalid]
|
||||
|
||||
# with pytest.raises(TypeError):
|
||||
# arr[[0]] = np.array([invalid], dtype=object)
|
||||
|
||||
# Series non-coercion, behavior subject to change
|
||||
ser = pd.Series(arr)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[0] = invalid
|
||||
# TODO: so, so many other variants of this...
|
||||
|
||||
_invalid_scalars = [
|
||||
1 + 2j,
|
||||
"True",
|
||||
"1",
|
||||
"1.0",
|
||||
pd.NaT,
|
||||
np.datetime64("NaT"),
|
||||
np.timedelta64("NaT"),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
|
||||
)
|
||||
def test_setitem_validation_scalar_bool(self, invalid):
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
self._check_setitem_invalid(arr, invalid)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
|
||||
def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype):
|
||||
arr = pd.array([1, 2, None], dtype=any_int_ea_dtype)
|
||||
self._check_setitem_invalid(arr, invalid)
|
||||
|
||||
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
|
||||
def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype):
|
||||
arr = pd.array([1, 2, None], dtype=float_ea_dtype)
|
||||
self._check_setitem_invalid(arr, invalid)
|
@ -0,0 +1,154 @@
|
||||
"""
|
||||
Tests shared by MaskedArray subclasses.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
|
||||
|
||||
class ComparisonOps(BaseOpsUtil):
|
||||
def _compare_other(self, data, op, other):
|
||||
# array
|
||||
result = pd.Series(op(data, other))
|
||||
expected = pd.Series(op(data._data, other), dtype="boolean")
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = pd.NA
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series
|
||||
ser = pd.Series(data)
|
||||
result = op(ser, other)
|
||||
|
||||
# Set nullable dtype here to avoid upcasting when setting to pd.NA below
|
||||
expected = op(pd.Series(data._data), other).astype("boolean")
|
||||
|
||||
# fill the nan locations
|
||||
expected[data._mask] = pd.NA
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# subclass will override to parametrize 'other'
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
op = comparison_op
|
||||
left = pd.array([1, 0, None], dtype=dtype)
|
||||
|
||||
result = op(left, other)
|
||||
|
||||
if other is pd.NA:
|
||||
expected = pd.array([None, None, None], dtype="boolean")
|
||||
else:
|
||||
values = op(left._data, other)
|
||||
expected = pd.arrays.BooleanArray(values, left._mask, copy=True)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(left, pd.array([1, 0, None], dtype=dtype))
|
||||
|
||||
|
||||
class NumericOps:
|
||||
# Shared by IntegerArray and FloatingArray, not BooleanArray
|
||||
|
||||
def test_searchsorted_nan(self, dtype):
|
||||
# The base class casts to object dtype, for which searchsorted returns
|
||||
# 0 from the left and 10 from the right.
|
||||
arr = pd.array(range(10), dtype=dtype)
|
||||
|
||||
assert arr.searchsorted(np.nan, side="left") == 10
|
||||
assert arr.searchsorted(np.nan, side="right") == 10
|
||||
|
||||
def test_no_shared_mask(self, data):
|
||||
result = data + 1
|
||||
assert not tm.shares_memory(result, data)
|
||||
|
||||
def test_array(self, comparison_op, dtype):
|
||||
op = comparison_op
|
||||
|
||||
left = pd.array([0, 1, 2, None, None, None], dtype=dtype)
|
||||
right = pd.array([0, 1, None, 0, 1, None], dtype=dtype)
|
||||
|
||||
result = op(left, right)
|
||||
values = op(left._data, right._data)
|
||||
mask = left._mask | right._mask
|
||||
|
||||
expected = pd.arrays.BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = pd.NA
|
||||
tm.assert_extension_array_equal(
|
||||
left, pd.array([0, 1, 2, None, None, None], dtype=dtype)
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
right, pd.array([0, 1, None, 0, 1, None], dtype=dtype)
|
||||
)
|
||||
|
||||
def test_compare_with_booleanarray(self, comparison_op, dtype):
|
||||
op = comparison_op
|
||||
|
||||
left = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
right = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype=dtype)
|
||||
other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean")
|
||||
|
||||
expected = op(left, other)
|
||||
result = op(left, right)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# reversed op
|
||||
expected = op(other, left)
|
||||
result = op(right, left)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_compare_to_string(self, dtype):
|
||||
# GH#28930
|
||||
ser = pd.Series([1, None], dtype=dtype)
|
||||
result = ser == "a"
|
||||
expected = pd.Series([False, pd.NA], dtype="boolean")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_ufunc_with_out(self, dtype):
|
||||
arr = pd.array([1, 2, 3], dtype=dtype)
|
||||
arr2 = pd.array([1, 2, pd.NA], dtype=dtype)
|
||||
|
||||
mask = arr == arr
|
||||
mask2 = arr2 == arr2
|
||||
|
||||
result = np.zeros(3, dtype=bool)
|
||||
result |= mask
|
||||
# If MaskedArray.__array_ufunc__ handled "out" appropriately,
|
||||
# `result` should still be an ndarray.
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.all()
|
||||
|
||||
# result |= mask worked because mask could be cast losslessly to
|
||||
# boolean ndarray. mask2 can't, so this raises
|
||||
result = np.zeros(3, dtype=bool)
|
||||
msg = "Specify an appropriate 'na_value' for this dtype"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
result |= mask2
|
||||
|
||||
# addition
|
||||
res = np.add(arr, arr2)
|
||||
expected = pd.array([2, 4, pd.NA], dtype=dtype)
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
# when passing out=arr, we will modify 'arr' inplace.
|
||||
res = np.add(arr, arr2, out=arr)
|
||||
assert res is arr
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
tm.assert_extension_array_equal(arr, expected)
|
||||
|
||||
def test_mul_td64_array(self, dtype):
|
||||
# GH#45622
|
||||
arr = pd.array([1, 2, pd.NA], dtype=dtype)
|
||||
other = np.arange(3, dtype=np.int64).view("m8[ns]")
|
||||
|
||||
result = arr * other
|
||||
expected = pd.array([pd.Timedelta(0), pd.Timedelta(2), pd.NaT])
|
||||
tm.assert_extension_array_equal(result, expected)
|
@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSearchsorted:
|
||||
def test_searchsorted_string(self, string_dtype):
|
||||
arr = pd.array(["a", "b", "c"], dtype=string_dtype)
|
||||
|
||||
result = arr.searchsorted("a", side="left")
|
||||
assert is_scalar(result)
|
||||
assert result == 0
|
||||
|
||||
result = arr.searchsorted("a", side="right")
|
||||
assert is_scalar(result)
|
||||
assert result == 1
|
||||
|
||||
def test_searchsorted_numeric_dtypes_scalar(self, any_real_numpy_dtype):
|
||||
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
|
||||
result = arr.searchsorted(30)
|
||||
assert is_scalar(result)
|
||||
assert result == 2
|
||||
|
||||
result = arr.searchsorted([30])
|
||||
expected = np.array([2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_searchsorted_numeric_dtypes_vector(self, any_real_numpy_dtype):
|
||||
arr = pd.array([1, 3, 90], dtype=any_real_numpy_dtype)
|
||||
result = arr.searchsorted([2, 30])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_searchsorted_sorter(self, any_real_numpy_dtype):
|
||||
arr = pd.array([3, 1, 2], dtype=any_real_numpy_dtype)
|
||||
result = arr.searchsorted([0, 3], sorter=np.argsort(arr))
|
||||
expected = np.array([0, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
@ -0,0 +1,324 @@
|
||||
"""
|
||||
Additional tests for NumpyExtensionArray that aren't covered by
|
||||
the interface tests.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import NumpyEADtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import NumpyExtensionArray
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.array(["a", "b"], dtype=object),
|
||||
np.array([0, 1], dtype=float),
|
||||
np.array([0, 1], dtype=int),
|
||||
np.array([0, 1 + 2j], dtype=complex),
|
||||
np.array([True, False], dtype=bool),
|
||||
np.array([0, 1], dtype="datetime64[ns]"),
|
||||
np.array([0, 1], dtype="timedelta64[ns]"),
|
||||
]
|
||||
)
|
||||
def any_numpy_array(request):
|
||||
"""
|
||||
Parametrized fixture for NumPy arrays with different dtypes.
|
||||
|
||||
This excludes string and bytes.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# NumpyEADtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
("bool", True),
|
||||
("int", True),
|
||||
("uint", True),
|
||||
("float", True),
|
||||
("complex", True),
|
||||
("str", False),
|
||||
("bytes", False),
|
||||
("datetime64[ns]", False),
|
||||
("object", False),
|
||||
("void", False),
|
||||
],
|
||||
)
|
||||
def test_is_numeric(dtype, expected):
|
||||
dtype = NumpyEADtype(dtype)
|
||||
assert dtype._is_numeric is expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, expected",
|
||||
[
|
||||
("bool", True),
|
||||
("int", False),
|
||||
("uint", False),
|
||||
("float", False),
|
||||
("complex", False),
|
||||
("str", False),
|
||||
("bytes", False),
|
||||
("datetime64[ns]", False),
|
||||
("object", False),
|
||||
("void", False),
|
||||
],
|
||||
)
|
||||
def test_is_boolean(dtype, expected):
|
||||
dtype = NumpyEADtype(dtype)
|
||||
assert dtype._is_boolean is expected
|
||||
|
||||
|
||||
def test_repr():
|
||||
dtype = NumpyEADtype(np.dtype("int64"))
|
||||
assert repr(dtype) == "NumpyEADtype('int64')"
|
||||
|
||||
|
||||
def test_constructor_from_string():
|
||||
result = NumpyEADtype.construct_from_string("int64")
|
||||
expected = NumpyEADtype(np.dtype("int64"))
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_dtype_idempotent(any_numpy_dtype):
|
||||
dtype = NumpyEADtype(any_numpy_dtype)
|
||||
|
||||
result = NumpyEADtype(dtype)
|
||||
assert result == dtype
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Construction
|
||||
|
||||
|
||||
def test_constructor_no_coercion():
|
||||
with pytest.raises(ValueError, match="NumPy array"):
|
||||
NumpyExtensionArray([1, 2, 3])
|
||||
|
||||
|
||||
def test_series_constructor_with_copy():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
ser = pd.Series(NumpyExtensionArray(ndarray), copy=True)
|
||||
|
||||
assert ser.values is not ndarray
|
||||
|
||||
|
||||
def test_series_constructor_with_astype():
|
||||
ndarray = np.array([1, 2, 3])
|
||||
result = pd.Series(NumpyExtensionArray(ndarray), dtype="float64")
|
||||
expected = pd.Series([1.0, 2.0, 3.0], dtype="float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_sequence_dtype():
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
result = NumpyExtensionArray._from_sequence(arr, dtype="uint64")
|
||||
expected = NumpyExtensionArray(np.array([1, 2, 3], dtype="uint64"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_constructor_copy():
|
||||
arr = np.array([0, 1])
|
||||
result = NumpyExtensionArray(arr, copy=True)
|
||||
|
||||
assert not tm.shares_memory(result, arr)
|
||||
|
||||
|
||||
def test_constructor_with_data(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = NumpyExtensionArray(nparr)
|
||||
assert arr.dtype.numpy_dtype == nparr.dtype
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Conversion
|
||||
|
||||
|
||||
def test_to_numpy():
|
||||
arr = NumpyExtensionArray(np.array([1, 2, 3]))
|
||||
result = arr.to_numpy()
|
||||
assert result is arr._ndarray
|
||||
|
||||
result = arr.to_numpy(copy=True)
|
||||
assert result is not arr._ndarray
|
||||
|
||||
result = arr.to_numpy(dtype="f8")
|
||||
expected = np.array([1, 2, 3], dtype="f8")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Setitem
|
||||
|
||||
|
||||
def test_setitem_series():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
ser.array[0] = 10
|
||||
expected = pd.Series([10, 2, 3])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_setitem(any_numpy_array):
|
||||
nparr = any_numpy_array
|
||||
arr = NumpyExtensionArray(nparr, copy=True)
|
||||
|
||||
arr[0] = arr[1]
|
||||
nparr[0] = nparr[1]
|
||||
|
||||
tm.assert_numpy_array_equal(arr.to_numpy(), nparr)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Reductions
|
||||
|
||||
|
||||
def test_bad_reduce_raises():
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
arr = NumpyExtensionArray(arr)
|
||||
msg = "cannot perform not_a_method with type int"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
arr._reduce(msg)
|
||||
|
||||
|
||||
def test_validate_reduction_keyword_args():
|
||||
arr = NumpyExtensionArray(np.array([1, 2, 3]))
|
||||
msg = "the 'keepdims' parameter is not supported .*all"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.all(keepdims=True)
|
||||
|
||||
|
||||
def test_np_max_nested_tuples():
|
||||
# case where checking in ufunc.nout works while checking for tuples
|
||||
# does not
|
||||
vals = [
|
||||
(("j", "k"), ("l", "m")),
|
||||
(("l", "m"), ("o", "p")),
|
||||
(("o", "p"), ("j", "k")),
|
||||
]
|
||||
ser = pd.Series(vals)
|
||||
arr = ser.array
|
||||
|
||||
assert arr.max() is arr[2]
|
||||
assert ser.max() is arr[2]
|
||||
|
||||
result = np.maximum.reduce(arr)
|
||||
assert result == arr[2]
|
||||
|
||||
result = np.maximum.reduce(ser)
|
||||
assert result == arr[2]
|
||||
|
||||
|
||||
def test_np_reduce_2d():
|
||||
raw = np.arange(12).reshape(4, 3)
|
||||
arr = NumpyExtensionArray(raw)
|
||||
|
||||
res = np.maximum.reduce(arr, axis=0)
|
||||
tm.assert_extension_array_equal(res, arr[-1])
|
||||
|
||||
alt = arr.max(axis=0)
|
||||
tm.assert_extension_array_equal(alt, arr[-1])
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Ops
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive])
|
||||
def test_ufunc_unary(ufunc):
|
||||
arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0]))
|
||||
result = ufunc(arr)
|
||||
expected = NumpyExtensionArray(ufunc(arr._ndarray))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# same thing but with the 'out' keyword
|
||||
out = NumpyExtensionArray(np.array([-9.0, -9.0, -9.0]))
|
||||
ufunc(arr, out=out)
|
||||
tm.assert_extension_array_equal(out, expected)
|
||||
|
||||
|
||||
def test_ufunc():
|
||||
arr = NumpyExtensionArray(np.array([-1.0, 0.0, 1.0]))
|
||||
|
||||
r1, r2 = np.divmod(arr, np.add(arr, 2))
|
||||
e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2))
|
||||
e1 = NumpyExtensionArray(e1)
|
||||
e2 = NumpyExtensionArray(e2)
|
||||
tm.assert_extension_array_equal(r1, e1)
|
||||
tm.assert_extension_array_equal(r2, e2)
|
||||
|
||||
|
||||
def test_basic_binop():
|
||||
# Just a basic smoke test. The EA interface tests exercise this
|
||||
# more thoroughly.
|
||||
x = NumpyExtensionArray(np.array([1, 2, 3]))
|
||||
result = x + x
|
||||
expected = NumpyExtensionArray(np.array([2, 4, 6]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_setitem_object_typecode(dtype):
|
||||
arr = NumpyExtensionArray(np.array(["a", "b", "c"], dtype=dtype))
|
||||
arr[0] = "t"
|
||||
expected = NumpyExtensionArray(np.array(["t", "b", "c"], dtype=dtype))
|
||||
tm.assert_extension_array_equal(arr, expected)
|
||||
|
||||
|
||||
def test_setitem_no_coercion():
|
||||
# https://github.com/pandas-dev/pandas/issues/28150
|
||||
arr = NumpyExtensionArray(np.array([1, 2, 3]))
|
||||
with pytest.raises(ValueError, match="int"):
|
||||
arr[0] = "a"
|
||||
|
||||
# With a value that we do coerce, check that we coerce the value
|
||||
# and not the underlying array.
|
||||
arr[0] = 2.5
|
||||
assert isinstance(arr[0], (int, np.integer)), type(arr[0])
|
||||
|
||||
|
||||
def test_setitem_preserves_views():
|
||||
# GH#28150, see also extension test of the same name
|
||||
arr = NumpyExtensionArray(np.array([1, 2, 3]))
|
||||
view1 = arr.view()
|
||||
view2 = arr[:]
|
||||
view3 = np.asarray(arr)
|
||||
|
||||
arr[0] = 9
|
||||
assert view1[0] == 9
|
||||
assert view2[0] == 9
|
||||
assert view3[0] == 9
|
||||
|
||||
arr[-1] = 2.5
|
||||
view1[-1] = 5
|
||||
assert arr[-1] == 5
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.uint64])
|
||||
def test_quantile_empty(dtype):
|
||||
# we should get back np.nans, not -1s
|
||||
arr = NumpyExtensionArray(np.array([], dtype=dtype))
|
||||
idx = pd.Index([0.0, 0.5])
|
||||
|
||||
result = arr._quantile(idx, interpolation="linear")
|
||||
expected = NumpyExtensionArray(np.array([np.nan, np.nan]))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_factorize_unsigned():
|
||||
# don't raise when calling factorize on unsigned int NumpyExtensionArray
|
||||
arr = np.array([1, 2, 3], dtype=np.uint64)
|
||||
obj = NumpyExtensionArray(arr)
|
||||
|
||||
res_codes, res_unique = obj.factorize()
|
||||
exp_codes, exp_unique = pd.factorize(arr)
|
||||
|
||||
tm.assert_numpy_array_equal(res_codes, exp_codes)
|
||||
|
||||
tm.assert_extension_array_equal(res_unique, NumpyExtensionArray(exp_unique))
|
@ -0,0 +1,130 @@
|
||||
import pytest
|
||||
|
||||
from pandas.compat.pyarrow import pa_version_under10p1
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
PeriodArray,
|
||||
period_array,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
|
||||
)
|
||||
|
||||
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
|
||||
def test_arrow_extension_type():
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
||||
|
||||
p1 = ArrowPeriodType("D")
|
||||
p2 = ArrowPeriodType("D")
|
||||
p3 = ArrowPeriodType("M")
|
||||
|
||||
assert p1.freq == "D"
|
||||
assert p1 == p2
|
||||
assert p1 != p3
|
||||
assert hash(p1) == hash(p2)
|
||||
assert hash(p1) != hash(p3)
|
||||
|
||||
|
||||
@pytest.mark.xfail(not pa_version_under10p1, reason="Wrong behavior with pyarrow 10")
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq",
|
||||
[
|
||||
(pd.date_range("2017", periods=3), "D"),
|
||||
(pd.date_range("2017", periods=3, freq="YE"), "Y-DEC"),
|
||||
],
|
||||
)
|
||||
def test_arrow_array(data, freq):
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
||||
|
||||
periods = period_array(data, freq=freq)
|
||||
result = pa.array(periods)
|
||||
assert isinstance(result.type, ArrowPeriodType)
|
||||
assert result.type.freq == freq
|
||||
expected = pa.array(periods.asi8, type="int64")
|
||||
assert result.storage.equals(expected)
|
||||
|
||||
# convert to its storage type
|
||||
result = pa.array(periods, type=pa.int64())
|
||||
assert result.equals(expected)
|
||||
|
||||
# unsupported conversions
|
||||
msg = "Not supported to convert PeriodArray to 'double' type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pa.array(periods, type="float64")
|
||||
|
||||
with pytest.raises(TypeError, match="different 'freq'"):
|
||||
pa.array(periods, type=ArrowPeriodType("T"))
|
||||
|
||||
|
||||
def test_arrow_array_missing():
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
||||
|
||||
arr = PeriodArray([1, 2, 3], dtype="period[D]")
|
||||
arr[1] = pd.NaT
|
||||
|
||||
result = pa.array(arr)
|
||||
assert isinstance(result.type, ArrowPeriodType)
|
||||
assert result.type.freq == "D"
|
||||
expected = pa.array([1, None, 3], type="int64")
|
||||
assert result.storage.equals(expected)
|
||||
|
||||
|
||||
def test_arrow_table_roundtrip():
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
||||
|
||||
arr = PeriodArray([1, 2, 3], dtype="period[D]")
|
||||
arr[1] = pd.NaT
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
table = pa.table(df)
|
||||
assert isinstance(table.field("a").type, ArrowPeriodType)
|
||||
result = table.to_pandas()
|
||||
assert isinstance(result["a"].dtype, PeriodDtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
table2 = pa.concat_tables([table, table])
|
||||
result = table2.to_pandas()
|
||||
expected = pd.concat([df, df], ignore_index=True)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_arrow_load_from_zero_chunks():
|
||||
# GH-41040
|
||||
|
||||
from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
|
||||
|
||||
arr = PeriodArray([], dtype="period[D]")
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
table = pa.table(df)
|
||||
assert isinstance(table.field("a").type, ArrowPeriodType)
|
||||
table = pa.table(
|
||||
[pa.chunked_array([], type=table.column(0).type)], schema=table.schema
|
||||
)
|
||||
|
||||
result = table.to_pandas()
|
||||
assert isinstance(result["a"].dtype, PeriodDtype)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_arrow_table_roundtrip_without_metadata():
|
||||
arr = PeriodArray([1, 2, 3], dtype="period[h]")
|
||||
arr[1] = pd.NaT
|
||||
df = pd.DataFrame({"a": arr})
|
||||
|
||||
table = pa.table(df)
|
||||
# remove the metadata
|
||||
table = table.replace_schema_metadata()
|
||||
assert table.schema.metadata is None
|
||||
|
||||
result = table.to_pandas()
|
||||
assert isinstance(result["a"].dtype, PeriodDtype)
|
||||
tm.assert_frame_equal(result, df)
|
@ -0,0 +1,67 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import PeriodDtype
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import period_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
|
||||
def test_astype_int(dtype):
|
||||
# We choose to ignore the sign and size of integers for
|
||||
# Period/Datetime/Timedelta astype
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
|
||||
if np.dtype(dtype) != np.int64:
|
||||
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
||||
arr.astype(dtype)
|
||||
return
|
||||
|
||||
result = arr.astype(dtype)
|
||||
expected = arr._ndarray.view("i8")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_copies():
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
result = arr.astype(np.int64, copy=False)
|
||||
|
||||
# Add the `.base`, since we now use `.asi8` which returns a view.
|
||||
# We could maybe override it in PeriodArray to return ._ndarray directly.
|
||||
assert result.base is arr._ndarray
|
||||
|
||||
result = arr.astype(np.int64, copy=True)
|
||||
assert result is not arr._ndarray
|
||||
tm.assert_numpy_array_equal(result, arr._ndarray.view("i8"))
|
||||
|
||||
|
||||
def test_astype_categorical():
|
||||
arr = period_array(["2000", "2001", "2001", None], freq="D")
|
||||
result = arr.astype("category")
|
||||
categories = pd.PeriodIndex(["2000", "2001"], freq="D")
|
||||
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_period():
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
result = arr.astype(PeriodDtype("M"))
|
||||
expected = period_array(["2000", "2001", None], freq="M")
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_astype_datetime(dtype):
|
||||
arr = period_array(["2000", "2001", None], freq="D")
|
||||
# slice off the [ns] so that the regex matches.
|
||||
if dtype == "timedelta64[ns]":
|
||||
with pytest.raises(TypeError, match=dtype[:-4]):
|
||||
arr.astype(dtype)
|
||||
|
||||
else:
|
||||
# GH#45038 allow period->dt64 because we allow dt64->period
|
||||
result = arr.astype(dtype)
|
||||
expected = pd.DatetimeIndex(["2000", "2001", pd.NaT], dtype=dtype)._data
|
||||
tm.assert_datetime_array_equal(result, expected)
|
@ -0,0 +1,156 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
from pandas._libs.tslibs.offsets import MonthEnd
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
PeriodArray,
|
||||
period_array,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq, expected",
|
||||
[
|
||||
([pd.Period("2017", "D")], None, [17167]),
|
||||
([pd.Period("2017", "D")], "D", [17167]),
|
||||
([2017], "D", [17167]),
|
||||
(["2017"], "D", [17167]),
|
||||
([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]),
|
||||
([pd.Period("2017", "D"), None], None, [17167, iNaT]),
|
||||
(pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]),
|
||||
(pd.date_range("2017", periods=3), None, [17167, 17168, 17169]),
|
||||
(pd.period_range("2017", periods=4, freq="Q"), None, [188, 189, 190, 191]),
|
||||
],
|
||||
)
|
||||
def test_period_array_ok(data, freq, expected):
|
||||
result = period_array(data, freq=freq).asi8
|
||||
expected = np.asarray(expected, dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_period_array_readonly_object():
|
||||
# https://github.com/pandas-dev/pandas/issues/25403
|
||||
pa = period_array([pd.Period("2019-01-01")])
|
||||
arr = np.asarray(pa, dtype="object")
|
||||
arr.setflags(write=False)
|
||||
|
||||
result = period_array(arr)
|
||||
tm.assert_period_array_equal(result, pa)
|
||||
|
||||
result = pd.Series(arr)
|
||||
tm.assert_series_equal(result, pd.Series(pa))
|
||||
|
||||
result = pd.DataFrame({"A": arr})
|
||||
tm.assert_frame_equal(result, pd.DataFrame({"A": pa}))
|
||||
|
||||
|
||||
def test_from_datetime64_freq_changes():
|
||||
# https://github.com/pandas-dev/pandas/issues/23438
|
||||
arr = pd.date_range("2017", periods=3, freq="D")
|
||||
result = PeriodArray._from_datetime64(arr, freq="M")
|
||||
expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M")
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2M", MonthEnd(2)])
|
||||
def test_from_datetime64_freq_2M(freq):
|
||||
arr = np.array(
|
||||
["2020-01-01T00:00:00", "2020-01-02T00:00:00"], dtype="datetime64[ns]"
|
||||
)
|
||||
result = PeriodArray._from_datetime64(arr, freq)
|
||||
expected = period_array(["2020-01", "2020-01"], freq=freq)
|
||||
tm.assert_period_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, freq, msg",
|
||||
[
|
||||
(
|
||||
[pd.Period("2017", "D"), pd.Period("2017", "Y")],
|
||||
None,
|
||||
"Input has different freq",
|
||||
),
|
||||
([pd.Period("2017", "D")], "Y", "Input has different freq"),
|
||||
],
|
||||
)
|
||||
def test_period_array_raises(data, freq, msg):
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
period_array(data, freq)
|
||||
|
||||
|
||||
def test_period_array_non_period_series_raies():
|
||||
ser = pd.Series([1, 2, 3])
|
||||
with pytest.raises(TypeError, match="dtype"):
|
||||
PeriodArray(ser, dtype="period[D]")
|
||||
|
||||
|
||||
def test_period_array_freq_mismatch():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
PeriodArray(arr, dtype="period[M]")
|
||||
|
||||
dtype = pd.PeriodDtype(pd.tseries.offsets.MonthEnd())
|
||||
with pytest.raises(IncompatibleFrequency, match="freq"):
|
||||
PeriodArray(arr, dtype=dtype)
|
||||
|
||||
|
||||
def test_from_sequence_disallows_i8():
|
||||
arr = period_array(["2000", "2001"], freq="D")
|
||||
|
||||
msg = str(arr[0].ordinal)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
PeriodArray._from_sequence(arr.asi8, dtype=arr.dtype)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
PeriodArray._from_sequence(list(arr.asi8), dtype=arr.dtype)
|
||||
|
||||
|
||||
def test_from_td64nat_sequence_raises():
|
||||
# GH#44507
|
||||
td = pd.NaT.to_numpy("m8[ns]")
|
||||
|
||||
dtype = pd.period_range("2005-01-01", periods=3, freq="D").dtype
|
||||
|
||||
arr = np.array([None], dtype=object)
|
||||
arr[0] = td
|
||||
|
||||
msg = "Value must be Period, string, integer, or datetime"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
PeriodArray._from_sequence(arr, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.PeriodIndex(arr, dtype=dtype)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Index(arr, dtype=dtype)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.array(arr, dtype=dtype)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.Series(arr, dtype=dtype)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
pd.DataFrame(arr, dtype=dtype)
|
||||
|
||||
|
||||
def test_freq_deprecated():
|
||||
# GH#52462
|
||||
data = np.arange(5).astype(np.int64)
|
||||
msg = "The 'freq' keyword in the PeriodArray constructor is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = PeriodArray(data, freq="M")
|
||||
|
||||
expected = PeriodArray(data, dtype="period[M]")
|
||||
tm.assert_equal(res, expected)
|
||||
|
||||
|
||||
def test_period_array_from_datetime64():
|
||||
arr = np.array(
|
||||
["2020-01-01T00:00:00", "2020-02-02T00:00:00"], dtype="datetime64[ns]"
|
||||
)
|
||||
result = PeriodArray._from_datetime64(arr, freq=MonthEnd(2))
|
||||
|
||||
expected = period_array(["2020-01-01", "2020-02-01"], freq=MonthEnd(2))
|
||||
tm.assert_period_array_equal(result, expected)
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user