Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAppend:
|
||||
@pytest.fixture
|
||||
def ci(self):
|
||||
categories = list("cab")
|
||||
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
|
||||
|
||||
def test_append(self, ci):
|
||||
# append cats with the same categories
|
||||
result = ci[:3].append(ci[3:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
foos = [ci[:1], ci[1:3], ci[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_empty(self, ci):
|
||||
# empty
|
||||
result = ci.append([])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_mismatched_categories(self, ci):
|
||||
# appending with different categories or reordered is not ok
|
||||
msg = "all inputs must be Index"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.set_categories(list("abcd")))
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.reorder_categories(list("abc")))
|
||||
|
||||
def test_append_category_objects(self, ci):
|
||||
# with objects
|
||||
result = ci.append(Index(["c", "a"]))
|
||||
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_non_categories(self, ci):
|
||||
# invalid objects -> cast to object via concat_compat
|
||||
result = ci.append(Index(["a", "d"]))
|
||||
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_object(self, ci):
|
||||
# GH#14298 - if base object is not categorical -> coerce to object
|
||||
result = Index(["c", "a"]).append(ci)
|
||||
expected = Index(list("caaabbca"))
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_to_another(self):
|
||||
# hits Index._concat
|
||||
fst = Index(["a", "b"])
|
||||
snd = CategoricalIndex(["d", "e"])
|
||||
result = fst.append(snd)
|
||||
expected = Index(["a", "b", "d", "e"])
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,90 @@
|
||||
from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = ci.astype(object)
|
||||
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
|
||||
|
||||
# this IS equal, but not the same class
|
||||
assert result.equals(ci)
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
# interval
|
||||
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
|
||||
|
||||
ci = CategoricalIndex(
|
||||
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
|
||||
)
|
||||
|
||||
result = ci.astype("interval")
|
||||
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = IntervalIndex(result.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("index_ordered", [True, False])
|
||||
def test_astype_category(self, name, dtype_ordered, index_ordered):
|
||||
# GH#18630
|
||||
index = CategoricalIndex(
|
||||
list("aabbca"), categories=list("cab"), ordered=index_ordered
|
||||
)
|
||||
if name:
|
||||
index = index.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(
|
||||
index.tolist(),
|
||||
name=name,
|
||||
categories=index.categories,
|
||||
ordered=dtype_ordered,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = index.astype("category")
|
||||
expected = index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False])
|
||||
def test_categorical_date_roundtrip(self, box):
|
||||
# astype to categorical and back should preserve date objects
|
||||
v = date.today()
|
||||
|
||||
obj = Index([v, v])
|
||||
assert obj.dtype == object
|
||||
if box:
|
||||
obj = obj.array
|
||||
|
||||
cat = obj.astype("category")
|
||||
|
||||
rtrip = cat.astype(object)
|
||||
assert rtrip.dtype == object
|
||||
assert type(rtrip[0]) is date
|
@ -0,0 +1,394 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas._libs.arrays import NDArrayBacked
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.api import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
@pytest.fixture
|
||||
def simple_index(self) -> CategoricalIndex:
|
||||
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
def test_can_hold_identifiers(self):
|
||||
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
def test_insert(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
# test 0th element
|
||||
result = ci.insert(0, "a")
|
||||
expected = CategoricalIndex(list("aaabbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
result = ci.insert(-1, "a")
|
||||
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test empty
|
||||
result = CategoricalIndex([], categories=categories).insert(0, "a")
|
||||
expected = CategoricalIndex(["a"], categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# invalid -> cast to object
|
||||
expected = ci.astype(object).insert(0, "d")
|
||||
result = ci.insert(0, "d").astype(object)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = CategoricalIndex(list("aabcb")).insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_na_mismatched_dtype(self):
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.insert(0, pd.NaT)
|
||||
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_delete(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
result = ci.delete(0)
|
||||
expected = CategoricalIndex(list("abbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
result = ci.delete(-1)
|
||||
expected = CategoricalIndex(list("aabbc"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
with tm.external_error_raised((IndexError, ValueError)):
|
||||
# Either depending on NumPy version
|
||||
ci.delete(10)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, non_lexsorted_data",
|
||||
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
|
||||
)
|
||||
def test_is_monotonic(self, data, non_lexsorted_data):
|
||||
c = CategoricalIndex(data)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, ordered=True)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
# test when data is neither monotonic increasing nor decreasing
|
||||
reordered_data = [data[0], data[2], data[1]]
|
||||
c = CategoricalIndex(reordered_data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
# non lexsorted categories
|
||||
categories = non_lexsorted_data
|
||||
|
||||
c = CategoricalIndex(categories[:2], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(categories[1:3], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
def test_has_duplicates(self):
|
||||
idx = CategoricalIndex([0, 0, 0], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected",
|
||||
[
|
||||
(
|
||||
[1, 1, 1],
|
||||
[1, 2, 3],
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 1, 1],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[2, "a", "b"],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.zeros(shape=(3), dtype=np.bool_),
|
||||
"last": np.zeros(shape=(3), dtype=np.bool_),
|
||||
False: np.zeros(shape=(3), dtype=np.bool_),
|
||||
},
|
||||
),
|
||||
(
|
||||
list("abb"),
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, False, True]),
|
||||
"last": np.array([False, True, False]),
|
||||
False: np.array([False, True, True]),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, data, categories, expected):
|
||||
idx = CategoricalIndex(data, categories=categories, name="foo")
|
||||
for keep, e in expected.items():
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
|
||||
e = idx[~e]
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, e)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected_data",
|
||||
[
|
||||
([1, 1, 1], [1, 2, 3], [1]),
|
||||
([1, 1, 1], list("abc"), [np.nan]),
|
||||
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
|
||||
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
|
||||
],
|
||||
)
|
||||
def test_unique(self, data, categories, expected_data, ordered):
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
|
||||
idx = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(expected_data, dtype=dtype)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr doesn't roundtrip")
|
||||
def test_repr_roundtrip(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
str(ci)
|
||||
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
|
||||
|
||||
# formatting
|
||||
str(ci)
|
||||
|
||||
# long format
|
||||
# this is not reprable
|
||||
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
|
||||
str(ci)
|
||||
|
||||
def test_isin(self):
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c"]), np.array([False, False, False, True, False, False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
|
||||
)
|
||||
|
||||
# mismatched categorical -> coerced to ndarray so doesn't matter
|
||||
result = ci.isin(ci.set_categories(list("abcdefghi")))
|
||||
expected = np.array([True] * 6)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ci.isin(ci.set_categories(list("defghi")))
|
||||
expected = np.array([False] * 5 + [True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_isin_overlapping_intervals(self):
|
||||
# GH 34974
|
||||
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
|
||||
result = CategoricalIndex(idx).isin(idx)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_identical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
assert ci1.identical(ci1)
|
||||
assert ci1.identical(ci1.copy())
|
||||
assert not ci1.identical(ci2)
|
||||
|
||||
def test_ensure_copied_data(self):
|
||||
# gh-12309: Check the "copy" argument of each
|
||||
# Index.__new__ is honored.
|
||||
#
|
||||
# Must be tested separately from other indexes because
|
||||
# self.values is not an ndarray.
|
||||
index = CategoricalIndex(list("ab") * 5)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=True)
|
||||
tm.assert_index_equal(index, result)
|
||||
assert not np.shares_memory(result._data._codes, index._data._codes)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=False)
|
||||
assert result._data._codes is index._data._codes
|
||||
|
||||
|
||||
class TestCategoricalIndex2:
|
||||
def test_view_i8(self):
|
||||
# GH#25464
|
||||
ci = CategoricalIndex(list("ab") * 50)
|
||||
msg = "When changing to a larger dtype, its size must be a divisor"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.view("i8")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci._data.view("i8")
|
||||
|
||||
ci = ci[:-4] # length divisible by 8
|
||||
|
||||
res = ci.view("i8")
|
||||
expected = ci._data.codes.view("i8")
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
cat = ci._data
|
||||
tm.assert_numpy_array_equal(cat.view("i8"), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, engine_type",
|
||||
[
|
||||
(np.int8, libindex.Int8Engine),
|
||||
(np.int16, libindex.Int16Engine),
|
||||
(np.int32, libindex.Int32Engine),
|
||||
(np.int64, libindex.Int64Engine),
|
||||
],
|
||||
)
|
||||
def test_engine_type(self, dtype, engine_type):
|
||||
if dtype != np.int64:
|
||||
# num. of uniques required to push CategoricalIndex.codes to a
|
||||
# dtype (128 categories required for .codes dtype to be int16 etc.)
|
||||
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
|
||||
ci = CategoricalIndex(range(num_uniques))
|
||||
else:
|
||||
# having 2**32 - 2**31 categories would be very memory-intensive,
|
||||
# so we cheat a bit with the dtype
|
||||
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
|
||||
arr = ci.values._ndarray.astype("int64")
|
||||
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
|
||||
assert np.issubdtype(ci.codes.dtype, dtype)
|
||||
assert isinstance(ci._engine, engine_type)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,op_name",
|
||||
[
|
||||
(lambda idx: idx - idx, "__sub__"),
|
||||
(lambda idx: idx + idx, "__add__"),
|
||||
(lambda idx: idx - ["a", "b"], "__sub__"),
|
||||
(lambda idx: idx + ["a", "b"], "__add__"),
|
||||
(lambda idx: ["a", "b"] - idx, "__rsub__"),
|
||||
(lambda idx: ["a", "b"] + idx, "__radd__"),
|
||||
],
|
||||
)
|
||||
def test_disallow_addsub_ops(self, func, op_name):
|
||||
# GH 10039
|
||||
# set ops (+/-) raise TypeError
|
||||
idx = Index(Categorical(["a", "b"]))
|
||||
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
|
||||
msg = "|".join(
|
||||
[
|
||||
f"cannot perform {op_name} with this index type: CategoricalIndex",
|
||||
"can only concatenate list",
|
||||
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
|
||||
def test_method_delegation(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.set_categories(list("cab"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.rename_categories(list("efg"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
|
||||
)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = ci.rename_categories(lambda x: x.upper())
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.add_categories(["d"])
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_unordered()
|
||||
tm.assert_index_equal(result, ci)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_ordered()
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
|
||||
)
|
||||
|
||||
# invalid
|
||||
msg = "cannot use inplace with CategoricalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.set_categories(list("cab"), inplace=True)
|
||||
|
||||
def test_remove_maintains_order(self):
|
||||
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
|
||||
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
|
||||
)
|
||||
result = result.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(
|
||||
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
|
||||
),
|
||||
)
|
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexConstructors:
|
||||
def test_construction_disallows_scalar(self):
|
||||
msg = "must be called with a collection of some kind"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(categories=list("abcd"), ordered=False)
|
||||
|
||||
def test_construction(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
|
||||
categories = ci.categories
|
||||
|
||||
result = Index(ci)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
result = Index(ci.values)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
# empty
|
||||
result = CategoricalIndex([], categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
|
||||
assert not result.ordered
|
||||
|
||||
# passing categories
|
||||
result = CategoricalIndex(list("aabbca"), categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
|
||||
c = Categorical(list("aabbca"))
|
||||
result = CategoricalIndex(c)
|
||||
tm.assert_index_equal(result.categories, Index(list("abc")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(c, categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
ci = CategoricalIndex(c, categories=list("abcd"))
|
||||
result = CategoricalIndex(ci)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"))
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
expected = CategoricalIndex(
|
||||
ci, categories=list("ab"), ordered=True, dtype="category"
|
||||
)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# turn me to an Index
|
||||
result = Index(np.array(ci))
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
def test_construction_with_dtype(self):
|
||||
# specify dtype
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
result = Index(np.array(ci).tolist(), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# these are generally only equal when the categories are reordered
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# make sure indexes are handled
|
||||
idx = Index(range(3))
|
||||
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
|
||||
result = CategoricalIndex(idx, categories=idx, ordered=True)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_construction_empty_with_bool_categories(self):
|
||||
# see GH#22702
|
||||
cat = CategoricalIndex([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_construction_with_categorical_dtype(self):
|
||||
# construction with CategoricalDtype
|
||||
# GH#18109
|
||||
data, cats, ordered = "a a b b".split(), "c b a".split(), True
|
||||
dtype = CategoricalDtype(categories=cats, ordered=ordered)
|
||||
|
||||
result = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH#19032
|
||||
result = Index(data, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# error when combining categories/ordered and dtype kwargs
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, categories=cats, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, ordered=ordered, dtype=dtype)
|
@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals_categorical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
|
||||
assert ci1.equals(ci1)
|
||||
assert not ci1.equals(ci2)
|
||||
assert ci1.equals(ci1.astype(object))
|
||||
assert ci1.astype(object).equals(ci1)
|
||||
|
||||
assert (ci1 == ci1).all()
|
||||
assert not (ci1 != ci1).all()
|
||||
assert not (ci1 > ci1).all()
|
||||
assert not (ci1 < ci1).all()
|
||||
assert (ci1 <= ci1).all()
|
||||
assert (ci1 >= ci1).all()
|
||||
|
||||
assert not (ci1 == 1).all()
|
||||
assert (ci1 == Index(["a", "b"])).all()
|
||||
assert (ci1 == ci1.values).all()
|
||||
|
||||
# invalid comparisons
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
ci1 == Index(["a", "b", "c"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == ci2
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, categories=list("abc"))
|
||||
|
||||
# tests
|
||||
# make sure that we are testing for category inclusion properly
|
||||
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
# Same categories, but different order
|
||||
# Unordered
|
||||
assert ci.equals(CategoricalIndex(list("aabca")))
|
||||
# Ordered
|
||||
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca")))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca") + [np.nan])
|
||||
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
def test_equals_categorical_unordered(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603
|
||||
a = CategoricalIndex(["A"], categories=["A", "B"])
|
||||
b = CategoricalIndex(["A"], categories=["B", "A"])
|
||||
c = CategoricalIndex(["C"], categories=["B", "A"])
|
||||
assert a.equals(b)
|
||||
assert not a.equals(c)
|
||||
assert not b.equals(c)
|
||||
|
||||
def test_equals_non_category(self):
|
||||
# GH#37667 Case where other contains a value not among ci's
|
||||
# categories ("D") and also contains np.nan
|
||||
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
|
||||
other = Index(["A", "B", "D", np.nan])
|
||||
|
||||
assert not ci.equals(other)
|
||||
|
||||
def test_equals_multiindex(self):
|
||||
# dont raise NotImplementedError when calling is_dtype_compat
|
||||
|
||||
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
|
||||
ci = mi.to_flat_index().astype("category")
|
||||
|
||||
assert not ci.equals(mi)
|
||||
|
||||
def test_equals_string_dtype(self, any_string_dtype):
|
||||
# GH#55364
|
||||
idx = CategoricalIndex(list("abc"), name="B")
|
||||
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
|
||||
assert idx.equals(other)
|
@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFillNA:
|
||||
def test_fillna_categorical(self):
|
||||
# GH#11343
|
||||
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
|
||||
# fill by value in categories
|
||||
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
|
||||
tm.assert_index_equal(idx.fillna(1.0), exp)
|
||||
|
||||
cat = idx._data
|
||||
|
||||
# fill by value not in categories raises TypeError on EA, casts on CI
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(2.0)
|
||||
|
||||
result = idx.fillna(2.0)
|
||||
expected = idx.astype(object).fillna(2.0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_fillna_copies_with_no_nas(self):
|
||||
# Nothing to fill, should still get a copy for the Categorical method,
|
||||
# but OK to get a view on CategoricalIndex method
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.fillna(0)
|
||||
assert result is not ci
|
||||
assert tm.shares_memory(result, ci)
|
||||
|
||||
# But at the EA level we always get a copy.
|
||||
cat = ci._data
|
||||
result = cat.fillna(0)
|
||||
assert result._ndarray is not cat._ndarray
|
||||
assert result._ndarray.base is None
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_fillna_validates_with_no_nas(self):
|
||||
# We validate the fill value even if fillna is a no-op
|
||||
ci = CategoricalIndex([2, 3, 3])
|
||||
cat = ci._data
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
res = ci.fillna(False)
|
||||
# nothing to fill, so we dont cast
|
||||
tm.assert_index_equal(res, ci)
|
||||
|
||||
# Same check directly on the Categorical
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(False)
|
@ -0,0 +1,120 @@
|
||||
"""
|
||||
Tests for CategoricalIndex.__repr__ and related methods.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_pyarrow_string_dtype
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexRepr:
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = CategoricalIndex(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
msg = r"CategoricalIndex\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
||||
|
||||
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
|
||||
def test_string_categorical_index_repr(self):
|
||||
# short
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"])
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
...
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("abcdefghijklmmo"))
|
||||
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
||||
'm', 'm', 'o'],
|
||||
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
|
||||
'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
|
||||
'さ', 'し', 'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
@ -0,0 +1,420 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_fill_value(self):
|
||||
# GH 12631
|
||||
|
||||
# numeric category
|
||||
idx = CategoricalIndex([1, 2, 3], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# object category
|
||||
idx = CategoricalIndex(
|
||||
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_datetime(self):
|
||||
# datetime category
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
idx = CategoricalIndex(idx)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
||||
expected = CategoricalIndex(expected, categories=exp_cats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = CategoricalIndex([1, 2, 3], name="foo")
|
||||
indices = [1, 0, -1]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc(self):
|
||||
# GH 12531
|
||||
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
|
||||
idx1 = Index(list("abcde"))
|
||||
assert cidx1.get_loc("a") == idx1.get_loc("a")
|
||||
assert cidx1.get_loc("e") == idx1.get_loc("e")
|
||||
|
||||
for i in [cidx1, idx1]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique
|
||||
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
|
||||
idx2 = Index(list("aacded"))
|
||||
|
||||
# results in bool array
|
||||
res = cidx2.get_loc("d")
|
||||
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
|
||||
tm.assert_numpy_array_equal(
|
||||
res, np.array([False, False, False, True, False, True])
|
||||
)
|
||||
# unique element results in scalar
|
||||
res = cidx2.get_loc("e")
|
||||
assert res == idx2.get_loc("e")
|
||||
assert res == 4
|
||||
|
||||
for i in [cidx2, idx2]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique, sliceable
|
||||
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
|
||||
idx3 = Index(list("aabbb"))
|
||||
|
||||
# results in slice
|
||||
res = cidx3.get_loc("a")
|
||||
assert res == idx3.get_loc("a")
|
||||
assert res == slice(0, 2, None)
|
||||
|
||||
res = cidx3.get_loc("b")
|
||||
assert res == idx3.get_loc("b")
|
||||
assert res == slice(2, 5, None)
|
||||
|
||||
for i in [cidx3, idx3]:
|
||||
with pytest.raises(KeyError, match="'c'"):
|
||||
i.get_loc("c")
|
||||
|
||||
def test_get_loc_unique(self):
|
||||
cidx = CategoricalIndex(list("abc"))
|
||||
result = cidx.get_loc("b")
|
||||
assert result == 1
|
||||
|
||||
def test_get_loc_monotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abbc"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = slice(1, 3, None)
|
||||
assert result == expected
|
||||
|
||||
def test_get_loc_nonmonotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abcb"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_loc_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
res = ci.get_loc(np.nan)
|
||||
|
||||
assert res == 2
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer_base(self):
|
||||
# Determined by cat ordering.
|
||||
idx = CategoricalIndex(list("cab"), categories=list("cab"))
|
||||
expected = np.arange(len(idx), dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
def test_get_indexer_requires_unique(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
oidx = Index(np.array(ci))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
|
||||
for n in [1, 2, 5, len(ci)]:
|
||||
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
|
||||
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
# see gh-17323
|
||||
#
|
||||
# Even when indexer is equal to the
|
||||
# members in the index, we should
|
||||
# respect duplicates instead of taking
|
||||
# the fast-track path.
|
||||
for finder in [list("aabbca"), list("aababca")]:
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
def test_get_indexer_non_unique(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
for indexer in [idx2, list("abf"), Index(list("abf"))]:
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(indexer)
|
||||
|
||||
r1, _ = idx1.get_indexer_non_unique(indexer)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, expected)
|
||||
|
||||
def test_get_indexer_method(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
msg = "method pad not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="pad")
|
||||
msg = "method backfill not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="backfill")
|
||||
|
||||
msg = "method nearest not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="nearest")
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array(
|
||||
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
||||
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_nans_in_index_and_target(self):
|
||||
# GH 45361
|
||||
ci = CategoricalIndex([1, 2, np.nan, 3])
|
||||
other1 = [2, 3, 4, np.nan]
|
||||
res1 = ci.get_indexer(other1)
|
||||
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res1, expected1)
|
||||
other2 = [1, 4, 2, 3]
|
||||
res2 = ci.get_indexer(other2)
|
||||
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res2, expected2)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
cond = [True] * len(i)
|
||||
expected = i
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * (len(i) - 1)
|
||||
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_non_categories(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "d"])
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
result = ci.where(mask, 2)
|
||||
expected = Index(["a", 2, "c", 2], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# Test the Categorical method directly
|
||||
ci._data._where(mask, 2)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
|
||||
|
||||
assert "a" in ci
|
||||
assert "z" not in ci
|
||||
assert "e" not in ci
|
||||
assert np.nan not in ci
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in ci
|
||||
assert 1 not in ci
|
||||
|
||||
def test_contains_nan(self):
|
||||
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
|
||||
assert np.nan in ci
|
||||
|
||||
@pytest.mark.parametrize("unwrap", [True, False])
|
||||
def test_contains_na_dtype(self, unwrap):
|
||||
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
|
||||
pi = dti.to_period("D")
|
||||
tdi = dti - dti[-1]
|
||||
ci = CategoricalIndex(dti)
|
||||
|
||||
obj = ci
|
||||
if unwrap:
|
||||
obj = ci._data
|
||||
|
||||
assert np.nan in obj
|
||||
assert None in obj
|
||||
assert pd.NaT in obj
|
||||
assert np.datetime64("NaT") in obj
|
||||
assert np.timedelta64("NaT") not in obj
|
||||
|
||||
obj2 = CategoricalIndex(tdi)
|
||||
if unwrap:
|
||||
obj2 = obj2._data
|
||||
|
||||
assert np.nan in obj2
|
||||
assert None in obj2
|
||||
assert pd.NaT in obj2
|
||||
assert np.datetime64("NaT") not in obj2
|
||||
assert np.timedelta64("NaT") in obj2
|
||||
|
||||
obj3 = CategoricalIndex(pi)
|
||||
if unwrap:
|
||||
obj3 = obj3._data
|
||||
|
||||
assert np.nan in obj3
|
||||
assert None in obj3
|
||||
assert pd.NaT in obj3
|
||||
assert np.datetime64("NaT") not in obj3
|
||||
assert np.timedelta64("NaT") not in obj3
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(pd.Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(pd.Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(pd.Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH 23705
|
||||
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in ci
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
idx = CategoricalIndex([1, 2, 3])
|
||||
|
||||
assert "a" not in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in idx
|
@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(data, categories, ordered):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
index = CategoricalIndex(data, categories=categories, ordered=ordered)
|
||||
result = index.map(str)
|
||||
expected = CategoricalIndex(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map():
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
ci = CategoricalIndex(
|
||||
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
|
||||
)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(
|
||||
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(
|
||||
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
|
||||
)
|
||||
|
||||
# change categories dtype
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = ci.map(f)
|
||||
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map({"A": 10, "B": 20, "C": 30})
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_with_categorical_series():
|
||||
# GH 12756
|
||||
a = Index([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(b), exp)
|
||||
exp = Index(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(c), exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action="ignore")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_none(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_with_dict_or_series():
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cur_index = CategoricalIndex(orig_values, name="XXX")
|
||||
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReindex:
|
||||
def test_reindex_list_non_unique(self):
|
||||
# GH#11586
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_list_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_duplicate_target(self):
|
||||
# See GH25459
|
||||
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
res, indexer = cat.reindex(["a", "c", "c"])
|
||||
exp = Index(["a", "c", "c"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
res, indexer = cat.reindex(
|
||||
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
)
|
||||
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_empty_index(self):
|
||||
# See GH16770
|
||||
c = CategoricalIndex([])
|
||||
res, indexer = c.reindex(["a", "b"])
|
||||
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorical_added_category(self):
|
||||
# GH 42424
|
||||
ci = CategoricalIndex(
|
||||
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
|
||||
ordered=True,
|
||||
)
|
||||
ci_add = CategoricalIndex(
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="right"),
|
||||
Interval(3, 4, closed="right"),
|
||||
],
|
||||
ordered=True,
|
||||
)
|
||||
result, _ = ci.reindex(ci_add)
|
||||
expected = ci_add
|
||||
tm.assert_index_equal(expected, result)
|
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_value", [None, np.nan])
|
||||
def test_difference_with_na(na_value):
|
||||
# GH 57318
|
||||
ci = CategoricalIndex(["a", "b", "c", None])
|
||||
other = Index(["c", na_value])
|
||||
result = ci.difference(other)
|
||||
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
Reference in New Issue
Block a user