Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,31 @@
from pandas.core.indexers.utils import (
check_array_indexer,
check_key_length,
check_setitem_lengths,
disallow_ndim_indexing,
is_empty_indexer,
is_list_like_indexer,
is_scalar_indexer,
is_valid_positional_slice,
length_of_indexer,
maybe_convert_indices,
unpack_1tuple,
unpack_tuple_and_ellipses,
validate_indices,
)
__all__ = [
"is_valid_positional_slice",
"is_list_like_indexer",
"is_scalar_indexer",
"is_empty_indexer",
"check_setitem_lengths",
"validate_indices",
"maybe_convert_indices",
"length_of_indexer",
"disallow_ndim_indexing",
"unpack_1tuple",
"check_key_length",
"check_array_indexer",
"unpack_tuple_and_ellipses",
]

View File

@ -0,0 +1,453 @@
"""Indexer objects for computing start/end window bounds for rolling operations"""
from __future__ import annotations
from datetime import timedelta
import numpy as np
from pandas._libs.tslibs import BaseOffset
from pandas._libs.window.indexers import calculate_variable_window_bounds
from pandas.util._decorators import Appender
from pandas.core.dtypes.common import ensure_platform_int
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.tseries.offsets import Nano
get_window_bounds_doc = """
Computes the bounds of a window.
Parameters
----------
num_values : int, default 0
number of values that will be aggregated over
window_size : int, default 0
the number of rows in a window
min_periods : int, default None
min_periods passed from the top level rolling API
center : bool, default None
center passed from the top level rolling API
closed : str, default None
closed passed from the top level rolling API
step : int, default None
step passed from the top level rolling API
.. versionadded:: 1.5
win_type : str, default None
win_type passed from the top level rolling API
Returns
-------
A tuple of ndarray[int64]s, indicating the boundaries of each
window
"""
class BaseIndexer:
"""
Base class for window bounds calculations.
Examples
--------
>>> from pandas.api.indexers import BaseIndexer
>>> class CustomIndexer(BaseIndexer):
... def get_window_bounds(self, num_values, min_periods, center, closed, step):
... start = np.empty(num_values, dtype=np.int64)
... end = np.empty(num_values, dtype=np.int64)
... for i in range(num_values):
... start[i] = i
... end[i] = i + self.window_size
... return start, end
>>> df = pd.DataFrame({"values": range(5)})
>>> indexer = CustomIndexer(window_size=2)
>>> df.rolling(indexer).sum()
values
0 1.0
1 3.0
2 5.0
3 7.0
4 4.0
"""
def __init__(
self, index_array: np.ndarray | None = None, window_size: int = 0, **kwargs
) -> None:
self.index_array = index_array
self.window_size = window_size
# Set user defined kwargs as attributes that can be used in get_window_bounds
for key, value in kwargs.items():
setattr(self, key, value)
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
raise NotImplementedError
class FixedWindowIndexer(BaseIndexer):
"""Creates window boundaries that are of fixed length."""
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if center or self.window_size == 0:
offset = (self.window_size - 1) // 2
else:
offset = 0
end = np.arange(1 + offset, num_values + 1 + offset, step, dtype="int64")
start = end - self.window_size
if closed in ["left", "both"]:
start -= 1
if closed in ["left", "neither"]:
end -= 1
end = np.clip(end, 0, num_values)
start = np.clip(start, 0, num_values)
return start, end
class VariableWindowIndexer(BaseIndexer):
"""Creates window boundaries that are of variable length, namely for time series."""
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
# error: Argument 4 to "calculate_variable_window_bounds" has incompatible
# type "Optional[bool]"; expected "bool"
# error: Argument 6 to "calculate_variable_window_bounds" has incompatible
# type "Optional[ndarray]"; expected "ndarray"
return calculate_variable_window_bounds(
num_values,
self.window_size,
min_periods,
center, # type: ignore[arg-type]
closed,
self.index_array, # type: ignore[arg-type]
)
class VariableOffsetWindowIndexer(BaseIndexer):
"""
Calculate window boundaries based on a non-fixed offset such as a BusinessDay.
Examples
--------
>>> from pandas.api.indexers import VariableOffsetWindowIndexer
>>> df = pd.DataFrame(range(10), index=pd.date_range("2020", periods=10))
>>> offset = pd.offsets.BDay(1)
>>> indexer = VariableOffsetWindowIndexer(index=df.index, offset=offset)
>>> df
0
2020-01-01 0
2020-01-02 1
2020-01-03 2
2020-01-04 3
2020-01-05 4
2020-01-06 5
2020-01-07 6
2020-01-08 7
2020-01-09 8
2020-01-10 9
>>> df.rolling(indexer).sum()
0
2020-01-01 0.0
2020-01-02 1.0
2020-01-03 2.0
2020-01-04 3.0
2020-01-05 7.0
2020-01-06 12.0
2020-01-07 6.0
2020-01-08 7.0
2020-01-09 8.0
2020-01-10 9.0
"""
def __init__(
self,
index_array: np.ndarray | None = None,
window_size: int = 0,
index: DatetimeIndex | None = None,
offset: BaseOffset | None = None,
**kwargs,
) -> None:
super().__init__(index_array, window_size, **kwargs)
if not isinstance(index, DatetimeIndex):
raise ValueError("index must be a DatetimeIndex.")
self.index = index
if not isinstance(offset, BaseOffset):
raise ValueError("offset must be a DateOffset-like object.")
self.offset = offset
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if step is not None:
raise NotImplementedError("step not implemented for variable offset window")
if num_values <= 0:
return np.empty(0, dtype="int64"), np.empty(0, dtype="int64")
# if windows is variable, default is 'right', otherwise default is 'both'
if closed is None:
closed = "right" if self.index is not None else "both"
right_closed = closed in ["right", "both"]
left_closed = closed in ["left", "both"]
if self.index[num_values - 1] < self.index[0]:
index_growth_sign = -1
else:
index_growth_sign = 1
offset_diff = index_growth_sign * self.offset
start = np.empty(num_values, dtype="int64")
start.fill(-1)
end = np.empty(num_values, dtype="int64")
end.fill(-1)
start[0] = 0
# right endpoint is closed
if right_closed:
end[0] = 1
# right endpoint is open
else:
end[0] = 0
zero = timedelta(0)
# start is start of slice interval (including)
# end is end of slice interval (not including)
for i in range(1, num_values):
end_bound = self.index[i]
start_bound = end_bound - offset_diff
# left endpoint is closed
if left_closed:
start_bound -= Nano(1)
# advance the start bound until we are
# within the constraint
start[i] = i
for j in range(start[i - 1], i):
start_diff = (self.index[j] - start_bound) * index_growth_sign
if start_diff > zero:
start[i] = j
break
# end bound is previous end
# or current index
end_diff = (self.index[end[i - 1]] - end_bound) * index_growth_sign
if end_diff == zero and not right_closed:
end[i] = end[i - 1] + 1
elif end_diff <= zero:
end[i] = i + 1
else:
end[i] = end[i - 1]
# right endpoint is open
if not right_closed:
end[i] -= 1
return start, end
class ExpandingIndexer(BaseIndexer):
"""Calculate expanding window bounds, mimicking df.expanding()"""
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
return (
np.zeros(num_values, dtype=np.int64),
np.arange(1, num_values + 1, dtype=np.int64),
)
class FixedForwardWindowIndexer(BaseIndexer):
"""
Creates window boundaries for fixed-length windows that include the current row.
Examples
--------
>>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
>>> df
B
0 0.0
1 1.0
2 2.0
3 NaN
4 4.0
>>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
>>> df.rolling(window=indexer, min_periods=1).sum()
B
0 1.0
1 3.0
2 2.0
3 4.0
4 4.0
"""
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
if center:
raise ValueError("Forward-looking windows can't have center=True")
if closed is not None:
raise ValueError(
"Forward-looking windows don't support setting the closed argument"
)
if step is None:
step = 1
start = np.arange(0, num_values, step, dtype="int64")
end = start + self.window_size
if self.window_size:
end = np.clip(end, 0, num_values)
return start, end
class GroupbyIndexer(BaseIndexer):
"""Calculate bounds to compute groupby rolling, mimicking df.groupby().rolling()"""
def __init__(
self,
index_array: np.ndarray | None = None,
window_size: int | BaseIndexer = 0,
groupby_indices: dict | None = None,
window_indexer: type[BaseIndexer] = BaseIndexer,
indexer_kwargs: dict | None = None,
**kwargs,
) -> None:
"""
Parameters
----------
index_array : np.ndarray or None
np.ndarray of the index of the original object that we are performing
a chained groupby operation over. This index has been pre-sorted relative to
the groups
window_size : int or BaseIndexer
window size during the windowing operation
groupby_indices : dict or None
dict of {group label: [positional index of rows belonging to the group]}
window_indexer : BaseIndexer
BaseIndexer class determining the start and end bounds of each group
indexer_kwargs : dict or None
Custom kwargs to be passed to window_indexer
**kwargs :
keyword arguments that will be available when get_window_bounds is called
"""
self.groupby_indices = groupby_indices or {}
self.window_indexer = window_indexer
self.indexer_kwargs = indexer_kwargs.copy() if indexer_kwargs else {}
super().__init__(
index_array=index_array,
window_size=self.indexer_kwargs.pop("window_size", window_size),
**kwargs,
)
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
# 1) For each group, get the indices that belong to the group
# 2) Use the indices to calculate the start & end bounds of the window
# 3) Append the window bounds in group order
start_arrays = []
end_arrays = []
window_indices_start = 0
for key, indices in self.groupby_indices.items():
index_array: np.ndarray | None
if self.index_array is not None:
index_array = self.index_array.take(ensure_platform_int(indices))
else:
index_array = self.index_array
indexer = self.window_indexer(
index_array=index_array,
window_size=self.window_size,
**self.indexer_kwargs,
)
start, end = indexer.get_window_bounds(
len(indices), min_periods, center, closed, step
)
start = start.astype(np.int64)
end = end.astype(np.int64)
assert len(start) == len(
end
), "these should be equal in length from get_window_bounds"
# Cannot use groupby_indices as they might not be monotonic with the object
# we're rolling over
window_indices = np.arange(
window_indices_start, window_indices_start + len(indices)
)
window_indices_start += len(indices)
# Extend as we'll be slicing window like [start, end)
window_indices = np.append(window_indices, [window_indices[-1] + 1]).astype(
np.int64, copy=False
)
start_arrays.append(window_indices.take(ensure_platform_int(start)))
end_arrays.append(window_indices.take(ensure_platform_int(end)))
if len(start_arrays) == 0:
return np.array([], dtype=np.int64), np.array([], dtype=np.int64)
start = np.concatenate(start_arrays)
end = np.concatenate(end_arrays)
return start, end
class ExponentialMovingWindowIndexer(BaseIndexer):
"""Calculate ewm window bounds (the entire window)"""
@Appender(get_window_bounds_doc)
def get_window_bounds(
self,
num_values: int = 0,
min_periods: int | None = None,
center: bool | None = None,
closed: str | None = None,
step: int | None = None,
) -> tuple[np.ndarray, np.ndarray]:
return np.array([0], dtype=np.int64), np.array([num_values], dtype=np.int64)

View File

@ -0,0 +1,553 @@
"""
Low-dependency indexing utilities.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
)
import numpy as np
from pandas._libs import lib
from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
is_integer,
is_integer_dtype,
is_list_like,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)
if TYPE_CHECKING:
from pandas._typing import AnyArrayLike
from pandas.core.frame import DataFrame
from pandas.core.indexes.base import Index
# -----------------------------------------------------------
# Indexer Identification
def is_valid_positional_slice(slc: slice) -> bool:
"""
Check if a slice object can be interpreted as a positional indexer.
Parameters
----------
slc : slice
Returns
-------
bool
Notes
-----
A valid positional slice may also be interpreted as a label-based slice
depending on the index being sliced.
"""
return (
lib.is_int_or_none(slc.start)
and lib.is_int_or_none(slc.stop)
and lib.is_int_or_none(slc.step)
)
def is_list_like_indexer(key) -> bool:
"""
Check if we have a list-like indexer that is *not* a NamedTuple.
Parameters
----------
key : object
Returns
-------
bool
"""
# allow a list_like, but exclude NamedTuples which can be indexers
return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
def is_scalar_indexer(indexer, ndim: int) -> bool:
"""
Return True if we are all scalar indexers.
Parameters
----------
indexer : object
ndim : int
Number of dimensions in the object being indexed.
Returns
-------
bool
"""
if ndim == 1 and is_integer(indexer):
# GH37748: allow indexer to be an integer for Series
return True
if isinstance(indexer, tuple) and len(indexer) == ndim:
return all(is_integer(x) for x in indexer)
return False
def is_empty_indexer(indexer) -> bool:
"""
Check if we have an empty indexer.
Parameters
----------
indexer : object
Returns
-------
bool
"""
if is_list_like(indexer) and not len(indexer):
return True
if not isinstance(indexer, tuple):
indexer = (indexer,)
return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
# -----------------------------------------------------------
# Indexer Validation
def check_setitem_lengths(indexer, value, values) -> bool:
"""
Validate that value and indexer are the same length.
An special-case is allowed for when the indexer is a boolean array
and the number of true values equals the length of ``value``. In
this case, no exception is raised.
Parameters
----------
indexer : sequence
Key for the setitem.
value : array-like
Value for the setitem.
values : array-like
Values being set into.
Returns
-------
bool
Whether this is an empty listlike setting which is a no-op.
Raises
------
ValueError
When the indexer is an ndarray or list and the lengths don't match.
"""
no_op = False
if isinstance(indexer, (np.ndarray, list)):
# We can ignore other listlikes because they are either
# a) not necessarily 1-D indexers, e.g. tuple
# b) boolean indexers e.g. BoolArray
if is_list_like(value):
if len(indexer) != len(value) and values.ndim == 1:
# boolean with truth values == len of the value is ok too
if isinstance(indexer, list):
indexer = np.array(indexer)
if not (
isinstance(indexer, np.ndarray)
and indexer.dtype == np.bool_
and indexer.sum() == len(value)
):
raise ValueError(
"cannot set using a list-like indexer "
"with a different length than the value"
)
if not len(indexer):
no_op = True
elif isinstance(indexer, slice):
if is_list_like(value):
if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:
# In case of two dimensional value is used row-wise and broadcasted
raise ValueError(
"cannot set using a slice indexer with a "
"different length than the value"
)
if not len(value):
no_op = True
return no_op
def validate_indices(indices: np.ndarray, n: int) -> None:
"""
Perform bounds-checking for an indexer.
-1 is allowed for indicating missing values.
Parameters
----------
indices : ndarray
n : int
Length of the array being indexed.
Raises
------
ValueError
Examples
--------
>>> validate_indices(np.array([1, 2]), 3) # OK
>>> validate_indices(np.array([1, -2]), 3)
Traceback (most recent call last):
...
ValueError: negative dimensions are not allowed
>>> validate_indices(np.array([1, 2, 3]), 3)
Traceback (most recent call last):
...
IndexError: indices are out-of-bounds
>>> validate_indices(np.array([-1, -1]), 0) # OK
>>> validate_indices(np.array([0, 1]), 0)
Traceback (most recent call last):
...
IndexError: indices are out-of-bounds
"""
if len(indices):
min_idx = indices.min()
if min_idx < -1:
msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
raise ValueError(msg)
max_idx = indices.max()
if max_idx >= n:
raise IndexError("indices are out-of-bounds")
# -----------------------------------------------------------
# Indexer Conversion
def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:
"""
Attempt to convert indices into valid, positive indices.
If we have negative indices, translate to positive here.
If we have indices that are out-of-bounds, raise an IndexError.
Parameters
----------
indices : array-like
Array of indices that we are to convert.
n : int
Number of elements in the array that we are indexing.
verify : bool, default True
Check that all entries are between 0 and n - 1, inclusive.
Returns
-------
array-like
An array-like of positive indices that correspond to the ones
that were passed in initially to this function.
Raises
------
IndexError
One of the converted indices either exceeded the number of,
elements (specified by `n`), or was still negative.
"""
if isinstance(indices, list):
indices = np.array(indices)
if len(indices) == 0:
# If `indices` is empty, np.array will return a float,
# and will cause indexing errors.
return np.empty(0, dtype=np.intp)
mask = indices < 0
if mask.any():
indices = indices.copy()
indices[mask] += n
if verify:
mask = (indices >= n) | (indices < 0)
if mask.any():
raise IndexError("indices are out-of-bounds")
return indices
# -----------------------------------------------------------
# Unsorted
def length_of_indexer(indexer, target=None) -> int:
"""
Return the expected length of target[indexer]
Returns
-------
int
"""
if target is not None and isinstance(indexer, slice):
target_len = len(target)
start = indexer.start
stop = indexer.stop
step = indexer.step
if start is None:
start = 0
elif start < 0:
start += target_len
if stop is None or stop > target_len:
stop = target_len
elif stop < 0:
stop += target_len
if step is None:
step = 1
elif step < 0:
start, stop = stop + 1, start + 1
step = -step
return (stop - start + step - 1) // step
elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
if isinstance(indexer, list):
indexer = np.array(indexer)
if indexer.dtype == bool:
# GH#25774
return indexer.sum()
return len(indexer)
elif isinstance(indexer, range):
return (indexer.stop - indexer.start) // indexer.step
elif not is_list_like_indexer(indexer):
return 1
raise AssertionError("cannot find the length of the indexer")
def disallow_ndim_indexing(result) -> None:
"""
Helper function to disallow multi-dimensional indexing on 1D Series/Index.
GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
and keep an index, so we used to return ndarray, which was deprecated
in GH#30588.
"""
if np.ndim(result) > 1:
raise ValueError(
"Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer "
"supported. Convert to a numpy array before indexing instead."
)
def unpack_1tuple(tup):
"""
If we have a length-1 tuple/list that contains a slice, unpack to just
the slice.
Notes
-----
The list case is deprecated.
"""
if len(tup) == 1 and isinstance(tup[0], slice):
# if we don't have a MultiIndex, we may still be able to handle
# a 1-tuple. see test_1tuple_without_multiindex
if isinstance(tup, list):
# GH#31299
raise ValueError(
"Indexing with a single-item list containing a "
"slice is not allowed. Pass a tuple instead.",
)
return tup[0]
return tup
def check_key_length(columns: Index, key, value: DataFrame) -> None:
"""
Checks if a key used as indexer has the same length as the columns it is
associated with.
Parameters
----------
columns : Index The columns of the DataFrame to index.
key : A list-like of keys to index with.
value : DataFrame The value to set for the keys.
Raises
------
ValueError: If the length of key is not equal to the number of columns in value
or if the number of columns referenced by key is not equal to number
of columns.
"""
if columns.is_unique:
if len(value.columns) != len(key):
raise ValueError("Columns must be same length as key")
else:
# Missing keys in columns are represented as -1
if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
raise ValueError("Columns must be same length as key")
def unpack_tuple_and_ellipses(item: tuple):
"""
Possibly unpack arr[..., n] to arr[n]
"""
if len(item) > 1:
# Note: we are assuming this indexing is being done on a 1D arraylike
if item[0] is Ellipsis:
item = item[1:]
elif item[-1] is Ellipsis:
item = item[:-1]
if len(item) > 1:
raise IndexError("too many indices for array.")
item = item[0]
return item
# -----------------------------------------------------------
# Public indexer validation
def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
"""
Check if `indexer` is a valid array indexer for `array`.
For a boolean mask, `array` and `indexer` are checked to have the same
length. The dtype is validated, and if it is an integer or boolean
ExtensionArray, it is checked if there are missing values present, and
it is converted to the appropriate numpy array. Other dtypes will raise
an error.
Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
through as is.
Parameters
----------
array : array-like
The array that is being indexed (only used for the length).
indexer : array-like or list-like
The array-like that's used to index. List-like input that is not yet
a numpy array or an ExtensionArray is converted to one. Other input
types are passed through as is.
Returns
-------
numpy.ndarray
The validated indexer as a numpy array that can be used to index.
Raises
------
IndexError
When the lengths don't match.
ValueError
When `indexer` cannot be converted to a numpy ndarray to index
(e.g. presence of missing values).
See Also
--------
api.types.is_bool_dtype : Check if `key` is of boolean dtype.
Examples
--------
When checking a boolean mask, a boolean ndarray is returned when the
arguments are all valid.
>>> mask = pd.array([True, False])
>>> arr = pd.array([1, 2])
>>> pd.api.indexers.check_array_indexer(arr, mask)
array([ True, False])
An IndexError is raised when the lengths don't match.
>>> mask = pd.array([True, False, True])
>>> pd.api.indexers.check_array_indexer(arr, mask)
Traceback (most recent call last):
...
IndexError: Boolean index has wrong length: 3 instead of 2.
NA values in a boolean array are treated as False.
>>> mask = pd.array([True, pd.NA])
>>> pd.api.indexers.check_array_indexer(arr, mask)
array([ True, False])
A numpy boolean mask will get passed through (if the length is correct):
>>> mask = np.array([True, False])
>>> pd.api.indexers.check_array_indexer(arr, mask)
array([ True, False])
Similarly for integer indexers, an integer ndarray is returned when it is
a valid indexer, otherwise an error is (for integer indexers, a matching
length is not required):
>>> indexer = pd.array([0, 2], dtype="Int64")
>>> arr = pd.array([1, 2, 3])
>>> pd.api.indexers.check_array_indexer(arr, indexer)
array([0, 2])
>>> indexer = pd.array([0, pd.NA], dtype="Int64")
>>> pd.api.indexers.check_array_indexer(arr, indexer)
Traceback (most recent call last):
...
ValueError: Cannot index with an integer indexer containing NA values
For non-integer/boolean dtypes, an appropriate error is raised:
>>> indexer = np.array([0., 2.], dtype="float64")
>>> pd.api.indexers.check_array_indexer(arr, indexer)
Traceback (most recent call last):
...
IndexError: arrays used as indices must be of integer or boolean type
"""
from pandas.core.construction import array as pd_array
# whatever is not an array-like is returned as-is (possible valid array
# indexers that are not array-like: integer, slice, Ellipsis, None)
# In this context, tuples are not considered as array-like, as they have
# a specific meaning in indexing (multi-dimensional indexing)
if is_list_like(indexer):
if isinstance(indexer, tuple):
return indexer
else:
return indexer
# convert list-likes to array
if not is_array_like(indexer):
indexer = pd_array(indexer)
if len(indexer) == 0:
# empty list is converted to float array by pd.array
indexer = np.array([], dtype=np.intp)
dtype = indexer.dtype
if is_bool_dtype(dtype):
if isinstance(dtype, ExtensionDtype):
indexer = indexer.to_numpy(dtype=bool, na_value=False)
else:
indexer = np.asarray(indexer, dtype=bool)
# GH26658
if len(indexer) != len(array):
raise IndexError(
f"Boolean index has wrong length: "
f"{len(indexer)} instead of {len(array)}"
)
elif is_integer_dtype(dtype):
try:
indexer = np.asarray(indexer, dtype=np.intp)
except ValueError as err:
raise ValueError(
"Cannot index with an integer indexer containing NA values"
) from err
else:
raise IndexError("arrays used as indices must be of integer or boolean type")
return indexer