Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,643 @@
"""
datetimelike delegation
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
cast,
)
import warnings
import numpy as np
from pandas._libs import lib
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_integer_dtype,
is_list_like,
)
from pandas.core.dtypes.dtypes import (
ArrowDtype,
CategoricalDtype,
DatetimeTZDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.accessor import (
PandasDelegate,
delegate_names,
)
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.arrow.array import ArrowExtensionArray
from pandas.core.base import (
NoNewAttributesMixin,
PandasObject,
)
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
if TYPE_CHECKING:
from pandas import (
DataFrame,
Series,
)
class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
_hidden_attrs = PandasObject._hidden_attrs | {
"orig",
"name",
}
def __init__(self, data: Series, orig) -> None:
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
self._parent = data
self.orig = orig
self.name = getattr(data, "name", None)
self._freeze()
def _get_values(self):
data = self._parent
if lib.is_np_dtype(data.dtype, "M"):
return DatetimeIndex(data, copy=False, name=self.name)
elif isinstance(data.dtype, DatetimeTZDtype):
return DatetimeIndex(data, copy=False, name=self.name)
elif lib.is_np_dtype(data.dtype, "m"):
return TimedeltaIndex(data, copy=False, name=self.name)
elif isinstance(data.dtype, PeriodDtype):
return PeriodArray(data, copy=False)
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
def _delegate_property_get(self, name: str):
from pandas import Series
values = self._get_values()
result = getattr(values, name)
# maybe need to upcast (ints)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result):
return result
result = np.asarray(result)
if self.orig is not None:
index = self.orig.index
else:
index = self._parent.index
# return the result as a Series
result = Series(result, index=index, name=self.name).__finalize__(self._parent)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a property of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
def _delegate_property_set(self, name: str, value, *args, **kwargs):
raise ValueError(
"modifications to a property of a datetimelike object are not supported. "
"Change values on the original."
)
def _delegate_method(self, name: str, *args, **kwargs):
from pandas import Series
values = self._get_values()
method = getattr(values, name)
result = method(*args, **kwargs)
if not is_list_like(result):
return result
result = Series(result, index=self._parent.index, name=self.name).__finalize__(
self._parent
)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a method of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
@delegate_names(
delegate=ArrowExtensionArray,
accessors=TimedeltaArray._datetimelike_ops,
typ="property",
accessor_mapping=lambda x: f"_dt_{x}",
raise_on_missing=False,
)
@delegate_names(
delegate=ArrowExtensionArray,
accessors=TimedeltaArray._datetimelike_methods,
typ="method",
accessor_mapping=lambda x: f"_dt_{x}",
raise_on_missing=False,
)
@delegate_names(
delegate=ArrowExtensionArray,
accessors=DatetimeArray._datetimelike_ops,
typ="property",
accessor_mapping=lambda x: f"_dt_{x}",
raise_on_missing=False,
)
@delegate_names(
delegate=ArrowExtensionArray,
accessors=DatetimeArray._datetimelike_methods,
typ="method",
accessor_mapping=lambda x: f"_dt_{x}",
raise_on_missing=False,
)
class ArrowTemporalProperties(PandasDelegate, PandasObject, NoNewAttributesMixin):
def __init__(self, data: Series, orig) -> None:
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
self._parent = data
self._orig = orig
self._freeze()
def _delegate_property_get(self, name: str):
if not hasattr(self._parent.array, f"_dt_{name}"):
raise NotImplementedError(
f"dt.{name} is not supported for {self._parent.dtype}"
)
result = getattr(self._parent.array, f"_dt_{name}")
if not is_list_like(result):
return result
if self._orig is not None:
index = self._orig.index
else:
index = self._parent.index
# return the result as a Series, which is by definition a copy
result = type(self._parent)(
result, index=index, name=self._parent.name
).__finalize__(self._parent)
return result
def _delegate_method(self, name: str, *args, **kwargs):
if not hasattr(self._parent.array, f"_dt_{name}"):
raise NotImplementedError(
f"dt.{name} is not supported for {self._parent.dtype}"
)
result = getattr(self._parent.array, f"_dt_{name}")(*args, **kwargs)
if self._orig is not None:
index = self._orig.index
else:
index = self._parent.index
# return the result as a Series, which is by definition a copy
result = type(self._parent)(
result, index=index, name=self._parent.name
).__finalize__(self._parent)
return result
def to_pytimedelta(self):
return cast(ArrowExtensionArray, self._parent.array)._dt_to_pytimedelta()
def to_pydatetime(self):
# GH#20306
warnings.warn(
f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, "
"in a future version this will return a Series containing python "
"datetime objects instead of an ndarray. To retain the old behavior, "
"call `np.array` on the result",
FutureWarning,
stacklevel=find_stack_level(),
)
return cast(ArrowExtensionArray, self._parent.array)._dt_to_pydatetime()
def isocalendar(self) -> DataFrame:
from pandas import DataFrame
result = (
cast(ArrowExtensionArray, self._parent.array)
._dt_isocalendar()
._pa_array.combine_chunks()
)
iso_calendar_df = DataFrame(
{
col: type(self._parent.array)(result.field(i)) # type: ignore[call-arg]
for i, col in enumerate(["year", "week", "day"])
}
)
return iso_calendar_df
@property
def components(self) -> DataFrame:
from pandas import DataFrame
components_df = DataFrame(
{
col: getattr(self._parent.array, f"_dt_{col}")
for col in [
"days",
"hours",
"minutes",
"seconds",
"milliseconds",
"microseconds",
"nanoseconds",
]
}
)
return components_df
@delegate_names(
delegate=DatetimeArray,
accessors=DatetimeArray._datetimelike_ops + ["unit"],
typ="property",
)
@delegate_names(
delegate=DatetimeArray,
accessors=DatetimeArray._datetimelike_methods + ["as_unit"],
typ="method",
)
class DatetimeProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Examples
--------
>>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
dtype: datetime64[ns]
>>> seconds_series.dt.second
0 0
1 1
2 2
dtype: int32
>>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
>>> hours_series
0 2000-01-01 00:00:00
1 2000-01-01 01:00:00
2 2000-01-01 02:00:00
dtype: datetime64[ns]
>>> hours_series.dt.hour
0 0
1 1
2 2
dtype: int32
>>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="QE"))
>>> quarters_series
0 2000-03-31
1 2000-06-30
2 2000-09-30
dtype: datetime64[ns]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
dtype: int32
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
"""
def to_pydatetime(self) -> np.ndarray:
"""
Return the data as an array of :class:`datetime.datetime` objects.
.. deprecated:: 2.1.0
The current behavior of dt.to_pydatetime is deprecated.
In a future version this will return a Series containing python
datetime objects instead of a ndarray.
Timezone information is retained if present.
.. warning::
Python's datetime uses microsecond resolution, which is lower than
pandas (nanosecond). The values are truncated.
Returns
-------
numpy.ndarray
Object dtype array containing native Python datetime objects.
See Also
--------
datetime.datetime : Standard library value for a datetime.
Examples
--------
>>> s = pd.Series(pd.date_range('20180310', periods=2))
>>> s
0 2018-03-10
1 2018-03-11
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 11, 0, 0)], dtype=object)
pandas' nanosecond precision is truncated to microseconds.
>>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns'))
>>> s
0 2018-03-10 00:00:00.000000000
1 2018-03-10 00:00:00.000000001
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 10, 0, 0)], dtype=object)
"""
# GH#20306
warnings.warn(
f"The behavior of {type(self).__name__}.to_pydatetime is deprecated, "
"in a future version this will return a Series containing python "
"datetime objects instead of an ndarray. To retain the old behavior, "
"call `np.array` on the result",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._get_values().to_pydatetime()
@property
def freq(self):
return self._get_values().inferred_freq
def isocalendar(self) -> DataFrame:
"""
Calculate year, week, and day according to the ISO 8601 standard.
Returns
-------
DataFrame
With columns year, week and day.
See Also
--------
Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
week number, and weekday for the given Timestamp object.
datetime.date.isocalendar : Return a named tuple object with
three components: year, week and weekday.
Examples
--------
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
>>> ser.dt.isocalendar()
year week day
0 2009 53 5
1 <NA> <NA> <NA>
>>> ser.dt.isocalendar().week
0 53
1 <NA>
Name: week, dtype: UInt32
"""
return self._get_values().isocalendar().set_index(self._parent.index)
@delegate_names(
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=TimedeltaArray,
accessors=TimedeltaArray._datetimelike_methods,
typ="method",
)
class TimedeltaProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.timedelta_range(start="1 second", periods=3, freq="s")
... )
>>> seconds_series
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
>>> seconds_series.dt.seconds
0 1
1 2
2 3
dtype: int32
"""
def to_pytimedelta(self) -> np.ndarray:
"""
Return an array of native :class:`datetime.timedelta` objects.
Python's standard `datetime` library uses a different representation
timedelta's. This method converts a Series of pandas Timedeltas
to `datetime.timedelta` format with the same length as the original
Series.
Returns
-------
numpy.ndarray
Array of 1D containing data with `datetime.timedelta` type.
See Also
--------
datetime.timedelta : A duration expressing the difference
between two date, time, or datetime.
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
>>> s
0 0 days
1 1 days
2 2 days
3 3 days
4 4 days
dtype: timedelta64[ns]
>>> s.dt.to_pytimedelta()
array([datetime.timedelta(0), datetime.timedelta(days=1),
datetime.timedelta(days=2), datetime.timedelta(days=3),
datetime.timedelta(days=4)], dtype=object)
"""
return self._get_values().to_pytimedelta()
@property
def components(self):
"""
Return a Dataframe of the components of the Timedeltas.
Returns
-------
DataFrame
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
>>> s
0 0 days 00:00:00
1 0 days 00:00:01
2 0 days 00:00:02
3 0 days 00:00:03
4 0 days 00:00:04
dtype: timedelta64[ns]
>>> s.dt.components
days hours minutes seconds milliseconds microseconds nanoseconds
0 0 0 0 0 0 0 0
1 0 0 0 1 0 0 0
2 0 0 0 2 0 0 0
3 0 0 0 3 0 0 0
4 0 0 0 4 0 0 0
"""
return (
self._get_values()
.components.set_index(self._parent.index)
.__finalize__(self._parent)
)
@property
def freq(self):
return self._get_values().inferred_freq
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method"
)
class PeriodProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.period_range(
... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s"
... )
... )
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
3 2000-01-01 00:00:03
dtype: period[s]
>>> seconds_series.dt.second
0 0
1 1
2 2
3 3
dtype: int64
>>> hours_series = pd.Series(
... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h")
... )
>>> hours_series
0 2000-01-01 00:00
1 2000-01-01 01:00
2 2000-01-01 02:00
3 2000-01-01 03:00
dtype: period[h]
>>> hours_series.dt.hour
0 0
1 1
2 2
3 3
dtype: int64
>>> quarters_series = pd.Series(
... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC")
... )
>>> quarters_series
0 2000Q1
1 2000Q2
2 2000Q3
3 2000Q4
dtype: period[Q-DEC]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
3 4
dtype: int64
"""
class CombinedDatetimelikeProperties(
DatetimeProperties, TimedeltaProperties, PeriodProperties
):
def __new__(cls, data: Series): # pyright: ignore[reportInconsistentConstructor]
# CombinedDatetimelikeProperties isn't really instantiated. Instead
# we need to choose which parent (datetime or timedelta) is
# appropriate. Since we're checking the dtypes anyway, we'll just
# do all the validation here.
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
orig = data if isinstance(data.dtype, CategoricalDtype) else None
if orig is not None:
data = data._constructor(
orig.array,
name=orig.name,
copy=False,
dtype=orig._values.categories.dtype,
index=orig.index,
)
if isinstance(data.dtype, ArrowDtype) and data.dtype.kind in "Mm":
return ArrowTemporalProperties(data, orig)
if lib.is_np_dtype(data.dtype, "M"):
return DatetimeProperties(data, orig)
elif isinstance(data.dtype, DatetimeTZDtype):
return DatetimeProperties(data, orig)
elif lib.is_np_dtype(data.dtype, "m"):
return TimedeltaProperties(data, orig)
elif isinstance(data.dtype, PeriodDtype):
return PeriodProperties(data, orig)
raise AttributeError("Can only use .dt accessor with datetimelike values")

View File

@ -0,0 +1,388 @@
from __future__ import annotations
import textwrap
from typing import (
TYPE_CHECKING,
cast,
)
import numpy as np
from pandas._libs import (
NaT,
lib,
)
from pandas.errors import InvalidIndexError
from pandas.core.dtypes.cast import find_common_type
from pandas.core.algorithms import safe_sort
from pandas.core.indexes.base import (
Index,
_new_Index,
ensure_index,
ensure_index_from_sequences,
get_unanimous_names,
)
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.interval import IntervalIndex
from pandas.core.indexes.multi import MultiIndex
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexes.range import RangeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
if TYPE_CHECKING:
from pandas._typing import Axis
_sort_msg = textwrap.dedent(
"""\
Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.
To accept the future behavior, pass 'sort=False'.
To retain the current behavior and silence the warning, pass 'sort=True'.
"""
)
__all__ = [
"Index",
"MultiIndex",
"CategoricalIndex",
"IntervalIndex",
"RangeIndex",
"InvalidIndexError",
"TimedeltaIndex",
"PeriodIndex",
"DatetimeIndex",
"_new_Index",
"NaT",
"ensure_index",
"ensure_index_from_sequences",
"get_objs_combined_axis",
"union_indexes",
"get_unanimous_names",
"all_indexes_same",
"default_index",
"safe_sort_index",
]
def get_objs_combined_axis(
objs,
intersect: bool = False,
axis: Axis = 0,
sort: bool = True,
copy: bool = False,
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
value of "intersect") of indexes on given axis, or None if all objects
lack indexes (e.g. they are numpy arrays).
Parameters
----------
objs : list
Series or DataFrame objects, may be mix of the two.
intersect : bool, default False
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
axis : {0 or 'index', 1 or 'outer'}, default 0
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.
Returns
-------
Index
"""
obs_idxes = [obj._get_axis(axis) for obj in objs]
return _get_combined_index(obs_idxes, intersect=intersect, sort=sort, copy=copy)
def _get_distinct_objs(objs: list[Index]) -> list[Index]:
"""
Return a list with distinct elements of "objs" (different ids).
Preserves order.
"""
ids: set[int] = set()
res = []
for obj in objs:
if id(obj) not in ids:
ids.add(id(obj))
res.append(obj)
return res
def _get_combined_index(
indexes: list[Index],
intersect: bool = False,
sort: bool = False,
copy: bool = False,
) -> Index:
"""
Return the union or intersection of indexes.
Parameters
----------
indexes : list of Index or list objects
When intersect=True, do not accept list of lists.
intersect : bool, default False
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
copy : bool, default False
If True, return a copy of the combined index.
Returns
-------
Index
"""
# TODO: handle index names!
indexes = _get_distinct_objs(indexes)
if len(indexes) == 0:
index = Index([])
elif len(indexes) == 1:
index = indexes[0]
elif intersect:
index = indexes[0]
for other in indexes[1:]:
index = index.intersection(other)
else:
index = union_indexes(indexes, sort=False)
index = ensure_index(index)
if sort:
index = safe_sort_index(index)
# GH 29879
if copy:
index = index.copy()
return index
def safe_sort_index(index: Index) -> Index:
"""
Returns the sorted index
We keep the dtypes and the name attributes.
Parameters
----------
index : an Index
Returns
-------
Index
"""
if index.is_monotonic_increasing:
return index
try:
array_sorted = safe_sort(index)
except TypeError:
pass
else:
if isinstance(array_sorted, Index):
return array_sorted
array_sorted = cast(np.ndarray, array_sorted)
if isinstance(index, MultiIndex):
index = MultiIndex.from_tuples(array_sorted, names=index.names)
else:
index = Index(array_sorted, name=index.name, dtype=index.dtype)
return index
def union_indexes(indexes, sort: bool | None = True) -> Index:
"""
Return the union of indexes.
The behavior of sort and names is not consistent.
Parameters
----------
indexes : list of Index or list objects
sort : bool, default True
Whether the result index should come out sorted or not.
Returns
-------
Index
"""
if len(indexes) == 0:
raise AssertionError("Must have at least 1 Index to union")
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
if not sort:
result = Index(result)
else:
result = Index(sorted(result))
return result
indexes, kind = _sanitize_and_check(indexes)
def _unique_indices(inds, dtype) -> Index:
"""
Concatenate indices and remove duplicates.
Parameters
----------
inds : list of Index or list objects
dtype : dtype to set for the resulting Index
Returns
-------
Index
"""
if all(isinstance(ind, Index) for ind in inds):
inds = [ind.astype(dtype, copy=False) for ind in inds]
result = inds[0].unique()
other = inds[1].append(inds[2:])
diff = other[result.get_indexer_for(other) == -1]
if len(diff):
result = result.append(diff.unique())
if sort:
result = result.sort_values()
return result
def conv(i):
if isinstance(i, Index):
i = i.tolist()
return i
return Index(
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort),
dtype=dtype,
)
def _find_common_index_dtype(inds):
"""
Finds a common type for the indexes to pass through to resulting index.
Parameters
----------
inds: list of Index or list objects
Returns
-------
The common type or None if no indexes were given
"""
dtypes = [idx.dtype for idx in indexes if isinstance(idx, Index)]
if dtypes:
dtype = find_common_type(dtypes)
else:
dtype = None
return dtype
if kind == "special":
result = indexes[0]
dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
dti_tzs = [x for x in dtis if x.tz is not None]
if len(dti_tzs) not in [0, len(dtis)]:
# TODO: this behavior is not tested (so may not be desired),
# but is kept in order to keep behavior the same when
# deprecating union_many
# test_frame_from_dict_with_mixed_indexes
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
if len(dtis) == len(indexes):
sort = True
result = indexes[0]
elif len(dtis) > 1:
# If we have mixed timezones, our casting behavior may depend on
# the order of indexes, which we don't want.
sort = False
# TODO: what about Categorical[dt64]?
# test_frame_from_dict_with_mixed_indexes
indexes = [x.astype(object, copy=False) for x in indexes]
result = indexes[0]
for other in indexes[1:]:
result = result.union(other, sort=None if sort else False)
return result
elif kind == "array":
dtype = _find_common_index_dtype(indexes)
index = indexes[0]
if not all(index.equals(other) for other in indexes[1:]):
index = _unique_indices(indexes, dtype)
name = get_unanimous_names(*indexes)[0]
if name != index.name:
index = index.rename(name)
return index
else: # kind='list'
dtype = _find_common_index_dtype(indexes)
return _unique_indices(indexes, dtype)
def _sanitize_and_check(indexes):
"""
Verify the type of indexes and convert lists to Index.
Cases:
- [list, list, ...]: Return ([list, list, ...], 'list')
- [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...])
Lists are sorted and converted to Index.
- [Index, Index, ...]: Return ([Index, Index, ...], TYPE)
TYPE = 'special' if at least one special type, 'array' otherwise.
Parameters
----------
indexes : list of Index or list objects
Returns
-------
sanitized_indexes : list of Index or list objects
type : {'list', 'array', 'special'}
"""
kinds = list({type(index) for index in indexes})
if list in kinds:
if len(kinds) > 1:
indexes = [
Index(list(x)) if not isinstance(x, Index) else x for x in indexes
]
kinds.remove(list)
else:
return indexes, "list"
if len(kinds) > 1 or Index not in kinds:
return indexes, "special"
else:
return indexes, "array"
def all_indexes_same(indexes) -> bool:
"""
Determine if all indexes contain the same elements.
Parameters
----------
indexes : iterable of Index objects
Returns
-------
bool
True if all indexes contain the same elements, False otherwise.
"""
itr = iter(indexes)
first = next(itr)
return all(first.equals(index) for index in itr)
def default_index(n: int) -> RangeIndex:
rng = range(n)
return RangeIndex._simple_new(rng, name=None)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,513 @@
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
Literal,
cast,
)
import numpy as np
from pandas._libs import index as libindex
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.core.dtypes.common import is_scalar
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
)
from pandas.core.arrays.categorical import (
Categorical,
contains,
)
from pandas.core.construction import extract_array
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
from pandas.core.indexes.extension import (
NDArrayBackedExtensionIndex,
inherit_names,
)
if TYPE_CHECKING:
from collections.abc import Hashable
from pandas._typing import (
Dtype,
DtypeObj,
Self,
npt,
)
@inherit_names(
[
"argsort",
"tolist",
"codes",
"categories",
"ordered",
"_reverse_indexer",
"searchsorted",
"min",
"max",
],
Categorical,
)
@inherit_names(
[
"rename_categories",
"reorder_categories",
"add_categories",
"remove_categories",
"remove_unused_categories",
"set_categories",
"as_ordered",
"as_unordered",
],
Categorical,
wrap=True,
)
class CategoricalIndex(NDArrayBackedExtensionIndex):
"""
Index based on an underlying :class:`Categorical`.
CategoricalIndex, like Categorical, can only take on a limited,
and usually fixed, number of possible values (`categories`). Also,
like Categorical, it might have an order, but numerical operations
(additions, divisions, ...) are not possible.
Parameters
----------
data : array-like (1-dimensional)
The values of the categorical. If `categories` are given, values not in
`categories` will be replaced with NaN.
categories : index-like, optional
The categories for the categorical. Items need to be unique.
If the categories are not given here (and also not in `dtype`), they
will be inferred from the `data`.
ordered : bool, optional
Whether or not this categorical is treated as an ordered
categorical. If not given here or in `dtype`, the resulting
categorical will be unordered.
dtype : CategoricalDtype or "category", optional
If :class:`CategoricalDtype`, cannot be used together with
`categories` or `ordered`.
copy : bool, default False
Make a copy of input ndarray.
name : object, optional
Name to be stored in the index.
Attributes
----------
codes
categories
ordered
Methods
-------
rename_categories
reorder_categories
add_categories
remove_categories
remove_unused_categories
set_categories
as_ordered
as_unordered
map
Raises
------
ValueError
If the categories do not validate.
TypeError
If an explicit ``ordered=True`` is given but no `categories` and the
`values` are not sortable.
See Also
--------
Index : The base pandas Index type.
Categorical : A categorical array.
CategoricalDtype : Type for categorical data.
Notes
-----
See the `user guide
<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__
for more.
Examples
--------
>>> pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"])
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['a', 'b', 'c'], ordered=False, dtype='category')
``CategoricalIndex`` can also be instantiated from a ``Categorical``:
>>> c = pd.Categorical(["a", "b", "c", "a", "b", "c"])
>>> pd.CategoricalIndex(c)
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['a', 'b', 'c'], ordered=False, dtype='category')
Ordered ``CategoricalIndex`` can have a min and max value.
>>> ci = pd.CategoricalIndex(
... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"]
... )
>>> ci
CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
categories=['c', 'b', 'a'], ordered=True, dtype='category')
>>> ci.min()
'c'
"""
_typ = "categoricalindex"
_data_cls = Categorical
@property
def _can_hold_strings(self):
return self.categories._can_hold_strings
@cache_readonly
def _should_fallback_to_positional(self) -> bool:
return self.categories._should_fallback_to_positional
codes: np.ndarray
categories: Index
ordered: bool | None
_data: Categorical
_values: Categorical
@property
def _engine_type(self) -> type[libindex.IndexEngine]:
# self.codes can have dtype int8, int16, int32 or int64, so we need
# to return the corresponding engine type (libindex.Int8Engine, etc.).
return {
np.int8: libindex.Int8Engine,
np.int16: libindex.Int16Engine,
np.int32: libindex.Int32Engine,
np.int64: libindex.Int64Engine,
}[self.codes.dtype.type]
# --------------------------------------------------------------------
# Constructors
def __new__(
cls,
data=None,
categories=None,
ordered=None,
dtype: Dtype | None = None,
copy: bool = False,
name: Hashable | None = None,
) -> Self:
name = maybe_extract_name(name, data, cls)
if is_scalar(data):
# GH#38944 include None here, which pre-2.0 subbed in []
cls._raise_scalar_data_error(data)
data = Categorical(
data, categories=categories, ordered=ordered, dtype=dtype, copy=copy
)
return cls._simple_new(data, name=name)
# --------------------------------------------------------------------
def _is_dtype_compat(self, other: Index) -> Categorical:
"""
*this is an internal non-public method*
provide a comparison between the dtype of self and other (coercing if
needed)
Parameters
----------
other : Index
Returns
-------
Categorical
Raises
------
TypeError if the dtypes are not compatible
"""
if isinstance(other.dtype, CategoricalDtype):
cat = extract_array(other)
cat = cast(Categorical, cat)
if not cat._categories_match_up_to_permutation(self._values):
raise TypeError(
"categories must match existing categories when appending"
)
elif other._is_multi:
# preempt raising NotImplementedError in isna call
raise TypeError("MultiIndex is not dtype-compatible with CategoricalIndex")
else:
values = other
cat = Categorical(other, dtype=self.dtype)
other = CategoricalIndex(cat)
if not other.isin(values).all():
raise TypeError(
"cannot append a non-category item to a CategoricalIndex"
)
cat = other._values
if not ((cat == values) | (isna(cat) & isna(values))).all():
# GH#37667 see test_equals_non_category
raise TypeError(
"categories must match existing categories when appending"
)
return cat
def equals(self, other: object) -> bool:
"""
Determine if two CategoricalIndex objects contain the same elements.
Returns
-------
bool
``True`` if two :class:`pandas.CategoricalIndex` objects have equal
elements, ``False`` otherwise.
Examples
--------
>>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'])
>>> ci2 = pd.CategoricalIndex(pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']))
>>> ci.equals(ci2)
True
The order of elements matters.
>>> ci3 = pd.CategoricalIndex(['c', 'b', 'a', 'a', 'b', 'c'])
>>> ci.equals(ci3)
False
The orderedness also matters.
>>> ci4 = ci.as_ordered()
>>> ci.equals(ci4)
False
The categories matter, but the order of the categories matters only when
``ordered=True``.
>>> ci5 = ci.set_categories(['a', 'b', 'c', 'd'])
>>> ci.equals(ci5)
False
>>> ci6 = ci.set_categories(['b', 'c', 'a'])
>>> ci.equals(ci6)
True
>>> ci_ordered = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'],
... ordered=True)
>>> ci2_ordered = ci_ordered.set_categories(['b', 'c', 'a'])
>>> ci_ordered.equals(ci2_ordered)
False
"""
if self.is_(other):
return True
if not isinstance(other, Index):
return False
try:
other = self._is_dtype_compat(other)
except (TypeError, ValueError):
return False
return self._data.equals(other)
# --------------------------------------------------------------------
# Rendering Methods
@property
def _formatter_func(self):
return self.categories._formatter_func
def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value)
"""
attrs: list[tuple[str, str | int | bool | None]]
attrs = [
(
"categories",
f"[{', '.join(self._data._repr_categories())}]",
),
("ordered", self.ordered),
]
extra = super()._format_attrs()
return attrs + extra
# --------------------------------------------------------------------
@property
def inferred_type(self) -> str:
return "categorical"
@doc(Index.__contains__)
def __contains__(self, key: Any) -> bool:
# if key is a NaN, check if any NaN is in self.
if is_valid_na_for_dtype(key, self.categories.dtype):
return self.hasnans
return contains(self, key, container=self._engine)
def reindex(
self, target, method=None, level=None, limit: int | None = None, tolerance=None
) -> tuple[Index, npt.NDArray[np.intp] | None]:
"""
Create index with target's values (move/add/delete values as necessary)
Returns
-------
new_index : pd.Index
Resulting index
indexer : np.ndarray[np.intp] or None
Indices of output values in original index
"""
if method is not None:
raise NotImplementedError(
"argument method is not implemented for CategoricalIndex.reindex"
)
if level is not None:
raise NotImplementedError(
"argument level is not implemented for CategoricalIndex.reindex"
)
if limit is not None:
raise NotImplementedError(
"argument limit is not implemented for CategoricalIndex.reindex"
)
return super().reindex(target)
# --------------------------------------------------------------------
# Indexing Methods
def _maybe_cast_indexer(self, key) -> int:
# GH#41933: we have to do this instead of self._data._validate_scalar
# because this will correctly get partial-indexing on Interval categories
try:
return self._data._unbox_scalar(key)
except KeyError:
if is_valid_na_for_dtype(key, self.categories.dtype):
return -1
raise
def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex:
if isinstance(values, CategoricalIndex):
values = values._data
if isinstance(values, Categorical):
# Indexing on codes is more efficient if categories are the same,
# so we can apply some optimizations based on the degree of
# dtype-matching.
cat = self._data._encode_with_my_categories(values)
codes = cat._codes
else:
codes = self.categories.get_indexer(values)
codes = codes.astype(self.codes.dtype, copy=False)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat)
# --------------------------------------------------------------------
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
return self.categories._is_comparable_dtype(dtype)
def map(self, mapper, na_action: Literal["ignore"] | None = None):
"""
Map values using input an input mapping or function.
Maps the values (their categories, not the codes) of the index to new
categories. If the mapping correspondence is one-to-one the result is a
:class:`~pandas.CategoricalIndex` which has the same order property as
the original, otherwise an :class:`~pandas.Index` is returned.
If a `dict` or :class:`~pandas.Series` is used any unmapped category is
mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
will be returned.
Parameters
----------
mapper : function, dict, or Series
Mapping correspondence.
Returns
-------
pandas.CategoricalIndex or pandas.Index
Mapped index.
See Also
--------
Index.map : Apply a mapping correspondence on an
:class:`~pandas.Index`.
Series.map : Apply a mapping correspondence on a
:class:`~pandas.Series`.
Series.apply : Apply more complex functions on a
:class:`~pandas.Series`.
Examples
--------
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'])
>>> idx
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
ordered=False, dtype='category')
>>> idx.map(lambda x: x.upper())
CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'],
ordered=False, dtype='category')
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'})
CategoricalIndex(['first', 'second', 'third'], categories=['first',
'second', 'third'], ordered=False, dtype='category')
If the mapping is one-to-one the ordering of the categories is
preserved:
>>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True)
>>> idx
CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
ordered=True, dtype='category')
>>> idx.map({'a': 3, 'b': 2, 'c': 1})
CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True,
dtype='category')
If the mapping is not one-to-one an :class:`~pandas.Index` is returned:
>>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'})
Index(['first', 'second', 'first'], dtype='object')
If a `dict` is used, all unmapped categories are mapped to `NaN` and
the result is an :class:`~pandas.Index`:
>>> idx.map({'a': 'first', 'b': 'second'})
Index(['first', 'second', nan], dtype='object')
"""
mapped = self._values.map(mapper, na_action=na_action)
return Index(mapped, name=self.name)
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
# if calling index is category, don't check dtype of others
try:
cat = Categorical._concat_same_type(
[self._is_dtype_compat(c) for c in to_concat]
)
except TypeError:
# not all to_concat elements are among our categories (or NA)
res = concat_compat([x._values for x in to_concat])
return Index(res, name=name)
else:
return type(self)._simple_new(cat, name=name)

View File

@ -0,0 +1,843 @@
"""
Base and utility classes for tseries type pandas objects.
"""
from __future__ import annotations
from abc import (
ABC,
abstractmethod,
)
from typing import (
TYPE_CHECKING,
Any,
Callable,
cast,
final,
)
import warnings
import numpy as np
from pandas._config import using_copy_on_write
from pandas._libs import (
NaT,
Timedelta,
lib,
)
from pandas._libs.tslibs import (
BaseOffset,
Resolution,
Tick,
parsing,
to_offset,
)
from pandas._libs.tslibs.dtypes import freq_to_period_freqstr
from pandas.compat.numpy import function as nv
from pandas.errors import (
InvalidIndexError,
NullFrequencyError,
)
from pandas.util._decorators import (
Appender,
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_integer,
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
import pandas.core.common as com
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import (
Index,
_index_shared_docs,
)
from pandas.core.indexes.extension import NDArrayBackedExtensionIndex
from pandas.core.indexes.range import RangeIndex
from pandas.core.tools.timedeltas import to_timedelta
if TYPE_CHECKING:
from collections.abc import Sequence
from datetime import datetime
from pandas._typing import (
Axis,
Self,
npt,
)
from pandas import CategoricalIndex
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex, ABC):
"""
Common ops mixin to support a unified interface datetimelike Index.
"""
_can_hold_strings = False
_data: DatetimeArray | TimedeltaArray | PeriodArray
@doc(DatetimeLikeArrayMixin.mean)
def mean(self, *, skipna: bool = True, axis: int | None = 0):
return self._data.mean(skipna=skipna, axis=axis)
@property
def freq(self) -> BaseOffset | None:
return self._data.freq
@freq.setter
def freq(self, value) -> None:
# error: Property "freq" defined in "PeriodArray" is read-only [misc]
self._data.freq = value # type: ignore[misc]
@property
def asi8(self) -> npt.NDArray[np.int64]:
return self._data.asi8
@property
@doc(DatetimeLikeArrayMixin.freqstr)
def freqstr(self) -> str:
from pandas import PeriodIndex
if self._data.freqstr is not None and isinstance(
self._data, (PeriodArray, PeriodIndex)
):
freq = freq_to_period_freqstr(self._data.freq.n, self._data.freq.name)
return freq
else:
return self._data.freqstr # type: ignore[return-value]
@cache_readonly
@abstractmethod
def _resolution_obj(self) -> Resolution:
...
@cache_readonly
@doc(DatetimeLikeArrayMixin.resolution)
def resolution(self) -> str:
return self._data.resolution
# ------------------------------------------------------------------------
@cache_readonly
def hasnans(self) -> bool:
return self._data._hasna
def equals(self, other: Any) -> bool:
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True
if not isinstance(other, Index):
return False
elif other.dtype.kind in "iufc":
return False
elif not isinstance(other, type(self)):
should_try = False
inferable = self._data._infer_matches
if other.dtype == object:
should_try = other.inferred_type in inferable
elif isinstance(other.dtype, CategoricalDtype):
other = cast("CategoricalIndex", other)
should_try = other.categories.inferred_type in inferable
if should_try:
try:
other = type(self)(other)
except (ValueError, TypeError, OverflowError):
# e.g.
# ValueError -> cannot parse str entry, or OutOfBoundsDatetime
# TypeError -> trying to convert IntervalIndex to DatetimeIndex
# OverflowError -> Index([very_large_timedeltas])
return False
if self.dtype != other.dtype:
# have different timezone
return False
return np.array_equal(self.asi8, other.asi8)
@Appender(Index.__contains__.__doc__)
def __contains__(self, key: Any) -> bool:
hash(key)
try:
self.get_loc(key)
except (KeyError, TypeError, ValueError, InvalidIndexError):
return False
return True
def _convert_tolerance(self, tolerance, target):
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
return super()._convert_tolerance(tolerance, target)
# --------------------------------------------------------------------
# Rendering Methods
_default_na_rep = "NaT"
def format(
self,
name: bool = False,
formatter: Callable | None = None,
na_rep: str = "NaT",
date_format: str | None = None,
) -> list[str]:
"""
Render a string representation of the Index.
"""
warnings.warn(
# GH#55413
f"{type(self).__name__}.format is deprecated and will be removed "
"in a future version. Convert using index.astype(str) or "
"index.map(formatter) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
header = []
if name:
header.append(
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
if self.name is not None
else ""
)
if formatter is not None:
return header + list(self.map(formatter))
return self._format_with_header(
header=header, na_rep=na_rep, date_format=date_format
)
def _format_with_header(
self, *, header: list[str], na_rep: str, date_format: str | None = None
) -> list[str]:
# TODO: not reached in tests 2023-10-11
# matches base class except for whitespace padding and date_format
return header + list(
self._get_values_for_csv(na_rep=na_rep, date_format=date_format)
)
@property
def _formatter_func(self):
return self._data._formatter()
def _format_attrs(self):
"""
Return a list of tuples of the (attr,formatted_value).
"""
attrs = super()._format_attrs()
for attrib in self._attributes:
# iterating over _attributes prevents us from doing this for PeriodIndex
if attrib == "freq":
freq = self.freqstr
if freq is not None:
freq = repr(freq) # e.g. D -> 'D'
attrs.append(("freq", freq))
return attrs
@Appender(Index._summary.__doc__)
def _summary(self, name=None) -> str:
result = super()._summary(name=name)
if self.freq:
result += f"\nFreq: {self.freqstr}"
return result
# --------------------------------------------------------------------
# Indexing Methods
@final
def _can_partial_date_slice(self, reso: Resolution) -> bool:
# e.g. test_getitem_setitem_periodindex
# History of conversation GH#3452, GH#3931, GH#2369, GH#14826
return reso > self._resolution_obj
# NB: for DTI/PI, not TDI
def _parsed_string_to_bounds(self, reso: Resolution, parsed):
raise NotImplementedError
def _parse_with_reso(self, label: str):
# overridden by TimedeltaIndex
try:
if self.freq is None or hasattr(self.freq, "rule_code"):
freq = self.freq
except NotImplementedError:
freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None))
freqstr: str | None
if freq is not None and not isinstance(freq, str):
freqstr = freq.rule_code
else:
freqstr = freq
if isinstance(label, np.str_):
# GH#45580
label = str(label)
parsed, reso_str = parsing.parse_datetime_string_with_reso(label, freqstr)
reso = Resolution.from_attrname(reso_str)
return parsed, reso
def _get_string_slice(self, key: str):
# overridden by TimedeltaIndex
parsed, reso = self._parse_with_reso(key)
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
raise KeyError(key) from err
@final
def _partial_date_slice(
self,
reso: Resolution,
parsed: datetime,
) -> slice | npt.NDArray[np.intp]:
"""
Parameters
----------
reso : Resolution
parsed : datetime
Returns
-------
slice or ndarray[intp]
"""
if not self._can_partial_date_slice(reso):
raise ValueError
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
vals = self._data._ndarray
unbox = self._data._unbox
if self.is_monotonic_increasing:
if len(self) and (
(t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
):
# we are out of range
raise KeyError
# TODO: does this depend on being monotonic _increasing_?
# a monotonic (sorted) series can be sliced
left = vals.searchsorted(unbox(t1), side="left")
right = vals.searchsorted(unbox(t2), side="right")
return slice(left, right)
else:
lhs_mask = vals >= unbox(t1)
rhs_mask = vals <= unbox(t2)
# try to find the dates
return (lhs_mask & rhs_mask).nonzero()[0]
def _maybe_cast_slice_bound(self, label, side: str):
"""
If label is a string, cast it to scalar type according to resolution.
Parameters
----------
label : object
side : {'left', 'right'}
Returns
-------
label : object
Notes
-----
Value of `side` parameter should be validated in caller.
"""
if isinstance(label, str):
try:
parsed, reso = self._parse_with_reso(label)
except ValueError as err:
# DTI -> parsing.DateParseError
# TDI -> 'unit abbreviation w/o a number'
# PI -> string cannot be parsed as datetime-like
self._raise_invalid_indexer("slice", label, err)
lower, upper = self._parsed_string_to_bounds(reso, parsed)
return lower if side == "left" else upper
elif not isinstance(label, self._data._recognized_scalars):
self._raise_invalid_indexer("slice", label)
return label
# --------------------------------------------------------------------
# Arithmetic Methods
def shift(self, periods: int = 1, freq=None) -> Self:
"""
Shift index by desired number of time frequency increments.
This method is for shifting the values of datetime-like indexes
by a specified time increment a given number of times.
Parameters
----------
periods : int, default 1
Number of periods (or increments) to shift by,
can be positive or negative.
freq : pandas.DateOffset, pandas.Timedelta or string, optional
Frequency increment to shift by.
If None, the index is shifted by its own `freq` attribute.
Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
Returns
-------
pandas.DatetimeIndex
Shifted index.
See Also
--------
Index.shift : Shift values of Index.
PeriodIndex.shift : Shift values of PeriodIndex.
"""
raise NotImplementedError
# --------------------------------------------------------------------
@doc(Index._maybe_cast_listlike_indexer)
def _maybe_cast_listlike_indexer(self, keyarr):
try:
res = self._data._validate_listlike(keyarr, allow_object=True)
except (ValueError, TypeError):
if not isinstance(keyarr, ExtensionArray):
# e.g. we don't want to cast DTA to ndarray[object]
res = com.asarray_tuplesafe(keyarr)
# TODO: com.asarray_tuplesafe shouldn't cast e.g. DatetimeArray
else:
res = keyarr
return Index(res, dtype=res.dtype)
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, ABC):
"""
Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
but not PeriodIndex
"""
_data: DatetimeArray | TimedeltaArray
_comparables = ["name", "freq"]
_attributes = ["name", "freq"]
# Compat for frequency inference, see GH#23789
_is_monotonic_increasing = Index.is_monotonic_increasing
_is_monotonic_decreasing = Index.is_monotonic_decreasing
_is_unique = Index.is_unique
@property
def unit(self) -> str:
return self._data.unit
def as_unit(self, unit: str) -> Self:
"""
Convert to a dtype with the given unit resolution.
Parameters
----------
unit : {'s', 'ms', 'us', 'ns'}
Returns
-------
same type as self
Examples
--------
For :class:`pandas.DatetimeIndex`:
>>> idx = pd.DatetimeIndex(['2020-01-02 01:02:03.004005006'])
>>> idx
DatetimeIndex(['2020-01-02 01:02:03.004005006'],
dtype='datetime64[ns]', freq=None)
>>> idx.as_unit('s')
DatetimeIndex(['2020-01-02 01:02:03'], dtype='datetime64[s]', freq=None)
For :class:`pandas.TimedeltaIndex`:
>>> tdelta_idx = pd.to_timedelta(['1 day 3 min 2 us 42 ns'])
>>> tdelta_idx
TimedeltaIndex(['1 days 00:03:00.000002042'],
dtype='timedelta64[ns]', freq=None)
>>> tdelta_idx.as_unit('s')
TimedeltaIndex(['1 days 00:03:00'], dtype='timedelta64[s]', freq=None)
"""
arr = self._data.as_unit(unit)
return type(self)._simple_new(arr, name=self.name)
def _with_freq(self, freq):
arr = self._data._with_freq(freq)
return type(self)._simple_new(arr, name=self._name)
@property
def values(self) -> np.ndarray:
# NB: For Datetime64TZ this is lossy
data = self._data._ndarray
if using_copy_on_write():
data = data.view()
data.flags.writeable = False
return data
@doc(DatetimeIndexOpsMixin.shift)
def shift(self, periods: int = 1, freq=None) -> Self:
if freq is not None and freq != self.freq:
if isinstance(freq, str):
freq = to_offset(freq)
offset = periods * freq
return self + offset
if periods == 0 or len(self) == 0:
# GH#14811 empty case
return self.copy()
if self.freq is None:
raise NullFrequencyError("Cannot shift with no freq")
start = self[0] + periods * self.freq
end = self[-1] + periods * self.freq
# Note: in the DatetimeTZ case, _generate_range will infer the
# appropriate timezone from `start` and `end`, so tz does not need
# to be passed explicitly.
result = self._data._generate_range(
start=start, end=end, periods=None, freq=self.freq, unit=self.unit
)
return type(self)._simple_new(result, name=self.name)
@cache_readonly
@doc(DatetimeLikeArrayMixin.inferred_freq)
def inferred_freq(self) -> str | None:
return self._data.inferred_freq
# --------------------------------------------------------------------
# Set Operation Methods
@cache_readonly
def _as_range_index(self) -> RangeIndex:
# Convert our i8 representations to RangeIndex
# Caller is responsible for checking isinstance(self.freq, Tick)
freq = cast(Tick, self.freq)
tick = Timedelta(freq).as_unit("ns")._value
rng = range(self[0]._value, self[-1]._value + tick, tick)
return RangeIndex(rng)
def _can_range_setop(self, other) -> bool:
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
def _wrap_range_setop(self, other, res_i8) -> Self:
new_freq = None
if not len(res_i8):
# RangeIndex defaults to step=1, which we don't want.
new_freq = self.freq
elif isinstance(res_i8, RangeIndex):
new_freq = to_offset(Timedelta(res_i8.step))
# TODO(GH#41493): we cannot just do
# type(self._data)(res_i8.values, dtype=self.dtype, freq=new_freq)
# because test_setops_preserve_freq fails with _validate_frequency raising.
# This raising is incorrect, as 'on_freq' is incorrect. This will
# be fixed by GH#41493
res_values = res_i8.values.view(self._data._ndarray.dtype)
result = type(self._data)._simple_new(
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
# incompatible type "Union[dtype[Any], ExtensionDtype]"; expected
# "Union[dtype[datetime64], DatetimeTZDtype]"
res_values,
dtype=self.dtype, # type: ignore[arg-type]
freq=new_freq, # type: ignore[arg-type]
)
return cast("Self", self._wrap_setop_result(other, result))
def _range_intersect(self, other, sort) -> Self:
# Dispatch to RangeIndex intersection logic.
left = self._as_range_index
right = other._as_range_index
res_i8 = left.intersection(right, sort=sort)
return self._wrap_range_setop(other, res_i8)
def _range_union(self, other, sort) -> Self:
# Dispatch to RangeIndex union logic.
left = self._as_range_index
right = other._as_range_index
res_i8 = left.union(right, sort=sort)
return self._wrap_range_setop(other, res_i8)
def _intersection(self, other: Index, sort: bool = False) -> Index:
"""
intersection specialized to the case with matching dtypes and both non-empty.
"""
other = cast("DatetimeTimedeltaMixin", other)
if self._can_range_setop(other):
return self._range_intersect(other, sort=sort)
if not self._can_fast_intersect(other):
result = Index._intersection(self, other, sort=sort)
# We need to invalidate the freq because Index._intersection
# uses _shallow_copy on a view of self._data, which will preserve
# self.freq if we're not careful.
# At this point we should have result.dtype == self.dtype
# and type(result) is type(self._data)
result = self._wrap_setop_result(other, result)
return result._with_freq(None)._with_freq("infer")
else:
return self._fast_intersect(other, sort)
def _fast_intersect(self, other, sort):
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
# after sorting, the intersection always starts with the right index
# and ends with the index of which the last elements is smallest
end = min(left[-1], right[-1])
start = right[0]
if end < start:
result = self[:0]
else:
lslice = slice(*left.slice_locs(start, end))
result = left._values[lslice]
return result
def _can_fast_intersect(self, other: Self) -> bool:
# Note: we only get here with len(self) > 0 and len(other) > 0
if self.freq is None:
return False
elif other.freq != self.freq:
return False
elif not self.is_monotonic_increasing:
# Because freq is not None, we must then be monotonic decreasing
return False
# this along with matching freqs ensure that we "line up",
# so intersection will preserve freq
# Note we are assuming away Ticks, as those go through _range_intersect
# GH#42104
return self.freq.n == 1
def _can_fast_union(self, other: Self) -> bool:
# Assumes that type(self) == type(other), as per the annotation
# The ability to fast_union also implies that `freq` should be
# retained on union.
freq = self.freq
if freq is None or freq != other.freq:
return False
if not self.is_monotonic_increasing:
# Because freq is not None, we must then be monotonic decreasing
# TODO: do union on the reversed indexes?
return False
if len(self) == 0 or len(other) == 0:
# only reached via union_many
return True
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
right_start = right[0]
left_end = left[-1]
# Only need to "adjoin", not overlap
return (right_start == left_end + freq) or right_start in left
def _fast_union(self, other: Self, sort=None) -> Self:
# Caller is responsible for ensuring self and other are non-empty
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
elif sort is False:
# TDIs are not in the "correct" order and we don't want
# to sort but want to remove overlaps
left, right = self, other
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right._values[:loc]
dates = concat_compat((left._values, right_chunk))
result = type(self)._simple_new(dates, name=self.name)
return result
else:
left, right = other, self
left_end = left[-1]
right_end = right[-1]
# concatenate
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
# The can_fast_union check ensures that the result.freq
# should match self.freq
assert isinstance(dates, type(self._data))
# error: Item "ExtensionArray" of "ExtensionArray |
# ndarray[Any, Any]" has no attribute "_freq"
assert dates._freq == self.freq # type: ignore[union-attr]
result = type(self)._simple_new(dates)
return result
else:
return left
def _union(self, other, sort):
# We are called by `union`, which is responsible for this validation
assert isinstance(other, type(self))
assert self.dtype == other.dtype
if self._can_range_setop(other):
return self._range_union(other, sort=sort)
if self._can_fast_union(other):
result = self._fast_union(other, sort=sort)
# in the case with sort=None, the _can_fast_union check ensures
# that result.freq == self.freq
return result
else:
return super()._union(other, sort)._with_freq("infer")
# --------------------------------------------------------------------
# Join Methods
def _get_join_freq(self, other):
"""
Get the freq to attach to the result of a join operation.
"""
freq = None
if self._can_fast_union(other):
freq = self.freq
return freq
def _wrap_joined_index(
self, joined, other, lidx: npt.NDArray[np.intp], ridx: npt.NDArray[np.intp]
):
assert other.dtype == self.dtype, (other.dtype, self.dtype)
result = super()._wrap_joined_index(joined, other, lidx, ridx)
result._data._freq = self._get_join_freq(other)
return result
def _get_engine_target(self) -> np.ndarray:
# engine methods and libjoin methods need dt64/td64 values cast to i8
return self._data._ndarray.view("i8")
def _from_join_target(self, result: np.ndarray):
# view e.g. i8 back to M8[ns]
result = result.view(self._data._ndarray.dtype)
return self._data._from_backing_data(result)
# --------------------------------------------------------------------
# List-like Methods
def _get_delete_freq(self, loc: int | slice | Sequence[int]):
"""
Find the `freq` for self.delete(loc).
"""
freq = None
if self.freq is not None:
if is_integer(loc):
if loc in (0, -len(self), -1, len(self) - 1):
freq = self.freq
else:
if is_list_like(loc):
# error: Incompatible types in assignment (expression has
# type "Union[slice, ndarray]", variable has type
# "Union[int, slice, Sequence[int]]")
loc = lib.maybe_indices_to_slice( # type: ignore[assignment]
np.asarray(loc, dtype=np.intp), len(self)
)
if isinstance(loc, slice) and loc.step in (1, None):
if loc.start in (0, None) or loc.stop in (len(self), None):
freq = self.freq
return freq
def _get_insert_freq(self, loc: int, item):
"""
Find the `freq` for self.insert(loc, item).
"""
value = self._data._validate_scalar(item)
item = self._data._box_func(value)
freq = None
if self.freq is not None:
# freq can be preserved on edge cases
if self.size:
if item is NaT:
pass
elif loc in (0, -len(self)) and item + self.freq == self[0]:
freq = self.freq
elif (loc == len(self)) and item - self.freq == self[-1]:
freq = self.freq
else:
# Adding a single item to an empty index may preserve freq
if isinstance(self.freq, Tick):
# all TimedeltaIndex cases go through here; is_on_offset
# would raise TypeError
freq = self.freq
elif self.freq.is_on_offset(item):
freq = self.freq
return freq
@doc(NDArrayBackedExtensionIndex.delete)
def delete(self, loc) -> Self:
result = super().delete(loc)
result._data._freq = self._get_delete_freq(loc)
return result
@doc(NDArrayBackedExtensionIndex.insert)
def insert(self, loc: int, item):
result = super().insert(loc, item)
if isinstance(result, type(self)):
# i.e. parent class method did not cast
result._data._freq = self._get_insert_freq(loc, item)
return result
# --------------------------------------------------------------------
# NDArray-Like Methods
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(
self,
indices,
axis: Axis = 0,
allow_fill: bool = True,
fill_value=None,
**kwargs,
) -> Self:
nv.validate_take((), kwargs)
indices = np.asarray(indices, dtype=np.intp)
result = NDArrayBackedExtensionIndex.take(
self, indices, axis, allow_fill, fill_value, **kwargs
)
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
if isinstance(maybe_slice, slice):
freq = self._data._get_getitem_freq(maybe_slice)
result._data._freq = freq
return result

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,172 @@
"""
Shared methods for Index subclasses backed by ExtensionArray.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Callable,
TypeVar,
)
from pandas.util._decorators import cache_readonly
from pandas.core.dtypes.generic import ABCDataFrame
from pandas.core.indexes.base import Index
if TYPE_CHECKING:
import numpy as np
from pandas._typing import (
ArrayLike,
npt,
)
from pandas.core.arrays import IntervalArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
_ExtensionIndexT = TypeVar("_ExtensionIndexT", bound="ExtensionIndex")
def _inherit_from_data(
name: str, delegate: type, cache: bool = False, wrap: bool = False
):
"""
Make an alias for a method of the underlying ExtensionArray.
Parameters
----------
name : str
Name of an attribute the class should inherit from its EA parent.
delegate : class
cache : bool, default False
Whether to convert wrapped properties into cache_readonly
wrap : bool, default False
Whether to wrap the inherited result in an Index.
Returns
-------
attribute, method, property, or cache_readonly
"""
attr = getattr(delegate, name)
if isinstance(attr, property) or type(attr).__name__ == "getset_descriptor":
# getset_descriptor i.e. property defined in cython class
if cache:
def cached(self):
return getattr(self._data, name)
cached.__name__ = name
cached.__doc__ = attr.__doc__
method = cache_readonly(cached)
else:
def fget(self):
result = getattr(self._data, name)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
def fset(self, value) -> None:
setattr(self._data, name, value)
fget.__name__ = name
fget.__doc__ = attr.__doc__
method = property(fget, fset)
elif not callable(attr):
# just a normal attribute, no wrapping
method = attr
else:
# error: Incompatible redefinition (redefinition with type "Callable[[Any,
# VarArg(Any), KwArg(Any)], Any]", original type "property")
def method(self, *args, **kwargs): # type: ignore[misc]
if "inplace" in kwargs:
raise ValueError(f"cannot use inplace with {type(self).__name__}")
result = attr(self._data, *args, **kwargs)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
# error: "property" has no attribute "__name__"
method.__name__ = name # type: ignore[attr-defined]
method.__doc__ = attr.__doc__
return method
def inherit_names(
names: list[str], delegate: type, cache: bool = False, wrap: bool = False
) -> Callable[[type[_ExtensionIndexT]], type[_ExtensionIndexT]]:
"""
Class decorator to pin attributes from an ExtensionArray to a Index subclass.
Parameters
----------
names : List[str]
delegate : class
cache : bool, default False
wrap : bool, default False
Whether to wrap the inherited result in an Index.
"""
def wrapper(cls: type[_ExtensionIndexT]) -> type[_ExtensionIndexT]:
for name in names:
meth = _inherit_from_data(name, delegate, cache=cache, wrap=wrap)
setattr(cls, name, meth)
return cls
return wrapper
class ExtensionIndex(Index):
"""
Index subclass for indexes backed by ExtensionArray.
"""
# The base class already passes through to _data:
# size, __len__, dtype
_data: IntervalArray | NDArrayBackedExtensionArray
# ---------------------------------------------------------------------
def _validate_fill_value(self, value):
"""
Convert value to be insertable to underlying array.
"""
return self._data._validate_setitem_value(value)
@cache_readonly
def _isnan(self) -> npt.NDArray[np.bool_]:
# error: Incompatible return value type (got "ExtensionArray", expected
# "ndarray")
return self._data.isna() # type: ignore[return-value]
class NDArrayBackedExtensionIndex(ExtensionIndex):
"""
Index subclass for indexes backed by NDArrayBackedExtensionArray.
"""
_data: NDArrayBackedExtensionArray
def _get_engine_target(self) -> np.ndarray:
return self._data._ndarray
def _from_join_target(self, result: np.ndarray) -> ArrayLike:
assert result.dtype == self._data._ndarray.dtype
return self._data._from_backing_data(result)

View File

@ -0,0 +1,120 @@
"""
frozen (immutable) data structures to support MultiIndexing
These are used for:
- .names (FrozenList)
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
NoReturn,
)
from pandas.core.base import PandasObject
from pandas.io.formats.printing import pprint_thing
if TYPE_CHECKING:
from pandas._typing import Self
class FrozenList(PandasObject, list):
"""
Container that doesn't allow setting item *but*
because it's technically hashable, will be used
for lookups, appropriately, etc.
"""
# Side note: This has to be of type list. Otherwise,
# it messes up PyTables type checks.
def union(self, other) -> FrozenList:
"""
Returns a FrozenList with other concatenated to the end of self.
Parameters
----------
other : array-like
The array-like whose elements we are concatenating.
Returns
-------
FrozenList
The collection difference between self and other.
"""
if isinstance(other, tuple):
other = list(other)
return type(self)(super().__add__(other))
def difference(self, other) -> FrozenList:
"""
Returns a FrozenList with elements from other removed from self.
Parameters
----------
other : array-like
The array-like whose elements we are removing self.
Returns
-------
FrozenList
The collection difference between self and other.
"""
other = set(other)
temp = [x for x in self if x not in other]
return type(self)(temp)
# TODO: Consider deprecating these in favor of `union` (xref gh-15506)
# error: Incompatible types in assignment (expression has type
# "Callable[[FrozenList, Any], FrozenList]", base class "list" defined the
# type as overloaded function)
__add__ = __iadd__ = union # type: ignore[assignment]
def __getitem__(self, n):
if isinstance(n, slice):
return type(self)(super().__getitem__(n))
return super().__getitem__(n)
def __radd__(self, other) -> Self:
if isinstance(other, tuple):
other = list(other)
return type(self)(other + list(self))
def __eq__(self, other: object) -> bool:
if isinstance(other, (tuple, FrozenList)):
other = list(other)
return super().__eq__(other)
__req__ = __eq__
def __mul__(self, other) -> Self:
return type(self)(super().__mul__(other))
__imul__ = __mul__
def __reduce__(self):
return type(self), (list(self),)
# error: Signature of "__hash__" incompatible with supertype "list"
def __hash__(self) -> int: # type: ignore[override]
return hash(tuple(self))
def _disabled(self, *args, **kwargs) -> NoReturn:
"""
This method will not function because object is immutable.
"""
raise TypeError(f"'{type(self).__name__}' does not support mutable operations.")
def __str__(self) -> str:
return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n"))
def __repr__(self) -> str:
return f"{type(self).__name__}({str(self)})"
__setitem__ = __setslice__ = _disabled # type: ignore[assignment]
__delitem__ = __delslice__ = _disabled
pop = append = extend = _disabled
remove = sort = insert = _disabled # type: ignore[assignment]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,614 @@
from __future__ import annotations
from datetime import (
datetime,
timedelta,
)
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas._libs import index as libindex
from pandas._libs.tslibs import (
BaseOffset,
NaT,
Period,
Resolution,
Tick,
)
from pandas._libs.tslibs.dtypes import OFFSET_TO_PERIOD_FREQSTR
from pandas.util._decorators import (
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import is_valid_na_for_dtype
from pandas.core.arrays.period import (
PeriodArray,
period_array,
raise_on_incompatible,
validate_dtype_freq,
)
import pandas.core.common as com
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import maybe_extract_name
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
from pandas.core.indexes.datetimes import (
DatetimeIndex,
Index,
)
from pandas.core.indexes.extension import inherit_names
if TYPE_CHECKING:
from collections.abc import Hashable
from pandas._typing import (
Dtype,
DtypeObj,
Self,
npt,
)
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
_shared_doc_kwargs = {
"klass": "PeriodArray",
}
# --- Period index sketch
def _new_PeriodIndex(cls, **d):
# GH13277 for unpickling
values = d.pop("data")
if values.dtype == "int64":
freq = d.pop("freq", None)
dtype = PeriodDtype(freq)
values = PeriodArray(values, dtype=dtype)
return cls._simple_new(values, **d)
else:
return cls(values, **d)
@inherit_names(
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
PeriodArray,
wrap=True,
)
@inherit_names(["is_leap_year"], PeriodArray)
class PeriodIndex(DatetimeIndexOpsMixin):
"""
Immutable ndarray holding ordinal values indicating regular periods in time.
Index keys are boxed to Period objects which carries the metadata (eg,
frequency information).
Parameters
----------
data : array-like (1d int np.ndarray or PeriodArray), optional
Optional period-like data to construct index with.
copy : bool
Make a copy of input ndarray.
freq : str or period object, optional
One of pandas period strings or corresponding objects.
year : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
month : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
quarter : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
day : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
hour : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
minute : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
second : int, array, or Series, default None
.. deprecated:: 2.2.0
Use PeriodIndex.from_fields instead.
dtype : str or PeriodDtype, default None
Attributes
----------
day
dayofweek
day_of_week
dayofyear
day_of_year
days_in_month
daysinmonth
end_time
freq
freqstr
hour
is_leap_year
minute
month
quarter
qyear
second
start_time
week
weekday
weekofyear
year
Methods
-------
asfreq
strftime
to_timestamp
from_fields
from_ordinals
See Also
--------
Index : The base pandas Index type.
Period : Represents a period of time.
DatetimeIndex : Index with datetime64 data.
TimedeltaIndex : Index of timedelta64 data.
period_range : Create a fixed-frequency PeriodIndex.
Examples
--------
>>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3])
>>> idx
PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
"""
_typ = "periodindex"
_data: PeriodArray
freq: BaseOffset
dtype: PeriodDtype
_data_cls = PeriodArray
_supports_partial_string_indexing = True
@property
def _engine_type(self) -> type[libindex.PeriodEngine]:
return libindex.PeriodEngine
@cache_readonly
def _resolution_obj(self) -> Resolution:
# for compat with DatetimeIndex
return self.dtype._resolution_obj
# --------------------------------------------------------------------
# methods that dispatch to array and wrap result in Index
# These are defined here instead of via inherit_names for mypy
@doc(
PeriodArray.asfreq,
other="pandas.arrays.PeriodArray",
other_name="PeriodArray",
**_shared_doc_kwargs,
)
def asfreq(self, freq=None, how: str = "E") -> Self:
arr = self._data.asfreq(freq, how)
return type(self)._simple_new(arr, name=self.name)
@doc(PeriodArray.to_timestamp)
def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
arr = self._data.to_timestamp(freq, how)
return DatetimeIndex._simple_new(arr, name=self.name)
@property
@doc(PeriodArray.hour.fget)
def hour(self) -> Index:
return Index(self._data.hour, name=self.name)
@property
@doc(PeriodArray.minute.fget)
def minute(self) -> Index:
return Index(self._data.minute, name=self.name)
@property
@doc(PeriodArray.second.fget)
def second(self) -> Index:
return Index(self._data.second, name=self.name)
# ------------------------------------------------------------------------
# Index Constructors
def __new__(
cls,
data=None,
ordinal=None,
freq=None,
dtype: Dtype | None = None,
copy: bool = False,
name: Hashable | None = None,
**fields,
) -> Self:
valid_field_set = {
"year",
"month",
"day",
"quarter",
"hour",
"minute",
"second",
}
refs = None
if not copy and isinstance(data, (Index, ABCSeries)):
refs = data._references
if not set(fields).issubset(valid_field_set):
argument = next(iter(set(fields) - valid_field_set))
raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
elif len(fields):
# GH#55960
warnings.warn(
"Constructing PeriodIndex from fields is deprecated. Use "
"PeriodIndex.from_fields instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if ordinal is not None:
# GH#55960
warnings.warn(
"The 'ordinal' keyword in PeriodIndex is deprecated and will "
"be removed in a future version. Use PeriodIndex.from_ordinals "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = maybe_extract_name(name, data, cls)
if data is None and ordinal is None:
# range-based.
if not fields:
# test_pickle_compat_construction
cls._raise_scalar_data_error(None)
data = cls.from_fields(**fields, freq=freq)._data
copy = False
elif fields:
if data is not None:
raise ValueError("Cannot pass both data and fields")
raise ValueError("Cannot pass both ordinal and fields")
else:
freq = validate_dtype_freq(dtype, freq)
# PeriodIndex allow PeriodIndex(period_index, freq=different)
# Let's not encourage that kind of behavior in PeriodArray.
if freq and isinstance(data, cls) and data.freq != freq:
# TODO: We can do some of these with no-copy / coercion?
# e.g. D -> 2D seems to be OK
data = data.asfreq(freq)
if data is None and ordinal is not None:
ordinal = np.asarray(ordinal, dtype=np.int64)
dtype = PeriodDtype(freq)
data = PeriodArray(ordinal, dtype=dtype)
elif data is not None and ordinal is not None:
raise ValueError("Cannot pass both data and ordinal")
else:
# don't pass copy here, since we copy later.
data = period_array(data=data, freq=freq)
if copy:
data = data.copy()
return cls._simple_new(data, name=name, refs=refs)
@classmethod
def from_fields(
cls,
*,
year=None,
quarter=None,
month=None,
day=None,
hour=None,
minute=None,
second=None,
freq=None,
) -> Self:
fields = {
"year": year,
"quarter": quarter,
"month": month,
"day": day,
"hour": hour,
"minute": minute,
"second": second,
}
fields = {key: value for key, value in fields.items() if value is not None}
arr = PeriodArray._from_fields(fields=fields, freq=freq)
return cls._simple_new(arr)
@classmethod
def from_ordinals(cls, ordinals, *, freq, name=None) -> Self:
ordinals = np.asarray(ordinals, dtype=np.int64)
dtype = PeriodDtype(freq)
data = PeriodArray._simple_new(ordinals, dtype=dtype)
return cls._simple_new(data, name=name)
# ------------------------------------------------------------------------
# Data
@property
def values(self) -> npt.NDArray[np.object_]:
return np.asarray(self, dtype=object)
def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
"""
Convert timedelta-like input to an integer multiple of self.freq
Parameters
----------
other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
Returns
-------
converted : int, np.ndarray[int64]
Raises
------
IncompatibleFrequency : if the input cannot be written as a multiple
of self.freq. Note IncompatibleFrequency subclasses ValueError.
"""
if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
if isinstance(self.freq, Tick):
# _check_timedeltalike_freq_compat will raise if incompatible
delta = self._data._check_timedeltalike_freq_compat(other)
return delta
elif isinstance(other, BaseOffset):
if other.base == self.freq.base:
return other.n
raise raise_on_incompatible(self, other)
elif is_integer(other):
assert isinstance(other, int)
return other
# raise when input doesn't have freq
raise raise_on_incompatible(self, None)
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
return self.dtype == dtype
# ------------------------------------------------------------------------
# Index Methods
def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
"""
where : array of timestamps
mask : np.ndarray[bool]
Array of booleans where data is not NA.
"""
if isinstance(where, DatetimeIndex):
where = PeriodIndex(where._values, freq=self.freq)
elif not isinstance(where, PeriodIndex):
raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
return super().asof_locs(where, mask)
@property
def is_full(self) -> bool:
"""
Returns True if this PeriodIndex is range-like in that all Periods
between start and end are present, in order.
"""
if len(self) == 0:
return True
if not self.is_monotonic_increasing:
raise ValueError("Index is not monotonic")
values = self.asi8
return bool(((values[1:] - values[:-1]) < 2).all())
@property
def inferred_type(self) -> str:
# b/c data is represented as ints make sure we can't have ambiguous
# indexing
return "period"
# ------------------------------------------------------------------------
# Indexing Methods
def _convert_tolerance(self, tolerance, target):
# Returned tolerance must be in dtype/units so that
# `|self._get_engine_target() - target._engine_target()| <= tolerance`
# is meaningful. Since PeriodIndex returns int64 for engine_target,
# we may need to convert timedelta64 tolerance to int64.
tolerance = super()._convert_tolerance(tolerance, target)
if self.dtype == target.dtype:
# convert tolerance to i8
tolerance = self._maybe_convert_timedelta(tolerance)
return tolerance
def get_loc(self, key):
"""
Get integer location for requested label.
Parameters
----------
key : Period, NaT, str, or datetime
String or datetime key must be parsable as Period.
Returns
-------
loc : int or ndarray[int64]
Raises
------
KeyError
Key is not present in the index.
TypeError
If key is listlike or otherwise not hashable.
"""
orig_key = key
self._check_indexing_error(key)
if is_valid_na_for_dtype(key, self.dtype):
key = NaT
elif isinstance(key, str):
try:
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
# A string with invalid format
raise KeyError(f"Cannot interpret '{key}' as period") from err
if self._can_partial_date_slice(reso):
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
raise KeyError(key) from err
if reso == self._resolution_obj:
# the reso < self._resolution_obj case goes
# through _get_string_slice
key = self._cast_partial_indexing_scalar(parsed)
else:
raise KeyError(key)
elif isinstance(key, Period):
self._disallow_mismatched_indexing(key)
elif isinstance(key, datetime):
key = self._cast_partial_indexing_scalar(key)
else:
# in particular integer, which Period constructor would cast to string
raise KeyError(key)
try:
return Index.get_loc(self, key)
except KeyError as err:
raise KeyError(orig_key) from err
def _disallow_mismatched_indexing(self, key: Period) -> None:
if key._dtype != self.dtype:
raise KeyError(key)
def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
try:
period = Period(label, freq=self.freq)
except ValueError as err:
# we cannot construct the Period
raise KeyError(label) from err
return period
@doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
def _maybe_cast_slice_bound(self, label, side: str):
if isinstance(label, datetime):
label = self._cast_partial_indexing_scalar(label)
return super()._maybe_cast_slice_bound(label, side)
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev)
iv = Period(parsed, freq=freq)
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
@doc(DatetimeIndexOpsMixin.shift)
def shift(self, periods: int = 1, freq=None) -> Self:
if freq is not None:
raise TypeError(
f"`freq` argument is not supported for {type(self).__name__}.shift"
)
return self + periods
def period_range(
start=None,
end=None,
periods: int | None = None,
freq=None,
name: Hashable | None = None,
) -> PeriodIndex:
"""
Return a fixed frequency PeriodIndex.
The day (calendar) is the default frequency.
Parameters
----------
start : str, datetime, date, pandas.Timestamp, or period-like, default None
Left bound for generating periods.
end : str, datetime, date, pandas.Timestamp, or period-like, default None
Right bound for generating periods.
periods : int, default None
Number of periods to generate.
freq : str or DateOffset, optional
Frequency alias. By default the freq is taken from `start` or `end`
if those are Period objects. Otherwise, the default is ``"D"`` for
daily frequency.
name : str, default None
Name of the resulting PeriodIndex.
Returns
-------
PeriodIndex
Notes
-----
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
must be specified.
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
Examples
--------
>>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='period[M]')
If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
endpoints for a ``PeriodIndex`` with frequency matching that of the
``period_range`` constructor.
>>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
... end=pd.Period('2017Q2', freq='Q'), freq='M')
PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
dtype='period[M]')
"""
if com.count_not_none(start, end, periods) != 2:
raise ValueError(
"Of the three parameters: start, end, and periods, "
"exactly two must be specified"
)
if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
freq = "D"
data, freq = PeriodArray._generate_range(start, end, periods, freq)
dtype = PeriodDtype(freq)
data = PeriodArray(data, dtype=dtype)
return PeriodIndex(data, name=name)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,356 @@
""" implement the TimedeltaIndex """
from __future__ import annotations
from typing import TYPE_CHECKING
import warnings
from pandas._libs import (
index as libindex,
lib,
)
from pandas._libs.tslibs import (
Resolution,
Timedelta,
to_offset,
)
from pandas._libs.tslibs.timedeltas import disallow_ambiguous_unit
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
is_scalar,
pandas_dtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.arrays.timedeltas import TimedeltaArray
import pandas.core.common as com
from pandas.core.indexes.base import (
Index,
maybe_extract_name,
)
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
from pandas.core.indexes.extension import inherit_names
if TYPE_CHECKING:
from pandas._typing import DtypeObj
@inherit_names(
["__neg__", "__pos__", "__abs__", "total_seconds", "round", "floor", "ceil"]
+ TimedeltaArray._field_ops,
TimedeltaArray,
wrap=True,
)
@inherit_names(
[
"components",
"to_pytimedelta",
"sum",
"std",
"median",
],
TimedeltaArray,
)
class TimedeltaIndex(DatetimeTimedeltaMixin):
"""
Immutable Index of timedelta64 data.
Represented internally as int64, and scalars returned Timedelta objects.
Parameters
----------
data : array-like (1-dimensional), optional
Optional timedelta-like data to construct index with.
unit : {'D', 'h', 'm', 's', 'ms', 'us', 'ns'}, optional
The unit of ``data``.
.. deprecated:: 2.2.0
Use ``pd.to_timedelta`` instead.
freq : str or pandas offset object, optional
One of pandas date offset strings or corresponding objects. The string
``'infer'`` can be passed in order to set the frequency of the index as
the inferred frequency upon creation.
dtype : numpy.dtype or str, default None
Valid ``numpy`` dtypes are ``timedelta64[ns]``, ``timedelta64[us]``,
``timedelta64[ms]``, and ``timedelta64[s]``.
copy : bool
Make a copy of input array.
name : object
Name to be stored in the index.
Attributes
----------
days
seconds
microseconds
nanoseconds
components
inferred_freq
Methods
-------
to_pytimedelta
to_series
round
floor
ceil
to_frame
mean
See Also
--------
Index : The base pandas Index type.
Timedelta : Represents a duration between two dates or times.
DatetimeIndex : Index of datetime64 data.
PeriodIndex : Index of Period data.
timedelta_range : Create a fixed-frequency TimedeltaIndex.
Notes
-----
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
Examples
--------
>>> pd.TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'])
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
We can also let pandas infer the frequency when possible.
>>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq='infer')
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
"""
_typ = "timedeltaindex"
_data_cls = TimedeltaArray
@property
def _engine_type(self) -> type[libindex.TimedeltaEngine]:
return libindex.TimedeltaEngine
_data: TimedeltaArray
# Use base class method instead of DatetimeTimedeltaMixin._get_string_slice
_get_string_slice = Index._get_string_slice
# error: Signature of "_resolution_obj" incompatible with supertype
# "DatetimeIndexOpsMixin"
@property
def _resolution_obj(self) -> Resolution | None: # type: ignore[override]
return self._data._resolution_obj
# -------------------------------------------------------------------
# Constructors
def __new__(
cls,
data=None,
unit=lib.no_default,
freq=lib.no_default,
closed=lib.no_default,
dtype=None,
copy: bool = False,
name=None,
):
if closed is not lib.no_default:
# GH#52628
warnings.warn(
f"The 'closed' keyword in {cls.__name__} construction is "
"deprecated and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
if unit is not lib.no_default:
# GH#55499
warnings.warn(
f"The 'unit' keyword in {cls.__name__} construction is "
"deprecated and will be removed in a future version. "
"Use pd.to_timedelta instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
unit = None
name = maybe_extract_name(name, data, cls)
if is_scalar(data):
cls._raise_scalar_data_error(data)
disallow_ambiguous_unit(unit)
if dtype is not None:
dtype = pandas_dtype(dtype)
if (
isinstance(data, TimedeltaArray)
and freq is lib.no_default
and (dtype is None or dtype == data.dtype)
):
if copy:
data = data.copy()
return cls._simple_new(data, name=name)
if (
isinstance(data, TimedeltaIndex)
and freq is lib.no_default
and name is None
and (dtype is None or dtype == data.dtype)
):
if copy:
return data.copy()
else:
return data._view()
# - Cases checked above all return/raise before reaching here - #
tdarr = TimedeltaArray._from_sequence_not_strict(
data, freq=freq, unit=unit, dtype=dtype, copy=copy
)
refs = None
if not copy and isinstance(data, (ABCSeries, Index)):
refs = data._references
return cls._simple_new(tdarr, name=name, refs=refs)
# -------------------------------------------------------------------
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
"""
return lib.is_np_dtype(dtype, "m") # aka self._data._is_recognized_dtype
# -------------------------------------------------------------------
# Indexing Methods
def get_loc(self, key):
"""
Get integer location for requested label
Returns
-------
loc : int, slice, or ndarray[int]
"""
self._check_indexing_error(key)
try:
key = self._data._validate_scalar(key, unbox=False)
except TypeError as err:
raise KeyError(key) from err
return Index.get_loc(self, key)
def _parse_with_reso(self, label: str):
# the "with_reso" is a no-op for TimedeltaIndex
parsed = Timedelta(label)
return parsed, None
def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
# reso is unused, included to match signature of DTI/PI
lbound = parsed.round(parsed.resolution_string)
rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
return lbound, rbound
# -------------------------------------------------------------------
@property
def inferred_type(self) -> str:
return "timedelta64"
def timedelta_range(
start=None,
end=None,
periods: int | None = None,
freq=None,
name=None,
closed=None,
*,
unit: str | None = None,
) -> TimedeltaIndex:
"""
Return a fixed frequency TimedeltaIndex with day as the default.
Parameters
----------
start : str or timedelta-like, default None
Left bound for generating timedeltas.
end : str or timedelta-like, default None
Right bound for generating timedeltas.
periods : int, default None
Number of periods to generate.
freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'D'
Frequency strings can have multiples, e.g. '5h'.
name : str, default None
Name of the resulting TimedeltaIndex.
closed : str, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None).
unit : str, default None
Specify the desired resolution of the result.
.. versionadded:: 2.0.0
Returns
-------
TimedeltaIndex
Notes
-----
Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
exactly three must be specified. If ``freq`` is omitted, the resulting
``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
``start`` and ``end`` (closed on both sides).
To learn more about the frequency strings, please see `this link
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
Examples
--------
>>> pd.timedelta_range(start='1 day', periods=4)
TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
The ``closed`` parameter specifies which endpoint is included. The default
behavior is to include both endpoints.
>>> pd.timedelta_range(start='1 day', periods=4, closed='right')
TimedeltaIndex(['2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq='D')
The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
Only fixed frequencies can be passed, non-fixed frequencies such as
'M' (month end) will raise.
>>> pd.timedelta_range(start='1 day', end='2 days', freq='6h')
TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
'1 days 18:00:00', '2 days 00:00:00'],
dtype='timedelta64[ns]', freq='6h')
Specify ``start``, ``end``, and ``periods``; the frequency is generated
automatically (linearly spaced).
>>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
'5 days 00:00:00'],
dtype='timedelta64[ns]', freq=None)
**Specify a unit**
>>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s")
TimedeltaIndex(['1 days', '100001 days', '200001 days'],
dtype='timedelta64[s]', freq='100000D')
"""
if freq is None and com.any_none(periods, start, end):
freq = "D"
freq = to_offset(freq)
tdarr = TimedeltaArray._generate_range(
start, end, periods, freq, closed=closed, unit=unit
)
return TimedeltaIndex._simple_new(tdarr, name=name)