Updated script that can be controled by Nodejs web app

2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions
--- a/lib/python3.13/site-packages/pandas/core/methods/init.py
+++ b/lib/python3.13/site-packages/pandas/core/methods/init.py
--- a/lib/python3.13/site-packages/pandas/core/methods/pycache/init.cpython-313.pyc
+++ b/lib/python3.13/site-packages/pandas/core/methods/pycache/init.cpython-313.pyc
--- a/lib/python3.13/site-packages/pandas/core/methods/pycache/describe.cpython-313.pyc
+++ b/lib/python3.13/site-packages/pandas/core/methods/pycache/describe.cpython-313.pyc
--- a/lib/python3.13/site-packages/pandas/core/methods/pycache/selectn.cpython-313.pyc
+++ b/lib/python3.13/site-packages/pandas/core/methods/pycache/selectn.cpython-313.pyc
--- a/lib/python3.13/site-packages/pandas/core/methods/describe.py
+++ b/lib/python3.13/site-packages/pandas/core/methods/describe.py
@ -0,0 +1,416 @@
+"""
+Module responsible for execution of NDFrame.describe() method.
+
+Method NDFrame.describe() delegates actual execution to function describe_ndframe().
+"""
+from __future__ import annotations
+
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    cast,
+)
+
+import numpy as np
+
+from pandas._libs.tslibs import Timestamp
+from pandas._typing import (
+    DtypeObj,
+    NDFrameT,
+    npt,
+)
+from pandas.util._validators import validate_percentile
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_numeric_dtype,
+)
+from pandas.core.dtypes.dtypes import (
+    ArrowDtype,
+    DatetimeTZDtype,
+    ExtensionDtype,
+)
+
+from pandas.core.arrays.floating import Float64Dtype
+from pandas.core.reshape.concat import concat
+
+from pandas.io.formats.format import format_percentiles
+
+if TYPE_CHECKING:
+    from collections.abc import (
+        Hashable,
+        Sequence,
+    )
+
+    from pandas import (
+        DataFrame,
+        Series,
+    )
+
+
+def describe_ndframe(
+    *,
+    obj: NDFrameT,
+    include: str | Sequence[str] | None,
+    exclude: str | Sequence[str] | None,
+    percentiles: Sequence[float] | np.ndarray | None,
+) -> NDFrameT:
+    """Describe series or dataframe.
+
+    Called from pandas.core.generic.NDFrame.describe()
+
+    Parameters
+    ----------
+    obj: DataFrame or Series
+        Either dataframe or series to be described.
+    include : 'all', list-like of dtypes or None (default), optional
+        A white list of data types to include in the result. Ignored for ``Series``.
+    exclude : list-like of dtypes or None (default), optional,
+        A black list of data types to omit from the result. Ignored for ``Series``.
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output. All should fall between 0 and 1.
+        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
+        75th percentiles.
+
+    Returns
+    -------
+    Dataframe or series description.
+    """
+    percentiles = _refine_percentiles(percentiles)
+
+    describer: NDFrameDescriberAbstract
+    if obj.ndim == 1:
+        describer = SeriesDescriber(
+            obj=cast("Series", obj),
+        )
+    else:
+        describer = DataFrameDescriber(
+            obj=cast("DataFrame", obj),
+            include=include,
+            exclude=exclude,
+        )
+
+    result = describer.describe(percentiles=percentiles)
+    return cast(NDFrameT, result)
+
+
+class NDFrameDescriberAbstract(ABC):
+    """Abstract class for describing dataframe or series.
+
+    Parameters
+    ----------
+    obj : Series or DataFrame
+        Object to be described.
+    """
+
+    def __init__(self, obj: DataFrame | Series) -> None:
+        self.obj = obj
+
+    @abstractmethod
+    def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame | Series:
+        """Do describe either series or dataframe.
+
+        Parameters
+        ----------
+        percentiles : list-like of numbers
+            The percentiles to include in the output.
+        """
+
+
+class SeriesDescriber(NDFrameDescriberAbstract):
+    """Class responsible for creating series description."""
+
+    obj: Series
+
+    def describe(self, percentiles: Sequence[float] | np.ndarray) -> Series:
+        describe_func = select_describe_func(
+            self.obj,
+        )
+        return describe_func(self.obj, percentiles)
+
+
+class DataFrameDescriber(NDFrameDescriberAbstract):
+    """Class responsible for creating dataobj description.
+
+    Parameters
+    ----------
+    obj : DataFrame
+        DataFrame to be described.
+    include : 'all', list-like of dtypes or None
+        A white list of data types to include in the result.
+    exclude : list-like of dtypes or None
+        A black list of data types to omit from the result.
+    """
+
+    obj: DataFrame
+
+    def __init__(
+        self,
+        obj: DataFrame,
+        *,
+        include: str | Sequence[str] | None,
+        exclude: str | Sequence[str] | None,
+    ) -> None:
+        self.include = include
+        self.exclude = exclude
+
+        if obj.ndim == 2 and obj.columns.size == 0:
+            raise ValueError("Cannot describe a DataFrame without columns")
+
+        super().__init__(obj)
+
+    def describe(self, percentiles: Sequence[float] | np.ndarray) -> DataFrame:
+        data = self._select_data()
+
+        ldesc: list[Series] = []
+        for _, series in data.items():
+            describe_func = select_describe_func(series)
+            ldesc.append(describe_func(series, percentiles))
+
+        col_names = reorder_columns(ldesc)
+        d = concat(
+            [x.reindex(col_names, copy=False) for x in ldesc],
+            axis=1,
+            sort=False,
+        )
+        d.columns = data.columns.copy()
+        return d
+
+    def _select_data(self) -> DataFrame:
+        """Select columns to be described."""
+        if (self.include is None) and (self.exclude is None):
+            # when some numerics are found, keep only numerics
+            default_include: list[npt.DTypeLike] = [np.number, "datetime"]
+            data = self.obj.select_dtypes(include=default_include)
+            if len(data.columns) == 0:
+                data = self.obj
+        elif self.include == "all":
+            if self.exclude is not None:
+                msg = "exclude must be None when include is 'all'"
+                raise ValueError(msg)
+            data = self.obj
+        else:
+            data = self.obj.select_dtypes(
+                include=self.include,
+                exclude=self.exclude,
+            )
+        return data
+
+
+def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]:
+    """Set a convenient order for rows for display."""
+    names: list[Hashable] = []
+    seen_names: set[Hashable] = set()
+    ldesc_indexes = sorted((x.index for x in ldesc), key=len)
+    for idxnames in ldesc_indexes:
+        for name in idxnames:
+            if name not in seen_names:
+                seen_names.add(name)
+                names.append(name)
+    return names
+
+
+def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
+    """Describe series containing numerical data.
+
+    Parameters
+    ----------
+    series : Series
+        Series to be described.
+    percentiles : list-like of numbers
+        The percentiles to include in the output.
+    """
+    from pandas import Series
+
+    formatted_percentiles = format_percentiles(percentiles)
+
+    stat_index = ["count", "mean", "std", "min"] + formatted_percentiles + ["max"]
+    d = (
+        [series.count(), series.mean(), series.std(), series.min()]
+        + series.quantile(percentiles).tolist()
+        + [series.max()]
+    )
+    # GH#48340 - always return float on non-complex numeric data
+    dtype: DtypeObj | None
+    if isinstance(series.dtype, ExtensionDtype):
+        if isinstance(series.dtype, ArrowDtype):
+            if series.dtype.kind == "m":
+                # GH53001: describe timedeltas with object dtype
+                dtype = None
+            else:
+                import pyarrow as pa
+
+                dtype = ArrowDtype(pa.float64())
+        else:
+            dtype = Float64Dtype()
+    elif series.dtype.kind in "iufb":
+        # i.e. numeric but exclude complex dtype
+        dtype = np.dtype("float")
+    else:
+        dtype = None
+    return Series(d, index=stat_index, name=series.name, dtype=dtype)
+
+
+def describe_categorical_1d(
+    data: Series,
+    percentiles_ignored: Sequence[float],
+) -> Series:
+    """Describe series containing categorical data.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles_ignored : list-like of numbers
+        Ignored, but in place to unify interface.
+    """
+    names = ["count", "unique", "top", "freq"]
+    objcounts = data.value_counts()
+    count_unique = len(objcounts[objcounts != 0])
+    if count_unique > 0:
+        top, freq = objcounts.index[0], objcounts.iloc[0]
+        dtype = None
+    else:
+        # If the DataFrame is empty, set 'top' and 'freq' to None
+        # to maintain output shape consistency
+        top, freq = np.nan, np.nan
+        dtype = "object"
+
+    result = [data.count(), count_unique, top, freq]
+
+    from pandas import Series
+
+    return Series(result, index=names, name=data.name, dtype=dtype)
+
+
+def describe_timestamp_as_categorical_1d(
+    data: Series,
+    percentiles_ignored: Sequence[float],
+) -> Series:
+    """Describe series containing timestamp data treated as categorical.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles_ignored : list-like of numbers
+        Ignored, but in place to unify interface.
+    """
+    names = ["count", "unique"]
+    objcounts = data.value_counts()
+    count_unique = len(objcounts[objcounts != 0])
+    result: list[float | Timestamp] = [data.count(), count_unique]
+    dtype = None
+    if count_unique > 0:
+        top, freq = objcounts.index[0], objcounts.iloc[0]
+        tz = data.dt.tz
+        asint = data.dropna().values.view("i8")
+        top = Timestamp(top)
+        if top.tzinfo is not None and tz is not None:
+            # Don't tz_localize(None) if key is already tz-aware
+            top = top.tz_convert(tz)
+        else:
+            top = top.tz_localize(tz)
+        names += ["top", "freq", "first", "last"]
+        result += [
+            top,
+            freq,
+            Timestamp(asint.min(), tz=tz),
+            Timestamp(asint.max(), tz=tz),
+        ]
+
+    # If the DataFrame is empty, set 'top' and 'freq' to None
+    # to maintain output shape consistency
+    else:
+        names += ["top", "freq"]
+        result += [np.nan, np.nan]
+        dtype = "object"
+
+    from pandas import Series
+
+    return Series(result, index=names, name=data.name, dtype=dtype)
+
+
+def describe_timestamp_1d(data: Series, percentiles: Sequence[float]) -> Series:
+    """Describe series containing datetime64 dtype.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    percentiles : list-like of numbers
+        The percentiles to include in the output.
+    """
+    # GH-30164
+    from pandas import Series
+
+    formatted_percentiles = format_percentiles(percentiles)
+
+    stat_index = ["count", "mean", "min"] + formatted_percentiles + ["max"]
+    d = (
+        [data.count(), data.mean(), data.min()]
+        + data.quantile(percentiles).tolist()
+        + [data.max()]
+    )
+    return Series(d, index=stat_index, name=data.name)
+
+
+def select_describe_func(
+    data: Series,
+) -> Callable:
+    """Select proper function for describing series based on data type.
+
+    Parameters
+    ----------
+    data : Series
+        Series to be described.
+    """
+    if is_bool_dtype(data.dtype):
+        return describe_categorical_1d
+    elif is_numeric_dtype(data):
+        return describe_numeric_1d
+    elif data.dtype.kind == "M" or isinstance(data.dtype, DatetimeTZDtype):
+        return describe_timestamp_1d
+    elif data.dtype.kind == "m":
+        return describe_numeric_1d
+    else:
+        return describe_categorical_1d
+
+
+def _refine_percentiles(
+    percentiles: Sequence[float] | np.ndarray | None,
+) -> npt.NDArray[np.float64]:
+    """
+    Ensure that percentiles are unique and sorted.
+
+    Parameters
+    ----------
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output.
+    """
+    if percentiles is None:
+        return np.array([0.25, 0.5, 0.75])
+
+    # explicit conversion of `percentiles` to list
+    percentiles = list(percentiles)
+
+    # get them all to be in [0, 1]
+    validate_percentile(percentiles)
+
+    # median should always be included
+    if 0.5 not in percentiles:
+        percentiles.append(0.5)
+
+    percentiles = np.asarray(percentiles)
+
+    # sort and check for duplicates
+    unique_pcts = np.unique(percentiles)
+    assert percentiles is not None
+    if len(unique_pcts) < len(percentiles):
+        raise ValueError("percentiles cannot contain duplicates")
+
+    return unique_pcts
--- a/lib/python3.13/site-packages/pandas/core/methods/selectn.py
+++ b/lib/python3.13/site-packages/pandas/core/methods/selectn.py
@ -0,0 +1,269 @@
+"""
+Implementation of nlargest and nsmallest.
+"""
+
+from __future__ import annotations
+
+from collections.abc import (
+    Hashable,
+    Sequence,
+)
+from typing import (
+    TYPE_CHECKING,
+    cast,
+    final,
+)
+
+import numpy as np
+
+from pandas._libs import algos as libalgos
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_complex_dtype,
+    is_integer_dtype,
+    is_list_like,
+    is_numeric_dtype,
+    needs_i8_conversion,
+)
+from pandas.core.dtypes.dtypes import BaseMaskedDtype
+
+if TYPE_CHECKING:
+    from pandas._typing import (
+        DtypeObj,
+        IndexLabel,
+    )
+
+    from pandas import (
+        DataFrame,
+        Series,
+    )
+
+
+class SelectN:
+    def __init__(self, obj, n: int, keep: str) -> None:
+        self.obj = obj
+        self.n = n
+        self.keep = keep
+
+        if self.keep not in ("first", "last", "all"):
+            raise ValueError('keep must be either "first", "last" or "all"')
+
+    def compute(self, method: str) -> DataFrame | Series:
+        raise NotImplementedError
+
+    @final
+    def nlargest(self):
+        return self.compute("nlargest")
+
+    @final
+    def nsmallest(self):
+        return self.compute("nsmallest")
+
+    @final
+    @staticmethod
+    def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
+        """
+        Helper function to determine if dtype is valid for
+        nsmallest/nlargest methods
+        """
+        if is_numeric_dtype(dtype):
+            return not is_complex_dtype(dtype)
+        return needs_i8_conversion(dtype)
+
+
+class SelectNSeries(SelectN):
+    """
+    Implement n largest/smallest for Series
+
+    Parameters
+    ----------
+    obj : Series
+    n : int
+    keep : {'first', 'last'}, default 'first'
+
+    Returns
+    -------
+    nordered : Series
+    """
+
+    def compute(self, method: str) -> Series:
+        from pandas.core.reshape.concat import concat
+
+        n = self.n
+        dtype = self.obj.dtype
+        if not self.is_valid_dtype_n_method(dtype):
+            raise TypeError(f"Cannot use method '{method}' with dtype {dtype}")
+
+        if n <= 0:
+            return self.obj[[]]
+
+        dropped = self.obj.dropna()
+        nan_index = self.obj.drop(dropped.index)
+
+        # slow method
+        if n >= len(self.obj):
+            ascending = method == "nsmallest"
+            return self.obj.sort_values(ascending=ascending).head(n)
+
+        # fast method
+        new_dtype = dropped.dtype
+
+        # Similar to algorithms._ensure_data
+        arr = dropped._values
+        if needs_i8_conversion(arr.dtype):
+            arr = arr.view("i8")
+        elif isinstance(arr.dtype, BaseMaskedDtype):
+            arr = arr._data
+        else:
+            arr = np.asarray(arr)
+        if arr.dtype.kind == "b":
+            arr = arr.view(np.uint8)
+
+        if method == "nlargest":
+            arr = -arr
+            if is_integer_dtype(new_dtype):
+                # GH 21426: ensure reverse ordering at boundaries
+                arr -= 1
+
+            elif is_bool_dtype(new_dtype):
+                # GH 26154: ensure False is smaller than True
+                arr = 1 - (-arr)
+
+        if self.keep == "last":
+            arr = arr[::-1]
+
+        nbase = n
+        narr = len(arr)
+        n = min(n, narr)
+
+        # arr passed into kth_smallest must be contiguous. We copy
+        # here because kth_smallest will modify its input
+        # avoid OOB access with kth_smallest_c when n <= 0
+        if len(arr) > 0:
+            kth_val = libalgos.kth_smallest(arr.copy(order="C"), n - 1)
+        else:
+            kth_val = np.nan
+        (ns,) = np.nonzero(arr <= kth_val)
+        inds = ns[arr[ns].argsort(kind="mergesort")]
+
+        if self.keep != "all":
+            inds = inds[:n]
+            findex = nbase
+        else:
+            if len(inds) < nbase <= len(nan_index) + len(inds):
+                findex = len(nan_index) + len(inds)
+            else:
+                findex = len(inds)
+
+        if self.keep == "last":
+            # reverse indices
+            inds = narr - 1 - inds
+
+        return concat([dropped.iloc[inds], nan_index]).iloc[:findex]
+
+
+class SelectNFrame(SelectN):
+    """
+    Implement n largest/smallest for DataFrame
+
+    Parameters
+    ----------
+    obj : DataFrame
+    n : int
+    keep : {'first', 'last'}, default 'first'
+    columns : list or str
+
+    Returns
+    -------
+    nordered : DataFrame
+    """
+
+    def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> None:
+        super().__init__(obj, n, keep)
+        if not is_list_like(columns) or isinstance(columns, tuple):
+            columns = [columns]
+
+        columns = cast(Sequence[Hashable], columns)
+        columns = list(columns)
+        self.columns = columns
+
+    def compute(self, method: str) -> DataFrame:
+        from pandas.core.api import Index
+
+        n = self.n
+        frame = self.obj
+        columns = self.columns
+
+        for column in columns:
+            dtype = frame[column].dtype
+            if not self.is_valid_dtype_n_method(dtype):
+                raise TypeError(
+                    f"Column {repr(column)} has dtype {dtype}, "
+                    f"cannot use method {repr(method)} with this dtype"
+                )
+
+        def get_indexer(current_indexer, other_indexer):
+            """
+            Helper function to concat `current_indexer` and `other_indexer`
+            depending on `method`
+            """
+            if method == "nsmallest":
+                return current_indexer.append(other_indexer)
+            else:
+                return other_indexer.append(current_indexer)
+
+        # Below we save and reset the index in case index contains duplicates
+        original_index = frame.index
+        cur_frame = frame = frame.reset_index(drop=True)
+        cur_n = n
+        indexer = Index([], dtype=np.int64)
+
+        for i, column in enumerate(columns):
+            # For each column we apply method to cur_frame[column].
+            # If it's the last column or if we have the number of
+            # results desired we are done.
+            # Otherwise there are duplicates of the largest/smallest
+            # value and we need to look at the rest of the columns
+            # to determine which of the rows with the largest/smallest
+            # value in the column to keep.
+            series = cur_frame[column]
+            is_last_column = len(columns) - 1 == i
+            values = getattr(series, method)(
+                cur_n, keep=self.keep if is_last_column else "all"
+            )
+
+            if is_last_column or len(values) <= cur_n:
+                indexer = get_indexer(indexer, values.index)
+                break
+
+            # Now find all values which are equal to
+            # the (nsmallest: largest)/(nlargest: smallest)
+            # from our series.
+            border_value = values == values[values.index[-1]]
+
+            # Some of these values are among the top-n
+            # some aren't.
+            unsafe_values = values[border_value]
+
+            # These values are definitely among the top-n
+            safe_values = values[~border_value]
+            indexer = get_indexer(indexer, safe_values.index)
+
+            # Go on and separate the unsafe_values on the remaining
+            # columns.
+            cur_frame = cur_frame.loc[unsafe_values.index]
+            cur_n = n - len(indexer)
+
+        frame = frame.take(indexer)
+
+        # Restore the index on frame
+        frame.index = original_index.take(indexer)
+
+        # If there is only one column, the frame is already sorted.
+        if len(columns) == 1:
+            return frame
+
+        ascending = method == "nsmallest"
+
+        return frame.sort_values(columns, ascending=ascending, kind="mergesort")
--- a/lib/python3.13/site-packages/pandas/core/methods/to_dict.py
+++ b/lib/python3.13/site-packages/pandas/core/methods/to_dict.py
@ -0,0 +1,272 @@
+from __future__ import annotations
+
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+    overload,
+)
+import warnings
+
+import numpy as np
+
+from pandas._libs import (
+    lib,
+    missing as libmissing,
+)
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.cast import maybe_box_native
+from pandas.core.dtypes.dtypes import (
+    BaseMaskedDtype,
+    ExtensionDtype,
+)
+
+from pandas.core import common as com
+
+if TYPE_CHECKING:
+    from pandas._typing import MutableMappingT
+
+    from pandas import DataFrame
+
+
+@overload
+def to_dict(
+    df: DataFrame,
+    orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
+    *,
+    into: type[MutableMappingT] | MutableMappingT,
+    index: bool = ...,
+) -> MutableMappingT:
+    ...
+
+
+@overload
+def to_dict(
+    df: DataFrame,
+    orient: Literal["records"],
+    *,
+    into: type[MutableMappingT] | MutableMappingT,
+    index: bool = ...,
+) -> list[MutableMappingT]:
+    ...
+
+
+@overload
+def to_dict(
+    df: DataFrame,
+    orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
+    *,
+    into: type[dict] = ...,
+    index: bool = ...,
+) -> dict:
+    ...
+
+
+@overload
+def to_dict(
+    df: DataFrame,
+    orient: Literal["records"],
+    *,
+    into: type[dict] = ...,
+    index: bool = ...,
+) -> list[dict]:
+    ...
+
+
+# error: Incompatible default for argument "into" (default has type "type[dict
+# [Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT")
+def to_dict(
+    df: DataFrame,
+    orient: Literal[
+        "dict", "list", "series", "split", "tight", "records", "index"
+    ] = "dict",
+    *,
+    into: type[MutableMappingT] | MutableMappingT = dict,  # type: ignore[assignment]
+    index: bool = True,
+) -> MutableMappingT | list[MutableMappingT]:
+    """
+    Convert the DataFrame to a dictionary.
+
+    The type of the key-value pairs can be customized with the parameters
+    (see below).
+
+    Parameters
+    ----------
+    orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
+        Determines the type of the values of the dictionary.
+
+        - 'dict' (default) : dict like {column -> {index -> value}}
+        - 'list' : dict like {column -> [values]}
+        - 'series' : dict like {column -> Series(values)}
+        - 'split' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+        - 'tight' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+          'index_names' -> [index.names], 'column_names' -> [column.names]}
+        - 'records' : list like
+          [{column -> value}, ... , {column -> value}]
+        - 'index' : dict like {index -> {column -> value}}
+
+        .. versionadded:: 1.4.0
+            'tight' as an allowed value for the ``orient`` argument
+
+    into : class, default dict
+        The collections.abc.MutableMapping subclass used for all Mappings
+        in the return value.  Can be the actual class or an empty
+        instance of the mapping type you want.  If you want a
+        collections.defaultdict, you must pass it initialized.
+
+    index : bool, default True
+        Whether to include the index item (and index_names item if `orient`
+        is 'tight') in the returned dictionary. Can only be ``False``
+        when `orient` is 'split' or 'tight'.
+
+        .. versionadded:: 2.0.0
+
+    Returns
+    -------
+    dict, list or collections.abc.Mapping
+        Return a collections.abc.MutableMapping object representing the
+        DataFrame. The resulting transformation depends on the `orient` parameter.
+    """
+    if not df.columns.is_unique:
+        warnings.warn(
+            "DataFrame columns are not unique, some columns will be omitted.",
+            UserWarning,
+            stacklevel=find_stack_level(),
+        )
+    # GH16122
+    into_c = com.standardize_mapping(into)
+
+    #  error: Incompatible types in assignment (expression has type "str",
+    # variable has type "Literal['dict', 'list', 'series', 'split', 'tight',
+    # 'records', 'index']")
+    orient = orient.lower()  # type: ignore[assignment]
+
+    if not index and orient not in ["split", "tight"]:
+        raise ValueError(
+            "'index=False' is only valid when 'orient' is 'split' or 'tight'"
+        )
+
+    if orient == "series":
+        # GH46470 Return quickly if orient series to avoid creating dtype objects
+        return into_c((k, v) for k, v in df.items())
+
+    box_native_indices = [
+        i
+        for i, col_dtype in enumerate(df.dtypes.values)
+        if col_dtype == np.dtype(object) or isinstance(col_dtype, ExtensionDtype)
+    ]
+    box_na_values = [
+        lib.no_default if not isinstance(col_dtype, BaseMaskedDtype) else libmissing.NA
+        for i, col_dtype in enumerate(df.dtypes.values)
+    ]
+    are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
+
+    if orient == "dict":
+        return into_c((k, v.to_dict(into=into)) for k, v in df.items())
+
+    elif orient == "list":
+        object_dtype_indices_as_set: set[int] = set(box_native_indices)
+        return into_c(
+            (
+                k,
+                list(map(maybe_box_native, v.to_numpy(na_value=box_na_values[i])))
+                if i in object_dtype_indices_as_set
+                else list(map(maybe_box_native, v.to_numpy())),
+            )
+            for i, (k, v) in enumerate(df.items())
+        )
+
+    elif orient == "split":
+        data = df._create_data_for_split_and_tight_to_dict(
+            are_all_object_dtype_cols, box_native_indices
+        )
+
+        return into_c(
+            ((("index", df.index.tolist()),) if index else ())
+            + (
+                ("columns", df.columns.tolist()),
+                ("data", data),
+            )
+        )
+
+    elif orient == "tight":
+        data = df._create_data_for_split_and_tight_to_dict(
+            are_all_object_dtype_cols, box_native_indices
+        )
+
+        return into_c(
+            ((("index", df.index.tolist()),) if index else ())
+            + (
+                ("columns", df.columns.tolist()),
+                (
+                    "data",
+                    [
+                        list(map(maybe_box_native, t))
+                        for t in df.itertuples(index=False, name=None)
+                    ],
+                ),
+            )
+            + ((("index_names", list(df.index.names)),) if index else ())
+            + (("column_names", list(df.columns.names)),)
+        )
+
+    elif orient == "records":
+        columns = df.columns.tolist()
+        if are_all_object_dtype_cols:
+            rows = (
+                dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
+            )
+            return [
+                into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
+            ]
+        else:
+            data = [
+                into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
+            ]
+            if box_native_indices:
+                object_dtype_indices_as_set = set(box_native_indices)
+                object_dtype_cols = {
+                    col
+                    for i, col in enumerate(df.columns)
+                    if i in object_dtype_indices_as_set
+                }
+                for row in data:
+                    for col in object_dtype_cols:
+                        row[col] = maybe_box_native(row[col])
+            return data
+
+    elif orient == "index":
+        if not df.index.is_unique:
+            raise ValueError("DataFrame index must be unique for orient='index'.")
+        columns = df.columns.tolist()
+        if are_all_object_dtype_cols:
+            return into_c(
+                (t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
+                for t in df.itertuples(name=None)
+            )
+        elif box_native_indices:
+            object_dtype_indices_as_set = set(box_native_indices)
+            is_object_dtype_by_index = [
+                i in object_dtype_indices_as_set for i in range(len(df.columns))
+            ]
+            return into_c(
+                (
+                    t[0],
+                    {
+                        columns[i]: maybe_box_native(v)
+                        if is_object_dtype_by_index[i]
+                        else v
+                        for i, v in enumerate(t[1:])
+                    },
+                )
+                for t in df.itertuples(name=None)
+            )
+        else:
+            return into_c(
+                (t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
+            )
+
+    else:
+        raise ValueError(f"orient '{orient}' not understood")