Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,199 @@
"""
compat
======
Cross-compatible functions for different versions of Python.
Other items:
* platform checker
"""
from __future__ import annotations
import os
import platform
import sys
from typing import TYPE_CHECKING
from pandas.compat._constants import (
IS64,
ISMUSL,
PY310,
PY311,
PY312,
PYPY,
)
import pandas.compat.compressors
from pandas.compat.numpy import is_numpy_dev
from pandas.compat.pyarrow import (
pa_version_under10p1,
pa_version_under11p0,
pa_version_under13p0,
pa_version_under14p0,
pa_version_under14p1,
pa_version_under16p0,
pa_version_under17p0,
)
if TYPE_CHECKING:
from pandas._typing import F
def set_function_name(f: F, name: str, cls: type) -> F:
"""
Bind the name/qualname attributes of the function.
"""
f.__name__ = name
f.__qualname__ = f"{cls.__name__}.{name}"
f.__module__ = cls.__module__
return f
def is_platform_little_endian() -> bool:
"""
Checking if the running platform is little endian.
Returns
-------
bool
True if the running platform is little endian.
"""
return sys.byteorder == "little"
def is_platform_windows() -> bool:
"""
Checking if the running platform is windows.
Returns
-------
bool
True if the running platform is windows.
"""
return sys.platform in ["win32", "cygwin"]
def is_platform_linux() -> bool:
"""
Checking if the running platform is linux.
Returns
-------
bool
True if the running platform is linux.
"""
return sys.platform == "linux"
def is_platform_mac() -> bool:
"""
Checking if the running platform is mac.
Returns
-------
bool
True if the running platform is mac.
"""
return sys.platform == "darwin"
def is_platform_arm() -> bool:
"""
Checking if the running platform use ARM architecture.
Returns
-------
bool
True if the running platform uses ARM architecture.
"""
return platform.machine() in ("arm64", "aarch64") or platform.machine().startswith(
"armv"
)
def is_platform_power() -> bool:
"""
Checking if the running platform use Power architecture.
Returns
-------
bool
True if the running platform uses ARM architecture.
"""
return platform.machine() in ("ppc64", "ppc64le")
def is_ci_environment() -> bool:
"""
Checking if running in a continuous integration environment by checking
the PANDAS_CI environment variable.
Returns
-------
bool
True if the running in a continuous integration environment.
"""
return os.environ.get("PANDAS_CI", "0") == "1"
def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
"""
Importing the `LZMAFile` class from the `lzma` module.
Returns
-------
class
The `LZMAFile` class from the `lzma` module.
Raises
------
RuntimeError
If the `lzma` module was not imported correctly, or didn't exist.
"""
if not pandas.compat.compressors.has_lzma:
raise RuntimeError(
"lzma module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return pandas.compat.compressors.LZMAFile
def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
"""
Importing the `BZ2File` class from the `bz2` module.
Returns
-------
class
The `BZ2File` class from the `bz2` module.
Raises
------
RuntimeError
If the `bz2` module was not imported correctly, or didn't exist.
"""
if not pandas.compat.compressors.has_bz2:
raise RuntimeError(
"bz2 module not available. "
"A Python re-install with the proper dependencies, "
"might be required to solve this issue."
)
return pandas.compat.compressors.BZ2File
__all__ = [
"is_numpy_dev",
"pa_version_under10p1",
"pa_version_under11p0",
"pa_version_under13p0",
"pa_version_under14p0",
"pa_version_under14p1",
"pa_version_under16p0",
"pa_version_under17p0",
"IS64",
"ISMUSL",
"PY310",
"PY311",
"PY312",
"PYPY",
]

View File

@ -0,0 +1,30 @@
"""
_constants
======
Constants relevant for the Python implementation.
"""
from __future__ import annotations
import platform
import sys
import sysconfig
IS64 = sys.maxsize > 2**32
PY310 = sys.version_info >= (3, 10)
PY311 = sys.version_info >= (3, 11)
PY312 = sys.version_info >= (3, 12)
PYPY = platform.python_implementation() == "PyPy"
ISMUSL = "musl" in (sysconfig.get_config_var("HOST_GNU_TYPE") or "")
REF_COUNT = 2 if PY311 else 3
__all__ = [
"IS64",
"ISMUSL",
"PY310",
"PY311",
"PY312",
"PYPY",
]

View File

@ -0,0 +1,168 @@
from __future__ import annotations
import importlib
import sys
from typing import TYPE_CHECKING
import warnings
from pandas.util._exceptions import find_stack_level
from pandas.util.version import Version
if TYPE_CHECKING:
import types
# Update install.rst & setup.cfg when updating versions!
VERSIONS = {
"adbc-driver-postgresql": "0.8.0",
"adbc-driver-sqlite": "0.8.0",
"bs4": "4.11.2",
"blosc": "1.21.3",
"bottleneck": "1.3.6",
"dataframe-api-compat": "0.1.7",
"fastparquet": "2022.12.0",
"fsspec": "2022.11.0",
"html5lib": "1.1",
"hypothesis": "6.46.1",
"gcsfs": "2022.11.0",
"jinja2": "3.1.2",
"lxml.etree": "4.9.2",
"matplotlib": "3.6.3",
"numba": "0.56.4",
"numexpr": "2.8.4",
"odfpy": "1.4.1",
"openpyxl": "3.1.0",
"pandas_gbq": "0.19.0",
"psycopg2": "2.9.6", # (dt dec pq3 ext lo64)
"pymysql": "1.0.2",
"pyarrow": "10.0.1",
"pyreadstat": "1.2.0",
"pytest": "7.3.2",
"python-calamine": "0.1.7",
"pyxlsb": "1.0.10",
"s3fs": "2022.11.0",
"scipy": "1.10.0",
"sqlalchemy": "2.0.0",
"tables": "3.8.0",
"tabulate": "0.9.0",
"xarray": "2022.12.0",
"xlrd": "2.0.1",
"xlsxwriter": "3.0.5",
"zstandard": "0.19.0",
"tzdata": "2022.7",
"qtpy": "2.3.0",
"pyqt5": "5.15.9",
}
# A mapping from import name to package name (on PyPI) for packages where
# these two names are different.
INSTALL_MAPPING = {
"bs4": "beautifulsoup4",
"bottleneck": "Bottleneck",
"jinja2": "Jinja2",
"lxml.etree": "lxml",
"odf": "odfpy",
"pandas_gbq": "pandas-gbq",
"python_calamine": "python-calamine",
"sqlalchemy": "SQLAlchemy",
"tables": "pytables",
}
def get_version(module: types.ModuleType) -> str:
version = getattr(module, "__version__", None)
if version is None:
raise ImportError(f"Can't determine version for {module.__name__}")
if module.__name__ == "psycopg2":
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
version = version.split()[0]
return version
def import_optional_dependency(
name: str,
extra: str = "",
errors: str = "raise",
min_version: str | None = None,
):
"""
Import an optional dependency.
By default, if a dependency is missing an ImportError with a nice
message will be raised. If a dependency is present, but too old,
we raise.
Parameters
----------
name : str
The module name.
extra : str
Additional text to include in the ImportError message.
errors : str {'raise', 'warn', 'ignore'}
What to do when a dependency is not found or its version is too old.
* raise : Raise an ImportError
* warn : Only applicable when a module's version is to old.
Warns that the version is too old and returns None
* ignore: If the module is not installed, return None, otherwise,
return the module, even if the version is too old.
It's expected that users validate the version locally when
using ``errors="ignore"`` (see. ``io/html.py``)
min_version : str, default None
Specify a minimum version that is different from the global pandas
minimum version required.
Returns
-------
maybe_module : Optional[ModuleType]
The imported module, when found and the version is correct.
None is returned when the package is not found and `errors`
is False, or when the package's version is too old and `errors`
is ``'warn'`` or ``'ignore'``.
"""
assert errors in {"warn", "raise", "ignore"}
package_name = INSTALL_MAPPING.get(name)
install_name = package_name if package_name is not None else name
msg = (
f"Missing optional dependency '{install_name}'. {extra} "
f"Use pip or conda to install {install_name}."
)
try:
module = importlib.import_module(name)
except ImportError:
if errors == "raise":
raise ImportError(msg)
return None
# Handle submodules: if we have submodule, grab parent module from sys.modules
parent = name.split(".")[0]
if parent != name:
install_name = parent
module_to_get = sys.modules[install_name]
else:
module_to_get = module
minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
if minimum_version:
version = get_version(module_to_get)
if version and Version(version) < Version(minimum_version):
msg = (
f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
f"(version '{version}' currently installed)."
)
if errors == "warn":
warnings.warn(
msg,
UserWarning,
stacklevel=find_stack_level(),
)
return None
elif errors == "raise":
raise ImportError(msg)
else:
return None
return module

View File

@ -0,0 +1,77 @@
"""
Patched ``BZ2File`` and ``LZMAFile`` to handle pickle protocol 5.
"""
from __future__ import annotations
from pickle import PickleBuffer
from pandas.compat._constants import PY310
try:
import bz2
has_bz2 = True
except ImportError:
has_bz2 = False
try:
import lzma
has_lzma = True
except ImportError:
has_lzma = False
def flatten_buffer(
b: bytes | bytearray | memoryview | PickleBuffer,
) -> bytes | bytearray | memoryview:
"""
Return some 1-D `uint8` typed buffer.
Coerces anything that does not match that description to one that does
without copying if possible (otherwise will copy).
"""
if isinstance(b, (bytes, bytearray)):
return b
if not isinstance(b, PickleBuffer):
b = PickleBuffer(b)
try:
# coerce to 1-D `uint8` C-contiguous `memoryview` zero-copy
return b.raw()
except BufferError:
# perform in-memory copy if buffer is not contiguous
return memoryview(b).tobytes("A")
if has_bz2:
class BZ2File(bz2.BZ2File):
if not PY310:
def write(self, b) -> int:
# Workaround issue where `bz2.BZ2File` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))
if has_lzma:
class LZMAFile(lzma.LZMAFile):
if not PY310:
def write(self, b) -> int:
# Workaround issue where `lzma.LZMAFile` expects `len`
# to return the number of bytes in `b` by converting
# `b` into something that meets that constraint with
# minimal copying.
#
# Note: This is fixed in Python 3.10.
return super().write(flatten_buffer(b))

View File

@ -0,0 +1,53 @@
""" support numpy compatibility across versions """
import warnings
import numpy as np
from pandas.util.version import Version
# numpy versioning
_np_version = np.__version__
_nlv = Version(_np_version)
np_version_lt1p23 = _nlv < Version("1.23")
np_version_gte1p24 = _nlv >= Version("1.24")
np_version_gte1p24p3 = _nlv >= Version("1.24.3")
np_version_gte1p25 = _nlv >= Version("1.25")
np_version_gt2 = _nlv >= Version("2.0.0")
is_numpy_dev = _nlv.dev is not None
_min_numpy_ver = "1.22.4"
if _nlv < Version(_min_numpy_ver):
raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n"
f"your numpy version is {_np_version}.\n"
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version"
)
np_long: type
np_ulong: type
if np_version_gt2:
try:
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
r".*In the future `np\.long` will be defined as.*",
FutureWarning,
)
np_long = np.long # type: ignore[attr-defined]
np_ulong = np.ulong # type: ignore[attr-defined]
except AttributeError:
np_long = np.int_
np_ulong = np.uint
else:
np_long = np.int_
np_ulong = np.uint
__all__ = [
"np",
"_np_version",
"is_numpy_dev",
]

View File

@ -0,0 +1,418 @@
"""
For compatibility with numpy libraries, pandas functions or methods have to
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
are not actually used or respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation is performed in
'validators.py' to make sure that any extra parameters passed correspond ONLY
to those in the numpy signature. Part of that validation includes whether or
not the user attempted to pass in non-default values for these extraneous
parameters. As we want to discourage users from relying on these parameters
when calling the pandas implementation, we want them only to pass in the
default values for these parameters.
This module provides a set of commonly used default arguments for functions and
methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures.
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Any,
TypeVar,
cast,
overload,
)
import numpy as np
from numpy import ndarray
from pandas._libs.lib import (
is_bool,
is_integer,
)
from pandas.errors import UnsupportedFunctionCall
from pandas.util._validators import (
validate_args,
validate_args_and_kwargs,
validate_kwargs,
)
if TYPE_CHECKING:
from pandas._typing import (
Axis,
AxisInt,
)
AxisNoneT = TypeVar("AxisNoneT", Axis, None)
class CompatValidator:
def __init__(
self,
defaults,
fname=None,
method: str | None = None,
max_fname_arg_count=None,
) -> None:
self.fname = fname
self.method = method
self.defaults = defaults
self.max_fname_arg_count = max_fname_arg_count
def __call__(
self,
args,
kwargs,
fname=None,
max_fname_arg_count=None,
method: str | None = None,
) -> None:
if not args and not kwargs:
return None
fname = self.fname if fname is None else fname
max_fname_arg_count = (
self.max_fname_arg_count
if max_fname_arg_count is None
else max_fname_arg_count
)
method = self.method if method is None else method
if method == "args":
validate_args(fname, args, max_fname_arg_count, self.defaults)
elif method == "kwargs":
validate_kwargs(fname, kwargs, self.defaults)
elif method == "both":
validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, self.defaults
)
else:
raise ValueError(f"invalid validation method '{method}'")
ARGMINMAX_DEFAULTS = {"out": None}
validate_argmin = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
)
validate_argmax = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
)
def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]:
if isinstance(skipna, ndarray) or skipna is None:
args = (skipna,) + args
skipna = True
return skipna, args
def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
"""
If 'Series.argmin' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs)
return skipna
def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
"""
If 'Series.argmax' is called via the 'numpy' library, the third parameter
in its signature is 'out', which takes either an ndarray or 'None', so
check if the 'skipna' parameter is either an instance of ndarray or is
None, since 'skipna' itself should be a boolean
"""
skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs)
return skipna
ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
ARGSORT_DEFAULTS["axis"] = -1
ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None
ARGSORT_DEFAULTS["kind"] = None
ARGSORT_DEFAULTS["stable"] = None
validate_argsort = CompatValidator(
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
)
# two different signatures of argsort, this second validation for when the
# `kind` param is supported
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
ARGSORT_DEFAULTS_KIND["axis"] = -1
ARGSORT_DEFAULTS_KIND["order"] = None
ARGSORT_DEFAULTS_KIND["stable"] = None
validate_argsort_kind = CompatValidator(
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
)
def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool:
"""
If 'Categorical.argsort' is called via the 'numpy' library, the first
parameter in its signature is 'axis', which takes either an integer or
'None', so check if the 'ascending' parameter has either integer type or is
None, since 'ascending' itself should be a boolean
"""
if is_integer(ascending) or ascending is None:
args = (ascending,) + args
ascending = True
validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
ascending = cast(bool, ascending)
return ascending
CLIP_DEFAULTS: dict[str, Any] = {"out": None}
validate_clip = CompatValidator(
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
)
@overload
def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None:
...
@overload
def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT:
...
def validate_clip_with_axis(
axis: ndarray | AxisNoneT, args, kwargs
) -> AxisNoneT | None:
"""
If 'NDFrame.clip' is called via the numpy library, the third parameter in
its signature is 'out', which can takes an ndarray, so check if the 'axis'
parameter is an instance of ndarray, since 'axis' itself should either be
an integer or None
"""
if isinstance(axis, ndarray):
args = (axis,) + args
# error: Incompatible types in assignment (expression has type "None",
# variable has type "Union[ndarray[Any, Any], str, int]")
axis = None # type: ignore[assignment]
validate_clip(args, kwargs)
# error: Incompatible return value type (got "Union[ndarray[Any, Any],
# str, int]", expected "Union[str, int, None]")
return axis # type: ignore[return-value]
CUM_FUNC_DEFAULTS: dict[str, Any] = {}
CUM_FUNC_DEFAULTS["dtype"] = None
CUM_FUNC_DEFAULTS["out"] = None
validate_cum_func = CompatValidator(
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
)
validate_cumsum = CompatValidator(
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
)
def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool:
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
check if the 'skipna' parameter is a boolean or not
"""
if not is_bool(skipna):
args = (skipna,) + args
skipna = True
elif isinstance(skipna, np.bool_):
skipna = bool(skipna)
validate_cum_func(args, kwargs, fname=name)
return skipna
ALLANY_DEFAULTS: dict[str, bool | None] = {}
ALLANY_DEFAULTS["dtype"] = None
ALLANY_DEFAULTS["out"] = None
ALLANY_DEFAULTS["keepdims"] = False
ALLANY_DEFAULTS["axis"] = None
validate_all = CompatValidator(
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
)
validate_any = CompatValidator(
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
)
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
MINMAX_DEFAULTS = {"axis": None, "dtype": None, "out": None, "keepdims": False}
validate_min = CompatValidator(
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
)
validate_max = CompatValidator(
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
)
RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"}
validate_reshape = CompatValidator(
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
)
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
validate_repeat = CompatValidator(
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
)
ROUND_DEFAULTS: dict[str, Any] = {"out": None}
validate_round = CompatValidator(
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
)
SORT_DEFAULTS: dict[str, int | str | None] = {}
SORT_DEFAULTS["axis"] = -1
SORT_DEFAULTS["kind"] = "quicksort"
SORT_DEFAULTS["order"] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
STAT_FUNC_DEFAULTS["dtype"] = None
STAT_FUNC_DEFAULTS["out"] = None
SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
SUM_DEFAULTS["axis"] = None
SUM_DEFAULTS["keepdims"] = False
SUM_DEFAULTS["initial"] = None
PROD_DEFAULTS = SUM_DEFAULTS.copy()
MEAN_DEFAULTS = SUM_DEFAULTS.copy()
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
MEDIAN_DEFAULTS["overwrite_input"] = False
MEDIAN_DEFAULTS["keepdims"] = False
STAT_FUNC_DEFAULTS["keepdims"] = False
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
validate_sum = CompatValidator(
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
)
validate_prod = CompatValidator(
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
)
validate_mean = CompatValidator(
MEAN_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
)
validate_median = CompatValidator(
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
)
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
STAT_DDOF_FUNC_DEFAULTS["out"] = None
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
TAKE_DEFAULTS: dict[str, str | None] = {}
TAKE_DEFAULTS["out"] = None
TAKE_DEFAULTS["mode"] = "raise"
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool:
"""
If this function is called via the 'numpy' library, the third parameter in
its signature is 'axis', which takes either an ndarray or 'None', so check
if the 'convert' parameter is either an instance of ndarray or is None
"""
if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args
convert = True
validate_take(args, kwargs, max_fname_arg_count=3, method="both")
return convert
TRANSPOSE_DEFAULTS = {"axes": None}
validate_transpose = CompatValidator(
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
)
def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None:
"""
'args' and 'kwargs' should be empty, except for allowed kwargs because all
of their necessary parameters are explicitly listed in the function
signature
"""
if allowed is None:
allowed = []
kwargs = set(kwargs) - set(allowed)
if len(args) + len(kwargs) > 0:
raise UnsupportedFunctionCall(
"numpy operations are not valid with groupby. "
f"Use .groupby(...).{name}() instead"
)
RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
def validate_resampler_func(method: str, args, kwargs) -> None:
"""
'args' and 'kwargs' should be empty because all of their necessary
parameters are explicitly listed in the function signature
"""
if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS:
raise UnsupportedFunctionCall(
"numpy operations are not valid with resample. "
f"Use .resample(...).{method}() instead"
)
raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None:
"""
Ensure that the axis argument passed to min, max, argmin, or argmax is zero
or None, as otherwise it will be incorrectly ignored.
Parameters
----------
axis : int or None
ndim : int, default 1
Raises
------
ValueError
"""
if axis is None:
return
if axis >= ndim or (axis < 0 and ndim + axis < 0):
raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")
_validation_funcs = {
"median": validate_median,
"mean": validate_mean,
"min": validate_min,
"max": validate_max,
"sum": validate_sum,
"prod": validate_prod,
}
def validate_func(fname, args, kwargs) -> None:
if fname not in _validation_funcs:
return validate_stat_func(args, kwargs, fname=fname)
validation_func = _validation_funcs[fname]
return validation_func(args, kwargs)

View File

@ -0,0 +1,262 @@
"""
Support pre-0.12 series pickle compatibility.
"""
from __future__ import annotations
import contextlib
import copy
import io
import pickle as pkl
from typing import TYPE_CHECKING
import numpy as np
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.tslibs import BaseOffset
from pandas import Index
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.internals import BlockManager
if TYPE_CHECKING:
from collections.abc import Generator
def load_reduce(self) -> None:
stack = self.stack
args = stack.pop()
func = stack[-1]
try:
stack[-1] = func(*args)
return
except TypeError as err:
# If we have a deprecated function,
# try to replace and try again.
msg = "_reconstruct: First argument must be a sub-type of ndarray"
if msg in str(err):
try:
cls = args[0]
stack[-1] = object.__new__(cls)
return
except TypeError:
pass
elif args and isinstance(args[0], type) and issubclass(args[0], BaseOffset):
# TypeError: object.__new__(Day) is not safe, use Day.__new__()
cls = args[0]
stack[-1] = cls.__new__(*args)
return
elif args and issubclass(args[0], PeriodArray):
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
raise
# If classes are moved, provide compat here.
_class_locations_map = {
("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"),
# 15477
("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"),
# Re-routing unpickle block logic to go through _unpickle_block instead
# for pandas <= 1.3.5
("pandas.core.internals.blocks", "new_block"): (
"pandas._libs.internals",
"_unpickle_block",
),
("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"),
("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"),
# 10890
("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"),
("pandas.sparse.series", "SparseTimeSeries"): (
"pandas.core.sparse.series",
"SparseSeries",
),
# 12588, extensions moving
("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"),
("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"),
# 18543 moving period
("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"),
("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"),
# 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype
("pandas.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
("pandas._libs.tslib", "__nat_unpickle"): (
"pandas._libs.tslibs.nattype",
"__nat_unpickle",
),
# 15998 top-level dirs moving
("pandas.sparse.array", "SparseArray"): (
"pandas.core.arrays.sparse",
"SparseArray",
),
("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"),
("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"),
("pandas.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"),
("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"),
("pandas.tseries.index", "_new_DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"_new_DatetimeIndex",
),
("pandas.tseries.index", "DatetimeIndex"): (
"pandas.core.indexes.datetimes",
"DatetimeIndex",
),
("pandas.tseries.period", "PeriodIndex"): (
"pandas.core.indexes.period",
"PeriodIndex",
),
# 19269, arrays moving
("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"),
# 19939, add timedeltaindex, float64index compat from 15998 move
("pandas.tseries.tdi", "TimedeltaIndex"): (
"pandas.core.indexes.timedeltas",
"TimedeltaIndex",
),
("pandas.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index", # updated in 50775
),
# 50775, remove Int64Index, UInt64Index & Float64Index from codabase
("pandas.core.indexes.numeric", "Int64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.indexes.numeric", "UInt64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.indexes.numeric", "Float64Index"): (
"pandas.core.indexes.base",
"Index",
),
("pandas.core.arrays.sparse.dtype", "SparseDtype"): (
"pandas.core.dtypes.dtypes",
"SparseDtype",
),
}
# our Unpickler sub-class to override methods and some dispatcher
# functions for compat and uses a non-public class of the pickle module.
class Unpickler(pkl._Unpickler):
def find_class(self, module, name):
# override superclass
key = (module, name)
module, name = _class_locations_map.get(key, key)
return super().find_class(module, name)
Unpickler.dispatch = copy.copy(Unpickler.dispatch)
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
def load_newobj(self) -> None:
args = self.stack.pop()
cls = self.stack[-1]
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
elif issubclass(cls, DatetimeArray) and not args:
arr = np.array([], dtype="M8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif issubclass(cls, TimedeltaArray) and not args:
arr = np.array([], dtype="m8[ns]")
obj = cls.__new__(cls, arr, arr.dtype)
elif cls is BlockManager and not args:
obj = cls.__new__(cls, (), [], False)
else:
obj = cls.__new__(cls, *args)
self.stack[-1] = obj
Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
def load_newobj_ex(self) -> None:
kwargs = self.stack.pop()
args = self.stack.pop()
cls = self.stack.pop()
# compat
if issubclass(cls, Index):
obj = object.__new__(cls)
else:
obj = cls.__new__(cls, *args, **kwargs)
self.append(obj)
try:
Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
except (AttributeError, KeyError):
pass
def load(fh, encoding: str | None = None, is_verbose: bool = False):
"""
Load a pickle, with a provided encoding,
Parameters
----------
fh : a filelike object
encoding : an optional encoding
is_verbose : show exception output
"""
try:
fh.seek(0)
if encoding is not None:
up = Unpickler(fh, encoding=encoding)
else:
up = Unpickler(fh)
# "Unpickler" has no attribute "is_verbose" [attr-defined]
up.is_verbose = is_verbose # type: ignore[attr-defined]
return up.load()
except (ValueError, TypeError):
raise
def loads(
bytes_object: bytes,
*,
fix_imports: bool = True,
encoding: str = "ASCII",
errors: str = "strict",
):
"""
Analogous to pickle._loads.
"""
fd = io.BytesIO(bytes_object)
return Unpickler(
fd, fix_imports=fix_imports, encoding=encoding, errors=errors
).load()
@contextlib.contextmanager
def patch_pickle() -> Generator[None, None, None]:
"""
Temporarily patch pickle to use our unpickler.
"""
orig_loads = pkl.loads
try:
setattr(pkl, "loads", loads)
yield
finally:
setattr(pkl, "loads", orig_loads)

View File

@ -0,0 +1,29 @@
""" support pyarrow compatibility across versions """
from __future__ import annotations
from pandas.util.version import Version
try:
import pyarrow as pa
_palv = Version(Version(pa.__version__).base_version)
pa_version_under10p1 = _palv < Version("10.0.1")
pa_version_under11p0 = _palv < Version("11.0.0")
pa_version_under12p0 = _palv < Version("12.0.0")
pa_version_under13p0 = _palv < Version("13.0.0")
pa_version_under14p0 = _palv < Version("14.0.0")
pa_version_under14p1 = _palv < Version("14.0.1")
pa_version_under15p0 = _palv < Version("15.0.0")
pa_version_under16p0 = _palv < Version("16.0.0")
pa_version_under17p0 = _palv < Version("17.0.0")
except ImportError:
pa_version_under10p1 = True
pa_version_under11p0 = True
pa_version_under12p0 = True
pa_version_under13p0 = True
pa_version_under14p0 = True
pa_version_under14p1 = True
pa_version_under15p0 = True
pa_version_under16p0 = True
pa_version_under17p0 = True