Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,29 @@
def __getattr__(key: str):
# These imports need to be lazy to avoid circular import errors
if key == "hash_array":
from pandas.core.util.hashing import hash_array
return hash_array
if key == "hash_pandas_object":
from pandas.core.util.hashing import hash_pandas_object
return hash_pandas_object
if key == "Appender":
from pandas.util._decorators import Appender
return Appender
if key == "Substitution":
from pandas.util._decorators import Substitution
return Substitution
if key == "cache_readonly":
from pandas.util._decorators import cache_readonly
return cache_readonly
raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")
def capitalize_first_letter(s):
return s[:1].upper() + s[1:]

View File

@ -0,0 +1,508 @@
from __future__ import annotations
from functools import wraps
import inspect
from textwrap import dedent
from typing import (
TYPE_CHECKING,
Any,
Callable,
cast,
)
import warnings
from pandas._libs.properties import cache_readonly
from pandas._typing import (
F,
T,
)
from pandas.util._exceptions import find_stack_level
if TYPE_CHECKING:
from collections.abc import Mapping
def deprecate(
name: str,
alternative: Callable[..., Any],
version: str,
alt_name: str | None = None,
klass: type[Warning] | None = None,
stacklevel: int = 2,
msg: str | None = None,
) -> Callable[[F], F]:
"""
Return a new function that emits a deprecation warning on use.
To use this method for a deprecated function, another function
`alternative` with the same signature must exist. The deprecated
function will emit a deprecation warning, and in the docstring
it will contain the deprecation directive with the provided version
so it can be detected for future removal.
Parameters
----------
name : str
Name of function to deprecate.
alternative : func
Function to use instead.
version : str
Version of pandas in which the method has been deprecated.
alt_name : str, optional
Name to use in preference of alternative.__name__.
klass : Warning, default FutureWarning
stacklevel : int, default 2
msg : str
The message to display in the warning.
Default is '{name} is deprecated. Use {alt_name} instead.'
"""
alt_name = alt_name or alternative.__name__
klass = klass or FutureWarning
warning_msg = msg or f"{name} is deprecated, use {alt_name} instead."
@wraps(alternative)
def wrapper(*args, **kwargs) -> Callable[..., Any]:
warnings.warn(warning_msg, klass, stacklevel=stacklevel)
return alternative(*args, **kwargs)
# adding deprecated directive to the docstring
msg = msg or f"Use `{alt_name}` instead."
doc_error_msg = (
"deprecate needs a correctly formatted docstring in "
"the target function (should have a one liner short "
"summary, and opening quotes should be in their own "
f"line). Found:\n{alternative.__doc__}"
)
# when python is running in optimized mode (i.e. `-OO`), docstrings are
# removed, so we check that a docstring with correct formatting is used
# but we allow empty docstrings
if alternative.__doc__:
if alternative.__doc__.count("\n") < 3:
raise AssertionError(doc_error_msg)
empty1, summary, empty2, doc_string = alternative.__doc__.split("\n", 3)
if empty1 or empty2 and not summary:
raise AssertionError(doc_error_msg)
wrapper.__doc__ = dedent(
f"""
{summary.strip()}
.. deprecated:: {version}
{msg}
{dedent(doc_string)}"""
)
# error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)],
# Callable[...,Any]]", expected "Callable[[F], F]")
return wrapper # type: ignore[return-value]
def deprecate_kwarg(
old_arg_name: str,
new_arg_name: str | None,
mapping: Mapping[Any, Any] | Callable[[Any], Any] | None = None,
stacklevel: int = 2,
) -> Callable[[F], F]:
"""
Decorator to deprecate a keyword argument of a function.
Parameters
----------
old_arg_name : str
Name of argument in function to deprecate
new_arg_name : str or None
Name of preferred argument in function. Use None to raise warning that
``old_arg_name`` keyword is deprecated.
mapping : dict or callable
If mapping is present, use it to translate old arguments to
new arguments. A callable must do its own value checking;
values not found in a dict will be forwarded unchanged.
Examples
--------
The following deprecates 'cols', using 'columns' instead
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns')
... def f(columns=''):
... print(columns)
...
>>> f(columns='should work ok')
should work ok
>>> f(cols='should raise warning') # doctest: +SKIP
FutureWarning: cols is deprecated, use columns instead
warnings.warn(msg, FutureWarning)
should raise warning
>>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP
TypeError: Can only specify 'cols' or 'columns', not both
>>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False})
... def f(new=False):
... print('yes!' if new else 'no!')
...
>>> f(old='yes') # doctest: +SKIP
FutureWarning: old='yes' is deprecated, use new=True instead
warnings.warn(msg, FutureWarning)
yes!
To raise a warning that a keyword will be removed entirely in the future
>>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None)
... def f(cols='', another_param=''):
... print(cols)
...
>>> f(cols='should raise warning') # doctest: +SKIP
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
future version please takes steps to stop use of 'cols'
should raise warning
>>> f(another_param='should not raise warning') # doctest: +SKIP
should not raise warning
>>> f(cols='should raise warning', another_param='') # doctest: +SKIP
FutureWarning: the 'cols' keyword is deprecated and will be removed in a
future version please takes steps to stop use of 'cols'
should raise warning
"""
if mapping is not None and not hasattr(mapping, "get") and not callable(mapping):
raise TypeError(
"mapping from old to new argument values must be dict or callable!"
)
def _deprecate_kwarg(func: F) -> F:
@wraps(func)
def wrapper(*args, **kwargs) -> Callable[..., Any]:
old_arg_value = kwargs.pop(old_arg_name, None)
if old_arg_value is not None:
if new_arg_name is None:
msg = (
f"the {repr(old_arg_name)} keyword is deprecated and "
"will be removed in a future version. Please take "
f"steps to stop the use of {repr(old_arg_name)}"
)
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
kwargs[old_arg_name] = old_arg_value
return func(*args, **kwargs)
elif mapping is not None:
if callable(mapping):
new_arg_value = mapping(old_arg_value)
else:
new_arg_value = mapping.get(old_arg_value, old_arg_value)
msg = (
f"the {old_arg_name}={repr(old_arg_value)} keyword is "
"deprecated, use "
f"{new_arg_name}={repr(new_arg_value)} instead."
)
else:
new_arg_value = old_arg_value
msg = (
f"the {repr(old_arg_name)} keyword is deprecated, "
f"use {repr(new_arg_name)} instead."
)
warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
if kwargs.get(new_arg_name) is not None:
msg = (
f"Can only specify {repr(old_arg_name)} "
f"or {repr(new_arg_name)}, not both."
)
raise TypeError(msg)
kwargs[new_arg_name] = new_arg_value
return func(*args, **kwargs)
return cast(F, wrapper)
return _deprecate_kwarg
def _format_argument_list(allow_args: list[str]) -> str:
"""
Convert the allow_args argument (either string or integer) of
`deprecate_nonkeyword_arguments` function to a string describing
it to be inserted into warning message.
Parameters
----------
allowed_args : list, tuple or int
The `allowed_args` argument for `deprecate_nonkeyword_arguments`,
but None value is not allowed.
Returns
-------
str
The substring describing the argument list in best way to be
inserted to the warning message.
Examples
--------
`format_argument_list([])` -> ''
`format_argument_list(['a'])` -> "except for the arguments 'a'"
`format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'"
`format_argument_list(['a', 'b', 'c'])` ->
"except for the arguments 'a', 'b' and 'c'"
"""
if "self" in allow_args:
allow_args.remove("self")
if not allow_args:
return ""
elif len(allow_args) == 1:
return f" except for the argument '{allow_args[0]}'"
else:
last = allow_args[-1]
args = ", ".join(["'" + x + "'" for x in allow_args[:-1]])
return f" except for the arguments {args} and '{last}'"
def future_version_msg(version: str | None) -> str:
"""Specify which version of pandas the deprecation will take place in."""
if version is None:
return "In a future version of pandas"
else:
return f"Starting with pandas version {version}"
def deprecate_nonkeyword_arguments(
version: str | None,
allowed_args: list[str] | None = None,
name: str | None = None,
) -> Callable[[F], F]:
"""
Decorator to deprecate a use of non-keyword arguments of a function.
Parameters
----------
version : str, optional
The version in which positional arguments will become
keyword-only. If None, then the warning message won't
specify any particular version.
allowed_args : list, optional
In case of list, it must be the list of names of some
first arguments of the decorated functions that are
OK to be given as positional arguments. In case of None value,
defaults to list of all arguments not having the
default value.
name : str, optional
The specific name of the function to show in the warning
message. If None, then the Qualified name of the function
is used.
"""
def decorate(func):
old_sig = inspect.signature(func)
if allowed_args is not None:
allow_args = allowed_args
else:
allow_args = [
p.name
for p in old_sig.parameters.values()
if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
and p.default is p.empty
]
new_params = [
p.replace(kind=p.KEYWORD_ONLY)
if (
p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
and p.name not in allow_args
)
else p
for p in old_sig.parameters.values()
]
new_params.sort(key=lambda p: p.kind)
new_sig = old_sig.replace(parameters=new_params)
num_allow_args = len(allow_args)
msg = (
f"{future_version_msg(version)} all arguments of "
f"{name or func.__qualname__}{{arguments}} will be keyword-only."
)
@wraps(func)
def wrapper(*args, **kwargs):
if len(args) > num_allow_args:
warnings.warn(
msg.format(arguments=_format_argument_list(allow_args)),
FutureWarning,
stacklevel=find_stack_level(),
)
return func(*args, **kwargs)
# error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
# attribute "__signature__"
wrapper.__signature__ = new_sig # type: ignore[attr-defined]
return wrapper
return decorate
def doc(*docstrings: None | str | Callable, **params) -> Callable[[F], F]:
"""
A decorator to take docstring templates, concatenate them and perform string
substitution on them.
This decorator will add a variable "_docstring_components" to the wrapped
callable to keep track the original docstring template for potential usage.
If it should be consider as a template, it will be saved as a string.
Otherwise, it will be saved as callable, and later user __doc__ and dedent
to get docstring.
Parameters
----------
*docstrings : None, str, or callable
The string / docstring / docstring template to be appended in order
after default docstring under callable.
**params
The string which would be used to format docstring template.
"""
def decorator(decorated: F) -> F:
# collecting docstring and docstring templates
docstring_components: list[str | Callable] = []
if decorated.__doc__:
docstring_components.append(dedent(decorated.__doc__))
for docstring in docstrings:
if docstring is None:
continue
if hasattr(docstring, "_docstring_components"):
docstring_components.extend(
docstring._docstring_components # pyright: ignore[reportGeneralTypeIssues]
)
elif isinstance(docstring, str) or docstring.__doc__:
docstring_components.append(docstring)
params_applied = [
component.format(**params)
if isinstance(component, str) and len(params) > 0
else component
for component in docstring_components
]
decorated.__doc__ = "".join(
[
component
if isinstance(component, str)
else dedent(component.__doc__ or "")
for component in params_applied
]
)
# error: "F" has no attribute "_docstring_components"
decorated._docstring_components = ( # type: ignore[attr-defined]
docstring_components
)
return decorated
return decorator
# Substitution and Appender are derived from matplotlib.docstring (1.1.0)
# module https://matplotlib.org/users/license.html
class Substitution:
"""
A decorator to take a function's docstring and perform string
substitution on it.
This decorator should be robust even if func.__doc__ is None
(for example, if -OO was passed to the interpreter)
Usage: construct a docstring.Substitution with a sequence or
dictionary suitable for performing substitution; then
decorate a suitable function with the constructed object. e.g.
sub_author_name = Substitution(author='Jason')
@sub_author_name
def some_function(x):
"%(author)s wrote this function"
# note that some_function.__doc__ is now "Jason wrote this function"
One can also use positional arguments.
sub_first_last_names = Substitution('Edgar Allen', 'Poe')
@sub_first_last_names
def some_function(x):
"%s %s wrote the Raven"
"""
def __init__(self, *args, **kwargs) -> None:
if args and kwargs:
raise AssertionError("Only positional or keyword args are allowed")
self.params = args or kwargs
def __call__(self, func: F) -> F:
func.__doc__ = func.__doc__ and func.__doc__ % self.params
return func
def update(self, *args, **kwargs) -> None:
"""
Update self.params with supplied args.
"""
if isinstance(self.params, dict):
self.params.update(*args, **kwargs)
class Appender:
"""
A function decorator that will append an addendum to the docstring
of the target function.
This decorator should be robust even if func.__doc__ is None
(for example, if -OO was passed to the interpreter).
Usage: construct a docstring.Appender with a string to be joined to
the original docstring. An optional 'join' parameter may be supplied
which will be used to join the docstring and addendum. e.g.
add_copyright = Appender("Copyright (c) 2009", join='\n')
@add_copyright
def my_dog(has='fleas'):
"This docstring will have a copyright below"
pass
"""
addendum: str | None
def __init__(self, addendum: str | None, join: str = "", indents: int = 0) -> None:
if indents > 0:
self.addendum = indent(addendum, indents=indents)
else:
self.addendum = addendum
self.join = join
def __call__(self, func: T) -> T:
func.__doc__ = func.__doc__ if func.__doc__ else ""
self.addendum = self.addendum if self.addendum else ""
docitems = [func.__doc__, self.addendum]
func.__doc__ = dedent(self.join.join(docitems))
return func
def indent(text: str | None, indents: int = 1) -> str:
if not text or not isinstance(text, str):
return ""
jointext = "".join(["\n"] + [" "] * indents)
return jointext.join(text.split("\n"))
__all__ = [
"Appender",
"cache_readonly",
"deprecate",
"deprecate_kwarg",
"deprecate_nonkeyword_arguments",
"doc",
"future_version_msg",
"Substitution",
]

View File

@ -0,0 +1,202 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
if TYPE_CHECKING:
from collections.abc import Iterable
class TablePlotter:
"""
Layout some DataFrames in vertical/horizontal layout for explanation.
Used in merging.rst
"""
def __init__(
self,
cell_width: float = 0.37,
cell_height: float = 0.25,
font_size: float = 7.5,
) -> None:
self.cell_width = cell_width
self.cell_height = cell_height
self.font_size = font_size
def _shape(self, df: pd.DataFrame) -> tuple[int, int]:
"""
Calculate table shape considering index levels.
"""
row, col = df.shape
return row + df.columns.nlevels, col + df.index.nlevels
def _get_cells(self, left, right, vertical) -> tuple[int, int]:
"""
Calculate appropriate figure size based on left and right data.
"""
if vertical:
# calculate required number of cells
vcells = max(sum(self._shape(df)[0] for df in left), self._shape(right)[0])
hcells = max(self._shape(df)[1] for df in left) + self._shape(right)[1]
else:
vcells = max([self._shape(df)[0] for df in left] + [self._shape(right)[0]])
hcells = sum([self._shape(df)[1] for df in left] + [self._shape(right)[1]])
return hcells, vcells
def plot(self, left, right, labels: Iterable[str] = (), vertical: bool = True):
"""
Plot left / right DataFrames in specified layout.
Parameters
----------
left : list of DataFrames before operation is applied
right : DataFrame of operation result
labels : list of str to be drawn as titles of left DataFrames
vertical : bool, default True
If True, use vertical layout. If False, use horizontal layout.
"""
from matplotlib import gridspec
import matplotlib.pyplot as plt
if not isinstance(left, list):
left = [left]
left = [self._conv(df) for df in left]
right = self._conv(right)
hcells, vcells = self._get_cells(left, right, vertical)
if vertical:
figsize = self.cell_width * hcells, self.cell_height * vcells
else:
# include margin for titles
figsize = self.cell_width * hcells, self.cell_height * vcells
fig = plt.figure(figsize=figsize)
if vertical:
gs = gridspec.GridSpec(len(left), hcells)
# left
max_left_cols = max(self._shape(df)[1] for df in left)
max_left_rows = max(self._shape(df)[0] for df in left)
for i, (_left, _label) in enumerate(zip(left, labels)):
ax = fig.add_subplot(gs[i, 0:max_left_cols])
self._make_table(ax, _left, title=_label, height=1.0 / max_left_rows)
# right
ax = plt.subplot(gs[:, max_left_cols:])
self._make_table(ax, right, title="Result", height=1.05 / vcells)
fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95)
else:
max_rows = max(self._shape(df)[0] for df in left + [right])
height = 1.0 / np.max(max_rows)
gs = gridspec.GridSpec(1, hcells)
# left
i = 0
for df, _label in zip(left, labels):
sp = self._shape(df)
ax = fig.add_subplot(gs[0, i : i + sp[1]])
self._make_table(ax, df, title=_label, height=height)
i += sp[1]
# right
ax = plt.subplot(gs[0, i:])
self._make_table(ax, right, title="Result", height=height)
fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95)
return fig
def _conv(self, data):
"""
Convert each input to appropriate for table outplot.
"""
if isinstance(data, pd.Series):
if data.name is None:
data = data.to_frame(name="")
else:
data = data.to_frame()
data = data.fillna("NaN")
return data
def _insert_index(self, data):
# insert is destructive
data = data.copy()
idx_nlevels = data.index.nlevels
if idx_nlevels == 1:
data.insert(0, "Index", data.index)
else:
for i in range(idx_nlevels):
data.insert(i, f"Index{i}", data.index._get_level_values(i))
col_nlevels = data.columns.nlevels
if col_nlevels > 1:
col = data.columns._get_level_values(0)
values = [
data.columns._get_level_values(i)._values for i in range(1, col_nlevels)
]
col_df = pd.DataFrame(values)
data.columns = col_df.columns
data = pd.concat([col_df, data])
data.columns = col
return data
def _make_table(self, ax, df, title: str, height: float | None = None) -> None:
if df is None:
ax.set_visible(False)
return
from pandas import plotting
idx_nlevels = df.index.nlevels
col_nlevels = df.columns.nlevels
# must be convert here to get index levels for colorization
df = self._insert_index(df)
tb = plotting.table(ax, df, loc=9)
tb.set_fontsize(self.font_size)
if height is None:
height = 1.0 / (len(df) + 1)
props = tb.properties()
for (r, c), cell in props["celld"].items():
if c == -1:
cell.set_visible(False)
elif r < col_nlevels and c < idx_nlevels:
cell.set_visible(False)
elif r < col_nlevels or c < idx_nlevels:
cell.set_facecolor("#AAAAAA")
cell.set_height(height)
ax.set_title(title, size=self.font_size)
ax.axis("off")
def main() -> None:
import matplotlib.pyplot as plt
p = TablePlotter()
df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]})
df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]})
p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True)
plt.show()
df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]})
p.plot(
[df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False
)
plt.show()
idx = pd.MultiIndex.from_tuples(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")]
)
column = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")])
df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx)
df3.columns = column
p.plot(df3, df3, labels=["df3"])
plt.show()
if __name__ == "__main__":
main()

View File

@ -0,0 +1,103 @@
from __future__ import annotations
import contextlib
import inspect
import os
import re
from typing import TYPE_CHECKING
import warnings
if TYPE_CHECKING:
from collections.abc import Generator
from types import FrameType
@contextlib.contextmanager
def rewrite_exception(old_name: str, new_name: str) -> Generator[None, None, None]:
"""
Rewrite the message of an exception.
"""
try:
yield
except Exception as err:
if not err.args:
raise
msg = str(err.args[0])
msg = msg.replace(old_name, new_name)
args: tuple[str, ...] = (msg,)
if len(err.args) > 1:
args = args + err.args[1:]
err.args = args
raise
def find_stack_level() -> int:
"""
Find the first place in the stack that is not inside pandas
(tests notwithstanding).
"""
import pandas as pd
pkg_dir = os.path.dirname(pd.__file__)
test_dir = os.path.join(pkg_dir, "tests")
# https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow
frame: FrameType | None = inspect.currentframe()
try:
n = 0
while frame:
filename = inspect.getfile(frame)
if filename.startswith(pkg_dir) and not filename.startswith(test_dir):
frame = frame.f_back
n += 1
else:
break
finally:
# See note in
# https://docs.python.org/3/library/inspect.html#inspect.Traceback
del frame
return n
@contextlib.contextmanager
def rewrite_warning(
target_message: str,
target_category: type[Warning],
new_message: str,
new_category: type[Warning] | None = None,
) -> Generator[None, None, None]:
"""
Rewrite the message of a warning.
Parameters
----------
target_message : str
Warning message to match.
target_category : Warning
Warning type to match.
new_message : str
New warning message to emit.
new_category : Warning or None, default None
New warning type to emit. When None, will be the same as target_category.
"""
if new_category is None:
new_category = target_category
with warnings.catch_warnings(record=True) as record:
yield
if len(record) > 0:
match = re.compile(target_message)
for warning in record:
if warning.category is target_category and re.search(
match, str(warning.message)
):
category = new_category
message: Warning | str = new_message
else:
category, message = warning.category, warning.message
warnings.warn_explicit(
message=message,
category=category,
filename=warning.filename,
lineno=warning.lineno,
)

View File

@ -0,0 +1,158 @@
from __future__ import annotations
import codecs
import json
import locale
import os
import platform
import struct
import sys
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pandas._typing import JSONSerializable
from pandas.compat._optional import (
VERSIONS,
get_version,
import_optional_dependency,
)
def _get_commit_hash() -> str | None:
"""
Use vendored versioneer code to get git hash, which handles
git worktree correctly.
"""
try:
from pandas._version_meson import ( # pyright: ignore [reportMissingImports]
__git_version__,
)
return __git_version__
except ImportError:
from pandas._version import get_versions
versions = get_versions()
return versions["full-revisionid"]
def _get_sys_info() -> dict[str, JSONSerializable]:
"""
Returns system information as a JSON serializable dictionary.
"""
uname_result = platform.uname()
language_code, encoding = locale.getlocale()
return {
"commit": _get_commit_hash(),
"python": platform.python_version(),
"python-bits": struct.calcsize("P") * 8,
"OS": uname_result.system,
"OS-release": uname_result.release,
"Version": uname_result.version,
"machine": uname_result.machine,
"processor": uname_result.processor,
"byteorder": sys.byteorder,
"LC_ALL": os.environ.get("LC_ALL"),
"LANG": os.environ.get("LANG"),
"LOCALE": {"language-code": language_code, "encoding": encoding},
}
def _get_dependency_info() -> dict[str, JSONSerializable]:
"""
Returns dependency information as a JSON serializable dictionary.
"""
deps = [
"pandas",
# required
"numpy",
"pytz",
"dateutil",
# install / build,
"pip",
"Cython",
# docs
"sphinx",
# Other, not imported.
"IPython",
]
# Optional dependencies
deps.extend(list(VERSIONS))
result: dict[str, JSONSerializable] = {}
for modname in deps:
try:
mod = import_optional_dependency(modname, errors="ignore")
except Exception:
# Dependency conflicts may cause a non ImportError
result[modname] = "N/A"
else:
result[modname] = get_version(mod) if mod else None
return result
def show_versions(as_json: str | bool = False) -> None:
"""
Provide useful information, important for bug reports.
It comprises info about hosting operation system, pandas version,
and versions of other installed relative packages.
Parameters
----------
as_json : str or bool, default False
* If False, outputs info in a human readable form to the console.
* If str, it will be considered as a path to a file.
Info will be written to that file in JSON format.
* If True, outputs info in JSON format to the console.
Examples
--------
>>> pd.show_versions() # doctest: +SKIP
Your output may look something like this:
INSTALLED VERSIONS
------------------
commit : 37ea63d540fd27274cad6585082c91b1283f963d
python : 3.10.6.final.0
python-bits : 64
OS : Linux
OS-release : 5.10.102.1-microsoft-standard-WSL2
Version : #1 SMP Wed Mar 2 00:30:59 UTC 2022
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_GB.UTF-8
LOCALE : en_GB.UTF-8
pandas : 2.0.1
numpy : 1.24.3
...
"""
sys_info = _get_sys_info()
deps = _get_dependency_info()
if as_json:
j = {"system": sys_info, "dependencies": deps}
if as_json is True:
sys.stdout.writelines(json.dumps(j, indent=2))
else:
assert isinstance(as_json, str) # needed for mypy
with codecs.open(as_json, "wb", encoding="utf8") as f:
json.dump(j, f, indent=2)
else:
assert isinstance(sys_info["LOCALE"], dict) # needed for mypy
language_code = sys_info["LOCALE"]["language-code"]
encoding = sys_info["LOCALE"]["encoding"]
sys_info["LOCALE"] = f"{language_code}.{encoding}"
maxlen = max(len(x) for x in deps)
print("\nINSTALLED VERSIONS")
print("------------------")
for k, v in sys_info.items():
print(f"{k:<{maxlen}}: {v}")
print("")
for k, v in deps.items():
print(f"{k:<{maxlen}}: {v}")

View File

@ -0,0 +1,173 @@
"""
This module provides decorator functions which can be applied to test objects
in order to skip those objects when certain conditions occur. A sample use case
is to detect if the platform is missing ``matplotlib``. If so, any test objects
which require ``matplotlib`` and decorated with ``@td.skip_if_no("matplotlib")``
will be skipped by ``pytest`` during the execution of the test suite.
To illustrate, after importing this module:
import pandas.util._test_decorators as td
The decorators can be applied to classes:
@td.skip_if_no("package")
class Foo:
...
Or individual functions:
@td.skip_if_no("package")
def test_foo():
...
For more information, refer to the ``pytest`` documentation on ``skipif``.
"""
from __future__ import annotations
import locale
from typing import (
TYPE_CHECKING,
Callable,
)
import pytest
from pandas._config import get_option
if TYPE_CHECKING:
from pandas._typing import F
from pandas._config.config import _get_option
from pandas.compat import (
IS64,
is_platform_windows,
)
from pandas.compat._optional import import_optional_dependency
def skip_if_installed(package: str) -> pytest.MarkDecorator:
"""
Skip a test if a package is installed.
Parameters
----------
package : str
The name of the package.
Returns
-------
pytest.MarkDecorator
a pytest.mark.skipif to use as either a test decorator or a
parametrization mark.
"""
return pytest.mark.skipif(
bool(import_optional_dependency(package, errors="ignore")),
reason=f"Skipping because {package} is installed.",
)
def skip_if_no(package: str, min_version: str | None = None) -> pytest.MarkDecorator:
"""
Generic function to help skip tests when required packages are not
present on the testing system.
This function returns a pytest mark with a skip condition that will be
evaluated during test collection. An attempt will be made to import the
specified ``package`` and optionally ensure it meets the ``min_version``
The mark can be used as either a decorator for a test class or to be
applied to parameters in pytest.mark.parametrize calls or parametrized
fixtures. Use pytest.importorskip if an imported moduled is later needed
or for test functions.
If the import and version check are unsuccessful, then the test function
(or test case when used in conjunction with parametrization) will be
skipped.
Parameters
----------
package: str
The name of the required package.
min_version: str or None, default None
Optional minimum version of the package.
Returns
-------
pytest.MarkDecorator
a pytest.mark.skipif to use as either a test decorator or a
parametrization mark.
"""
msg = f"Could not import '{package}'"
if min_version:
msg += f" satisfying a min_version of {min_version}"
return pytest.mark.skipif(
not bool(
import_optional_dependency(
package, errors="ignore", min_version=min_version
)
),
reason=msg,
)
skip_if_32bit = pytest.mark.skipif(not IS64, reason="skipping for 32 bit")
skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows")
skip_if_not_us_locale = pytest.mark.skipif(
locale.getlocale()[0] != "en_US",
reason=f"Set local {locale.getlocale()[0]} is not en_US",
)
def parametrize_fixture_doc(*args) -> Callable[[F], F]:
"""
Intended for use as a decorator for parametrized fixture,
this function will wrap the decorated function with a pytest
``parametrize_fixture_doc`` mark. That mark will format
initial fixture docstring by replacing placeholders {0}, {1} etc
with parameters passed as arguments.
Parameters
----------
args: iterable
Positional arguments for docstring.
Returns
-------
function
The decorated function wrapped within a pytest
``parametrize_fixture_doc`` mark
"""
def documented_fixture(fixture):
fixture.__doc__ = fixture.__doc__.format(*args)
return fixture
return documented_fixture
def mark_array_manager_not_yet_implemented(request) -> None:
mark = pytest.mark.xfail(reason="Not yet implemented for ArrayManager")
request.applymarker(mark)
skip_array_manager_not_yet_implemented = pytest.mark.xfail(
_get_option("mode.data_manager", silent=True) == "array",
reason="Not yet implemented for ArrayManager",
)
skip_array_manager_invalid_test = pytest.mark.skipif(
_get_option("mode.data_manager", silent=True) == "array",
reason="Test that relies on BlockManager internals or specific behaviour",
)
skip_copy_on_write_not_yet_implemented = pytest.mark.xfail(
get_option("mode.copy_on_write") is True,
reason="Not yet implemented/adapted for Copy-on-Write mode",
)
skip_copy_on_write_invalid_test = pytest.mark.skipif(
get_option("mode.copy_on_write") is True,
reason="Test not valid for Copy-on-Write mode",
)

View File

@ -0,0 +1,53 @@
"""
Entrypoint for testing from the top-level namespace.
"""
from __future__ import annotations
import os
import sys
from pandas.compat._optional import import_optional_dependency
PKG = os.path.dirname(os.path.dirname(__file__))
def test(extra_args: list[str] | None = None, run_doctests: bool = False) -> None:
"""
Run the pandas test suite using pytest.
By default, runs with the marks -m "not slow and not network and not db"
Parameters
----------
extra_args : list[str], default None
Extra marks to run the tests.
run_doctests : bool, default False
Whether to only run the Python and Cython doctests. If you would like to run
both doctests/regular tests, just append "--doctest-modules"/"--doctest-cython"
to extra_args.
Examples
--------
>>> pd.test() # doctest: +SKIP
running: pytest...
"""
pytest = import_optional_dependency("pytest")
import_optional_dependency("hypothesis")
cmd = ["-m not slow and not network and not db"]
if extra_args:
if not isinstance(extra_args, list):
extra_args = [extra_args]
cmd = extra_args
if run_doctests:
cmd = [
"--doctest-modules",
"--doctest-cython",
f"--ignore={os.path.join(PKG, 'tests')}",
]
cmd += [PKG]
joined = " ".join(cmd)
print(f"running: pytest {joined}")
sys.exit(pytest.main(cmd))
__all__ = ["test"]

View File

@ -0,0 +1,456 @@
"""
Module that contains many useful utilities
for validating data or function arguments
"""
from __future__ import annotations
from collections.abc import (
Iterable,
Sequence,
)
from typing import (
TypeVar,
overload,
)
import numpy as np
from pandas._libs import lib
from pandas.core.dtypes.common import (
is_bool,
is_integer,
)
BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = next(iter(diff))
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args) -> None:
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, compat_args
) -> None:
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(
value: BoolishNoneT,
arg_name: str,
none_allowed: bool = True,
int_allowed: bool = False,
) -> BoolishNoneT:
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or (value is None)
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value # pyright: ignore[reportGeneralTypeIssues]
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
if value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]"
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg)
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg)
return q_arr
@overload
def validate_ascending(ascending: BoolishT) -> BoolishT:
...
@overload
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
...
def validate_ascending(
ascending: bool | int | Sequence[BoolishT],
) -> bool | int | list[BoolishT]:
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, Sequence):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
"""
Check that the `closed` argument is among [None, "left", "right"]
Parameters
----------
closed : {None, "left", "right"}
Returns
-------
left_closed : bool
right_closed : bool
Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
return left_closed, right_closed
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
"""
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
Parameters
----------
inclusive : {"both", "neither", "left", "right"}
Returns
-------
left_right_inclusive : tuple[bool, bool]
Raises
------
ValueError : if argument is not among valid values
"""
left_right_inclusive: tuple[bool, bool] | None = None
if isinstance(inclusive, str):
left_right_inclusive = {
"both": (True, True),
"left": (True, False),
"right": (False, True),
"neither": (False, False),
}.get(inclusive)
if left_right_inclusive is None:
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)
return left_right_inclusive
def validate_insert_loc(loc: int, length: int) -> int:
"""
Check that we have an integer between -length and length, inclusive.
Standardize negative loc to within [0, length].
The exceptions we raise on failure match np.insert.
"""
if not is_integer(loc):
raise TypeError(f"loc must be an integer between -{length} and {length}")
if loc < 0:
loc += length
if not 0 <= loc <= length:
raise IndexError(f"loc must be an integer between -{length} and {length}")
return loc # pyright: ignore[reportGeneralTypeIssues]
def check_dtype_backend(dtype_backend) -> None:
if dtype_backend is not lib.no_default:
if dtype_backend not in ["numpy_nullable", "pyarrow"]:
raise ValueError(
f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
f"'pyarrow' are allowed.",
)

View File

@ -0,0 +1,579 @@
# Vendored from https://github.com/pypa/packaging/blob/main/packaging/_structures.py
# and https://github.com/pypa/packaging/blob/main/packaging/_structures.py
# changeset ae891fd74d6dd4c6063bb04f2faeadaac6fc6313
# 04/30/2021
# This file is dual licensed under the terms of the Apache License, Version
# 2.0, and the BSD License. Licence at LICENSES/PACKAGING_LICENSE
from __future__ import annotations
import collections
from collections.abc import Iterator
import itertools
import re
from typing import (
Callable,
SupportsInt,
Tuple,
Union,
)
import warnings
__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"]
class InfinityType:
def __repr__(self) -> str:
return "Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return False
def __le__(self, other: object) -> bool:
return False
def __eq__(self, other: object) -> bool:
return isinstance(other, type(self))
def __ne__(self, other: object) -> bool:
return not isinstance(other, type(self))
def __gt__(self, other: object) -> bool:
return True
def __ge__(self, other: object) -> bool:
return True
def __neg__(self: object) -> NegativeInfinityType:
return NegativeInfinity
Infinity = InfinityType()
class NegativeInfinityType:
def __repr__(self) -> str:
return "-Infinity"
def __hash__(self) -> int:
return hash(repr(self))
def __lt__(self, other: object) -> bool:
return True
def __le__(self, other: object) -> bool:
return True
def __eq__(self, other: object) -> bool:
return isinstance(other, type(self))
def __ne__(self, other: object) -> bool:
return not isinstance(other, type(self))
def __gt__(self, other: object) -> bool:
return False
def __ge__(self, other: object) -> bool:
return False
def __neg__(self: object) -> InfinityType:
return Infinity
NegativeInfinity = NegativeInfinityType()
InfiniteTypes = Union[InfinityType, NegativeInfinityType]
PrePostDevType = Union[InfiniteTypes, tuple[str, int]]
SubLocalType = Union[InfiniteTypes, int, str]
LocalType = Union[
NegativeInfinityType,
tuple[
Union[
SubLocalType,
tuple[SubLocalType, str],
tuple[NegativeInfinityType, SubLocalType],
],
...,
],
]
CmpKey = tuple[
int, tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType
]
LegacyCmpKey = tuple[int, tuple[str, ...]]
VersionComparisonMethod = Callable[
[Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool
]
_Version = collections.namedtuple(
"_Version", ["epoch", "release", "dev", "pre", "post", "local"]
)
def parse(version: str) -> LegacyVersion | Version:
"""
Parse the given version string and return either a :class:`Version` object
or a :class:`LegacyVersion` object depending on if the given version is
a valid PEP 440 version or a legacy version.
"""
try:
return Version(version)
except InvalidVersion:
return LegacyVersion(version)
class InvalidVersion(ValueError):
"""
An invalid version was found, users should refer to PEP 440.
Examples
--------
>>> pd.util.version.Version('1.')
Traceback (most recent call last):
InvalidVersion: Invalid version: '1.'
"""
class _BaseVersion:
_key: CmpKey | LegacyCmpKey
def __hash__(self) -> int:
return hash(self._key)
# Please keep the duplicated `isinstance` check
# in the six comparisons hereunder
# unless you find a way to avoid adding overhead function calls.
def __lt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key < other._key
def __le__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key <= other._key
def __eq__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key == other._key
def __ge__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key >= other._key
def __gt__(self, other: _BaseVersion) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key > other._key
def __ne__(self, other: object) -> bool:
if not isinstance(other, _BaseVersion):
return NotImplemented
return self._key != other._key
class LegacyVersion(_BaseVersion):
def __init__(self, version: str) -> None:
self._version = str(version)
self._key = _legacy_cmpkey(self._version)
warnings.warn(
"Creating a LegacyVersion has been deprecated and will be "
"removed in the next major release.",
DeprecationWarning,
)
def __str__(self) -> str:
return self._version
def __repr__(self) -> str:
return f"<LegacyVersion('{self}')>"
@property
def public(self) -> str:
return self._version
@property
def base_version(self) -> str:
return self._version
@property
def epoch(self) -> int:
return -1
@property
def release(self) -> None:
return None
@property
def pre(self) -> None:
return None
@property
def post(self) -> None:
return None
@property
def dev(self) -> None:
return None
@property
def local(self) -> None:
return None
@property
def is_prerelease(self) -> bool:
return False
@property
def is_postrelease(self) -> bool:
return False
@property
def is_devrelease(self) -> bool:
return False
_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE)
_legacy_version_replacement_map = {
"pre": "c",
"preview": "c",
"-": "final-",
"rc": "c",
"dev": "@",
}
def _parse_version_parts(s: str) -> Iterator[str]:
for part in _legacy_version_component_re.split(s):
mapped_part = _legacy_version_replacement_map.get(part, part)
if not mapped_part or mapped_part == ".":
continue
if mapped_part[:1] in "0123456789":
# pad for numeric comparison
yield mapped_part.zfill(8)
else:
yield "*" + mapped_part
# ensure that alpha/beta/candidate are before final
yield "*final"
def _legacy_cmpkey(version: str) -> LegacyCmpKey:
# We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
# greater than or equal to 0. This will effectively put the LegacyVersion,
# which uses the defacto standard originally implemented by setuptools,
# as before all PEP 440 versions.
epoch = -1
# This scheme is taken from pkg_resources.parse_version setuptools prior to
# it's adoption of the packaging library.
parts: list[str] = []
for part in _parse_version_parts(version.lower()):
if part.startswith("*"):
# remove "-" before a prerelease tag
if part < "*final":
while parts and parts[-1] == "*final-":
parts.pop()
# remove trailing zeros from each series of numeric parts
while parts and parts[-1] == "00000000":
parts.pop()
parts.append(part)
return epoch, tuple(parts)
# Deliberately not anchored to the start and end of the string, to make it
# easier for 3rd party code to reuse
VERSION_PATTERN = r"""
v?
(?:
(?:(?P<epoch>[0-9]+)!)? # epoch
(?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
(?P<pre> # pre-release
[-_\.]?
(?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
[-_\.]?
(?P<pre_n>[0-9]+)?
)?
(?P<post> # post release
(?:-(?P<post_n1>[0-9]+))
|
(?:
[-_\.]?
(?P<post_l>post|rev|r)
[-_\.]?
(?P<post_n2>[0-9]+)?
)
)?
(?P<dev> # dev release
[-_\.]?
(?P<dev_l>dev)
[-_\.]?
(?P<dev_n>[0-9]+)?
)?
)
(?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
"""
class Version(_BaseVersion):
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
def __init__(self, version: str) -> None:
# Validate the version and parse it into pieces
match = self._regex.search(version)
if not match:
raise InvalidVersion(f"Invalid version: '{version}'")
# Store the parsed out pieces of the version
self._version = _Version(
epoch=int(match.group("epoch")) if match.group("epoch") else 0,
release=tuple(int(i) for i in match.group("release").split(".")),
pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
post=_parse_letter_version(
match.group("post_l"), match.group("post_n1") or match.group("post_n2")
),
dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
local=_parse_local_version(match.group("local")),
)
# Generate a key which will be used for sorting
self._key = _cmpkey(
self._version.epoch,
self._version.release,
self._version.pre,
self._version.post,
self._version.dev,
self._version.local,
)
def __repr__(self) -> str:
return f"<Version('{self}')>"
def __str__(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join([str(x) for x in self.release]))
# Pre-release
if self.pre is not None:
parts.append("".join([str(x) for x in self.pre]))
# Post-release
if self.post is not None:
parts.append(f".post{self.post}")
# Development release
if self.dev is not None:
parts.append(f".dev{self.dev}")
# Local version segment
if self.local is not None:
parts.append(f"+{self.local}")
return "".join(parts)
@property
def epoch(self) -> int:
_epoch: int = self._version.epoch
return _epoch
@property
def release(self) -> tuple[int, ...]:
_release: tuple[int, ...] = self._version.release
return _release
@property
def pre(self) -> tuple[str, int] | None:
_pre: tuple[str, int] | None = self._version.pre
return _pre
@property
def post(self) -> int | None:
return self._version.post[1] if self._version.post else None
@property
def dev(self) -> int | None:
return self._version.dev[1] if self._version.dev else None
@property
def local(self) -> str | None:
if self._version.local:
return ".".join([str(x) for x in self._version.local])
else:
return None
@property
def public(self) -> str:
return str(self).split("+", 1)[0]
@property
def base_version(self) -> str:
parts = []
# Epoch
if self.epoch != 0:
parts.append(f"{self.epoch}!")
# Release segment
parts.append(".".join([str(x) for x in self.release]))
return "".join(parts)
@property
def is_prerelease(self) -> bool:
return self.dev is not None or self.pre is not None
@property
def is_postrelease(self) -> bool:
return self.post is not None
@property
def is_devrelease(self) -> bool:
return self.dev is not None
@property
def major(self) -> int:
return self.release[0] if len(self.release) >= 1 else 0
@property
def minor(self) -> int:
return self.release[1] if len(self.release) >= 2 else 0
@property
def micro(self) -> int:
return self.release[2] if len(self.release) >= 3 else 0
def _parse_letter_version(
letter: str, number: str | bytes | SupportsInt
) -> tuple[str, int] | None:
if letter:
# We consider there to be an implicit 0 in a pre-release if there is
# not a numeral associated with it.
if number is None:
number = 0
# We normalize any letters to their lower case form
letter = letter.lower()
# We consider some words to be alternate spellings of other words and
# in those cases we want to normalize the spellings to our preferred
# spelling.
if letter == "alpha":
letter = "a"
elif letter == "beta":
letter = "b"
elif letter in ["c", "pre", "preview"]:
letter = "rc"
elif letter in ["rev", "r"]:
letter = "post"
return letter, int(number)
if not letter and number:
# We assume if we are given a number, but we are not given a letter
# then this is using the implicit post release syntax (e.g. 1.0-1)
letter = "post"
return letter, int(number)
return None
_local_version_separators = re.compile(r"[\._-]")
def _parse_local_version(local: str) -> LocalType | None:
"""
Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
"""
if local is not None:
return tuple(
part.lower() if not part.isdigit() else int(part)
for part in _local_version_separators.split(local)
)
return None
def _cmpkey(
epoch: int,
release: tuple[int, ...],
pre: tuple[str, int] | None,
post: tuple[str, int] | None,
dev: tuple[str, int] | None,
local: tuple[SubLocalType] | None,
) -> CmpKey:
# When we compare a release version, we want to compare it with all of the
# trailing zeros removed. So we'll use a reverse the list, drop all the now
# leading zeros until we come to something non zero, then take the rest
# re-reverse it back into the correct order and make it a tuple and use
# that for our sorting key.
_release = tuple(
reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
)
# We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
# We'll do this by abusing the pre segment, but we _only_ want to do this
# if there is not a pre or a post segment. If we have one of those then
# the normal sorting rules will handle this case correctly.
if pre is None and post is None and dev is not None:
_pre: PrePostDevType = NegativeInfinity
# Versions without a pre-release (except as noted above) should sort after
# those with one.
elif pre is None:
_pre = Infinity
else:
_pre = pre
# Versions without a post segment should sort before those with one.
if post is None:
_post: PrePostDevType = NegativeInfinity
else:
_post = post
# Versions without a development segment should sort after those with one.
if dev is None:
_dev: PrePostDevType = Infinity
else:
_dev = dev
if local is None:
# Versions without a local segment should sort before those with one.
_local: LocalType = NegativeInfinity
else:
# Versions with a local segment need that segment parsed to implement
# the sorting rules in PEP440.
# - Alpha numeric segments sort before numeric segments
# - Alpha numeric segments sort lexicographically
# - Numeric segments sort numerically
# - Shorter versions sort before longer versions when the prefixes
# match exactly
_local = tuple(
(i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
)
return epoch, _release, _pre, _post, _dev, _local