Updated script that can be controled by Nodejs web app
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
213
lib/python3.13/site-packages/pandas/core/computation/align.py
Normal file
213
lib/python3.13/site-packages/pandas/core/computation/align.py
Normal file
@ -0,0 +1,213 @@
|
||||
"""
|
||||
Core eval alignment algorithms.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import (
|
||||
partial,
|
||||
wraps,
|
||||
)
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDataFrame,
|
||||
ABCSeries,
|
||||
)
|
||||
|
||||
from pandas.core.base import PandasObject
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import result_type_many
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
from pandas._typing import F
|
||||
|
||||
from pandas.core.generic import NDFrame
|
||||
from pandas.core.indexes.api import Index
|
||||
|
||||
|
||||
def _align_core_single_unary_op(
|
||||
term,
|
||||
) -> tuple[partial | type[NDFrame], dict[str, Index] | None]:
|
||||
typ: partial | type[NDFrame]
|
||||
axes: dict[str, Index] | None = None
|
||||
|
||||
if isinstance(term.value, np.ndarray):
|
||||
typ = partial(np.asanyarray, dtype=term.value.dtype)
|
||||
else:
|
||||
typ = type(term.value)
|
||||
if hasattr(term.value, "axes"):
|
||||
axes = _zip_axes_from_type(typ, term.value.axes)
|
||||
|
||||
return typ, axes
|
||||
|
||||
|
||||
def _zip_axes_from_type(
|
||||
typ: type[NDFrame], new_axes: Sequence[Index]
|
||||
) -> dict[str, Index]:
|
||||
return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)}
|
||||
|
||||
|
||||
def _any_pandas_objects(terms) -> bool:
|
||||
"""
|
||||
Check a sequence of terms for instances of PandasObject.
|
||||
"""
|
||||
return any(isinstance(term.value, PandasObject) for term in terms)
|
||||
|
||||
|
||||
def _filter_special_cases(f) -> Callable[[F], F]:
|
||||
@wraps(f)
|
||||
def wrapper(terms):
|
||||
# single unary operand
|
||||
if len(terms) == 1:
|
||||
return _align_core_single_unary_op(terms[0])
|
||||
|
||||
term_values = (term.value for term in terms)
|
||||
|
||||
# we don't have any pandas objects
|
||||
if not _any_pandas_objects(terms):
|
||||
return result_type_many(*term_values), None
|
||||
|
||||
return f(terms)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
@_filter_special_cases
|
||||
def _align_core(terms):
|
||||
term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")]
|
||||
term_dims = [terms[i].value.ndim for i in term_index]
|
||||
|
||||
from pandas import Series
|
||||
|
||||
ndims = Series(dict(zip(term_index, term_dims)))
|
||||
|
||||
# initial axes are the axes of the largest-axis'd term
|
||||
biggest = terms[ndims.idxmax()].value
|
||||
typ = biggest._constructor
|
||||
axes = biggest.axes
|
||||
naxes = len(axes)
|
||||
gt_than_one_axis = naxes > 1
|
||||
|
||||
for value in (terms[i].value for i in term_index):
|
||||
is_series = isinstance(value, ABCSeries)
|
||||
is_series_and_gt_one_axis = is_series and gt_than_one_axis
|
||||
|
||||
for axis, items in enumerate(value.axes):
|
||||
if is_series_and_gt_one_axis:
|
||||
ax, itm = naxes - 1, value.index
|
||||
else:
|
||||
ax, itm = axis, items
|
||||
|
||||
if not axes[ax].is_(itm):
|
||||
axes[ax] = axes[ax].union(itm)
|
||||
|
||||
for i, ndim in ndims.items():
|
||||
for axis, items in zip(range(ndim), axes):
|
||||
ti = terms[i].value
|
||||
|
||||
if hasattr(ti, "reindex"):
|
||||
transpose = isinstance(ti, ABCSeries) and naxes > 1
|
||||
reindexer = axes[naxes - 1] if transpose else items
|
||||
|
||||
term_axis_size = len(ti.axes[axis])
|
||||
reindexer_size = len(reindexer)
|
||||
|
||||
ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
|
||||
if ordm >= 1 and reindexer_size >= 10000:
|
||||
w = (
|
||||
f"Alignment difference on axis {axis} is larger "
|
||||
f"than an order of magnitude on term {repr(terms[i].name)}, "
|
||||
f"by more than {ordm:.4g}; performance may suffer."
|
||||
)
|
||||
warnings.warn(
|
||||
w, category=PerformanceWarning, stacklevel=find_stack_level()
|
||||
)
|
||||
|
||||
obj = ti.reindex(reindexer, axis=axis, copy=False)
|
||||
terms[i].update(obj)
|
||||
|
||||
terms[i].update(terms[i].value.values)
|
||||
|
||||
return typ, _zip_axes_from_type(typ, axes)
|
||||
|
||||
|
||||
def align_terms(terms):
|
||||
"""
|
||||
Align a set of terms.
|
||||
"""
|
||||
try:
|
||||
# flatten the parse tree (a nested list, really)
|
||||
terms = list(com.flatten(terms))
|
||||
except TypeError:
|
||||
# can't iterate so it must just be a constant or single variable
|
||||
if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
|
||||
typ = type(terms.value)
|
||||
return typ, _zip_axes_from_type(typ, terms.value.axes)
|
||||
return np.result_type(terms.type), None
|
||||
|
||||
# if all resolved variables are numeric scalars
|
||||
if all(term.is_scalar for term in terms):
|
||||
return result_type_many(*(term.value for term in terms)).type, None
|
||||
|
||||
# perform the main alignment
|
||||
typ, axes = _align_core(terms)
|
||||
return typ, axes
|
||||
|
||||
|
||||
def reconstruct_object(typ, obj, axes, dtype):
|
||||
"""
|
||||
Reconstruct an object given its type, raw value, and possibly empty
|
||||
(None) axes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
typ : object
|
||||
A type
|
||||
obj : object
|
||||
The value to use in the type constructor
|
||||
axes : dict
|
||||
The axes to use to construct the resulting pandas object
|
||||
|
||||
Returns
|
||||
-------
|
||||
ret : typ
|
||||
An object of type ``typ`` with the value `obj` and possible axes
|
||||
`axes`.
|
||||
"""
|
||||
try:
|
||||
typ = typ.type
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
res_t = np.result_type(obj.dtype, dtype)
|
||||
|
||||
if not isinstance(typ, partial) and issubclass(typ, PandasObject):
|
||||
return typ(obj, dtype=res_t, **axes)
|
||||
|
||||
# special case for pathological things like ~True/~False
|
||||
if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
|
||||
ret_value = res_t.type(obj)
|
||||
else:
|
||||
ret_value = typ(obj).astype(res_t)
|
||||
# The condition is to distinguish 0-dim array (returned in case of
|
||||
# scalar) and 1 element array
|
||||
# e.g. np.array(0) and np.array([0])
|
||||
if (
|
||||
len(obj.shape) == 1
|
||||
and len(obj) == 1
|
||||
and not isinstance(ret_value, np.ndarray)
|
||||
):
|
||||
ret_value = np.array([ret_value]).astype(res_t)
|
||||
|
||||
return ret_value
|
@ -0,0 +1,2 @@
|
||||
__all__ = ["eval"]
|
||||
from pandas.core.computation.eval import eval
|
@ -0,0 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
|
||||
ne = import_optional_dependency("numexpr", errors="warn")
|
||||
NUMEXPR_INSTALLED = ne is not None
|
||||
|
||||
__all__ = ["NUMEXPR_INSTALLED"]
|
@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
|
||||
def ensure_decoded(s) -> str:
|
||||
"""
|
||||
If we have bytes, decode them to unicode.
|
||||
"""
|
||||
if isinstance(s, (np.bytes_, bytes)):
|
||||
s = s.decode(get_option("display.encoding"))
|
||||
return s
|
||||
|
||||
|
||||
def result_type_many(*arrays_and_dtypes):
|
||||
"""
|
||||
Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
|
||||
argument limit.
|
||||
"""
|
||||
try:
|
||||
return np.result_type(*arrays_and_dtypes)
|
||||
except ValueError:
|
||||
# we have > NPY_MAXARGS terms in our expression
|
||||
return reduce(np.result_type, arrays_and_dtypes)
|
||||
except TypeError:
|
||||
from pandas.core.dtypes.cast import find_common_type
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
arr_and_dtypes = list(arrays_and_dtypes)
|
||||
ea_dtypes, non_ea_dtypes = [], []
|
||||
for arr_or_dtype in arr_and_dtypes:
|
||||
if is_extension_array_dtype(arr_or_dtype):
|
||||
ea_dtypes.append(arr_or_dtype)
|
||||
else:
|
||||
non_ea_dtypes.append(arr_or_dtype)
|
||||
|
||||
if non_ea_dtypes:
|
||||
try:
|
||||
np_dtype = np.result_type(*non_ea_dtypes)
|
||||
except ValueError:
|
||||
np_dtype = reduce(np.result_type, arrays_and_dtypes)
|
||||
return find_common_type(ea_dtypes + [np_dtype])
|
||||
|
||||
return find_common_type(ea_dtypes)
|
143
lib/python3.13/site-packages/pandas/core/computation/engines.py
Normal file
143
lib/python3.13/site-packages/pandas/core/computation/engines.py
Normal file
@ -0,0 +1,143 @@
|
||||
"""
|
||||
Engine classes for :func:`~pandas.eval`
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from pandas.errors import NumExprClobberingError
|
||||
|
||||
from pandas.core.computation.align import (
|
||||
align_terms,
|
||||
reconstruct_object,
|
||||
)
|
||||
from pandas.core.computation.ops import (
|
||||
MATHOPS,
|
||||
REDUCTIONS,
|
||||
)
|
||||
|
||||
from pandas.io.formats import printing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas.core.computation.expr import Expr
|
||||
|
||||
_ne_builtins = frozenset(MATHOPS + REDUCTIONS)
|
||||
|
||||
|
||||
def _check_ne_builtin_clash(expr: Expr) -> None:
|
||||
"""
|
||||
Attempt to prevent foot-shooting in a helpful way.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : Expr
|
||||
Terms can contain
|
||||
"""
|
||||
names = expr.names
|
||||
overlap = names & _ne_builtins
|
||||
|
||||
if overlap:
|
||||
s = ", ".join([repr(x) for x in overlap])
|
||||
raise NumExprClobberingError(
|
||||
f'Variables in expression "{expr}" overlap with builtins: ({s})'
|
||||
)
|
||||
|
||||
|
||||
class AbstractEngine(metaclass=abc.ABCMeta):
|
||||
"""Object serving as a base class for all engines."""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def __init__(self, expr) -> None:
|
||||
self.expr = expr
|
||||
self.aligned_axes = None
|
||||
self.result_type = None
|
||||
|
||||
def convert(self) -> str:
|
||||
"""
|
||||
Convert an expression for evaluation.
|
||||
|
||||
Defaults to return the expression as a string.
|
||||
"""
|
||||
return printing.pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self) -> object:
|
||||
"""
|
||||
Run the engine on the expression.
|
||||
|
||||
This method performs alignment which is necessary no matter what engine
|
||||
is being used, thus its implementation is in the base class.
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
The result of the passed expression.
|
||||
"""
|
||||
if not self._is_aligned:
|
||||
self.result_type, self.aligned_axes = align_terms(self.expr.terms)
|
||||
|
||||
# make sure no names in resolvers and locals/globals clash
|
||||
res = self._evaluate()
|
||||
return reconstruct_object(
|
||||
self.result_type, res, self.aligned_axes, self.expr.terms.return_type
|
||||
)
|
||||
|
||||
@property
|
||||
def _is_aligned(self) -> bool:
|
||||
return self.aligned_axes is not None and self.result_type is not None
|
||||
|
||||
@abc.abstractmethod
|
||||
def _evaluate(self):
|
||||
"""
|
||||
Return an evaluated expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
The local and global environment in which to evaluate an
|
||||
expression.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Must be implemented by subclasses.
|
||||
"""
|
||||
|
||||
|
||||
class NumExprEngine(AbstractEngine):
|
||||
"""NumExpr engine class"""
|
||||
|
||||
has_neg_frac = True
|
||||
|
||||
def _evaluate(self):
|
||||
import numexpr as ne
|
||||
|
||||
# convert the expression to a valid numexpr expression
|
||||
s = self.convert()
|
||||
|
||||
env = self.expr.env
|
||||
scope = env.full_scope
|
||||
_check_ne_builtin_clash(self.expr)
|
||||
return ne.evaluate(s, local_dict=scope)
|
||||
|
||||
|
||||
class PythonEngine(AbstractEngine):
|
||||
"""
|
||||
Evaluate an expression in Python space.
|
||||
|
||||
Mostly for testing purposes.
|
||||
"""
|
||||
|
||||
has_neg_frac = False
|
||||
|
||||
def evaluate(self):
|
||||
return self.expr()
|
||||
|
||||
def _evaluate(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
ENGINES: dict[str, type[AbstractEngine]] = {
|
||||
"numexpr": NumExprEngine,
|
||||
"python": PythonEngine,
|
||||
}
|
415
lib/python3.13/site-packages/pandas/core/computation/eval.py
Normal file
415
lib/python3.13/site-packages/pandas/core/computation/eval.py
Normal file
@ -0,0 +1,415 @@
|
||||
"""
|
||||
Top level ``eval`` module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import tokenize
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from pandas.core.dtypes.common import is_extension_array_dtype
|
||||
|
||||
from pandas.core.computation.engines import ENGINES
|
||||
from pandas.core.computation.expr import (
|
||||
PARSERS,
|
||||
Expr,
|
||||
)
|
||||
from pandas.core.computation.parsing import tokenize_string
|
||||
from pandas.core.computation.scope import ensure_scope
|
||||
from pandas.core.generic import NDFrame
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas.core.computation.ops import BinOp
|
||||
|
||||
|
||||
def _check_engine(engine: str | None) -> str:
|
||||
"""
|
||||
Make sure a valid engine is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
engine : str
|
||||
String to validate.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid engine is passed.
|
||||
ImportError
|
||||
* If numexpr was requested but doesn't exist.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Engine name.
|
||||
"""
|
||||
from pandas.core.computation.check import NUMEXPR_INSTALLED
|
||||
from pandas.core.computation.expressions import USE_NUMEXPR
|
||||
|
||||
if engine is None:
|
||||
engine = "numexpr" if USE_NUMEXPR else "python"
|
||||
|
||||
if engine not in ENGINES:
|
||||
valid_engines = list(ENGINES.keys())
|
||||
raise KeyError(
|
||||
f"Invalid engine '{engine}' passed, valid engines are {valid_engines}"
|
||||
)
|
||||
|
||||
# TODO: validate this in a more general way (thinking of future engines
|
||||
# that won't necessarily be import-able)
|
||||
# Could potentially be done on engine instantiation
|
||||
if engine == "numexpr" and not NUMEXPR_INSTALLED:
|
||||
raise ImportError(
|
||||
"'numexpr' is not installed or an unsupported version. Cannot use "
|
||||
"engine='numexpr' for query/eval if 'numexpr' is not installed"
|
||||
)
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
def _check_parser(parser: str):
|
||||
"""
|
||||
Make sure a valid parser is passed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
parser : str
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
* If an invalid parser is passed
|
||||
"""
|
||||
if parser not in PARSERS:
|
||||
raise KeyError(
|
||||
f"Invalid parser '{parser}' passed, valid parsers are {PARSERS.keys()}"
|
||||
)
|
||||
|
||||
|
||||
def _check_resolvers(resolvers):
|
||||
if resolvers is not None:
|
||||
for resolver in resolvers:
|
||||
if not hasattr(resolver, "__getitem__"):
|
||||
name = type(resolver).__name__
|
||||
raise TypeError(
|
||||
f"Resolver of type '{name}' does not "
|
||||
"implement the __getitem__ method"
|
||||
)
|
||||
|
||||
|
||||
def _check_expression(expr):
|
||||
"""
|
||||
Make sure an expression is not an empty string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
An object that can be converted to a string
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If expr is an empty string
|
||||
"""
|
||||
if not expr:
|
||||
raise ValueError("expr cannot be an empty string")
|
||||
|
||||
|
||||
def _convert_expression(expr) -> str:
|
||||
"""
|
||||
Convert an object to an expression.
|
||||
|
||||
This function converts an object to an expression (a unicode string) and
|
||||
checks to make sure it isn't empty after conversion. This is used to
|
||||
convert operators to their string representation for recursive calls to
|
||||
:func:`~pandas.eval`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : object
|
||||
The object to be converted to a string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The string representation of an object.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If the expression is empty.
|
||||
"""
|
||||
s = pprint_thing(expr)
|
||||
_check_expression(s)
|
||||
return s
|
||||
|
||||
|
||||
def _check_for_locals(expr: str, stack_level: int, parser: str):
|
||||
at_top_of_stack = stack_level == 0
|
||||
not_pandas_parser = parser != "pandas"
|
||||
|
||||
if not_pandas_parser:
|
||||
msg = "The '@' prefix is only supported by the pandas parser"
|
||||
elif at_top_of_stack:
|
||||
msg = (
|
||||
"The '@' prefix is not allowed in top-level eval calls.\n"
|
||||
"please refer to your variables by name without the '@' prefix."
|
||||
)
|
||||
|
||||
if at_top_of_stack or not_pandas_parser:
|
||||
for toknum, tokval in tokenize_string(expr):
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
raise SyntaxError(msg)
|
||||
|
||||
|
||||
def eval(
|
||||
expr: str | BinOp, # we leave BinOp out of the docstr bc it isn't for users
|
||||
parser: str = "pandas",
|
||||
engine: str | None = None,
|
||||
local_dict=None,
|
||||
global_dict=None,
|
||||
resolvers=(),
|
||||
level: int = 0,
|
||||
target=None,
|
||||
inplace: bool = False,
|
||||
):
|
||||
"""
|
||||
Evaluate a Python expression as a string using various backends.
|
||||
|
||||
The following arithmetic operations are supported: ``+``, ``-``, ``*``,
|
||||
``/``, ``**``, ``%``, ``//`` (python engine only) along with the following
|
||||
boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not).
|
||||
Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`,
|
||||
:keyword:`or`, and :keyword:`not` with the same semantics as the
|
||||
corresponding bitwise operators. :class:`~pandas.Series` and
|
||||
:class:`~pandas.DataFrame` objects are supported and behave as they would
|
||||
with plain ol' Python evaluation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str
|
||||
The expression to evaluate. This string cannot contain any Python
|
||||
`statements
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__,
|
||||
only Python `expressions
|
||||
<https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__.
|
||||
parser : {'pandas', 'python'}, default 'pandas'
|
||||
The parser to use to construct the syntax tree from the expression. The
|
||||
default of ``'pandas'`` parses code slightly different than standard
|
||||
Python. Alternatively, you can parse an expression using the
|
||||
``'python'`` parser to retain strict Python semantics. See the
|
||||
:ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
engine : {'python', 'numexpr'}, default 'numexpr'
|
||||
|
||||
The engine used to evaluate the expression. Supported engines are
|
||||
|
||||
- None : tries to use ``numexpr``, falls back to ``python``
|
||||
- ``'numexpr'`` : This default engine evaluates pandas objects using
|
||||
numexpr for large speed ups in complex expressions with large frames.
|
||||
- ``'python'`` : Performs operations as if you had ``eval``'d in top
|
||||
level python. This engine is generally not that useful.
|
||||
|
||||
More backends may be available in the future.
|
||||
local_dict : dict or None, optional
|
||||
A dictionary of local variables, taken from locals() by default.
|
||||
global_dict : dict or None, optional
|
||||
A dictionary of global variables, taken from globals() by default.
|
||||
resolvers : list of dict-like or None, optional
|
||||
A list of objects implementing the ``__getitem__`` special method that
|
||||
you can use to inject an additional collection of namespaces to use for
|
||||
variable lookup. For example, this is used in the
|
||||
:meth:`~DataFrame.query` method to inject the
|
||||
``DataFrame.index`` and ``DataFrame.columns``
|
||||
variables that refer to their respective :class:`~pandas.DataFrame`
|
||||
instance attributes.
|
||||
level : int, optional
|
||||
The number of prior stack frames to traverse and add to the current
|
||||
scope. Most users will **not** need to change this parameter.
|
||||
target : object, optional, default None
|
||||
This is the target object for assignment. It is used when there is
|
||||
variable assignment in the expression. If so, then `target` must
|
||||
support item assignment with string keys, and if a copy is being
|
||||
returned, it must also support `.copy()`.
|
||||
inplace : bool, default False
|
||||
If `target` is provided, and the expression mutates `target`, whether
|
||||
to modify `target` inplace. Otherwise, return a copy of `target` with
|
||||
the mutation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray, numeric scalar, DataFrame, Series, or None
|
||||
The completion value of evaluating the given code or None if ``inplace=True``.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
There are many instances where such an error can be raised:
|
||||
|
||||
- `target=None`, but the expression is multiline.
|
||||
- The expression is multiline, but not all them have item assignment.
|
||||
An example of such an arrangement is this:
|
||||
|
||||
a = b + 1
|
||||
a + 2
|
||||
|
||||
Here, there are expressions on different lines, making it multiline,
|
||||
but the last line has no variable assigned to the output of `a + 2`.
|
||||
- `inplace=True`, but the expression is missing item assignment.
|
||||
- Item assignment is provided, but the `target` does not support
|
||||
string item assignment.
|
||||
- Item assignment is provided and `inplace=False`, but the `target`
|
||||
does not support the `.copy()` method
|
||||
|
||||
See Also
|
||||
--------
|
||||
DataFrame.query : Evaluates a boolean expression to query the columns
|
||||
of a frame.
|
||||
DataFrame.eval : Evaluate a string describing operations on
|
||||
DataFrame columns.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The ``dtype`` of any objects involved in an arithmetic ``%`` operation are
|
||||
recursively cast to ``float64``.
|
||||
|
||||
See the :ref:`enhancing performance <enhancingperf.eval>` documentation for
|
||||
more details.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = pd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]})
|
||||
>>> df
|
||||
animal age
|
||||
0 dog 10
|
||||
1 pig 20
|
||||
|
||||
We can add a new column using ``pd.eval``:
|
||||
|
||||
>>> pd.eval("double_age = df.age * 2", target=df)
|
||||
animal age double_age
|
||||
0 dog 10 20
|
||||
1 pig 20 40
|
||||
"""
|
||||
inplace = validate_bool_kwarg(inplace, "inplace")
|
||||
|
||||
exprs: list[str | BinOp]
|
||||
if isinstance(expr, str):
|
||||
_check_expression(expr)
|
||||
exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""]
|
||||
else:
|
||||
# ops.BinOp; for internal compat, not intended to be passed by users
|
||||
exprs = [expr]
|
||||
multi_line = len(exprs) > 1
|
||||
|
||||
if multi_line and target is None:
|
||||
raise ValueError(
|
||||
"multi-line expressions are only valid in the "
|
||||
"context of data, use DataFrame.eval"
|
||||
)
|
||||
engine = _check_engine(engine)
|
||||
_check_parser(parser)
|
||||
_check_resolvers(resolvers)
|
||||
|
||||
ret = None
|
||||
first_expr = True
|
||||
target_modified = False
|
||||
|
||||
for expr in exprs:
|
||||
expr = _convert_expression(expr)
|
||||
_check_for_locals(expr, level, parser)
|
||||
|
||||
# get our (possibly passed-in) scope
|
||||
env = ensure_scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
parsed_expr = Expr(expr, engine=engine, parser=parser, env=env)
|
||||
|
||||
if engine == "numexpr" and (
|
||||
is_extension_array_dtype(parsed_expr.terms.return_type)
|
||||
or getattr(parsed_expr.terms, "operand_types", None) is not None
|
||||
and any(
|
||||
is_extension_array_dtype(elem)
|
||||
for elem in parsed_expr.terms.operand_types
|
||||
)
|
||||
):
|
||||
warnings.warn(
|
||||
"Engine has switched to 'python' because numexpr does not support "
|
||||
"extension array dtypes. Please set your engine to python manually.",
|
||||
RuntimeWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
engine = "python"
|
||||
|
||||
# construct the engine and evaluate the parsed expression
|
||||
eng = ENGINES[engine]
|
||||
eng_inst = eng(parsed_expr)
|
||||
ret = eng_inst.evaluate()
|
||||
|
||||
if parsed_expr.assigner is None:
|
||||
if multi_line:
|
||||
raise ValueError(
|
||||
"Multi-line expressions are only valid "
|
||||
"if all expressions contain an assignment"
|
||||
)
|
||||
if inplace:
|
||||
raise ValueError("Cannot operate inplace if there is no assignment")
|
||||
|
||||
# assign if needed
|
||||
assigner = parsed_expr.assigner
|
||||
if env.target is not None and assigner is not None:
|
||||
target_modified = True
|
||||
|
||||
# if returning a copy, copy only on the first assignment
|
||||
if not inplace and first_expr:
|
||||
try:
|
||||
target = env.target
|
||||
if isinstance(target, NDFrame):
|
||||
target = target.copy(deep=None)
|
||||
else:
|
||||
target = target.copy()
|
||||
except AttributeError as err:
|
||||
raise ValueError("Cannot return a copy of the target") from err
|
||||
else:
|
||||
target = env.target
|
||||
|
||||
# TypeError is most commonly raised (e.g. int, list), but you
|
||||
# get IndexError if you try to do this assignment on np.ndarray.
|
||||
# we will ignore numpy warnings here; e.g. if trying
|
||||
# to use a non-numeric indexer
|
||||
try:
|
||||
if inplace and isinstance(target, NDFrame):
|
||||
target.loc[:, assigner] = ret
|
||||
else:
|
||||
target[assigner] = ret # pyright: ignore[reportGeneralTypeIssues]
|
||||
except (TypeError, IndexError) as err:
|
||||
raise ValueError("Cannot assign expression output to target") from err
|
||||
|
||||
if not resolvers:
|
||||
resolvers = ({assigner: ret},)
|
||||
else:
|
||||
# existing resolver needs updated to handle
|
||||
# case of mutating existing column in copy
|
||||
for resolver in resolvers:
|
||||
if assigner in resolver:
|
||||
resolver[assigner] = ret
|
||||
break
|
||||
else:
|
||||
resolvers += ({assigner: ret},)
|
||||
|
||||
ret = None
|
||||
first_expr = False
|
||||
|
||||
# We want to exclude `inplace=None` as being False.
|
||||
if inplace is False:
|
||||
return target if target_modified else ret
|
836
lib/python3.13/site-packages/pandas/core/computation/expr.py
Normal file
836
lib/python3.13/site-packages/pandas/core/computation/expr.py
Normal file
@ -0,0 +1,836 @@
|
||||
"""
|
||||
:func:`~pandas.eval` parsers.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from functools import (
|
||||
partial,
|
||||
reduce,
|
||||
)
|
||||
from keyword import iskeyword
|
||||
import tokenize
|
||||
from typing import (
|
||||
Callable,
|
||||
ClassVar,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.errors import UndefinedVariableError
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.ops import (
|
||||
ARITH_OPS_SYMS,
|
||||
BOOL_OPS_SYMS,
|
||||
CMP_OPS_SYMS,
|
||||
LOCAL_TAG,
|
||||
MATHOPS,
|
||||
REDUCTIONS,
|
||||
UNARY_OPS_SYMS,
|
||||
BinOp,
|
||||
Constant,
|
||||
FuncNode,
|
||||
Op,
|
||||
Term,
|
||||
UnaryOp,
|
||||
is_term,
|
||||
)
|
||||
from pandas.core.computation.parsing import (
|
||||
clean_backtick_quoted_toks,
|
||||
tokenize_string,
|
||||
)
|
||||
from pandas.core.computation.scope import Scope
|
||||
|
||||
from pandas.io.formats import printing
|
||||
|
||||
|
||||
def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Rewrite the assignment operator for PyTables expressions that use ``=``
|
||||
as a substitute for ``==``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
return toknum, "==" if tokval == "=" else tokval
|
||||
|
||||
|
||||
def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
|
||||
precedence is changed to boolean precedence.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP:
|
||||
if tokval == "&":
|
||||
return tokenize.NAME, "and"
|
||||
elif tokval == "|":
|
||||
return tokenize.NAME, "or"
|
||||
return toknum, tokval
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Replace local variables with a syntactically valid name.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of int, str
|
||||
Either the input or token or the replacement values
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
|
||||
``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
|
||||
is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == tokenize.OP and tokval == "@":
|
||||
return tokenize.OP, LOCAL_TAG
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def _compose2(f, g):
|
||||
"""
|
||||
Compose 2 callables.
|
||||
"""
|
||||
return lambda *args, **kwargs: f(g(*args, **kwargs))
|
||||
|
||||
|
||||
def _compose(*funcs):
|
||||
"""
|
||||
Compose 2 or more callables.
|
||||
"""
|
||||
assert len(funcs) > 1, "At least 2 callables must be passed to compose"
|
||||
return reduce(_compose2, funcs)
|
||||
|
||||
|
||||
def _preparse(
|
||||
source: str,
|
||||
f=_compose(
|
||||
_replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks
|
||||
),
|
||||
) -> str:
|
||||
"""
|
||||
Compose a collection of tokenization functions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
A Python source code string
|
||||
f : callable
|
||||
This takes a tuple of (toknum, tokval) as its argument and returns a
|
||||
tuple with the same structure but possibly different elements. Defaults
|
||||
to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
|
||||
``_replace_locals``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Valid Python source code
|
||||
|
||||
Notes
|
||||
-----
|
||||
The `f` parameter can be any callable that takes *and* returns input of the
|
||||
form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
|
||||
the ``tokenize`` module and ``tokval`` is a string.
|
||||
"""
|
||||
assert callable(f), "f must be callable"
|
||||
return tokenize.untokenize(f(x) for x in tokenize_string(source))
|
||||
|
||||
|
||||
def _is_type(t):
|
||||
"""
|
||||
Factory for a type checking function of type ``t`` or tuple of types.
|
||||
"""
|
||||
return lambda x: isinstance(x.value, t)
|
||||
|
||||
|
||||
_is_list = _is_type(list)
|
||||
_is_str = _is_type(str)
|
||||
|
||||
|
||||
# partition all AST nodes
|
||||
_all_nodes = frozenset(
|
||||
node
|
||||
for node in (getattr(ast, name) for name in dir(ast))
|
||||
if isinstance(node, type) and issubclass(node, ast.AST)
|
||||
)
|
||||
|
||||
|
||||
def _filter_nodes(superclass, all_nodes=_all_nodes):
|
||||
"""
|
||||
Filter out AST nodes that are subclasses of ``superclass``.
|
||||
"""
|
||||
node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))
|
||||
return frozenset(node_names)
|
||||
|
||||
|
||||
_all_node_names = frozenset(x.__name__ for x in _all_nodes)
|
||||
_mod_nodes = _filter_nodes(ast.mod)
|
||||
_stmt_nodes = _filter_nodes(ast.stmt)
|
||||
_expr_nodes = _filter_nodes(ast.expr)
|
||||
_expr_context_nodes = _filter_nodes(ast.expr_context)
|
||||
_boolop_nodes = _filter_nodes(ast.boolop)
|
||||
_operator_nodes = _filter_nodes(ast.operator)
|
||||
_unary_op_nodes = _filter_nodes(ast.unaryop)
|
||||
_cmp_op_nodes = _filter_nodes(ast.cmpop)
|
||||
_comprehension_nodes = _filter_nodes(ast.comprehension)
|
||||
_handler_nodes = _filter_nodes(ast.excepthandler)
|
||||
_arguments_nodes = _filter_nodes(ast.arguments)
|
||||
_keyword_nodes = _filter_nodes(ast.keyword)
|
||||
_alias_nodes = _filter_nodes(ast.alias)
|
||||
|
||||
|
||||
# nodes that we don't support directly but are needed for parsing
|
||||
_hacked_nodes = frozenset(["Assign", "Module", "Expr"])
|
||||
|
||||
|
||||
_unsupported_expr_nodes = frozenset(
|
||||
[
|
||||
"Yield",
|
||||
"GeneratorExp",
|
||||
"IfExp",
|
||||
"DictComp",
|
||||
"SetComp",
|
||||
"Repr",
|
||||
"Lambda",
|
||||
"Set",
|
||||
"AST",
|
||||
"Is",
|
||||
"IsNot",
|
||||
]
|
||||
)
|
||||
|
||||
# these nodes are low priority or won't ever be supported (e.g., AST)
|
||||
_unsupported_nodes = (
|
||||
_stmt_nodes
|
||||
| _mod_nodes
|
||||
| _handler_nodes
|
||||
| _arguments_nodes
|
||||
| _keyword_nodes
|
||||
| _alias_nodes
|
||||
| _expr_context_nodes
|
||||
| _unsupported_expr_nodes
|
||||
) - _hacked_nodes
|
||||
|
||||
# we're adding a different assignment in some cases to be equality comparison
|
||||
# and we don't want `stmt` and friends in their so get only the class whose
|
||||
# names are capitalized
|
||||
_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
|
||||
intersection = _unsupported_nodes & _base_supported_nodes
|
||||
_msg = f"cannot both support and not support {intersection}"
|
||||
assert not intersection, _msg
|
||||
|
||||
|
||||
def _node_not_implemented(node_name: str) -> Callable[..., None]:
|
||||
"""
|
||||
Return a function that raises a NotImplementedError with a passed node name.
|
||||
"""
|
||||
|
||||
def f(self, *args, **kwargs):
|
||||
raise NotImplementedError(f"'{node_name}' nodes are not implemented")
|
||||
|
||||
return f
|
||||
|
||||
|
||||
# should be bound by BaseExprVisitor but that creates a circular dependency:
|
||||
# _T is used in disallow, but disallow is used to define BaseExprVisitor
|
||||
# https://github.com/microsoft/pyright/issues/2315
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
|
||||
"""
|
||||
Decorator to disallow certain nodes from parsing. Raises a
|
||||
NotImplementedError instead.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
|
||||
def disallowed(cls: type[_T]) -> type[_T]:
|
||||
# error: "Type[_T]" has no attribute "unsupported_nodes"
|
||||
cls.unsupported_nodes = () # type: ignore[attr-defined]
|
||||
for node in nodes:
|
||||
new_method = _node_not_implemented(node)
|
||||
name = f"visit_{node}"
|
||||
# error: "Type[_T]" has no attribute "unsupported_nodes"
|
||||
cls.unsupported_nodes += (name,) # type: ignore[attr-defined]
|
||||
setattr(cls, name, new_method)
|
||||
return cls
|
||||
|
||||
return disallowed
|
||||
|
||||
|
||||
def _op_maker(op_class, op_symbol):
|
||||
"""
|
||||
Return a function to create an op class with its symbol already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
|
||||
def f(self, node, *args, **kwargs):
|
||||
"""
|
||||
Return a partial function with an Op subclass with an operator already passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
callable
|
||||
"""
|
||||
return partial(op_class, op_symbol, *args, **kwargs)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
_op_classes = {"binary": BinOp, "unary": UnaryOp}
|
||||
|
||||
|
||||
def add_ops(op_classes):
|
||||
"""
|
||||
Decorator to add default implementation of ops.
|
||||
"""
|
||||
|
||||
def f(cls):
|
||||
for op_attr_name, op_class in op_classes.items():
|
||||
ops = getattr(cls, f"{op_attr_name}_ops")
|
||||
ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map")
|
||||
for op in ops:
|
||||
op_node = ops_map[op]
|
||||
if op_node is not None:
|
||||
made_op = _op_maker(op_class, op)
|
||||
setattr(cls, f"visit_{op_node}", made_op)
|
||||
return cls
|
||||
|
||||
return f
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes)
|
||||
@add_ops(_op_classes)
|
||||
class BaseExprVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
Custom ast walker. Parsers of other engines should subclass this class
|
||||
if necessary.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
preparser : callable
|
||||
"""
|
||||
|
||||
const_type: ClassVar[type[Term]] = Constant
|
||||
term_type: ClassVar[type[Term]] = Term
|
||||
|
||||
binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS
|
||||
binary_op_nodes = (
|
||||
"Gt",
|
||||
"Lt",
|
||||
"GtE",
|
||||
"LtE",
|
||||
"Eq",
|
||||
"NotEq",
|
||||
"In",
|
||||
"NotIn",
|
||||
"BitAnd",
|
||||
"BitOr",
|
||||
"And",
|
||||
"Or",
|
||||
"Add",
|
||||
"Sub",
|
||||
"Mult",
|
||||
"Div",
|
||||
"Pow",
|
||||
"FloorDiv",
|
||||
"Mod",
|
||||
)
|
||||
binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
|
||||
|
||||
unary_ops = UNARY_OPS_SYMS
|
||||
unary_op_nodes = "UAdd", "USub", "Invert", "Not"
|
||||
unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
|
||||
|
||||
rewrite_map = {
|
||||
ast.Eq: ast.In,
|
||||
ast.NotEq: ast.NotIn,
|
||||
ast.In: ast.In,
|
||||
ast.NotIn: ast.NotIn,
|
||||
}
|
||||
|
||||
unsupported_nodes: tuple[str, ...]
|
||||
|
||||
def __init__(self, env, engine, parser, preparser=_preparse) -> None:
|
||||
self.env = env
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self.preparser = preparser
|
||||
self.assigner = None
|
||||
|
||||
def visit(self, node, **kwargs):
|
||||
if isinstance(node, str):
|
||||
clean = self.preparser(node)
|
||||
try:
|
||||
node = ast.fix_missing_locations(ast.parse(clean))
|
||||
except SyntaxError as e:
|
||||
if any(iskeyword(x) for x in clean.split()):
|
||||
e.msg = "Python keyword not valid identifier in numexpr query"
|
||||
raise e
|
||||
|
||||
method = f"visit_{type(node).__name__}"
|
||||
visitor = getattr(self, method)
|
||||
return visitor(node, **kwargs)
|
||||
|
||||
def visit_Module(self, node, **kwargs):
|
||||
if len(node.body) != 1:
|
||||
raise SyntaxError("only a single expression is allowed")
|
||||
expr = node.body[0]
|
||||
return self.visit(expr, **kwargs)
|
||||
|
||||
def visit_Expr(self, node, **kwargs):
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
# the kind of the operator (is actually an instance)
|
||||
op_instance = node.op
|
||||
op_type = type(op_instance)
|
||||
|
||||
# must be two terms and the comparison operator must be ==/!=/in/not in
|
||||
if is_term(left) and is_term(right) and op_type in self.rewrite_map:
|
||||
left_list, right_list = map(_is_list, (left, right))
|
||||
left_str, right_str = map(_is_str, (left, right))
|
||||
|
||||
# if there are any strings or lists in the expression
|
||||
if left_list or right_list or left_str or right_str:
|
||||
op_instance = self.rewrite_map[op_type]()
|
||||
|
||||
# pop the string variable out of locals and replace it with a list
|
||||
# of one string, kind of a hack
|
||||
if right_str:
|
||||
name = self.env.add_tmp([right.value])
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
if left_str:
|
||||
name = self.env.add_tmp([left.value])
|
||||
left = self.term_type(name, self.env)
|
||||
|
||||
op = self.visit(op_instance)
|
||||
return op, op_instance, left, right
|
||||
|
||||
def _maybe_transform_eq_ne(self, node, left=None, right=None):
|
||||
if left is None:
|
||||
left = self.visit(node.left, side="left")
|
||||
if right is None:
|
||||
right = self.visit(node.right, side="right")
|
||||
op, op_class, left, right = self._rewrite_membership_op(node, left, right)
|
||||
return op, op_class, left, right
|
||||
|
||||
def _maybe_downcast_constants(self, left, right):
|
||||
f32 = np.dtype(np.float32)
|
||||
if (
|
||||
left.is_scalar
|
||||
and hasattr(left, "value")
|
||||
and not right.is_scalar
|
||||
and right.return_type == f32
|
||||
):
|
||||
# right is a float32 array, left is a scalar
|
||||
name = self.env.add_tmp(np.float32(left.value))
|
||||
left = self.term_type(name, self.env)
|
||||
if (
|
||||
right.is_scalar
|
||||
and hasattr(right, "value")
|
||||
and not left.is_scalar
|
||||
and left.return_type == f32
|
||||
):
|
||||
# left is a float32 array, right is a scalar
|
||||
name = self.env.add_tmp(np.float32(right.value))
|
||||
right = self.term_type(name, self.env)
|
||||
|
||||
return left, right
|
||||
|
||||
def _maybe_eval(self, binop, eval_in_python):
|
||||
# eval `in` and `not in` (for now) in "partial" python space
|
||||
# things that can be evaluated in "eval" space will be turned into
|
||||
# temporary variables. for example,
|
||||
# [1,2] in a + 2 * b
|
||||
# in that case a + 2 * b will be evaluated using numexpr, and the "in"
|
||||
# call will be evaluated using isin (in python space)
|
||||
return binop.evaluate(
|
||||
self.env, self.engine, self.parser, self.term_type, eval_in_python
|
||||
)
|
||||
|
||||
def _maybe_evaluate_binop(
|
||||
self,
|
||||
op,
|
||||
op_class,
|
||||
lhs,
|
||||
rhs,
|
||||
eval_in_python=("in", "not in"),
|
||||
maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="),
|
||||
):
|
||||
res = op(lhs, rhs)
|
||||
|
||||
if res.has_invalid_return_type:
|
||||
raise TypeError(
|
||||
f"unsupported operand type(s) for {res.op}: "
|
||||
f"'{lhs.type}' and '{rhs.type}'"
|
||||
)
|
||||
|
||||
if self.engine != "pytables" and (
|
||||
res.op in CMP_OPS_SYMS
|
||||
and getattr(lhs, "is_datetime", False)
|
||||
or getattr(rhs, "is_datetime", False)
|
||||
):
|
||||
# all date ops must be done in python bc numexpr doesn't work
|
||||
# well with NaT
|
||||
return self._maybe_eval(res, self.binary_ops)
|
||||
|
||||
if res.op in eval_in_python:
|
||||
# "in"/"not in" ops are always evaluated in python
|
||||
return self._maybe_eval(res, eval_in_python)
|
||||
elif self.engine != "pytables":
|
||||
if (
|
||||
getattr(lhs, "return_type", None) == object
|
||||
or getattr(rhs, "return_type", None) == object
|
||||
):
|
||||
# evaluate "==" and "!=" in python if either of our operands
|
||||
# has an object return type
|
||||
return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
|
||||
return res
|
||||
|
||||
def visit_BinOp(self, node, **kwargs):
|
||||
op, op_class, left, right = self._maybe_transform_eq_ne(node)
|
||||
left, right = self._maybe_downcast_constants(left, right)
|
||||
return self._maybe_evaluate_binop(op, op_class, left, right)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs):
|
||||
op = self.visit(node.op)
|
||||
operand = self.visit(node.operand)
|
||||
return op(operand)
|
||||
|
||||
def visit_Name(self, node, **kwargs) -> Term:
|
||||
return self.term_type(node.id, self.env, **kwargs)
|
||||
|
||||
# TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
|
||||
def visit_NameConstant(self, node, **kwargs) -> Term:
|
||||
return self.const_type(node.value, self.env)
|
||||
|
||||
# TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
|
||||
def visit_Num(self, node, **kwargs) -> Term:
|
||||
return self.const_type(node.value, self.env)
|
||||
|
||||
def visit_Constant(self, node, **kwargs) -> Term:
|
||||
return self.const_type(node.value, self.env)
|
||||
|
||||
# TODO(py314): deprecated since Python 3.8. Remove after Python 3.14 is min
|
||||
def visit_Str(self, node, **kwargs) -> Term:
|
||||
name = self.env.add_tmp(node.s)
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
def visit_List(self, node, **kwargs) -> Term:
|
||||
name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
|
||||
return self.term_type(name, self.env)
|
||||
|
||||
visit_Tuple = visit_List
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
"""df.index[4]"""
|
||||
return self.visit(node.value)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs) -> Term:
|
||||
from pandas import eval as pd_eval
|
||||
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
result = pd_eval(
|
||||
slobj, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
try:
|
||||
# a Term instance
|
||||
v = value.value[result]
|
||||
except AttributeError:
|
||||
# an Op instance
|
||||
lhs = pd_eval(
|
||||
value, local_dict=self.env, engine=self.engine, parser=self.parser
|
||||
)
|
||||
v = lhs[result]
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, env=self.env)
|
||||
|
||||
def visit_Slice(self, node, **kwargs) -> slice:
|
||||
"""df.index[slice(4,6)]"""
|
||||
lower = node.lower
|
||||
if lower is not None:
|
||||
lower = self.visit(lower).value
|
||||
upper = node.upper
|
||||
if upper is not None:
|
||||
upper = self.visit(upper).value
|
||||
step = node.step
|
||||
if step is not None:
|
||||
step = self.visit(step).value
|
||||
|
||||
return slice(lower, upper, step)
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
"""
|
||||
support a single assignment node, like
|
||||
|
||||
c = a + b
|
||||
|
||||
set the assigner at the top level, must be a Name node which
|
||||
might or might not exist in the resolvers
|
||||
|
||||
"""
|
||||
if len(node.targets) != 1:
|
||||
raise SyntaxError("can only assign a single expression")
|
||||
if not isinstance(node.targets[0], ast.Name):
|
||||
raise SyntaxError("left hand side of an assignment must be a single name")
|
||||
if self.env.target is None:
|
||||
raise ValueError("cannot assign without a target object")
|
||||
|
||||
try:
|
||||
assigner = self.visit(node.targets[0], **kwargs)
|
||||
except UndefinedVariableError:
|
||||
assigner = node.targets[0].id
|
||||
|
||||
self.assigner = getattr(assigner, "name", assigner)
|
||||
if self.assigner is None:
|
||||
raise SyntaxError(
|
||||
"left hand side of an assignment must be a single resolvable name"
|
||||
)
|
||||
|
||||
return self.visit(node.value, **kwargs)
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = node.ctx
|
||||
if isinstance(ctx, ast.Load):
|
||||
# resolve the value
|
||||
resolved = self.visit(value).value
|
||||
try:
|
||||
v = getattr(resolved, attr)
|
||||
name = self.env.add_tmp(v)
|
||||
return self.term_type(name, self.env)
|
||||
except AttributeError:
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
raise
|
||||
|
||||
raise ValueError(f"Invalid Attribute context {type(ctx).__name__}")
|
||||
|
||||
def visit_Call(self, node, side=None, **kwargs):
|
||||
if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__":
|
||||
res = self.visit_Attribute(node.func)
|
||||
elif not isinstance(node.func, ast.Name):
|
||||
raise TypeError("Only named functions are supported")
|
||||
else:
|
||||
try:
|
||||
res = self.visit(node.func)
|
||||
except UndefinedVariableError:
|
||||
# Check if this is a supported function name
|
||||
try:
|
||||
res = FuncNode(node.func.id)
|
||||
except ValueError:
|
||||
# Raise original error
|
||||
raise
|
||||
|
||||
if res is None:
|
||||
# error: "expr" has no attribute "id"
|
||||
raise ValueError(
|
||||
f"Invalid function call {node.func.id}" # type: ignore[attr-defined]
|
||||
)
|
||||
if hasattr(res, "value"):
|
||||
res = res.value
|
||||
|
||||
if isinstance(res, FuncNode):
|
||||
new_args = [self.visit(arg) for arg in node.args]
|
||||
|
||||
if node.keywords:
|
||||
raise TypeError(
|
||||
f'Function "{res.name}" does not support keyword arguments'
|
||||
)
|
||||
|
||||
return res(*new_args)
|
||||
|
||||
else:
|
||||
new_args = [self.visit(arg)(self.env) for arg in node.args]
|
||||
|
||||
for key in node.keywords:
|
||||
if not isinstance(key, ast.keyword):
|
||||
# error: "expr" has no attribute "id"
|
||||
raise ValueError(
|
||||
"keyword error in function call "
|
||||
f"'{node.func.id}'" # type: ignore[attr-defined]
|
||||
)
|
||||
|
||||
if key.arg:
|
||||
kwargs[key.arg] = self.visit(key.value)(self.env)
|
||||
|
||||
name = self.env.add_tmp(res(*new_args, **kwargs))
|
||||
return self.term_type(name=name, env=self.env)
|
||||
|
||||
def translate_In(self, op):
|
||||
return op
|
||||
|
||||
def visit_Compare(self, node, **kwargs):
|
||||
ops = node.ops
|
||||
comps = node.comparators
|
||||
|
||||
# base case: we have something like a CMP b
|
||||
if len(comps) == 1:
|
||||
op = self.translate_In(ops[0])
|
||||
binop = ast.BinOp(op=op, left=node.left, right=comps[0])
|
||||
return self.visit(binop)
|
||||
|
||||
# recursive case: we have a chained comparison, a CMP b CMP c, etc.
|
||||
left = node.left
|
||||
values = []
|
||||
for op, comp in zip(ops, comps):
|
||||
new_node = self.visit(
|
||||
ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
|
||||
)
|
||||
left = comp
|
||||
values.append(new_node)
|
||||
return self.visit(ast.BoolOp(op=ast.And(), values=values))
|
||||
|
||||
def _try_visit_binop(self, bop):
|
||||
if isinstance(bop, (Op, Term)):
|
||||
return bop
|
||||
return self.visit(bop)
|
||||
|
||||
def visit_BoolOp(self, node, **kwargs):
|
||||
def visitor(x, y):
|
||||
lhs = self._try_visit_binop(x)
|
||||
rhs = self._try_visit_binop(y)
|
||||
|
||||
op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)
|
||||
return self._maybe_evaluate_binop(op, node.op, lhs, rhs)
|
||||
|
||||
operands = node.values
|
||||
return reduce(visitor, operands)
|
||||
|
||||
|
||||
_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"])
|
||||
_numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS)
|
||||
|
||||
|
||||
@disallow(
|
||||
(_unsupported_nodes | _python_not_supported)
|
||||
- (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"]))
|
||||
)
|
||||
class PandasExprVisitor(BaseExprVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
env,
|
||||
engine,
|
||||
parser,
|
||||
preparser=partial(
|
||||
_preparse,
|
||||
f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),
|
||||
),
|
||||
) -> None:
|
||||
super().__init__(env, engine, parser, preparser)
|
||||
|
||||
|
||||
@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"]))
|
||||
class PythonExprVisitor(BaseExprVisitor):
|
||||
def __init__(
|
||||
self, env, engine, parser, preparser=lambda source, f=None: source
|
||||
) -> None:
|
||||
super().__init__(env, engine, parser, preparser=preparser)
|
||||
|
||||
|
||||
class Expr:
|
||||
"""
|
||||
Object encapsulating an expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
expr : str
|
||||
engine : str, optional, default 'numexpr'
|
||||
parser : str, optional, default 'pandas'
|
||||
env : Scope, optional, default None
|
||||
level : int, optional, default 2
|
||||
"""
|
||||
|
||||
env: Scope
|
||||
engine: str
|
||||
parser: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
expr,
|
||||
engine: str = "numexpr",
|
||||
parser: str = "pandas",
|
||||
env: Scope | None = None,
|
||||
level: int = 0,
|
||||
) -> None:
|
||||
self.expr = expr
|
||||
self.env = env or Scope(level=level + 1)
|
||||
self.engine = engine
|
||||
self.parser = parser
|
||||
self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
|
||||
self.terms = self.parse()
|
||||
|
||||
@property
|
||||
def assigner(self):
|
||||
return getattr(self._visitor, "assigner", None)
|
||||
|
||||
def __call__(self):
|
||||
return self.terms(self.env)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return printing.pprint_thing(self.terms)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.expr)
|
||||
|
||||
def parse(self):
|
||||
"""
|
||||
Parse an expression.
|
||||
"""
|
||||
return self._visitor.visit(self.expr)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""
|
||||
Get the names in an expression.
|
||||
"""
|
||||
if is_term(self.terms):
|
||||
return frozenset([self.terms.name])
|
||||
return frozenset(term.name for term in com.flatten(self.terms))
|
||||
|
||||
|
||||
PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor}
|
@ -0,0 +1,286 @@
|
||||
"""
|
||||
Expressions
|
||||
-----------
|
||||
|
||||
Offer fast expression evaluation through numexpr
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import operator
|
||||
from typing import TYPE_CHECKING
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core import roperator
|
||||
from pandas.core.computation.check import NUMEXPR_INSTALLED
|
||||
|
||||
if NUMEXPR_INSTALLED:
|
||||
import numexpr as ne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import FuncType
|
||||
|
||||
_TEST_MODE: bool | None = None
|
||||
_TEST_RESULT: list[bool] = []
|
||||
USE_NUMEXPR = NUMEXPR_INSTALLED
|
||||
_evaluate: FuncType | None = None
|
||||
_where: FuncType | None = None
|
||||
|
||||
# the set of dtypes that we will allow pass to numexpr
|
||||
_ALLOWED_DTYPES = {
|
||||
"evaluate": {"int64", "int32", "float64", "float32", "bool"},
|
||||
"where": {"int64", "float64", "bool"},
|
||||
}
|
||||
|
||||
# the minimum prod shape that we will use numexpr
|
||||
_MIN_ELEMENTS = 1_000_000
|
||||
|
||||
|
||||
def set_use_numexpr(v: bool = True) -> None:
|
||||
# set/unset to use numexpr
|
||||
global USE_NUMEXPR
|
||||
if NUMEXPR_INSTALLED:
|
||||
USE_NUMEXPR = v
|
||||
|
||||
# choose what we are going to do
|
||||
global _evaluate, _where
|
||||
|
||||
_evaluate = _evaluate_numexpr if USE_NUMEXPR else _evaluate_standard
|
||||
_where = _where_numexpr if USE_NUMEXPR else _where_standard
|
||||
|
||||
|
||||
def set_numexpr_threads(n=None) -> None:
|
||||
# if we are using numexpr, set the threads to n
|
||||
# otherwise reset
|
||||
if NUMEXPR_INSTALLED and USE_NUMEXPR:
|
||||
if n is None:
|
||||
n = ne.detect_number_of_cores()
|
||||
ne.set_num_threads(n)
|
||||
|
||||
|
||||
def _evaluate_standard(op, op_str, a, b):
|
||||
"""
|
||||
Standard evaluation.
|
||||
"""
|
||||
if _TEST_MODE:
|
||||
_store_test_result(False)
|
||||
return op(a, b)
|
||||
|
||||
|
||||
def _can_use_numexpr(op, op_str, a, b, dtype_check) -> bool:
|
||||
"""return a boolean if we WILL be using numexpr"""
|
||||
if op_str is not None:
|
||||
# required min elements (otherwise we are adding overhead)
|
||||
if a.size > _MIN_ELEMENTS:
|
||||
# check for dtype compatibility
|
||||
dtypes: set[str] = set()
|
||||
for o in [a, b]:
|
||||
# ndarray and Series Case
|
||||
if hasattr(o, "dtype"):
|
||||
dtypes |= {o.dtype.name}
|
||||
|
||||
# allowed are a superset
|
||||
if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _evaluate_numexpr(op, op_str, a, b):
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(op, op_str, a, b, "evaluate"):
|
||||
is_reversed = op.__name__.strip("_").startswith("r")
|
||||
if is_reversed:
|
||||
# we were originally called by a reversed op method
|
||||
a, b = b, a
|
||||
|
||||
a_value = a
|
||||
b_value = b
|
||||
|
||||
try:
|
||||
result = ne.evaluate(
|
||||
f"a_value {op_str} b_value",
|
||||
local_dict={"a_value": a_value, "b_value": b_value},
|
||||
casting="safe",
|
||||
)
|
||||
except TypeError:
|
||||
# numexpr raises eg for array ** array with integers
|
||||
# (https://github.com/pydata/numexpr/issues/379)
|
||||
pass
|
||||
except NotImplementedError:
|
||||
if _bool_arith_fallback(op_str, a, b):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
if is_reversed:
|
||||
# reverse order to original for fallback
|
||||
a, b = b, a
|
||||
|
||||
if _TEST_MODE:
|
||||
_store_test_result(result is not None)
|
||||
|
||||
if result is None:
|
||||
result = _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
_op_str_mapping = {
|
||||
operator.add: "+",
|
||||
roperator.radd: "+",
|
||||
operator.mul: "*",
|
||||
roperator.rmul: "*",
|
||||
operator.sub: "-",
|
||||
roperator.rsub: "-",
|
||||
operator.truediv: "/",
|
||||
roperator.rtruediv: "/",
|
||||
# floordiv not supported by numexpr 2.x
|
||||
operator.floordiv: None,
|
||||
roperator.rfloordiv: None,
|
||||
# we require Python semantics for mod of negative for backwards compatibility
|
||||
# see https://github.com/pydata/numexpr/issues/365
|
||||
# so sticking with unaccelerated for now GH#36552
|
||||
operator.mod: None,
|
||||
roperator.rmod: None,
|
||||
operator.pow: "**",
|
||||
roperator.rpow: "**",
|
||||
operator.eq: "==",
|
||||
operator.ne: "!=",
|
||||
operator.le: "<=",
|
||||
operator.lt: "<",
|
||||
operator.ge: ">=",
|
||||
operator.gt: ">",
|
||||
operator.and_: "&",
|
||||
roperator.rand_: "&",
|
||||
operator.or_: "|",
|
||||
roperator.ror_: "|",
|
||||
operator.xor: "^",
|
||||
roperator.rxor: "^",
|
||||
divmod: None,
|
||||
roperator.rdivmod: None,
|
||||
}
|
||||
|
||||
|
||||
def _where_standard(cond, a, b):
|
||||
# Caller is responsible for extracting ndarray if necessary
|
||||
return np.where(cond, a, b)
|
||||
|
||||
|
||||
def _where_numexpr(cond, a, b):
|
||||
# Caller is responsible for extracting ndarray if necessary
|
||||
result = None
|
||||
|
||||
if _can_use_numexpr(None, "where", a, b, "where"):
|
||||
result = ne.evaluate(
|
||||
"where(cond_value, a_value, b_value)",
|
||||
local_dict={"cond_value": cond, "a_value": a, "b_value": b},
|
||||
casting="safe",
|
||||
)
|
||||
|
||||
if result is None:
|
||||
result = _where_standard(cond, a, b)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# turn myself on
|
||||
set_use_numexpr(get_option("compute.use_numexpr"))
|
||||
|
||||
|
||||
def _has_bool_dtype(x):
|
||||
try:
|
||||
return x.dtype == bool
|
||||
except AttributeError:
|
||||
return isinstance(x, (bool, np.bool_))
|
||||
|
||||
|
||||
_BOOL_OP_UNSUPPORTED = {"+": "|", "*": "&", "-": "^"}
|
||||
|
||||
|
||||
def _bool_arith_fallback(op_str, a, b) -> bool:
|
||||
"""
|
||||
Check if we should fallback to the python `_evaluate_standard` in case
|
||||
of an unsupported operation by numexpr, which is the case for some
|
||||
boolean ops.
|
||||
"""
|
||||
if _has_bool_dtype(a) and _has_bool_dtype(b):
|
||||
if op_str in _BOOL_OP_UNSUPPORTED:
|
||||
warnings.warn(
|
||||
f"evaluating in Python space because the {repr(op_str)} "
|
||||
"operator is not supported by numexpr for the bool dtype, "
|
||||
f"use {repr(_BOOL_OP_UNSUPPORTED[op_str])} instead.",
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def evaluate(op, a, b, use_numexpr: bool = True):
|
||||
"""
|
||||
Evaluate and return the expression of the op on a and b.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : the actual operand
|
||||
a : left operand
|
||||
b : right operand
|
||||
use_numexpr : bool, default True
|
||||
Whether to try to use numexpr.
|
||||
"""
|
||||
op_str = _op_str_mapping[op]
|
||||
if op_str is not None:
|
||||
if use_numexpr:
|
||||
# error: "None" not callable
|
||||
return _evaluate(op, op_str, a, b) # type: ignore[misc]
|
||||
return _evaluate_standard(op, op_str, a, b)
|
||||
|
||||
|
||||
def where(cond, a, b, use_numexpr: bool = True):
|
||||
"""
|
||||
Evaluate the where condition cond on a and b.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cond : np.ndarray[bool]
|
||||
a : return if cond is True
|
||||
b : return if cond is False
|
||||
use_numexpr : bool, default True
|
||||
Whether to try to use numexpr.
|
||||
"""
|
||||
assert _where is not None
|
||||
return _where(cond, a, b) if use_numexpr else _where_standard(cond, a, b)
|
||||
|
||||
|
||||
def set_test_mode(v: bool = True) -> None:
|
||||
"""
|
||||
Keeps track of whether numexpr was used.
|
||||
|
||||
Stores an additional ``True`` for every successful use of evaluate with
|
||||
numexpr since the last ``get_test_result``.
|
||||
"""
|
||||
global _TEST_MODE, _TEST_RESULT
|
||||
_TEST_MODE = v
|
||||
_TEST_RESULT = []
|
||||
|
||||
|
||||
def _store_test_result(used_numexpr: bool) -> None:
|
||||
if used_numexpr:
|
||||
_TEST_RESULT.append(used_numexpr)
|
||||
|
||||
|
||||
def get_test_result() -> list[bool]:
|
||||
"""
|
||||
Get test result and reset test_results.
|
||||
"""
|
||||
global _TEST_RESULT
|
||||
res = _TEST_RESULT
|
||||
_TEST_RESULT = []
|
||||
return res
|
572
lib/python3.13/site-packages/pandas/core/computation/ops.py
Normal file
572
lib/python3.13/site-packages/pandas/core/computation/ops.py
Normal file
@ -0,0 +1,572 @@
|
||||
"""
|
||||
Operator classes for eval.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
import operator
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
Literal,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_list_like,
|
||||
is_scalar,
|
||||
)
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation.common import (
|
||||
ensure_decoded,
|
||||
result_type_many,
|
||||
)
|
||||
from pandas.core.computation.scope import DEFAULT_GLOBALS
|
||||
|
||||
from pandas.io.formats.printing import (
|
||||
pprint_thing,
|
||||
pprint_thing_encoded,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Iterable,
|
||||
Iterator,
|
||||
)
|
||||
|
||||
REDUCTIONS = ("sum", "prod", "min", "max")
|
||||
|
||||
_unary_math_ops = (
|
||||
"sin",
|
||||
"cos",
|
||||
"exp",
|
||||
"log",
|
||||
"expm1",
|
||||
"log1p",
|
||||
"sqrt",
|
||||
"sinh",
|
||||
"cosh",
|
||||
"tanh",
|
||||
"arcsin",
|
||||
"arccos",
|
||||
"arctan",
|
||||
"arccosh",
|
||||
"arcsinh",
|
||||
"arctanh",
|
||||
"abs",
|
||||
"log10",
|
||||
"floor",
|
||||
"ceil",
|
||||
)
|
||||
_binary_math_ops = ("arctan2",)
|
||||
|
||||
MATHOPS = _unary_math_ops + _binary_math_ops
|
||||
|
||||
|
||||
LOCAL_TAG = "__pd_eval_local_"
|
||||
|
||||
|
||||
class Term:
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
klass = Constant if not isinstance(name, str) else cls
|
||||
# error: Argument 2 for "super" not an instance of argument 1
|
||||
supr_new = super(Term, klass).__new__ # type: ignore[misc]
|
||||
return supr_new(klass)
|
||||
|
||||
is_local: bool
|
||||
|
||||
def __init__(self, name, env, side=None, encoding=None) -> None:
|
||||
# name is a str for Term, but may be something else for subclasses
|
||||
self._name = name
|
||||
self.env = env
|
||||
self.side = side
|
||||
tname = str(name)
|
||||
self.is_local = tname.startswith(LOCAL_TAG) or tname in DEFAULT_GLOBALS
|
||||
self._value = self._resolve_name()
|
||||
self.encoding = encoding
|
||||
|
||||
@property
|
||||
def local_name(self) -> str:
|
||||
return self.name.replace(LOCAL_TAG, "")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(self.name)
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return self.value
|
||||
|
||||
def evaluate(self, *args, **kwargs) -> Term:
|
||||
return self
|
||||
|
||||
def _resolve_name(self):
|
||||
local_name = str(self.local_name)
|
||||
is_local = self.is_local
|
||||
if local_name in self.env.scope and isinstance(
|
||||
self.env.scope[local_name], type
|
||||
):
|
||||
is_local = False
|
||||
|
||||
res = self.env.resolve(local_name, is_local=is_local)
|
||||
self.update(res)
|
||||
|
||||
if hasattr(res, "ndim") and res.ndim > 2:
|
||||
raise NotImplementedError(
|
||||
"N-dimensional objects, where N > 2, are not supported with eval"
|
||||
)
|
||||
return res
|
||||
|
||||
def update(self, value) -> None:
|
||||
"""
|
||||
search order for local (i.e., @variable) variables:
|
||||
|
||||
scope, key_variable
|
||||
[('locals', 'local_name'),
|
||||
('globals', 'local_name'),
|
||||
('locals', 'key'),
|
||||
('globals', 'key')]
|
||||
"""
|
||||
key = self.name
|
||||
|
||||
# if it's a variable name (otherwise a constant)
|
||||
if isinstance(key, str):
|
||||
self.env.swapkey(self.local_name, key, new_value=value)
|
||||
|
||||
self.value = value
|
||||
|
||||
@property
|
||||
def is_scalar(self) -> bool:
|
||||
return is_scalar(self._value)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
try:
|
||||
# potentially very slow for large, mixed dtype frames
|
||||
return self._value.values.dtype
|
||||
except AttributeError:
|
||||
try:
|
||||
# ndarray
|
||||
return self._value.dtype
|
||||
except AttributeError:
|
||||
# scalar
|
||||
return type(self._value)
|
||||
|
||||
return_type = type
|
||||
|
||||
@property
|
||||
def raw(self) -> str:
|
||||
return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})"
|
||||
|
||||
@property
|
||||
def is_datetime(self) -> bool:
|
||||
try:
|
||||
t = self.type.type
|
||||
except AttributeError:
|
||||
t = self.type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
@value.setter
|
||||
def value(self, new_value) -> None:
|
||||
self._value = new_value
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def ndim(self) -> int:
|
||||
return self._value.ndim
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.value
|
||||
|
||||
def __repr__(self) -> str:
|
||||
# in python 2 str() of float
|
||||
# can truncate shorter than repr()
|
||||
return repr(self.name)
|
||||
|
||||
|
||||
_bool_op_map = {"not": "~", "and": "&", "or": "|"}
|
||||
|
||||
|
||||
class Op:
|
||||
"""
|
||||
Hold an operator of arbitrary arity.
|
||||
"""
|
||||
|
||||
op: str
|
||||
|
||||
def __init__(self, op: str, operands: Iterable[Term | Op], encoding=None) -> None:
|
||||
self.op = _bool_op_map.get(op, op)
|
||||
self.operands = operands
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self) -> Iterator:
|
||||
return iter(self.operands)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Print a generic n-ary operator and its operands using infix notation.
|
||||
"""
|
||||
# recurse over the operands
|
||||
parened = (f"({pprint_thing(opr)})" for opr in self.operands)
|
||||
return pprint_thing(f" {self.op} ".join(parened))
|
||||
|
||||
@property
|
||||
def return_type(self):
|
||||
# clobber types to bool if the op is a boolean operator
|
||||
if self.op in (CMP_OPS_SYMS + BOOL_OPS_SYMS):
|
||||
return np.bool_
|
||||
return result_type_many(*(term.type for term in com.flatten(self)))
|
||||
|
||||
@property
|
||||
def has_invalid_return_type(self) -> bool:
|
||||
types = self.operand_types
|
||||
obj_dtype_set = frozenset([np.dtype("object")])
|
||||
return self.return_type == object and types - obj_dtype_set
|
||||
|
||||
@property
|
||||
def operand_types(self):
|
||||
return frozenset(term.type for term in com.flatten(self))
|
||||
|
||||
@property
|
||||
def is_scalar(self) -> bool:
|
||||
return all(operand.is_scalar for operand in self.operands)
|
||||
|
||||
@property
|
||||
def is_datetime(self) -> bool:
|
||||
try:
|
||||
t = self.return_type.type
|
||||
except AttributeError:
|
||||
t = self.return_type
|
||||
|
||||
return issubclass(t, (datetime, np.datetime64))
|
||||
|
||||
|
||||
def _in(x, y):
|
||||
"""
|
||||
Compute the vectorized membership of ``x in y`` if possible, otherwise
|
||||
use Python.
|
||||
"""
|
||||
try:
|
||||
return x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x in y
|
||||
|
||||
|
||||
def _not_in(x, y):
|
||||
"""
|
||||
Compute the vectorized membership of ``x not in y`` if possible,
|
||||
otherwise use Python.
|
||||
"""
|
||||
try:
|
||||
return ~x.isin(y)
|
||||
except AttributeError:
|
||||
if is_list_like(x):
|
||||
try:
|
||||
return ~y.isin(x)
|
||||
except AttributeError:
|
||||
pass
|
||||
return x not in y
|
||||
|
||||
|
||||
CMP_OPS_SYMS = (">", "<", ">=", "<=", "==", "!=", "in", "not in")
|
||||
_cmp_ops_funcs = (
|
||||
operator.gt,
|
||||
operator.lt,
|
||||
operator.ge,
|
||||
operator.le,
|
||||
operator.eq,
|
||||
operator.ne,
|
||||
_in,
|
||||
_not_in,
|
||||
)
|
||||
_cmp_ops_dict = dict(zip(CMP_OPS_SYMS, _cmp_ops_funcs))
|
||||
|
||||
BOOL_OPS_SYMS = ("&", "|", "and", "or")
|
||||
_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_)
|
||||
_bool_ops_dict = dict(zip(BOOL_OPS_SYMS, _bool_ops_funcs))
|
||||
|
||||
ARITH_OPS_SYMS = ("+", "-", "*", "/", "**", "//", "%")
|
||||
_arith_ops_funcs = (
|
||||
operator.add,
|
||||
operator.sub,
|
||||
operator.mul,
|
||||
operator.truediv,
|
||||
operator.pow,
|
||||
operator.floordiv,
|
||||
operator.mod,
|
||||
)
|
||||
_arith_ops_dict = dict(zip(ARITH_OPS_SYMS, _arith_ops_funcs))
|
||||
|
||||
SPECIAL_CASE_ARITH_OPS_SYMS = ("**", "//", "%")
|
||||
_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod)
|
||||
_special_case_arith_ops_dict = dict(
|
||||
zip(SPECIAL_CASE_ARITH_OPS_SYMS, _special_case_arith_ops_funcs)
|
||||
)
|
||||
|
||||
_binary_ops_dict = {}
|
||||
|
||||
for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict):
|
||||
_binary_ops_dict.update(d)
|
||||
|
||||
|
||||
def is_term(obj) -> bool:
|
||||
return isinstance(obj, Term)
|
||||
|
||||
|
||||
class BinOp(Op):
|
||||
"""
|
||||
Hold a binary operator and its operands.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
lhs : Term or Op
|
||||
rhs : Term or Op
|
||||
"""
|
||||
|
||||
def __init__(self, op: str, lhs, rhs) -> None:
|
||||
super().__init__(op, (lhs, rhs))
|
||||
self.lhs = lhs
|
||||
self.rhs = rhs
|
||||
|
||||
self._disallow_scalar_only_bool_ops()
|
||||
|
||||
self.convert_values()
|
||||
|
||||
try:
|
||||
self.func = _binary_ops_dict[op]
|
||||
except KeyError as err:
|
||||
# has to be made a list for python3
|
||||
keys = list(_binary_ops_dict.keys())
|
||||
raise ValueError(
|
||||
f"Invalid binary operator {repr(op)}, valid operators are {keys}"
|
||||
) from err
|
||||
|
||||
def __call__(self, env):
|
||||
"""
|
||||
Recursively evaluate an expression in Python space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
The result of an evaluated expression.
|
||||
"""
|
||||
# recurse over the left/right nodes
|
||||
left = self.lhs(env)
|
||||
right = self.rhs(env)
|
||||
|
||||
return self.func(left, right)
|
||||
|
||||
def evaluate(self, env, engine: str, parser, term_type, eval_in_python):
|
||||
"""
|
||||
Evaluate a binary operation *before* being passed to the engine.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env : Scope
|
||||
engine : str
|
||||
parser : str
|
||||
term_type : type
|
||||
eval_in_python : list
|
||||
|
||||
Returns
|
||||
-------
|
||||
term_type
|
||||
The "pre-evaluated" expression as an instance of ``term_type``
|
||||
"""
|
||||
if engine == "python":
|
||||
res = self(env)
|
||||
else:
|
||||
# recurse over the left/right nodes
|
||||
|
||||
left = self.lhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
|
||||
right = self.rhs.evaluate(
|
||||
env,
|
||||
engine=engine,
|
||||
parser=parser,
|
||||
term_type=term_type,
|
||||
eval_in_python=eval_in_python,
|
||||
)
|
||||
|
||||
# base cases
|
||||
if self.op in eval_in_python:
|
||||
res = self.func(left.value, right.value)
|
||||
else:
|
||||
from pandas.core.computation.eval import eval
|
||||
|
||||
res = eval(self, local_dict=env, engine=engine, parser=parser)
|
||||
|
||||
name = env.add_tmp(res)
|
||||
return term_type(name, env=env)
|
||||
|
||||
def convert_values(self) -> None:
|
||||
"""
|
||||
Convert datetimes to a comparable value in an expression.
|
||||
"""
|
||||
|
||||
def stringify(value):
|
||||
encoder: Callable
|
||||
if self.encoding is not None:
|
||||
encoder = partial(pprint_thing_encoded, encoding=self.encoding)
|
||||
else:
|
||||
encoder = pprint_thing
|
||||
return encoder(value)
|
||||
|
||||
lhs, rhs = self.lhs, self.rhs
|
||||
|
||||
if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar:
|
||||
v = rhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.rhs.update(v)
|
||||
|
||||
if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar:
|
||||
v = lhs.value
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = Timestamp(ensure_decoded(v))
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
self.lhs.update(v)
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self):
|
||||
rhs = self.rhs
|
||||
lhs = self.lhs
|
||||
|
||||
# GH#24883 unwrap dtype if necessary to ensure we have a type object
|
||||
rhs_rt = rhs.return_type
|
||||
rhs_rt = getattr(rhs_rt, "type", rhs_rt)
|
||||
lhs_rt = lhs.return_type
|
||||
lhs_rt = getattr(lhs_rt, "type", lhs_rt)
|
||||
if (
|
||||
(lhs.is_scalar or rhs.is_scalar)
|
||||
and self.op in _bool_ops_dict
|
||||
and (
|
||||
not (
|
||||
issubclass(rhs_rt, (bool, np.bool_))
|
||||
and issubclass(lhs_rt, (bool, np.bool_))
|
||||
)
|
||||
)
|
||||
):
|
||||
raise NotImplementedError("cannot evaluate scalar only bool ops")
|
||||
|
||||
|
||||
def isnumeric(dtype) -> bool:
|
||||
return issubclass(np.dtype(dtype).type, np.number)
|
||||
|
||||
|
||||
UNARY_OPS_SYMS = ("+", "-", "~", "not")
|
||||
_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert)
|
||||
_unary_ops_dict = dict(zip(UNARY_OPS_SYMS, _unary_ops_funcs))
|
||||
|
||||
|
||||
class UnaryOp(Op):
|
||||
"""
|
||||
Hold a unary operator and its operands.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op : str
|
||||
The token used to represent the operator.
|
||||
operand : Term or Op
|
||||
The Term or Op operand to the operator.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If no function associated with the passed operator token is found.
|
||||
"""
|
||||
|
||||
def __init__(self, op: Literal["+", "-", "~", "not"], operand) -> None:
|
||||
super().__init__(op, (operand,))
|
||||
self.operand = operand
|
||||
|
||||
try:
|
||||
self.func = _unary_ops_dict[op]
|
||||
except KeyError as err:
|
||||
raise ValueError(
|
||||
f"Invalid unary operator {repr(op)}, "
|
||||
f"valid operators are {UNARY_OPS_SYMS}"
|
||||
) from err
|
||||
|
||||
def __call__(self, env) -> MathCall:
|
||||
operand = self.operand(env)
|
||||
# error: Cannot call function of unknown type
|
||||
return self.func(operand) # type: ignore[operator]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(f"{self.op}({self.operand})")
|
||||
|
||||
@property
|
||||
def return_type(self) -> np.dtype:
|
||||
operand = self.operand
|
||||
if operand.return_type == np.dtype("bool"):
|
||||
return np.dtype("bool")
|
||||
if isinstance(operand, Op) and (
|
||||
operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict
|
||||
):
|
||||
return np.dtype("bool")
|
||||
return np.dtype("int")
|
||||
|
||||
|
||||
class MathCall(Op):
|
||||
def __init__(self, func, args) -> None:
|
||||
super().__init__(func.name, args)
|
||||
self.func = func
|
||||
|
||||
def __call__(self, env):
|
||||
# error: "Op" not callable
|
||||
operands = [op(env) for op in self.operands] # type: ignore[operator]
|
||||
return self.func.func(*operands)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
operands = map(str, self.operands)
|
||||
return pprint_thing(f"{self.op}({','.join(operands)})")
|
||||
|
||||
|
||||
class FuncNode:
|
||||
def __init__(self, name: str) -> None:
|
||||
if name not in MATHOPS:
|
||||
raise ValueError(f'"{name}" is not a supported function')
|
||||
self.name = name
|
||||
self.func = getattr(np, name)
|
||||
|
||||
def __call__(self, *args) -> MathCall:
|
||||
return MathCall(self, args)
|
198
lib/python3.13/site-packages/pandas/core/computation/parsing.py
Normal file
198
lib/python3.13/site-packages/pandas/core/computation/parsing.py
Normal file
@ -0,0 +1,198 @@
|
||||
"""
|
||||
:func:`~pandas.eval` source string parsing functions
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from io import StringIO
|
||||
from keyword import iskeyword
|
||||
import token
|
||||
import tokenize
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterator,
|
||||
)
|
||||
|
||||
# A token value Python's tokenizer probably will never use.
|
||||
BACKTICK_QUOTED_STRING = 100
|
||||
|
||||
|
||||
def create_valid_python_identifier(name: str) -> str:
|
||||
"""
|
||||
Create valid Python identifiers from any string.
|
||||
|
||||
Check if name contains any special characters. If it contains any
|
||||
special characters, the special characters will be replaced by
|
||||
a special string and a prefix is added.
|
||||
|
||||
Raises
|
||||
------
|
||||
SyntaxError
|
||||
If the returned name is not a Python valid identifier, raise an exception.
|
||||
This can happen if there is a hashtag in the name, as the tokenizer will
|
||||
than terminate and not find the backtick.
|
||||
But also for characters that fall out of the range of (U+0001..U+007F).
|
||||
"""
|
||||
if name.isidentifier() and not iskeyword(name):
|
||||
return name
|
||||
|
||||
# Create a dict with the special characters and their replacement string.
|
||||
# EXACT_TOKEN_TYPES contains these special characters
|
||||
# token.tok_name contains a readable description of the replacement string.
|
||||
special_characters_replacements = {
|
||||
char: f"_{token.tok_name[tokval]}_"
|
||||
for char, tokval in (tokenize.EXACT_TOKEN_TYPES.items())
|
||||
}
|
||||
special_characters_replacements.update(
|
||||
{
|
||||
" ": "_",
|
||||
"?": "_QUESTIONMARK_",
|
||||
"!": "_EXCLAMATIONMARK_",
|
||||
"$": "_DOLLARSIGN_",
|
||||
"€": "_EUROSIGN_",
|
||||
"°": "_DEGREESIGN_",
|
||||
# Including quotes works, but there are exceptions.
|
||||
"'": "_SINGLEQUOTE_",
|
||||
'"': "_DOUBLEQUOTE_",
|
||||
# Currently not possible. Terminates parser and won't find backtick.
|
||||
# "#": "_HASH_",
|
||||
}
|
||||
)
|
||||
|
||||
name = "".join([special_characters_replacements.get(char, char) for char in name])
|
||||
name = f"BACKTICK_QUOTED_STRING_{name}"
|
||||
|
||||
if not name.isidentifier():
|
||||
raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def clean_backtick_quoted_toks(tok: tuple[int, str]) -> tuple[int, str]:
|
||||
"""
|
||||
Clean up a column name if surrounded by backticks.
|
||||
|
||||
Backtick quoted string are indicated by a certain tokval value. If a string
|
||||
is a backtick quoted token it will processed by
|
||||
:func:`_create_valid_python_identifier` so that the parser can find this
|
||||
string when the query is executed.
|
||||
In this case the tok will get the NAME tokval.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tok : tuple of int, str
|
||||
ints correspond to the all caps constants in the tokenize module
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok : Tuple[int, str]
|
||||
Either the input or token or the replacement values
|
||||
"""
|
||||
toknum, tokval = tok
|
||||
if toknum == BACKTICK_QUOTED_STRING:
|
||||
return tokenize.NAME, create_valid_python_identifier(tokval)
|
||||
return toknum, tokval
|
||||
|
||||
|
||||
def clean_column_name(name: Hashable) -> Hashable:
|
||||
"""
|
||||
Function to emulate the cleaning of a backtick quoted name.
|
||||
|
||||
The purpose for this function is to see what happens to the name of
|
||||
identifier if it goes to the process of being parsed a Python code
|
||||
inside a backtick quoted string and than being cleaned
|
||||
(removed of any special characters).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : hashable
|
||||
Name to be cleaned.
|
||||
|
||||
Returns
|
||||
-------
|
||||
name : hashable
|
||||
Returns the name after tokenizing and cleaning.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For some cases, a name cannot be converted to a valid Python identifier.
|
||||
In that case :func:`tokenize_string` raises a SyntaxError.
|
||||
In that case, we just return the name unmodified.
|
||||
|
||||
If this name was used in the query string (this makes the query call impossible)
|
||||
an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
|
||||
which is not caught and propagates to the user level.
|
||||
"""
|
||||
try:
|
||||
tokenized = tokenize_string(f"`{name}`")
|
||||
tokval = next(tokenized)[1]
|
||||
return create_valid_python_identifier(tokval)
|
||||
except SyntaxError:
|
||||
return name
|
||||
|
||||
|
||||
def tokenize_backtick_quoted_string(
|
||||
token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int
|
||||
) -> tuple[int, str]:
|
||||
"""
|
||||
Creates a token from a backtick quoted string.
|
||||
|
||||
Moves the token_generator forwards till right after the next backtick.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
token_generator : Iterator[tokenize.TokenInfo]
|
||||
The generator that yields the tokens of the source string (Tuple[int, str]).
|
||||
The generator is at the first token after the backtick (`)
|
||||
|
||||
source : str
|
||||
The Python source code string.
|
||||
|
||||
string_start : int
|
||||
This is the start of backtick quoted string inside the source string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok: Tuple[int, str]
|
||||
The token that represents the backtick quoted string.
|
||||
The integer is equal to BACKTICK_QUOTED_STRING (100).
|
||||
"""
|
||||
for _, tokval, start, _, _ in token_generator:
|
||||
if tokval == "`":
|
||||
string_end = start[1]
|
||||
break
|
||||
|
||||
return BACKTICK_QUOTED_STRING, source[string_start:string_end]
|
||||
|
||||
|
||||
def tokenize_string(source: str) -> Iterator[tuple[int, str]]:
|
||||
"""
|
||||
Tokenize a Python source code string.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
The Python source code string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tok_generator : Iterator[Tuple[int, str]]
|
||||
An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
|
||||
"""
|
||||
line_reader = StringIO(source).readline
|
||||
token_generator = tokenize.generate_tokens(line_reader)
|
||||
|
||||
# Loop over all tokens till a backtick (`) is found.
|
||||
# Then, take all tokens till the next backtick to form a backtick quoted string
|
||||
for toknum, tokval, start, _, _ in token_generator:
|
||||
if tokval == "`":
|
||||
try:
|
||||
yield tokenize_backtick_quoted_string(
|
||||
token_generator, source, string_start=start[1] + 1
|
||||
)
|
||||
except Exception as err:
|
||||
raise SyntaxError(f"Failed to parse backticks in '{source}'.") from err
|
||||
else:
|
||||
yield toknum, tokval
|
666
lib/python3.13/site-packages/pandas/core/computation/pytables.py
Normal file
666
lib/python3.13/site-packages/pandas/core/computation/pytables.py
Normal file
@ -0,0 +1,666 @@
|
||||
""" manage PyTables query interface via Expressions """
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from decimal import (
|
||||
Decimal,
|
||||
InvalidOperation,
|
||||
)
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
ClassVar,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
from pandas.errors import UndefinedVariableError
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas.core.common as com
|
||||
from pandas.core.computation import (
|
||||
expr,
|
||||
ops,
|
||||
scope as _scope,
|
||||
)
|
||||
from pandas.core.computation.common import ensure_decoded
|
||||
from pandas.core.computation.expr import BaseExprVisitor
|
||||
from pandas.core.computation.ops import is_term
|
||||
from pandas.core.construction import extract_array
|
||||
from pandas.core.indexes.base import Index
|
||||
|
||||
from pandas.io.formats.printing import (
|
||||
pprint_thing,
|
||||
pprint_thing_encoded,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
Self,
|
||||
npt,
|
||||
)
|
||||
|
||||
|
||||
class PyTablesScope(_scope.Scope):
|
||||
__slots__ = ("queryables",)
|
||||
|
||||
queryables: dict[str, Any]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
level: int,
|
||||
global_dict=None,
|
||||
local_dict=None,
|
||||
queryables: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict)
|
||||
self.queryables = queryables or {}
|
||||
|
||||
|
||||
class Term(ops.Term):
|
||||
env: PyTablesScope
|
||||
|
||||
def __new__(cls, name, env, side=None, encoding=None):
|
||||
if isinstance(name, str):
|
||||
klass = cls
|
||||
else:
|
||||
klass = Constant
|
||||
return object.__new__(klass)
|
||||
|
||||
def __init__(self, name, env: PyTablesScope, side=None, encoding=None) -> None:
|
||||
super().__init__(name, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
# must be a queryables
|
||||
if self.side == "left":
|
||||
# Note: The behavior of __new__ ensures that self.name is a str here
|
||||
if self.name not in self.env.queryables:
|
||||
raise NameError(f"name {repr(self.name)} is not defined")
|
||||
return self.name
|
||||
|
||||
# resolve the rhs (and allow it to be None)
|
||||
try:
|
||||
return self.env.resolve(self.name, is_local=False)
|
||||
except UndefinedVariableError:
|
||||
return self.name
|
||||
|
||||
# read-only property overwriting read/write property
|
||||
@property # type: ignore[misc]
|
||||
def value(self):
|
||||
return self._value
|
||||
|
||||
|
||||
class Constant(Term):
|
||||
def __init__(self, name, env: PyTablesScope, side=None, encoding=None) -> None:
|
||||
assert isinstance(env, PyTablesScope), type(env)
|
||||
super().__init__(name, env, side=side, encoding=encoding)
|
||||
|
||||
def _resolve_name(self):
|
||||
return self._name
|
||||
|
||||
|
||||
class BinOp(ops.BinOp):
|
||||
_max_selectors = 31
|
||||
|
||||
op: str
|
||||
queryables: dict[str, Any]
|
||||
condition: str | None
|
||||
|
||||
def __init__(self, op: str, lhs, rhs, queryables: dict[str, Any], encoding) -> None:
|
||||
super().__init__(op, lhs, rhs)
|
||||
self.queryables = queryables
|
||||
self.encoding = encoding
|
||||
self.condition = None
|
||||
|
||||
def _disallow_scalar_only_bool_ops(self) -> None:
|
||||
pass
|
||||
|
||||
def prune(self, klass):
|
||||
def pr(left, right):
|
||||
"""create and return a new specialized BinOp from myself"""
|
||||
if left is None:
|
||||
return right
|
||||
elif right is None:
|
||||
return left
|
||||
|
||||
k = klass
|
||||
if isinstance(left, ConditionBinOp):
|
||||
if isinstance(right, ConditionBinOp):
|
||||
k = JointConditionBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
elif isinstance(left, FilterBinOp):
|
||||
if isinstance(right, FilterBinOp):
|
||||
k = JointFilterBinOp
|
||||
elif isinstance(left, k):
|
||||
return left
|
||||
elif isinstance(right, k):
|
||||
return right
|
||||
|
||||
return k(
|
||||
self.op, left, right, queryables=self.queryables, encoding=self.encoding
|
||||
).evaluate()
|
||||
|
||||
left, right = self.lhs, self.rhs
|
||||
|
||||
if is_term(left) and is_term(right):
|
||||
res = pr(left.value, right.value)
|
||||
elif not is_term(left) and is_term(right):
|
||||
res = pr(left.prune(klass), right.value)
|
||||
elif is_term(left) and not is_term(right):
|
||||
res = pr(left.value, right.prune(klass))
|
||||
elif not (is_term(left) or is_term(right)):
|
||||
res = pr(left.prune(klass), right.prune(klass))
|
||||
|
||||
return res
|
||||
|
||||
def conform(self, rhs):
|
||||
"""inplace conform rhs"""
|
||||
if not is_list_like(rhs):
|
||||
rhs = [rhs]
|
||||
if isinstance(rhs, np.ndarray):
|
||||
rhs = rhs.ravel()
|
||||
return rhs
|
||||
|
||||
@property
|
||||
def is_valid(self) -> bool:
|
||||
"""return True if this is a valid field"""
|
||||
return self.lhs in self.queryables
|
||||
|
||||
@property
|
||||
def is_in_table(self) -> bool:
|
||||
"""
|
||||
return True if this is a valid column name for generation (e.g. an
|
||||
actual column in the table)
|
||||
"""
|
||||
return self.queryables.get(self.lhs) is not None
|
||||
|
||||
@property
|
||||
def kind(self):
|
||||
"""the kind of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "kind", None)
|
||||
|
||||
@property
|
||||
def meta(self):
|
||||
"""the meta of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "meta", None)
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
"""the metadata of my field"""
|
||||
return getattr(self.queryables.get(self.lhs), "metadata", None)
|
||||
|
||||
def generate(self, v) -> str:
|
||||
"""create and return the op string for this TermValue"""
|
||||
val = v.tostring(self.encoding)
|
||||
return f"({self.lhs} {self.op} {val})"
|
||||
|
||||
def convert_value(self, v) -> TermValue:
|
||||
"""
|
||||
convert the expression that is in the term to something that is
|
||||
accepted by pytables
|
||||
"""
|
||||
|
||||
def stringify(value):
|
||||
if self.encoding is not None:
|
||||
return pprint_thing_encoded(value, encoding=self.encoding)
|
||||
return pprint_thing(value)
|
||||
|
||||
kind = ensure_decoded(self.kind)
|
||||
meta = ensure_decoded(self.meta)
|
||||
if kind == "datetime" or (kind and kind.startswith("datetime64")):
|
||||
if isinstance(v, (int, float)):
|
||||
v = stringify(v)
|
||||
v = ensure_decoded(v)
|
||||
v = Timestamp(v).as_unit("ns")
|
||||
if v.tz is not None:
|
||||
v = v.tz_convert("UTC")
|
||||
return TermValue(v, v._value, kind)
|
||||
elif kind in ("timedelta64", "timedelta"):
|
||||
if isinstance(v, str):
|
||||
v = Timedelta(v)
|
||||
else:
|
||||
v = Timedelta(v, unit="s")
|
||||
v = v.as_unit("ns")._value
|
||||
return TermValue(int(v), v, kind)
|
||||
elif meta == "category":
|
||||
metadata = extract_array(self.metadata, extract_numpy=True)
|
||||
result: npt.NDArray[np.intp] | np.intp | int
|
||||
if v not in metadata:
|
||||
result = -1
|
||||
else:
|
||||
result = metadata.searchsorted(v, side="left")
|
||||
return TermValue(result, result, "integer")
|
||||
elif kind == "integer":
|
||||
try:
|
||||
v_dec = Decimal(v)
|
||||
except InvalidOperation:
|
||||
# GH 54186
|
||||
# convert v to float to raise float's ValueError
|
||||
float(v)
|
||||
else:
|
||||
v = int(v_dec.to_integral_exact(rounding="ROUND_HALF_EVEN"))
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "float":
|
||||
v = float(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif kind == "bool":
|
||||
if isinstance(v, str):
|
||||
v = v.strip().lower() not in [
|
||||
"false",
|
||||
"f",
|
||||
"no",
|
||||
"n",
|
||||
"none",
|
||||
"0",
|
||||
"[]",
|
||||
"{}",
|
||||
"",
|
||||
]
|
||||
else:
|
||||
v = bool(v)
|
||||
return TermValue(v, v, kind)
|
||||
elif isinstance(v, str):
|
||||
# string quoting
|
||||
return TermValue(v, stringify(v), "string")
|
||||
else:
|
||||
raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column")
|
||||
|
||||
def convert_values(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class FilterBinOp(BinOp):
|
||||
filter: tuple[Any, Any, Index] | None = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.filter is None:
|
||||
return "Filter: Not Initialized"
|
||||
return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]")
|
||||
|
||||
def invert(self) -> Self:
|
||||
"""invert the filter"""
|
||||
if self.filter is not None:
|
||||
self.filter = (
|
||||
self.filter[0],
|
||||
self.generate_filter_op(invert=True),
|
||||
self.filter[2],
|
||||
)
|
||||
return self
|
||||
|
||||
def format(self):
|
||||
"""return the actual filter format"""
|
||||
return [self.filter]
|
||||
|
||||
# error: Signature of "evaluate" incompatible with supertype "BinOp"
|
||||
def evaluate(self) -> Self | None: # type: ignore[override]
|
||||
if not self.is_valid:
|
||||
raise ValueError(f"query term is not valid [{self}]")
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = list(rhs)
|
||||
|
||||
if self.is_in_table:
|
||||
# if too many values to create the expression, use a filter instead
|
||||
if self.op in ["==", "!="] and len(values) > self._max_selectors:
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, Index(values))
|
||||
|
||||
return self
|
||||
return None
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
filter_op = self.generate_filter_op()
|
||||
self.filter = (self.lhs, filter_op, Index(values))
|
||||
|
||||
else:
|
||||
raise TypeError(
|
||||
f"passing a filterable condition to a non-table indexer [{self}]"
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
def generate_filter_op(self, invert: bool = False):
|
||||
if (self.op == "!=" and not invert) or (self.op == "==" and invert):
|
||||
return lambda axis, vals: ~axis.isin(vals)
|
||||
else:
|
||||
return lambda axis, vals: axis.isin(vals)
|
||||
|
||||
|
||||
class JointFilterBinOp(FilterBinOp):
|
||||
def format(self):
|
||||
raise NotImplementedError("unable to collapse Joint Filters")
|
||||
|
||||
# error: Signature of "evaluate" incompatible with supertype "BinOp"
|
||||
def evaluate(self) -> Self: # type: ignore[override]
|
||||
return self
|
||||
|
||||
|
||||
class ConditionBinOp(BinOp):
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(f"[Condition : [{self.condition}]]")
|
||||
|
||||
def invert(self):
|
||||
"""invert the condition"""
|
||||
# if self.condition is not None:
|
||||
# self.condition = "~(%s)" % self.condition
|
||||
# return self
|
||||
raise NotImplementedError(
|
||||
"cannot use an invert condition when passing to numexpr"
|
||||
)
|
||||
|
||||
def format(self):
|
||||
"""return the actual ne format"""
|
||||
return self.condition
|
||||
|
||||
# error: Signature of "evaluate" incompatible with supertype "BinOp"
|
||||
def evaluate(self) -> Self | None: # type: ignore[override]
|
||||
if not self.is_valid:
|
||||
raise ValueError(f"query term is not valid [{self}]")
|
||||
|
||||
# convert values if we are in the table
|
||||
if not self.is_in_table:
|
||||
return None
|
||||
|
||||
rhs = self.conform(self.rhs)
|
||||
values = [self.convert_value(v) for v in rhs]
|
||||
|
||||
# equality conditions
|
||||
if self.op in ["==", "!="]:
|
||||
# too many values to create the expression?
|
||||
if len(values) <= self._max_selectors:
|
||||
vs = [self.generate(v) for v in values]
|
||||
self.condition = f"({' | '.join(vs)})"
|
||||
|
||||
# use a filter after reading
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
self.condition = self.generate(values[0])
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class JointConditionBinOp(ConditionBinOp):
|
||||
# error: Signature of "evaluate" incompatible with supertype "BinOp"
|
||||
def evaluate(self) -> Self: # type: ignore[override]
|
||||
self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})"
|
||||
return self
|
||||
|
||||
|
||||
class UnaryOp(ops.UnaryOp):
|
||||
def prune(self, klass):
|
||||
if self.op != "~":
|
||||
raise NotImplementedError("UnaryOp only support invert type ops")
|
||||
|
||||
operand = self.operand
|
||||
operand = operand.prune(klass)
|
||||
|
||||
if operand is not None and (
|
||||
issubclass(klass, ConditionBinOp)
|
||||
and operand.condition is not None
|
||||
or not issubclass(klass, ConditionBinOp)
|
||||
and issubclass(klass, FilterBinOp)
|
||||
and operand.filter is not None
|
||||
):
|
||||
return operand.invert()
|
||||
return None
|
||||
|
||||
|
||||
class PyTablesExprVisitor(BaseExprVisitor):
|
||||
const_type: ClassVar[type[ops.Term]] = Constant
|
||||
term_type: ClassVar[type[Term]] = Term
|
||||
|
||||
def __init__(self, env, engine, parser, **kwargs) -> None:
|
||||
super().__init__(env, engine, parser)
|
||||
for bin_op in self.binary_ops:
|
||||
bin_node = self.binary_op_nodes_map[bin_op]
|
||||
setattr(
|
||||
self,
|
||||
f"visit_{bin_node}",
|
||||
lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs),
|
||||
)
|
||||
|
||||
def visit_UnaryOp(self, node, **kwargs) -> ops.Term | UnaryOp | None:
|
||||
if isinstance(node.op, (ast.Not, ast.Invert)):
|
||||
return UnaryOp("~", self.visit(node.operand))
|
||||
elif isinstance(node.op, ast.USub):
|
||||
return self.const_type(-self.visit(node.operand).value, self.env)
|
||||
elif isinstance(node.op, ast.UAdd):
|
||||
raise NotImplementedError("Unary addition not supported")
|
||||
# TODO: return None might never be reached
|
||||
return None
|
||||
|
||||
def visit_Index(self, node, **kwargs):
|
||||
return self.visit(node.value).value
|
||||
|
||||
def visit_Assign(self, node, **kwargs):
|
||||
cmpr = ast.Compare(
|
||||
ops=[ast.Eq()], left=node.targets[0], comparators=[node.value]
|
||||
)
|
||||
return self.visit(cmpr)
|
||||
|
||||
def visit_Subscript(self, node, **kwargs) -> ops.Term:
|
||||
# only allow simple subscripts
|
||||
|
||||
value = self.visit(node.value)
|
||||
slobj = self.visit(node.slice)
|
||||
try:
|
||||
value = value.value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
if isinstance(slobj, Term):
|
||||
# In py39 np.ndarray lookups with Term containing int raise
|
||||
slobj = slobj.value
|
||||
|
||||
try:
|
||||
return self.const_type(value[slobj], self.env)
|
||||
except TypeError as err:
|
||||
raise ValueError(
|
||||
f"cannot subscript {repr(value)} with {repr(slobj)}"
|
||||
) from err
|
||||
|
||||
def visit_Attribute(self, node, **kwargs):
|
||||
attr = node.attr
|
||||
value = node.value
|
||||
|
||||
ctx = type(node.ctx)
|
||||
if ctx == ast.Load:
|
||||
# resolve the value
|
||||
resolved = self.visit(value)
|
||||
|
||||
# try to get the value to see if we are another expression
|
||||
try:
|
||||
resolved = resolved.value
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.term_type(getattr(resolved, attr), self.env)
|
||||
except AttributeError:
|
||||
# something like datetime.datetime where scope is overridden
|
||||
if isinstance(value, ast.Name) and value.id == attr:
|
||||
return resolved
|
||||
|
||||
raise ValueError(f"Invalid Attribute context {ctx.__name__}")
|
||||
|
||||
def translate_In(self, op):
|
||||
return ast.Eq() if isinstance(op, ast.In) else op
|
||||
|
||||
def _rewrite_membership_op(self, node, left, right):
|
||||
return self.visit(node.op), node.op, left, right
|
||||
|
||||
|
||||
def _validate_where(w):
|
||||
"""
|
||||
Validate that the where statement is of the right type.
|
||||
|
||||
The type may either be String, Expr, or list-like of Exprs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : String term expression, Expr, or list-like of Exprs.
|
||||
|
||||
Returns
|
||||
-------
|
||||
where : The original where clause if the check was successful.
|
||||
|
||||
Raises
|
||||
------
|
||||
TypeError : An invalid data type was passed in for w (e.g. dict).
|
||||
"""
|
||||
if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)):
|
||||
raise TypeError(
|
||||
"where must be passed as a string, PyTablesExpr, "
|
||||
"or list-like of PyTablesExpr"
|
||||
)
|
||||
|
||||
return w
|
||||
|
||||
|
||||
class PyTablesExpr(expr.Expr):
|
||||
"""
|
||||
Hold a pytables-like expression, comprised of possibly multiple 'terms'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
where : string term expression, PyTablesExpr, or list-like of PyTablesExprs
|
||||
queryables : a "kinds" map (dict of column name -> kind), or None if column
|
||||
is non-indexable
|
||||
encoding : an encoding that will encode the query terms
|
||||
|
||||
Returns
|
||||
-------
|
||||
a PyTablesExpr object
|
||||
|
||||
Examples
|
||||
--------
|
||||
'index>=date'
|
||||
"columns=['A', 'D']"
|
||||
'columns=A'
|
||||
'columns==A'
|
||||
"~(columns=['A','B'])"
|
||||
'index>df.index[3] & string="bar"'
|
||||
'(index>df.index[3] & index<=df.index[6]) | string="bar"'
|
||||
"ts>=Timestamp('2012-02-01')"
|
||||
"major_axis>=20130101"
|
||||
"""
|
||||
|
||||
_visitor: PyTablesExprVisitor | None
|
||||
env: PyTablesScope
|
||||
expr: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
where,
|
||||
queryables: dict[str, Any] | None = None,
|
||||
encoding=None,
|
||||
scope_level: int = 0,
|
||||
) -> None:
|
||||
where = _validate_where(where)
|
||||
|
||||
self.encoding = encoding
|
||||
self.condition = None
|
||||
self.filter = None
|
||||
self.terms = None
|
||||
self._visitor = None
|
||||
|
||||
# capture the environment if needed
|
||||
local_dict: _scope.DeepChainMap[Any, Any] | None = None
|
||||
|
||||
if isinstance(where, PyTablesExpr):
|
||||
local_dict = where.env.scope
|
||||
_where = where.expr
|
||||
|
||||
elif is_list_like(where):
|
||||
where = list(where)
|
||||
for idx, w in enumerate(where):
|
||||
if isinstance(w, PyTablesExpr):
|
||||
local_dict = w.env.scope
|
||||
else:
|
||||
where[idx] = _validate_where(w)
|
||||
_where = " & ".join([f"({w})" for w in com.flatten(where)])
|
||||
else:
|
||||
# _validate_where ensures we otherwise have a string
|
||||
_where = where
|
||||
|
||||
self.expr = _where
|
||||
self.env = PyTablesScope(scope_level + 1, local_dict=local_dict)
|
||||
|
||||
if queryables is not None and isinstance(self.expr, str):
|
||||
self.env.queryables.update(queryables)
|
||||
self._visitor = PyTablesExprVisitor(
|
||||
self.env,
|
||||
queryables=queryables,
|
||||
parser="pytables",
|
||||
engine="pytables",
|
||||
encoding=encoding,
|
||||
)
|
||||
self.terms = self.parse()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self.terms is not None:
|
||||
return pprint_thing(self.terms)
|
||||
return pprint_thing(self.expr)
|
||||
|
||||
def evaluate(self):
|
||||
"""create and return the numexpr condition and filter"""
|
||||
try:
|
||||
self.condition = self.terms.prune(ConditionBinOp)
|
||||
except AttributeError as err:
|
||||
raise ValueError(
|
||||
f"cannot process expression [{self.expr}], [{self}] "
|
||||
"is not a valid condition"
|
||||
) from err
|
||||
try:
|
||||
self.filter = self.terms.prune(FilterBinOp)
|
||||
except AttributeError as err:
|
||||
raise ValueError(
|
||||
f"cannot process expression [{self.expr}], [{self}] "
|
||||
"is not a valid filter"
|
||||
) from err
|
||||
|
||||
return self.condition, self.filter
|
||||
|
||||
|
||||
class TermValue:
|
||||
"""hold a term value the we use to construct a condition/filter"""
|
||||
|
||||
def __init__(self, value, converted, kind: str) -> None:
|
||||
assert isinstance(kind, str), kind
|
||||
self.value = value
|
||||
self.converted = converted
|
||||
self.kind = kind
|
||||
|
||||
def tostring(self, encoding) -> str:
|
||||
"""quote the string if not encoded else encode and return"""
|
||||
if self.kind == "string":
|
||||
if encoding is not None:
|
||||
return str(self.converted)
|
||||
return f'"{self.converted}"'
|
||||
elif self.kind == "float":
|
||||
# python 2 str(float) is not always
|
||||
# round-trippable so use repr()
|
||||
return repr(self.converted)
|
||||
return str(self.converted)
|
||||
|
||||
|
||||
def maybe_expression(s) -> bool:
|
||||
"""loose checking if s is a pytables-acceptable expression"""
|
||||
if not isinstance(s, str):
|
||||
return False
|
||||
operations = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",)
|
||||
|
||||
# make sure we have an op at least
|
||||
return any(op in s for op in operations)
|
355
lib/python3.13/site-packages/pandas/core/computation/scope.py
Normal file
355
lib/python3.13/site-packages/pandas/core/computation/scope.py
Normal file
@ -0,0 +1,355 @@
|
||||
"""
|
||||
Module for scope operations
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import ChainMap
|
||||
import datetime
|
||||
import inspect
|
||||
from io import StringIO
|
||||
import itertools
|
||||
import pprint
|
||||
import struct
|
||||
import sys
|
||||
from typing import TypeVar
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.errors import UndefinedVariableError
|
||||
|
||||
_KT = TypeVar("_KT")
|
||||
_VT = TypeVar("_VT")
|
||||
|
||||
|
||||
# https://docs.python.org/3/library/collections.html#chainmap-examples-and-recipes
|
||||
class DeepChainMap(ChainMap[_KT, _VT]):
|
||||
"""
|
||||
Variant of ChainMap that allows direct updates to inner scopes.
|
||||
|
||||
Only works when all passed mapping are mutable.
|
||||
"""
|
||||
|
||||
def __setitem__(self, key: _KT, value: _VT) -> None:
|
||||
for mapping in self.maps:
|
||||
if key in mapping:
|
||||
mapping[key] = value
|
||||
return
|
||||
self.maps[0][key] = value
|
||||
|
||||
def __delitem__(self, key: _KT) -> None:
|
||||
"""
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
If `key` doesn't exist.
|
||||
"""
|
||||
for mapping in self.maps:
|
||||
if key in mapping:
|
||||
del mapping[key]
|
||||
return
|
||||
raise KeyError(key)
|
||||
|
||||
|
||||
def ensure_scope(
|
||||
level: int, global_dict=None, local_dict=None, resolvers=(), target=None
|
||||
) -> Scope:
|
||||
"""Ensure that we are grabbing the correct scope."""
|
||||
return Scope(
|
||||
level + 1,
|
||||
global_dict=global_dict,
|
||||
local_dict=local_dict,
|
||||
resolvers=resolvers,
|
||||
target=target,
|
||||
)
|
||||
|
||||
|
||||
def _replacer(x) -> str:
|
||||
"""
|
||||
Replace a number with its hexadecimal representation. Used to tag
|
||||
temporary variables with their calling scope's id.
|
||||
"""
|
||||
# get the hex repr of the binary char and remove 0x and pad by pad_size
|
||||
# zeros
|
||||
try:
|
||||
hexin = ord(x)
|
||||
except TypeError:
|
||||
# bytes literals masquerade as ints when iterating in py3
|
||||
hexin = x
|
||||
|
||||
return hex(hexin)
|
||||
|
||||
|
||||
def _raw_hex_id(obj) -> str:
|
||||
"""Return the padded hexadecimal id of ``obj``."""
|
||||
# interpret as a pointer since that's what really what id returns
|
||||
packed = struct.pack("@P", id(obj))
|
||||
return "".join([_replacer(x) for x in packed])
|
||||
|
||||
|
||||
DEFAULT_GLOBALS = {
|
||||
"Timestamp": Timestamp,
|
||||
"datetime": datetime.datetime,
|
||||
"True": True,
|
||||
"False": False,
|
||||
"list": list,
|
||||
"tuple": tuple,
|
||||
"inf": np.inf,
|
||||
"Inf": np.inf,
|
||||
}
|
||||
|
||||
|
||||
def _get_pretty_string(obj) -> str:
|
||||
"""
|
||||
Return a prettier version of obj.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
Object to pretty print
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Pretty print object repr
|
||||
"""
|
||||
sio = StringIO()
|
||||
pprint.pprint(obj, stream=sio)
|
||||
return sio.getvalue()
|
||||
|
||||
|
||||
class Scope:
|
||||
"""
|
||||
Object to hold scope, with a few bells to deal with some custom syntax
|
||||
and contexts added by pandas.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int
|
||||
global_dict : dict or None, optional, default None
|
||||
local_dict : dict or Scope or None, optional, default None
|
||||
resolvers : list-like or None, optional, default None
|
||||
target : object
|
||||
|
||||
Attributes
|
||||
----------
|
||||
level : int
|
||||
scope : DeepChainMap
|
||||
target : object
|
||||
temps : dict
|
||||
"""
|
||||
|
||||
__slots__ = ["level", "scope", "target", "resolvers", "temps"]
|
||||
level: int
|
||||
scope: DeepChainMap
|
||||
resolvers: DeepChainMap
|
||||
temps: dict
|
||||
|
||||
def __init__(
|
||||
self, level: int, global_dict=None, local_dict=None, resolvers=(), target=None
|
||||
) -> None:
|
||||
self.level = level + 1
|
||||
|
||||
# shallow copy because we don't want to keep filling this up with what
|
||||
# was there before if there are multiple calls to Scope/_ensure_scope
|
||||
self.scope = DeepChainMap(DEFAULT_GLOBALS.copy())
|
||||
self.target = target
|
||||
|
||||
if isinstance(local_dict, Scope):
|
||||
self.scope.update(local_dict.scope)
|
||||
if local_dict.target is not None:
|
||||
self.target = local_dict.target
|
||||
self._update(local_dict.level)
|
||||
|
||||
frame = sys._getframe(self.level)
|
||||
|
||||
try:
|
||||
# shallow copy here because we don't want to replace what's in
|
||||
# scope when we align terms (alignment accesses the underlying
|
||||
# numpy array of pandas objects)
|
||||
scope_global = self.scope.new_child(
|
||||
(global_dict if global_dict is not None else frame.f_globals).copy()
|
||||
)
|
||||
self.scope = DeepChainMap(scope_global)
|
||||
if not isinstance(local_dict, Scope):
|
||||
scope_local = self.scope.new_child(
|
||||
(local_dict if local_dict is not None else frame.f_locals).copy()
|
||||
)
|
||||
self.scope = DeepChainMap(scope_local)
|
||||
finally:
|
||||
del frame
|
||||
|
||||
# assumes that resolvers are going from outermost scope to inner
|
||||
if isinstance(local_dict, Scope):
|
||||
resolvers += tuple(local_dict.resolvers.maps)
|
||||
self.resolvers = DeepChainMap(*resolvers)
|
||||
self.temps = {}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
scope_keys = _get_pretty_string(list(self.scope.keys()))
|
||||
res_keys = _get_pretty_string(list(self.resolvers.keys()))
|
||||
return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})"
|
||||
|
||||
@property
|
||||
def has_resolvers(self) -> bool:
|
||||
"""
|
||||
Return whether we have any extra scope.
|
||||
|
||||
For example, DataFrames pass Their columns as resolvers during calls to
|
||||
``DataFrame.eval()`` and ``DataFrame.query()``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
hr : bool
|
||||
"""
|
||||
return bool(len(self.resolvers))
|
||||
|
||||
def resolve(self, key: str, is_local: bool):
|
||||
"""
|
||||
Resolve a variable name in a possibly local context.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
A variable name
|
||||
is_local : bool
|
||||
Flag indicating whether the variable is local or not (prefixed with
|
||||
the '@' symbol)
|
||||
|
||||
Returns
|
||||
-------
|
||||
value : object
|
||||
The value of a particular variable
|
||||
"""
|
||||
try:
|
||||
# only look for locals in outer scope
|
||||
if is_local:
|
||||
return self.scope[key]
|
||||
|
||||
# not a local variable so check in resolvers if we have them
|
||||
if self.has_resolvers:
|
||||
return self.resolvers[key]
|
||||
|
||||
# if we're here that means that we have no locals and we also have
|
||||
# no resolvers
|
||||
assert not is_local and not self.has_resolvers
|
||||
return self.scope[key]
|
||||
except KeyError:
|
||||
try:
|
||||
# last ditch effort we look in temporaries
|
||||
# these are created when parsing indexing expressions
|
||||
# e.g., df[df > 0]
|
||||
return self.temps[key]
|
||||
except KeyError as err:
|
||||
raise UndefinedVariableError(key, is_local) from err
|
||||
|
||||
def swapkey(self, old_key: str, new_key: str, new_value=None) -> None:
|
||||
"""
|
||||
Replace a variable name, with a potentially new value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
old_key : str
|
||||
Current variable name to replace
|
||||
new_key : str
|
||||
New variable name to replace `old_key` with
|
||||
new_value : object
|
||||
Value to be replaced along with the possible renaming
|
||||
"""
|
||||
if self.has_resolvers:
|
||||
maps = self.resolvers.maps + self.scope.maps
|
||||
else:
|
||||
maps = self.scope.maps
|
||||
|
||||
maps.append(self.temps)
|
||||
|
||||
for mapping in maps:
|
||||
if old_key in mapping:
|
||||
mapping[new_key] = new_value
|
||||
return
|
||||
|
||||
def _get_vars(self, stack, scopes: list[str]) -> None:
|
||||
"""
|
||||
Get specifically scoped variables from a list of stack frames.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stack : list
|
||||
A list of stack frames as returned by ``inspect.stack()``
|
||||
scopes : sequence of strings
|
||||
A sequence containing valid stack frame attribute names that
|
||||
evaluate to a dictionary. For example, ('locals', 'globals')
|
||||
"""
|
||||
variables = itertools.product(scopes, stack)
|
||||
for scope, (frame, _, _, _, _, _) in variables:
|
||||
try:
|
||||
d = getattr(frame, f"f_{scope}")
|
||||
self.scope = DeepChainMap(self.scope.new_child(d))
|
||||
finally:
|
||||
# won't remove it, but DECREF it
|
||||
# in Py3 this probably isn't necessary since frame won't be
|
||||
# scope after the loop
|
||||
del frame
|
||||
|
||||
def _update(self, level: int) -> None:
|
||||
"""
|
||||
Update the current scope by going back `level` levels.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
level : int
|
||||
"""
|
||||
sl = level + 1
|
||||
|
||||
# add sl frames to the scope starting with the
|
||||
# most distant and overwriting with more current
|
||||
# makes sure that we can capture variable scope
|
||||
stack = inspect.stack()
|
||||
|
||||
try:
|
||||
self._get_vars(stack[:sl], scopes=["locals"])
|
||||
finally:
|
||||
del stack[:], stack
|
||||
|
||||
def add_tmp(self, value) -> str:
|
||||
"""
|
||||
Add a temporary variable to the scope.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : object
|
||||
An arbitrary object to be assigned to a temporary variable.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The name of the temporary variable created.
|
||||
"""
|
||||
name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}"
|
||||
|
||||
# add to inner most scope
|
||||
assert name not in self.temps
|
||||
self.temps[name] = value
|
||||
assert name in self.temps
|
||||
|
||||
# only increment if the variable gets put in the scope
|
||||
return name
|
||||
|
||||
@property
|
||||
def ntemps(self) -> int:
|
||||
"""The number of temporary variables in this scope"""
|
||||
return len(self.temps)
|
||||
|
||||
@property
|
||||
def full_scope(self) -> DeepChainMap:
|
||||
"""
|
||||
Return the full scope for use with passing to engines transparently
|
||||
as a mapping.
|
||||
|
||||
Returns
|
||||
-------
|
||||
vars : DeepChainMap
|
||||
All variables in this scope.
|
||||
"""
|
||||
maps = [self.temps] + self.resolvers.maps + self.scope.maps
|
||||
return DeepChainMap(*maps)
|
Reference in New Issue
Block a user