Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,329 @@
from __future__ import annotations
from typing import (
TYPE_CHECKING,
Literal,
)
import warnings
import numpy as np
from pandas._libs import lib
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend
from pandas.core.dtypes.cast import maybe_downcast_numeric
from pandas.core.dtypes.common import (
ensure_object,
is_bool_dtype,
is_decimal,
is_integer_dtype,
is_number,
is_numeric_dtype,
is_scalar,
is_string_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import ArrowDtype
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)
from pandas.core.arrays import BaseMaskedArray
from pandas.core.arrays.string_ import StringDtype
if TYPE_CHECKING:
from pandas._typing import (
DateTimeErrorChoices,
DtypeBackend,
npt,
)
def to_numeric(
arg,
errors: DateTimeErrorChoices = "raise",
downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
):
"""
Convert argument to a numeric type.
The default return dtype is `float64` or `int64`
depending on the data supplied. Use the `downcast` parameter
to obtain other dtypes.
Please note that precision loss may occur if really large numbers
are passed in. Due to the internal limitations of `ndarray`, if
numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
passed in, it is very likely they will be converted to float so that
they can be stored in an `ndarray`. These warnings apply similarly to
`Series` since it internally leverages `ndarray`.
Parameters
----------
arg : scalar, list, tuple, 1-d array, or Series
Argument to be converted.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
- If 'coerce', then invalid parsing will be set as NaN.
- If 'ignore', then invalid parsing will return the input.
.. versionchanged:: 2.2
"ignore" is deprecated. Catch exceptions explicitly instead.
downcast : str, default None
Can be 'integer', 'signed', 'unsigned', or 'float'.
If not None, and if the data has been successfully cast to a
numerical dtype (or if the data was numeric to begin with),
downcast that resulting data to the smallest numerical dtype
possible according to the following rules:
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
- 'float': smallest float dtype (min.: np.float32)
As this behaviour is separate from the core conversion to
numeric values, any errors raised during the downcasting
will be surfaced regardless of the value of the 'errors' input.
In addition, downcasting will only occur if the size
of the resulting data's dtype is strictly larger than
the dtype it is to be cast to, so if none of the dtypes
checked satisfy that specification, no downcasting will be
performed on the data.
dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). Behaviour is as follows:
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
(default).
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
DataFrame.
.. versionadded:: 2.0
Returns
-------
ret
Numeric if parsing succeeded.
Return type depends on input. Series if Series, otherwise ndarray.
See Also
--------
DataFrame.astype : Cast argument to a specified dtype.
to_datetime : Convert argument to datetime.
to_timedelta : Convert argument to timedelta.
numpy.ndarray.astype : Cast a numpy array to a specified type.
DataFrame.convert_dtypes : Convert dtypes.
Examples
--------
Take separate series and convert to numeric, coercing when told to
>>> s = pd.Series(['1.0', '2', -3])
>>> pd.to_numeric(s)
0 1.0
1 2.0
2 -3.0
dtype: float64
>>> pd.to_numeric(s, downcast='float')
0 1.0
1 2.0
2 -3.0
dtype: float32
>>> pd.to_numeric(s, downcast='signed')
0 1
1 2
2 -3
dtype: int8
>>> s = pd.Series(['apple', '1.0', '2', -3])
>>> pd.to_numeric(s, errors='coerce')
0 NaN
1 1.0
2 2.0
3 -3.0
dtype: float64
Downcasting of nullable integer and floating dtypes is supported:
>>> s = pd.Series([1, 2, 3], dtype="Int64")
>>> pd.to_numeric(s, downcast="integer")
0 1
1 2
2 3
dtype: Int8
>>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
>>> pd.to_numeric(s, downcast="float")
0 1.0
1 2.1
2 3.0
dtype: Float32
"""
if downcast not in (None, "integer", "signed", "unsigned", "float"):
raise ValueError("invalid downcasting method provided")
if errors not in ("ignore", "raise", "coerce"):
raise ValueError("invalid error value specified")
if errors == "ignore":
# GH#54467
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_numeric without passing `errors` and catch exceptions "
"explicitly instead",
FutureWarning,
stacklevel=find_stack_level(),
)
check_dtype_backend(dtype_backend)
is_series = False
is_index = False
is_scalars = False
if isinstance(arg, ABCSeries):
is_series = True
values = arg.values
elif isinstance(arg, ABCIndex):
is_index = True
if needs_i8_conversion(arg.dtype):
values = arg.view("i8")
else:
values = arg.values
elif isinstance(arg, (list, tuple)):
values = np.array(arg, dtype="O")
elif is_scalar(arg):
if is_decimal(arg):
return float(arg)
if is_number(arg):
return arg
is_scalars = True
values = np.array([arg], dtype="O")
elif getattr(arg, "ndim", 1) > 1:
raise TypeError("arg must be a list, tuple, 1-d array, or Series")
else:
values = arg
orig_values = values
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
# save mask to reconstruct the full array after casting
mask: npt.NDArray[np.bool_] | None = None
if isinstance(values, BaseMaskedArray):
mask = values._mask
values = values._data[~mask]
values_dtype = getattr(values, "dtype", None)
if isinstance(values_dtype, ArrowDtype):
mask = values.isna()
values = values.dropna().to_numpy()
new_mask: np.ndarray | None = None
if is_numeric_dtype(values_dtype):
pass
elif lib.is_np_dtype(values_dtype, "mM"):
values = values.view(np.int64)
else:
values = ensure_object(values)
coerce_numeric = errors not in ("ignore", "raise")
try:
values, new_mask = lib.maybe_convert_numeric( # type: ignore[call-overload]
values,
set(),
coerce_numeric=coerce_numeric,
convert_to_masked_nullable=dtype_backend is not lib.no_default
or isinstance(values_dtype, StringDtype)
and not values_dtype.storage == "pyarrow_numpy",
)
except (ValueError, TypeError):
if errors == "raise":
raise
values = orig_values
if new_mask is not None:
# Remove unnecessary values, is expected later anyway and enables
# downcasting
values = values[~new_mask]
elif (
dtype_backend is not lib.no_default
and new_mask is None
or isinstance(values_dtype, StringDtype)
and not values_dtype.storage == "pyarrow_numpy"
):
new_mask = np.zeros(values.shape, dtype=np.bool_)
# attempt downcast only if the data has been successfully converted
# to a numerical dtype and if a downcast method has been specified
if downcast is not None and is_numeric_dtype(values.dtype):
typecodes: str | None = None
if downcast in ("integer", "signed"):
typecodes = np.typecodes["Integer"]
elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0):
typecodes = np.typecodes["UnsignedInteger"]
elif downcast == "float":
typecodes = np.typecodes["Float"]
# pandas support goes only to np.float32,
# as float dtypes smaller than that are
# extremely rare and not well supported
float_32_char = np.dtype(np.float32).char
float_32_ind = typecodes.index(float_32_char)
typecodes = typecodes[float_32_ind:]
if typecodes is not None:
# from smallest to largest
for typecode in typecodes:
dtype = np.dtype(typecode)
if dtype.itemsize <= values.dtype.itemsize:
values = maybe_downcast_numeric(values, dtype)
# successful conversion
if values.dtype == dtype:
break
# GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
# masked array
if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype):
if mask is None or (new_mask is not None and new_mask.shape == mask.shape):
# GH 52588
mask = new_mask
else:
mask = mask.copy()
assert isinstance(mask, np.ndarray)
data = np.zeros(mask.shape, dtype=values.dtype)
data[~mask] = values
from pandas.core.arrays import (
ArrowExtensionArray,
BooleanArray,
FloatingArray,
IntegerArray,
)
klass: type[IntegerArray | BooleanArray | FloatingArray]
if is_integer_dtype(data.dtype):
klass = IntegerArray
elif is_bool_dtype(data.dtype):
klass = BooleanArray
else:
klass = FloatingArray
values = klass(data, mask)
if dtype_backend == "pyarrow" or isinstance(values_dtype, ArrowDtype):
values = ArrowExtensionArray(values.__arrow_array__())
if is_series:
return arg._constructor(values, index=arg.index, name=arg.name)
elif is_index:
# because we want to coerce to numeric if possible,
# do not use _shallow_copy
from pandas import Index
return Index(values, name=arg.name)
elif is_scalars:
return values[0]
else:
return values

View File

@ -0,0 +1,283 @@
"""
timedelta support tools
"""
from __future__ import annotations
from typing import (
TYPE_CHECKING,
overload,
)
import warnings
import numpy as np
from pandas._libs import lib
from pandas._libs.tslibs import (
NaT,
NaTType,
)
from pandas._libs.tslibs.timedeltas import (
Timedelta,
disallow_ambiguous_unit,
parse_timedelta_unit,
)
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.dtypes import ArrowDtype
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)
from pandas.core.arrays.timedeltas import sequence_to_td64ns
if TYPE_CHECKING:
from collections.abc import Hashable
from datetime import timedelta
from pandas._libs.tslibs.timedeltas import UnitChoices
from pandas._typing import (
ArrayLike,
DateTimeErrorChoices,
)
from pandas import (
Index,
Series,
TimedeltaIndex,
)
@overload
def to_timedelta(
arg: str | float | timedelta,
unit: UnitChoices | None = ...,
errors: DateTimeErrorChoices = ...,
) -> Timedelta:
...
@overload
def to_timedelta(
arg: Series,
unit: UnitChoices | None = ...,
errors: DateTimeErrorChoices = ...,
) -> Series:
...
@overload
def to_timedelta(
arg: list | tuple | range | ArrayLike | Index,
unit: UnitChoices | None = ...,
errors: DateTimeErrorChoices = ...,
) -> TimedeltaIndex:
...
def to_timedelta(
arg: str
| int
| float
| timedelta
| list
| tuple
| range
| ArrayLike
| Index
| Series,
unit: UnitChoices | None = None,
errors: DateTimeErrorChoices = "raise",
) -> Timedelta | TimedeltaIndex | Series:
"""
Convert argument to timedelta.
Timedeltas are absolute differences in times, expressed in difference
units (e.g. days, hours, minutes, seconds). This method converts
an argument from a recognized timedelta format / value into
a Timedelta type.
Parameters
----------
arg : str, timedelta, list-like or Series
The data to be converted to timedelta.
.. versionchanged:: 2.0
Strings with units 'M', 'Y' and 'y' do not represent
unambiguous timedelta values and will raise an exception.
unit : str, optional
Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.
Possible values:
* 'W'
* 'D' / 'days' / 'day'
* 'hours' / 'hour' / 'hr' / 'h' / 'H'
* 'm' / 'minute' / 'min' / 'minutes' / 'T'
* 's' / 'seconds' / 'sec' / 'second' / 'S'
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
* 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
Must not be specified when `arg` contains strings and ``errors="raise"``.
.. deprecated:: 2.2.0
Units 'H', 'T', 'S', 'L', 'U' and 'N' are deprecated and will be removed
in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns'
instead of 'H', 'T', 'S', 'L', 'U' and 'N'.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
- If 'coerce', then invalid parsing will be set as NaT.
- If 'ignore', then invalid parsing will return the input.
Returns
-------
timedelta
If parsing succeeded.
Return type depends on input:
- list-like: TimedeltaIndex of timedelta64 dtype
- Series: Series of timedelta64 dtype
- scalar: Timedelta
See Also
--------
DataFrame.astype : Cast argument to a specified dtype.
to_datetime : Convert argument to datetime.
convert_dtypes : Convert dtypes.
Notes
-----
If the precision is higher than nanoseconds, the precision of the duration is
truncated to nanoseconds for string inputs.
Examples
--------
Parsing a single string to a Timedelta:
>>> pd.to_timedelta('1 days 06:05:01.00003')
Timedelta('1 days 06:05:01.000030')
>>> pd.to_timedelta('15.5us')
Timedelta('0 days 00:00:00.000015500')
Parsing a list or array of strings:
>>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT],
dtype='timedelta64[ns]', freq=None)
Converting numbers by specifying the `unit` keyword argument:
>>> pd.to_timedelta(np.arange(5), unit='s')
TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
'0 days 00:00:03', '0 days 00:00:04'],
dtype='timedelta64[ns]', freq=None)
>>> pd.to_timedelta(np.arange(5), unit='d')
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
"""
if unit is not None:
unit = parse_timedelta_unit(unit)
disallow_ambiguous_unit(unit)
if errors not in ("ignore", "raise", "coerce"):
raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
if errors == "ignore":
# GH#54467
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_timedelta without passing `errors` and catch exceptions "
"explicitly instead",
FutureWarning,
stacklevel=find_stack_level(),
)
if arg is None:
return arg
elif isinstance(arg, ABCSeries):
values = _convert_listlike(arg._values, unit=unit, errors=errors)
return arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndex):
return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name)
elif isinstance(arg, np.ndarray) and arg.ndim == 0:
# extract array scalar and process below
# error: Incompatible types in assignment (expression has type "object",
# variable has type "Union[str, int, float, timedelta, List[Any],
# Tuple[Any, ...], Union[Union[ExtensionArray, ndarray[Any, Any]], Index,
# Series]]") [assignment]
arg = lib.item_from_zerodim(arg) # type: ignore[assignment]
elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1:
return _convert_listlike(arg, unit=unit, errors=errors)
elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
"arg must be a string, timedelta, list, tuple, 1-d array, or Series"
)
if isinstance(arg, str) and unit is not None:
raise ValueError("unit must not be specified if the input is/contains a str")
# ...so it must be a scalar value. Return scalar.
return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
def _coerce_scalar_to_timedelta_type(
r, unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise"
):
"""Convert string 'r' to a timedelta object."""
result: Timedelta | NaTType
try:
result = Timedelta(r, unit)
except ValueError:
if errors == "raise":
raise
if errors == "ignore":
return r
# coerce
result = NaT
return result
def _convert_listlike(
arg,
unit: UnitChoices | None = None,
errors: DateTimeErrorChoices = "raise",
name: Hashable | None = None,
):
"""Convert a list of objects to a timedelta index object."""
arg_dtype = getattr(arg, "dtype", None)
if isinstance(arg, (list, tuple)) or arg_dtype is None:
# This is needed only to ensure that in the case where we end up
# returning arg (errors == "ignore"), and where the input is a
# generator, we return a useful list-like instead of a
# used-up generator
if not hasattr(arg, "__array__"):
arg = list(arg)
arg = np.array(arg, dtype=object)
elif isinstance(arg_dtype, ArrowDtype) and arg_dtype.kind == "m":
return arg
try:
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
except ValueError:
if errors == "ignore":
return arg
else:
# This else-block accounts for the cases when errors='raise'
# and errors='coerce'. If errors == 'raise', these errors
# should be raised. If errors == 'coerce', we shouldn't
# expect any errors to be raised, since all parsing errors
# cause coercion to pd.NaT. However, if an error / bug is
# introduced that causes an Exception to be raised, we would
# like to surface it.
raise
from pandas import TimedeltaIndex
value = TimedeltaIndex(td64arr, name=name)
return value

View File

@ -0,0 +1,168 @@
from __future__ import annotations
from datetime import (
datetime,
time,
)
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas._libs.lib import is_list_like
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.generic import (
ABCIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import notna
if TYPE_CHECKING:
from pandas._typing import DateTimeErrorChoices
def to_time(
arg,
format: str | None = None,
infer_time_format: bool = False,
errors: DateTimeErrorChoices = "raise",
):
"""
Parse time strings to time objects using fixed strptime formats ("%H:%M",
"%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p")
Use infer_time_format if all the strings are in the same format to speed
up conversion.
Parameters
----------
arg : string in time format, datetime.time, list, tuple, 1-d array, Series
format : str, default None
Format used to convert arg into a time object. If None, fixed formats
are used.
infer_time_format: bool, default False
Infer the time format based on the first non-NaN element. If all
strings are in the same format, this will speed up conversion.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as None
- If 'ignore', then invalid parsing will return the input
Returns
-------
datetime.time
"""
if errors == "ignore":
# GH#54467
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_time without passing `errors` and catch exceptions "
"explicitly instead",
FutureWarning,
stacklevel=find_stack_level(),
)
def _convert_listlike(arg, format):
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype="O")
elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
"arg must be a string, datetime, list, tuple, 1-d array, or Series"
)
arg = np.asarray(arg, dtype="O")
if infer_time_format and format is None:
format = _guess_time_format_for_array(arg)
times: list[time | None] = []
if format is not None:
for element in arg:
try:
times.append(datetime.strptime(element, format).time())
except (ValueError, TypeError) as err:
if errors == "raise":
msg = (
f"Cannot convert {element} to a time with given "
f"format {format}"
)
raise ValueError(msg) from err
if errors == "ignore":
return arg
else:
times.append(None)
else:
formats = _time_formats[:]
format_found = False
for element in arg:
time_object = None
try:
time_object = time.fromisoformat(element)
except (ValueError, TypeError):
for time_format in formats:
try:
time_object = datetime.strptime(element, time_format).time()
if not format_found:
# Put the found format in front
fmt = formats.pop(formats.index(time_format))
formats.insert(0, fmt)
format_found = True
break
except (ValueError, TypeError):
continue
if time_object is not None:
times.append(time_object)
elif errors == "raise":
raise ValueError(f"Cannot convert arg {arg} to a time")
elif errors == "ignore":
return arg
else:
times.append(None)
return times
if arg is None:
return arg
elif isinstance(arg, time):
return arg
elif isinstance(arg, ABCSeries):
values = _convert_listlike(arg._values, format)
return arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndex):
return _convert_listlike(arg, format)
elif is_list_like(arg):
return _convert_listlike(arg, format)
return _convert_listlike(np.array([arg]), format)[0]
# Fixed time formats for time parsing
_time_formats = [
"%H:%M",
"%H%M",
"%I:%M%p",
"%I%M%p",
"%H:%M:%S",
"%H%M%S",
"%I:%M:%S%p",
"%I%M%S%p",
]
def _guess_time_format_for_array(arr):
# Try to guess the format based on the first non-NaN element
non_nan_elements = notna(arr).nonzero()[0]
if len(non_nan_elements):
element = arr[non_nan_elements[0]]
for time_format in _time_formats:
try:
datetime.strptime(element, time_format)
return time_format
except ValueError:
pass
return None