Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,48 @@
import numpy as np
import pytest
import pandas as pd
from pandas.core.arrays.floating import (
Float32Dtype,
Float64Dtype,
)
@pytest.fixture(params=[Float32Dtype, Float64Dtype])
def dtype(request):
"""Parametrized fixture returning a float 'dtype'"""
return request.param()
@pytest.fixture
def data(dtype):
"""Fixture returning 'data' array according to parametrized float 'dtype'"""
return pd.array(
list(np.arange(0.1, 0.9, 0.1))
+ [pd.NA]
+ list(np.arange(1, 9.8, 0.1))
+ [pd.NA]
+ [9.9, 10.0],
dtype=dtype,
)
@pytest.fixture
def data_missing(dtype):
"""
Fixture returning array with missing data according to parametrized float
'dtype'.
"""
return pd.array([np.nan, 0.1], dtype=dtype)
@pytest.fixture(params=["data", "data_missing"])
def all_data(request, data, data_missing):
"""Parametrized fixture returning 'data' or 'data_missing' float arrays.
Used to test dtype conversion with and without missing values.
"""
if request.param == "data":
return data
elif request.param == "data_missing":
return data_missing

View File

@ -0,0 +1,244 @@
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------
@pytest.mark.parametrize(
"opname, exp",
[
("add", [1.1, 2.2, None, None, 5.5]),
("mul", [0.1, 0.4, None, None, 2.5]),
("sub", [0.9, 1.8, None, None, 4.5]),
("truediv", [10.0, 10.0, None, None, 10.0]),
("floordiv", [9.0, 9.0, None, None, 10.0]),
("mod", [0.1, 0.2, None, None, 0.0]),
],
ids=["add", "mul", "sub", "div", "floordiv", "mod"],
)
def test_array_op(dtype, opname, exp):
a = pd.array([1.0, 2.0, None, 4.0, 5.0], dtype=dtype)
b = pd.array([0.1, 0.2, 0.3, None, 0.5], dtype=dtype)
op = getattr(operator, opname)
result = op(a, b)
expected = pd.array(exp, dtype=dtype)
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
def test_divide_by_zero(dtype, zero, negative):
# TODO pending NA/NaN discussion
# https://github.com/pandas-dev/pandas/issues/32265/
a = pd.array([0, 1, -1, None], dtype=dtype)
result = a / zero
expected = FloatingArray(
np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype),
np.array([False, False, False, True]),
)
if negative:
expected *= -1
tm.assert_extension_array_equal(result, expected)
def test_pow_scalar(dtype):
a = pd.array([-1, 0, 1, None, 2], dtype=dtype)
result = a**0
expected = pd.array([1, 1, 1, 1, 1], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = a**1
expected = pd.array([-1, 0, 1, None, 2], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = a**pd.NA
expected = pd.array([None, None, 1, None, None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = a**np.nan
# TODO np.nan should be converted to pd.NA / missing before operation?
expected = FloatingArray(
np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
mask=a._mask,
)
tm.assert_extension_array_equal(result, expected)
# reversed
a = a[1:] # Can't raise integers to negative powers.
result = 0**a
expected = pd.array([1, 0, None, 0], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = 1**a
expected = pd.array([1, 1, 1, 1], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = pd.NA**a
expected = pd.array([1, None, None, None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
result = np.nan**a
expected = FloatingArray(
np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
)
tm.assert_extension_array_equal(result, expected)
def test_pow_array(dtype):
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None], dtype=dtype)
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None], dtype=dtype)
result = a**b
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None], dtype=dtype)
tm.assert_extension_array_equal(result, expected)
def test_rpow_one_to_na():
# https://github.com/pandas-dev/pandas/issues/22022
# https://github.com/pandas-dev/pandas/issues/29997
arr = pd.array([np.nan, np.nan], dtype="Float64")
result = np.array([1.0, 2.0]) ** arr
expected = pd.array([1.0, np.nan], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("other", [0, 0.5])
def test_arith_zero_dim_ndarray(other):
arr = pd.array([1, None, 2], dtype="Float64")
result = arr + np.array(other)
expected = arr + other
tm.assert_equal(result, expected)
# Test generic characteristics / errors
# -----------------------------------------------------------------------------
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
if using_infer_string:
import pyarrow as pa
errs = (TypeError, pa.lib.ArrowNotImplementedError, NotImplementedError)
else:
errs = TypeError
# invalid scalars
msg = "|".join(
[
r"can only perform ops with numeric values",
r"FloatingArray cannot perform the operation mod",
"unsupported operand type",
"not all arguments converted during string formatting",
"can't multiply sequence by non-int of type 'float'",
"ufunc 'subtract' cannot use operands with types dtype",
r"can only concatenate str \(not \"float\"\) to str",
"ufunc '.*' not supported for the input types, and the inputs could not",
"ufunc '.*' did not contain a loop with signature matching types",
"Concatenation operation is not implemented for NumPy arrays",
"has no kernel",
"not implemented",
]
)
with pytest.raises(errs, match=msg):
ops("foo")
with pytest.raises(errs, match=msg):
ops(pd.Timestamp("20180101"))
# invalid array-likes
with pytest.raises(errs, match=msg):
ops(pd.Series("foo", index=s.index))
msg = "|".join(
[
"can only perform ops with numeric values",
"cannot perform .* with this index type: DatetimeArray",
"Addition/subtraction of integers and integer-arrays "
"with DatetimeArray is no longer supported. *",
"unsupported operand type",
"not all arguments converted during string formatting",
"can't multiply sequence by non-int of type 'float'",
"ufunc 'subtract' cannot use operands with types dtype",
(
"ufunc 'add' cannot use operands with types "
rf"dtype\('{tm.ENDIAN}M8\[ns\]'\)"
),
r"ufunc 'add' cannot use operands with types dtype\('float\d{2}'\)",
"cannot subtract DatetimeArray from ndarray",
"has no kernel",
"not implemented",
]
)
with pytest.raises(errs, match=msg):
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
# Various
# -----------------------------------------------------------------------------
def test_cross_type_arithmetic():
df = pd.DataFrame(
{
"A": pd.array([1, 2, np.nan], dtype="Float64"),
"B": pd.array([1, np.nan, 3], dtype="Float32"),
"C": np.array([1, 2, 3], dtype="float64"),
}
)
result = df.A + df.C
expected = pd.Series([2, 4, np.nan], dtype="Float64")
tm.assert_series_equal(result, expected)
result = (df.A + df.C) * 3 == 12
expected = pd.Series([False, True, None], dtype="boolean")
tm.assert_series_equal(result, expected)
result = df.A + df.B
expected = pd.Series([2, np.nan, np.nan], dtype="Float64")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"source, neg_target, abs_target",
[
([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]),
([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]),
([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]),
],
)
def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target):
# GH38794
dtype = float_ea_dtype
arr = pd.array(source, dtype=dtype)
neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
neg_target = pd.array(neg_target, dtype=dtype)
abs_target = pd.array(abs_target, dtype=dtype)
tm.assert_extension_array_equal(neg_result, neg_target)
tm.assert_extension_array_equal(pos_result, arr)
assert not tm.shares_memory(pos_result, arr)
tm.assert_extension_array_equal(abs_result, abs_target)
def test_bitwise(dtype):
left = pd.array([1, None, 3, 4], dtype=dtype)
right = pd.array([None, 3, 5, 4], dtype=dtype)
with pytest.raises(TypeError, match="unsupported operand type"):
left | right
with pytest.raises(TypeError, match="unsupported operand type"):
left & right
with pytest.raises(TypeError, match="unsupported operand type"):
left ^ right

View File

@ -0,0 +1,128 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_astype():
# with missing values
arr = pd.array([0.1, 0.2, None], dtype="Float64")
with pytest.raises(ValueError, match="cannot convert NA to integer"):
arr.astype("int64")
with pytest.raises(ValueError, match="cannot convert float NaN to bool"):
arr.astype("bool")
result = arr.astype("float64")
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
# no missing values
arr = pd.array([0.0, 1.0, 0.5], dtype="Float64")
result = arr.astype("int64")
expected = np.array([0, 1, 0], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("bool")
expected = np.array([False, True, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
def test_astype_to_floating_array():
# astype to FloatingArray
arr = pd.array([0.0, 1.0, None], dtype="Float64")
result = arr.astype("Float64")
tm.assert_extension_array_equal(result, arr)
result = arr.astype(pd.Float64Dtype())
tm.assert_extension_array_equal(result, arr)
result = arr.astype("Float32")
expected = pd.array([0.0, 1.0, None], dtype="Float32")
tm.assert_extension_array_equal(result, expected)
def test_astype_to_boolean_array():
# astype to BooleanArray
arr = pd.array([0.0, 1.0, None], dtype="Float64")
result = arr.astype("boolean")
expected = pd.array([False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = arr.astype(pd.BooleanDtype())
tm.assert_extension_array_equal(result, expected)
def test_astype_to_integer_array():
# astype to IntegerArray
arr = pd.array([0.0, 1.5, None], dtype="Float64")
result = arr.astype("Int64")
expected = pd.array([0, 1, None], dtype="Int64")
tm.assert_extension_array_equal(result, expected)
def test_astype_str():
a = pd.array([0.1, 0.2, None], dtype="Float64")
expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
tm.assert_numpy_array_equal(a.astype(str), expected)
tm.assert_numpy_array_equal(a.astype("str"), expected)
def test_astype_copy():
arr = pd.array([0.1, 0.2, None], dtype="Float64")
orig = pd.array([0.1, 0.2, None], dtype="Float64")
# copy=True -> ensure both data and mask are actual copies
result = arr.astype("Float64", copy=True)
assert result is not arr
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
tm.assert_extension_array_equal(arr, orig)
# copy=False
result = arr.astype("Float64", copy=False)
assert result is arr
assert np.shares_memory(result._data, arr._data)
assert np.shares_memory(result._mask, arr._mask)
result[0] = 10
assert arr[0] == 10
result[0] = pd.NA
assert arr[0] is pd.NA
# astype to different dtype -> always needs a copy -> even with copy=False
# we need to ensure that also the mask is actually copied
arr = pd.array([0.1, 0.2, None], dtype="Float64")
orig = pd.array([0.1, 0.2, None], dtype="Float64")
result = arr.astype("Float32", copy=False)
assert not tm.shares_memory(result, arr)
result[0] = 10
tm.assert_extension_array_equal(arr, orig)
result[0] = pd.NA
tm.assert_extension_array_equal(arr, orig)
def test_astype_object(dtype):
arr = pd.array([1.0, pd.NA], dtype=dtype)
result = arr.astype(object)
expected = np.array([1.0, pd.NA], dtype=object)
tm.assert_numpy_array_equal(result, expected)
# check exact element types
assert isinstance(result[0], float)
assert result[1] is pd.NA
def test_Float64_conversion():
# GH#40729
testseries = pd.Series(["1", "2", "3", "4"], dtype="object")
result = testseries.astype(pd.Float64Dtype())
expected = pd.Series([1.0, 2.0, 3.0, 4.0], dtype=pd.Float64Dtype())
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,65 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
from pandas.tests.arrays.masked_shared import (
ComparisonOps,
NumericOps,
)
class TestComparisonOps(NumericOps, ComparisonOps):
@pytest.mark.parametrize("other", [True, False, pd.NA, -1.0, 0.0, 1])
def test_scalar(self, other, comparison_op, dtype):
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
def test_compare_with_integerarray(self, comparison_op):
op = comparison_op
a = pd.array([0, 1, None] * 3, dtype="Int64")
b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Float64")
other = b.astype("Int64")
expected = op(a, other)
result = op(a, b)
tm.assert_extension_array_equal(result, expected)
expected = op(other, a)
result = op(b, a)
tm.assert_extension_array_equal(result, expected)
def test_equals():
# GH-30652
# equals is generally tested in /tests/extension/base/methods, but this
# specifically tests that two arrays of the same class but different dtype
# do not evaluate equal
a1 = pd.array([1, 2, None], dtype="Float64")
a2 = pd.array([1, 2, None], dtype="Float32")
assert a1.equals(a2) is False
def test_equals_nan_vs_na():
# GH#44382
mask = np.zeros(3, dtype=bool)
data = np.array([1.0, np.nan, 3.0], dtype=np.float64)
left = FloatingArray(data, mask)
assert left.equals(left)
tm.assert_extension_array_equal(left, left)
assert left.equals(left.copy())
assert left.equals(FloatingArray(data.copy(), mask.copy()))
mask2 = np.array([False, True, False], dtype=bool)
data2 = np.array([1.0, 2.0, 3.0], dtype=np.float64)
right = FloatingArray(data2, mask2)
assert right.equals(right)
tm.assert_extension_array_equal(right, right)
assert not left.equals(right)
# with mask[1] = True, the only difference is data[1], which should
# not matter for equals
mask[1] = True
assert left.equals(right)

View File

@ -0,0 +1,20 @@
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize(
"to_concat_dtypes, result_dtype",
[
(["Float64", "Float64"], "Float64"),
(["Float32", "Float64"], "Float64"),
(["Float32", "Float32"], "Float32"),
],
)
def test_concat_series(to_concat_dtypes, result_dtype):
result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
result_dtype
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,204 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
from pandas.core.arrays.floating import (
Float32Dtype,
Float64Dtype,
)
def test_uses_pandas_na():
a = pd.array([1, None], dtype=Float64Dtype())
assert a[1] is pd.NA
def test_floating_array_constructor():
values = np.array([1, 2, 3, 4], dtype="float64")
mask = np.array([False, False, False, True], dtype="bool")
result = FloatingArray(values, mask)
expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
tm.assert_numpy_array_equal(result._data, values)
tm.assert_numpy_array_equal(result._mask, mask)
msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
with pytest.raises(TypeError, match=msg):
FloatingArray(values.tolist(), mask)
with pytest.raises(TypeError, match=msg):
FloatingArray(values, mask.tolist())
with pytest.raises(TypeError, match=msg):
FloatingArray(values.astype(int), mask)
msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
with pytest.raises(TypeError, match=msg):
FloatingArray(values)
def test_floating_array_disallows_float16():
# GH#44715
arr = np.array([1, 2], dtype=np.float16)
mask = np.array([False, False])
msg = "FloatingArray does not support np.float16 dtype"
with pytest.raises(TypeError, match=msg):
FloatingArray(arr, mask)
def test_floating_array_disallows_Float16_dtype(request):
# GH#44715
with pytest.raises(TypeError, match="data type 'Float16' not understood"):
pd.array([1.0, 2.0], dtype="Float16")
def test_floating_array_constructor_copy():
values = np.array([1, 2, 3, 4], dtype="float64")
mask = np.array([False, False, False, True], dtype="bool")
result = FloatingArray(values, mask)
assert result._data is values
assert result._mask is mask
result = FloatingArray(values, mask, copy=True)
assert result._data is not values
assert result._mask is not mask
def test_to_array():
result = pd.array([0.1, 0.2, 0.3, 0.4])
expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"a, b",
[
([1, None], [1, pd.NA]),
([None], [pd.NA]),
([None, np.nan], [pd.NA, pd.NA]),
([1, np.nan], [1, pd.NA]),
([np.nan], [pd.NA]),
],
)
def test_to_array_none_is_nan(a, b):
result = pd.array(a, dtype="Float64")
expected = pd.array(b, dtype="Float64")
tm.assert_extension_array_equal(result, expected)
def test_to_array_mixed_integer_float():
result = pd.array([1, 2.0])
expected = pd.array([1.0, 2.0], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
result = pd.array([1, None, 2.0])
expected = pd.array([1.0, None, 2.0], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"values",
[
["foo", "bar"],
"foo",
1,
1.0,
pd.date_range("20130101", periods=2),
np.array(["foo"]),
[[1, 2], [3, 4]],
[np.nan, {"a": 1}],
# GH#44514 all-NA case used to get quietly swapped out before checking ndim
np.array([pd.NA] * 6, dtype=object).reshape(3, 2),
],
)
def test_to_array_error(values):
# error in converting existing arrays to FloatingArray
msg = "|".join(
[
"cannot be converted to FloatingDtype",
"values must be a 1D list-like",
"Cannot pass scalar",
r"float\(\) argument must be a string or a (real )?number, not 'dict'",
"could not convert string to float: 'foo'",
r"could not convert string to float: np\.str_\('foo'\)",
]
)
with pytest.raises((TypeError, ValueError), match=msg):
pd.array(values, dtype="Float64")
@pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]])
def test_construct_from_float_strings(values):
# see also test_to_integer_array_str
expected = pd.array([float(values[0]), 2, None], dtype="Float64")
res = pd.array(values, dtype="Float64")
tm.assert_extension_array_equal(res, expected)
res = FloatingArray._from_sequence(values)
tm.assert_extension_array_equal(res, expected)
def test_to_array_inferred_dtype():
# if values has dtype -> respect it
result = pd.array(np.array([1, 2], dtype="float32"))
assert result.dtype == Float32Dtype()
# if values have no dtype -> always float64
result = pd.array([1.0, 2.0])
assert result.dtype == Float64Dtype()
def test_to_array_dtype_keyword():
result = pd.array([1, 2], dtype="Float32")
assert result.dtype == Float32Dtype()
# if values has dtype -> override it
result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64")
assert result.dtype == Float64Dtype()
def test_to_array_integer():
result = pd.array([1, 2], dtype="Float64")
expected = pd.array([1.0, 2.0], dtype="Float64")
tm.assert_extension_array_equal(result, expected)
# for integer dtypes, the itemsize is not preserved
# TODO can we specify "floating" in general?
result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64")
assert result.dtype == Float64Dtype()
@pytest.mark.parametrize(
"bool_values, values, target_dtype, expected_dtype",
[
([False, True], [0, 1], Float64Dtype(), Float64Dtype()),
([False, True], [0, 1], "Float64", Float64Dtype()),
([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()),
],
)
def test_to_array_bool(bool_values, values, target_dtype, expected_dtype):
result = pd.array(bool_values, dtype=target_dtype)
assert result.dtype == expected_dtype
expected = pd.array(values, dtype=target_dtype)
tm.assert_extension_array_equal(result, expected)
def test_series_from_float(data):
# construct from our dtype & string dtype
dtype = data.dtype
# from float
expected = pd.Series(data)
result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
tm.assert_series_equal(result, expected)
# from list
expected = pd.Series(data)
result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,12 @@
import numpy as np
import pandas as pd
def test_contains_nan():
# GH#52840
arr = pd.array(range(5)) / 0
assert np.isnan(arr._data[0])
assert not arr.isna()[0]
assert np.nan in arr

View File

@ -0,0 +1,194 @@
import numpy as np
import pytest
from pandas.compat import IS64
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
def test_ufuncs_single(ufunc):
a = pd.array([1, 2, -3, np.nan], dtype="Float64")
result = ufunc(a)
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
s = pd.Series(a)
result = ufunc(s)
expected = pd.Series(expected)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
def test_ufuncs_single_float(ufunc):
a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64")
with np.errstate(invalid="ignore"):
result = ufunc(a)
expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
s = pd.Series(a)
with np.errstate(invalid="ignore"):
result = ufunc(s)
expected = pd.Series(ufunc(s.astype(float)), dtype="Float64")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
def test_ufuncs_binary_float(ufunc):
# two FloatingArrays
a = pd.array([1, 0.2, -3, np.nan], dtype="Float64")
result = ufunc(a, a)
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
# FloatingArray with numpy array
arr = np.array([1, 2, 3, 4])
result = ufunc(a, arr)
expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
result = ufunc(arr, a)
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
# FloatingArray with scalar
result = ufunc(a, 1)
expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
result = ufunc(1, a)
expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("values", [[0, 1], [0, None]])
def test_ufunc_reduce_raises(values):
arr = pd.array(values, dtype="Float64")
res = np.add.reduce(arr)
expected = arr.sum(skipna=False)
tm.assert_almost_equal(res, expected)
@pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
@pytest.mark.parametrize(
"pandasmethname, kwargs",
[
("var", {"ddof": 0}),
("var", {"ddof": 1}),
("std", {"ddof": 0}),
("std", {"ddof": 1}),
("kurtosis", {}),
("skew", {}),
("sem", {}),
],
)
def test_stat_method(pandasmethname, kwargs):
s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
pandasmeth = getattr(s, pandasmethname)
result = pandasmeth(**kwargs)
s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
pandasmeth = getattr(s2, pandasmethname)
expected = pandasmeth(**kwargs)
assert expected == result
def test_value_counts_na():
arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
result = arr.value_counts(dropna=False)
idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype)
assert idx.dtype == arr.dtype
expected = pd.Series([2, 1, 1], index=idx, dtype="Int64", name="count")
tm.assert_series_equal(result, expected)
result = arr.value_counts(dropna=True)
expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64", name="count")
tm.assert_series_equal(result, expected)
def test_value_counts_empty():
ser = pd.Series([], dtype="Float64")
result = ser.value_counts()
idx = pd.Index([], dtype="Float64")
assert idx.dtype == "Float64"
expected = pd.Series([], index=idx, dtype="Int64", name="count")
tm.assert_series_equal(result, expected)
def test_value_counts_with_normalize():
ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
result = ser.value_counts(normalize=True)
expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
assert expected.index.dtype == ser.dtype
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("min_count", [0, 4])
def test_floating_array_sum(skipna, min_count, dtype):
arr = pd.array([1, 2, 3, None], dtype=dtype)
result = arr.sum(skipna=skipna, min_count=min_count)
if skipna and min_count == 0:
assert result == 6.0
else:
assert result is pd.NA
@pytest.mark.parametrize(
"values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)]
)
def test_floating_array_numpy_sum(values, expected):
arr = pd.array(values, dtype="Float64")
result = np.sum(arr)
assert result == expected
@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
def test_preserve_dtypes(op):
df = pd.DataFrame(
{
"A": ["a", "b", "b"],
"B": [1, None, 3],
"C": pd.array([0.1, None, 3.0], dtype="Float64"),
}
)
# op
result = getattr(df.C, op)()
assert isinstance(result, np.float64)
# groupby
result = getattr(df.groupby("A"), op)()
expected = pd.DataFrame(
{"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")},
index=pd.Index(["a", "b"], name="A"),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("method", ["min", "max"])
def test_floating_array_min_max(skipna, method, dtype):
arr = pd.array([0.0, 1.0, None], dtype=dtype)
func = getattr(arr, method)
result = func(skipna=skipna)
if skipna:
assert result == (0 if method == "min" else 1)
else:
assert result is pd.NA
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("min_count", [0, 9])
def test_floating_array_prod(skipna, min_count, dtype):
arr = pd.array([1.0, 2.0, None], dtype=dtype)
result = arr.prod(skipna=skipna, min_count=min_count)
if skipna and min_count == 0:
assert result == 2
else:
assert result is pd.NA

View File

@ -0,0 +1,47 @@
import numpy as np
import pytest
import pandas as pd
from pandas.core.arrays.floating import (
Float32Dtype,
Float64Dtype,
)
def test_dtypes(dtype):
# smoke tests on auto dtype construction
np.dtype(dtype.type).kind == "f"
assert dtype.name is not None
@pytest.mark.parametrize(
"dtype, expected",
[(Float32Dtype(), "Float32Dtype()"), (Float64Dtype(), "Float64Dtype()")],
)
def test_repr_dtype(dtype, expected):
assert repr(dtype) == expected
def test_repr_array():
result = repr(pd.array([1.0, None, 3.0]))
expected = "<FloatingArray>\n[1.0, <NA>, 3.0]\nLength: 3, dtype: Float64"
assert result == expected
def test_repr_array_long():
data = pd.array([1.0, 2.0, None] * 1000)
expected = """<FloatingArray>
[ 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0,
...
<NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>, 1.0, 2.0, <NA>]
Length: 3000, dtype: Float64"""
result = repr(data)
assert result == expected
def test_frame_repr(data_missing):
df = pd.DataFrame({"A": data_missing})
result = repr(df)
expected = " A\n0 <NA>\n1 0.1"
assert result == expected

View File

@ -0,0 +1,132 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy(box):
con = pd.Series if box else pd.array
# default (with or without missing values) -> object dtype
arr = con([0.1, 0.2, 0.3], dtype="Float64")
result = arr.to_numpy()
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
arr = con([0.1, 0.2, None], dtype="Float64")
result = arr.to_numpy()
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_float(box):
con = pd.Series if box else pd.array
# no missing values -> can convert to float, otherwise raises
arr = con([0.1, 0.2, 0.3], dtype="Float64")
result = arr.to_numpy(dtype="float64")
expected = np.array([0.1, 0.2, 0.3], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
arr = con([0.1, 0.2, None], dtype="Float64")
result = arr.to_numpy(dtype="float64")
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="float64", na_value=np.nan)
expected = np.array([0.1, 0.2, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_int(box):
con = pd.Series if box else pd.array
# no missing values -> can convert to int, otherwise raises
arr = con([1.0, 2.0, 3.0], dtype="Float64")
result = arr.to_numpy(dtype="int64")
expected = np.array([1, 2, 3], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
arr = con([1.0, 2.0, None], dtype="Float64")
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
result = arr.to_numpy(dtype="int64")
# automatic casting (floors the values)
arr = con([0.1, 0.9, 1.1], dtype="Float64")
result = arr.to_numpy(dtype="int64")
expected = np.array([0, 0, 1], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_na_value(box):
con = pd.Series if box else pd.array
arr = con([0.0, 1.0, None], dtype="Float64")
result = arr.to_numpy(dtype=object, na_value=None)
expected = np.array([0.0, 1.0, None], dtype="object")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype=bool, na_value=False)
expected = np.array([False, True, False], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="int64", na_value=-99)
expected = np.array([0, 1, -99], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_na_value_with_nan():
# array with both NaN and NA -> only fill NA with `na_value`
arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True]))
result = arr.to_numpy(dtype="float64", na_value=-1)
expected = np.array([0.0, np.nan, -1.0], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_dtype(box, dtype):
con = pd.Series if box else pd.array
arr = con([0.0, 1.0], dtype="Float64")
result = arr.to_numpy(dtype=dtype)
expected = np.array([0, 1], dtype=dtype)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"])
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_na_raises(box, dtype):
con = pd.Series if box else pd.array
arr = con([0.0, 1.0, None], dtype="Float64")
with pytest.raises(ValueError, match=dtype):
arr.to_numpy(dtype=dtype)
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy_string(box, dtype):
con = pd.Series if box else pd.array
arr = con([0.0, 1.0, None], dtype="Float64")
result = arr.to_numpy(dtype="str")
expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_copy():
# to_numpy can be zero-copy if no missing values
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
result = arr.to_numpy(dtype="float64")
result[0] = 10
tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64"))
arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
result = arr.to_numpy(dtype="float64", copy=True)
result[0] = 10
tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64"))