Updated script that can be controled by Nodejs web app

2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions
--- a/lib/python3.13/site-packages/pandas/tests/arrays/string_/test_string_arrow.py
+++ b/lib/python3.13/site-packages/pandas/tests/arrays/string_/test_string_arrow.py
@@ -0,0 +1,265 @@
+import pickle
+import re
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+import pandas as pd
+import pandas._testing as tm
+from pandas.core.arrays.string_ import (
+    StringArray,
+    StringDtype,
+)
+from pandas.core.arrays.string_arrow import (
+    ArrowStringArray,
+    ArrowStringArrayNumpySemantics,
+)
+
+
+def test_eq_all_na():
+    pytest.importorskip("pyarrow")
+    a = pd.array([pd.NA, pd.NA], dtype=StringDtype("pyarrow"))
+    result = a == a
+    expected = pd.array([pd.NA, pd.NA], dtype="boolean[pyarrow]")
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_config(string_storage, request, using_infer_string):
+    if using_infer_string and string_storage != "pyarrow_numpy":
+        request.applymarker(pytest.mark.xfail(reason="infer string takes precedence"))
+    with pd.option_context("string_storage", string_storage):
+        assert StringDtype().storage == string_storage
+        result = pd.array(["a", "b"])
+        assert result.dtype.storage == string_storage
+
+    dtype = StringDtype(string_storage)
+    expected = dtype.construct_array_type()._from_sequence(["a", "b"], dtype=dtype)
+    tm.assert_equal(result, expected)
+
+
+def test_config_bad_storage_raises():
+    msg = re.escape("Value must be one of python|pyarrow")
+    with pytest.raises(ValueError, match=msg):
+        pd.options.mode.string_storage = "foo"
+
+
+@pytest.mark.parametrize("chunked", [True, False])
+@pytest.mark.parametrize("array", ["numpy", "pyarrow"])
+def test_constructor_not_string_type_raises(array, chunked, arrow_string_storage):
+    pa = pytest.importorskip("pyarrow")
+
+    array = pa if array in arrow_string_storage else np
+
+    arr = array.array([1, 2, 3])
+    if chunked:
+        if array is np:
+            pytest.skip("chunked not applicable to numpy array")
+        arr = pa.chunked_array(arr)
+    if array is np:
+        msg = "Unsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArray"
+    else:
+        msg = re.escape(
+            "ArrowStringArray requires a PyArrow (chunked) array of large_string type"
+        )
+    with pytest.raises(ValueError, match=msg):
+        ArrowStringArray(arr)
+
+
+@pytest.mark.parametrize("chunked", [True, False])
+def test_constructor_not_string_type_value_dictionary_raises(chunked):
+    pa = pytest.importorskip("pyarrow")
+
+    arr = pa.array([1, 2, 3], pa.dictionary(pa.int32(), pa.int32()))
+    if chunked:
+        arr = pa.chunked_array(arr)
+
+    msg = re.escape(
+        "ArrowStringArray requires a PyArrow (chunked) array of large_string type"
+    )
+    with pytest.raises(ValueError, match=msg):
+        ArrowStringArray(arr)
+
+
+@pytest.mark.xfail(
+    reason="dict conversion does not seem to be implemented for large string in arrow"
+)
+@pytest.mark.parametrize("chunked", [True, False])
+def test_constructor_valid_string_type_value_dictionary(chunked):
+    pa = pytest.importorskip("pyarrow")
+
+    arr = pa.array(["1", "2", "3"], pa.large_string()).dictionary_encode()
+    if chunked:
+        arr = pa.chunked_array(arr)
+
+    arr = ArrowStringArray(arr)
+    assert pa.types.is_string(arr._pa_array.type.value_type)
+
+
+def test_constructor_from_list():
+    # GH#27673
+    pytest.importorskip("pyarrow")
+    result = pd.Series(["E"], dtype=StringDtype(storage="pyarrow"))
+    assert isinstance(result.dtype, StringDtype)
+    assert result.dtype.storage == "pyarrow"
+
+
+def test_from_sequence_wrong_dtype_raises(using_infer_string):
+    pytest.importorskip("pyarrow")
+    with pd.option_context("string_storage", "python"):
+        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")
+
+    with pd.option_context("string_storage", "pyarrow"):
+        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")
+
+    with pytest.raises(AssertionError, match=None):
+        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[python]")
+
+    ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
+
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "python"):
+                ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
+    with pd.option_context("string_storage", "pyarrow"):
+        ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            ArrowStringArray._from_sequence(
+                ["a", None, "c"], dtype=StringDtype("python")
+            )
+
+    ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))
+
+    with pd.option_context("string_storage", "python"):
+        StringArray._from_sequence(["a", None, "c"], dtype="string")
+
+    with pd.option_context("string_storage", "pyarrow"):
+        StringArray._from_sequence(["a", None, "c"], dtype="string")
+
+    StringArray._from_sequence(["a", None, "c"], dtype="string[python]")
+
+    with pytest.raises(AssertionError, match=None):
+        StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")
+
+    if not using_infer_string:
+        with pd.option_context("string_storage", "python"):
+            StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
+    if not using_infer_string:
+        with pytest.raises(AssertionError, match=None):
+            with pd.option_context("string_storage", "pyarrow"):
+                StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())
+
+    StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))
+
+    with pytest.raises(AssertionError, match=None):
+        StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("pyarrow"))
+
+
+@td.skip_if_installed("pyarrow")
+def test_pyarrow_not_installed_raises():
+    msg = re.escape("pyarrow>=10.0.1 is required for PyArrow backed")
+
+    with pytest.raises(ImportError, match=msg):
+        StringDtype(storage="pyarrow")
+
+    with pytest.raises(ImportError, match=msg):
+        ArrowStringArray([])
+
+    with pytest.raises(ImportError, match=msg):
+        ArrowStringArrayNumpySemantics([])
+
+    with pytest.raises(ImportError, match=msg):
+        ArrowStringArray._from_sequence(["a", None, "b"])
+
+
+@pytest.mark.parametrize("multiple_chunks", [False, True])
+@pytest.mark.parametrize(
+    "key, value, expected",
+    [
+        (-1, "XX", ["a", "b", "c", "d", "XX"]),
+        (1, "XX", ["a", "XX", "c", "d", "e"]),
+        (1, None, ["a", None, "c", "d", "e"]),
+        (1, pd.NA, ["a", None, "c", "d", "e"]),
+        ([1, 3], "XX", ["a", "XX", "c", "XX", "e"]),
+        ([1, 3], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]),
+        ([1, 3], ["XX", None], ["a", "XX", "c", None, "e"]),
+        ([1, 3], ["XX", pd.NA], ["a", "XX", "c", None, "e"]),
+        ([0, -1], ["XX", "YY"], ["XX", "b", "c", "d", "YY"]),
+        ([-1, 0], ["XX", "YY"], ["YY", "b", "c", "d", "XX"]),
+        (slice(3, None), "XX", ["a", "b", "c", "XX", "XX"]),
+        (slice(2, 4), ["XX", "YY"], ["a", "b", "XX", "YY", "e"]),
+        (slice(3, 1, -1), ["XX", "YY"], ["a", "b", "YY", "XX", "e"]),
+        (slice(None), "XX", ["XX", "XX", "XX", "XX", "XX"]),
+        ([False, True, False, True, False], ["XX", "YY"], ["a", "XX", "c", "YY", "e"]),
+    ],
+)
+def test_setitem(multiple_chunks, key, value, expected):
+    pa = pytest.importorskip("pyarrow")
+
+    result = pa.array(list("abcde"))
+    expected = pa.array(expected)
+
+    if multiple_chunks:
+        result = pa.chunked_array([result[:3], result[3:]])
+        expected = pa.chunked_array([expected[:3], expected[3:]])
+
+    result = ArrowStringArray(result)
+    expected = ArrowStringArray(expected)
+
+    result[key] = value
+    tm.assert_equal(result, expected)
+
+
+def test_setitem_invalid_indexer_raises():
+    pa = pytest.importorskip("pyarrow")
+
+    arr = ArrowStringArray(pa.array(list("abcde")))
+
+    with pytest.raises(IndexError, match=None):
+        arr[5] = "foo"
+
+    with pytest.raises(IndexError, match=None):
+        arr[-6] = "foo"
+
+    with pytest.raises(IndexError, match=None):
+        arr[[0, 5]] = "foo"
+
+    with pytest.raises(IndexError, match=None):
+        arr[[0, -6]] = "foo"
+
+    with pytest.raises(IndexError, match=None):
+        arr[[True, True, False]] = "foo"
+
+    with pytest.raises(ValueError, match=None):
+        arr[[0, 1]] = ["foo", "bar", "baz"]
+
+
+@pytest.mark.parametrize("dtype", ["string[pyarrow]", "string[pyarrow_numpy]"])
+def test_pickle_roundtrip(dtype):
+    # GH 42600
+    pytest.importorskip("pyarrow")
+    expected = pd.Series(range(10), dtype=dtype)
+    expected_sliced = expected.head(2)
+    full_pickled = pickle.dumps(expected)
+    sliced_pickled = pickle.dumps(expected_sliced)
+
+    assert len(full_pickled) > len(sliced_pickled)
+
+    result = pickle.loads(full_pickled)
+    tm.assert_series_equal(result, expected)
+
+    result_sliced = pickle.loads(sliced_pickled)
+    tm.assert_series_equal(result_sliced, expected_sliced)
+
+
+def test_string_dtype_error_message():
+    # GH#55051
+    pytest.importorskip("pyarrow")
+    msg = "Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'."
+    with pytest.raises(ValueError, match=msg):
+        StringDtype("bla")