Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,69 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: DatetimeIndex(x),
|
||||
lambda x: DatetimeIndex(DatetimeIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_datetimeindex(using_copy_on_write, cons):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_convert(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D", tz="Europe/Berlin")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_convert("US/Eastern")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31", tz="Europe/Berlin")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_tz_localize(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = DatetimeIndex(ser).tz_localize("Europe/Berlin")
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_datetimeindex_isocalendar(using_copy_on_write):
|
||||
dt = date_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
df = DatetimeIndex(ser).isocalendar()
|
||||
expected = df.index.copy(deep=True)
|
||||
ser.iloc[0] = Timestamp("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_index_values(using_copy_on_write):
|
||||
idx = date_range("2019-12-31", periods=3, freq="D")
|
||||
result = idx.values
|
||||
if using_copy_on_write:
|
||||
assert result.flags.writeable is False
|
||||
else:
|
||||
assert result.flags.writeable is True
|
@ -0,0 +1,184 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def index_view(index_data=[1, 2]):
|
||||
df = DataFrame({"a": index_data, "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
idx = df.index
|
||||
# df = None
|
||||
return idx, view
|
||||
|
||||
|
||||
def test_set_index_update_column(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1})
|
||||
df = df.set_index("a", drop=False)
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 2], name="a"))
|
||||
|
||||
|
||||
def test_set_index_drop_update_column(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
view = df[:]
|
||||
df = df.set_index("a", drop=True)
|
||||
expected = df.index.copy(deep=True)
|
||||
view.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
|
||||
|
||||
def test_set_index_series(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df = df.set_index(ser)
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
df.index = ser
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5})
|
||||
ser = Series([10, 11])
|
||||
rhs_index = Index(ser)
|
||||
df.index = rhs_index
|
||||
rhs_index = None # overwrite to clear reference
|
||||
expected = df.index.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(df.index, expected)
|
||||
else:
|
||||
tm.assert_index_equal(df.index, Index([100, 11]))
|
||||
|
||||
|
||||
def test_index_from_series(using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
else:
|
||||
tm.assert_index_equal(idx, Index([100, 2]))
|
||||
|
||||
|
||||
def test_index_from_series_copy(using_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser, copy=True) # noqa: F841
|
||||
arr = get_array(ser)
|
||||
ser.iloc[0] = 100
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
def test_index_from_index(using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series([1, 2])
|
||||
idx = Index(ser)
|
||||
idx = Index(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
||||
else:
|
||||
tm.assert_index_equal(idx, Index([100, 2]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: x._shallow_copy(x._values),
|
||||
lambda x: x.view(),
|
||||
lambda x: x.take([0, 1]),
|
||||
lambda x: x.repeat([1, 1]),
|
||||
lambda x: x[slice(0, 2)],
|
||||
lambda x: x[[0, 1]],
|
||||
lambda x: x._getitem_slice(slice(0, 2)),
|
||||
lambda x: x.delete([]),
|
||||
lambda x: x.rename("b"),
|
||||
lambda x: x.astype("Int64", copy=False),
|
||||
],
|
||||
ids=[
|
||||
"_shallow_copy",
|
||||
"view",
|
||||
"take",
|
||||
"repeat",
|
||||
"getitem_slice",
|
||||
"getitem_list",
|
||||
"_getitem_slice",
|
||||
"delete",
|
||||
"rename",
|
||||
"astype",
|
||||
],
|
||||
)
|
||||
def test_index_ops(using_copy_on_write, func, request):
|
||||
idx, view_ = index_view()
|
||||
expected = idx.copy(deep=True)
|
||||
if "astype" in request.node.callspec.id:
|
||||
expected = expected.astype("Int64")
|
||||
idx = func(idx)
|
||||
view_.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_infer_objects(using_copy_on_write):
|
||||
idx, view_ = index_view(["a", "b"])
|
||||
expected = idx.copy(deep=True)
|
||||
idx = idx.infer_objects(copy=False)
|
||||
view_.iloc[0, 0] = "aaaa"
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected, check_names=False)
|
||||
|
||||
|
||||
def test_index_to_frame(using_copy_on_write):
|
||||
idx = Index([1, 2, 3], name="a")
|
||||
expected = idx.copy(deep=True)
|
||||
df = idx.to_frame()
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), idx._values)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_index_values(using_copy_on_write):
|
||||
idx = Index([1, 2, 3])
|
||||
result = idx.values
|
||||
if using_copy_on_write:
|
||||
assert result.flags.writeable is False
|
||||
else:
|
||||
assert result.flags.writeable is True
|
@ -0,0 +1,30 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: PeriodIndex(x),
|
||||
lambda x: PeriodIndex(PeriodIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_periodindex(using_copy_on_write, cons):
|
||||
dt = period_range("2019-12-31", periods=3, freq="D")
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Period("2020-12-31")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,30 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Setting a value on a view:FutureWarning"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cons",
|
||||
[
|
||||
lambda x: TimedeltaIndex(x),
|
||||
lambda x: TimedeltaIndex(TimedeltaIndex(x)),
|
||||
],
|
||||
)
|
||||
def test_timedeltaindex(using_copy_on_write, cons):
|
||||
dt = timedelta_range("1 day", periods=3)
|
||||
ser = Series(dt)
|
||||
idx = cons(ser)
|
||||
expected = idx.copy(deep=True)
|
||||
ser.iloc[0] = Timedelta("5 days")
|
||||
if using_copy_on_write:
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for accessing underlying array of Series/DataFrame
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[lambda ser: ser.values, lambda ser: np.asarray(ser)],
|
||||
ids=["values", "asarray"],
|
||||
)
|
||||
def test_series_values(using_copy_on_write, method):
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
arr = method(ser)
|
||||
|
||||
if using_copy_on_write:
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[lambda df: df.values, lambda df: np.asarray(df)],
|
||||
ids=["values", "asarray"],
|
||||
)
|
||||
def test_dataframe_values(using_copy_on_write, using_array_manager, method):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df_orig = df.copy()
|
||||
|
||||
arr = method(df)
|
||||
|
||||
if using_copy_on_write:
|
||||
# .values still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0, 0] = 0
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
df.iloc[0, 0] = 0
|
||||
assert df.values[0, 0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0, 0] = 0
|
||||
if not using_array_manager:
|
||||
assert df.iloc[0, 0] == 0
|
||||
else:
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_series_to_numpy(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
ser_orig = ser.copy()
|
||||
|
||||
# default: copy=False, no dtype or NAs
|
||||
arr = ser.to_numpy()
|
||||
if using_copy_on_write:
|
||||
# to_numpy still gives a view but is read-only
|
||||
assert np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is False
|
||||
|
||||
# mutating series through arr therefore doesn't work
|
||||
with pytest.raises(ValueError, match="read-only"):
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
# mutating the series itself still works
|
||||
ser.iloc[0] = 0
|
||||
assert ser.values[0] == 0
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
arr[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
# specify copy=False gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(copy=True)
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
# specifying a dtype that already causes a copy also gives a writeable array
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
arr = ser.to_numpy(dtype="float64")
|
||||
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize("order", ["F", "C"])
|
||||
def test_ravel_read_only(using_copy_on_write, order):
|
||||
ser = Series([1, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning, match="is deprecated"):
|
||||
arr = ser.ravel(order=order)
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
def test_series_array_ea_dtypes(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], dtype="Int64")
|
||||
arr = np.asarray(ser, dtype="int64")
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
arr = np.asarray(ser)
|
||||
assert np.shares_memory(arr, get_array(ser))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_array_ea_dtypes(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
arr = np.asarray(df, dtype="int64")
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
arr = np.asarray(df)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
|
||||
df = DataFrame({"a": ["a", "b"]}, dtype="string")
|
||||
arr = np.asarray(df)
|
||||
if not using_array_manager:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_dataframe_multiple_numpy_dtypes():
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
|
||||
arr = np.asarray(df)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_values_is_ea(using_copy_on_write):
|
||||
df = DataFrame({"a": date_range("2012-01-01", periods=3)})
|
||||
arr = np.asarray(df)
|
||||
if using_copy_on_write:
|
||||
assert arr.flags.writeable is False
|
||||
else:
|
||||
assert arr.flags.writeable is True
|
||||
|
||||
|
||||
def test_empty_dataframe():
|
||||
df = DataFrame()
|
||||
arr = np.asarray(df)
|
||||
assert arr.flags.writeable is True
|
@ -0,0 +1,260 @@
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.pyarrow import pa_version_under12p0
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_astype_single_dtype(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": 1.5})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype("float64")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype("float64")
|
||||
df.iloc[0, 2] = 5.5
|
||||
tm.assert_frame_equal(df2, df_orig.astype("float64"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
|
||||
@pytest.mark.parametrize("new_dtype", ["int64", "Int64", "int64[pyarrow]"])
|
||||
def test_astype_avoids_copy(using_copy_on_write, dtype, new_dtype):
|
||||
if new_dtype == "int64[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(new_dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(new_dtype))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float64", "int32", "Int32", "int32[pyarrow]"])
|
||||
def test_astype_different_target_dtype(using_copy_on_write, dtype):
|
||||
if dtype == "int32[pyarrow]":
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(dtype)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
|
||||
df2.iloc[0, 0] = 5
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
# mutating parent also doesn't update result
|
||||
df2 = df.astype(dtype)
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df2, df_orig.astype(dtype))
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_astype_numpy_to_ea():
|
||||
ser = Series([1, 2, 3])
|
||||
with pd.option_context("mode.copy_on_write", True):
|
||||
result = ser.astype("Int64")
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string"), ("string", "object")]
|
||||
)
|
||||
def test_astype_string_and_object(using_copy_on_write, dtype, new_dtype):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype(new_dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, new_dtype", [("object", "string"), ("string", "object")]
|
||||
)
|
||||
def test_astype_string_and_object_update_original(
|
||||
using_copy_on_write, dtype, new_dtype
|
||||
):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype=dtype)
|
||||
df2 = df.astype(new_dtype)
|
||||
df_orig = df2.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df2, df_orig)
|
||||
|
||||
|
||||
def test_astype_string_copy_on_pickle_roundrip():
|
||||
# https://github.com/pandas-dev/pandas/issues/54654
|
||||
# ensure_string_array may alter array inplace
|
||||
base = Series(np.array([(1, 2), None, 1], dtype="object"))
|
||||
base_copy = pickle.loads(pickle.dumps(base))
|
||||
base_copy.astype(str)
|
||||
tm.assert_series_equal(base, base_copy)
|
||||
|
||||
|
||||
def test_astype_dict_dtypes(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")}
|
||||
)
|
||||
df_orig = df.copy()
|
||||
df2 = df.astype({"a": "float64", "c": "float64"})
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
# mutating df2 triggers a copy-on-write for that column/block
|
||||
df2.iloc[0, 2] = 5.5
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
|
||||
df2.iloc[0, 1] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_astype_different_datetime_resos(using_copy_on_write):
|
||||
df = DataFrame({"a": date_range("2019-12-31", periods=2, freq="D")})
|
||||
result = df.astype("datetime64[ms]")
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
if using_copy_on_write:
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_astype_different_timezones(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
|
||||
)
|
||||
result = df.astype("datetime64[ns, Europe/Berlin]")
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_different_timezones_different_reso(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
{"a": date_range("2019-12-31", periods=5, freq="D", tz="US/Pacific")}
|
||||
)
|
||||
result = df.astype("datetime64[ms, Europe/Berlin]")
|
||||
if using_copy_on_write:
|
||||
assert result._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
|
||||
|
||||
def test_astype_arrow_timestamp(using_copy_on_write):
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
Timestamp("2020-01-01 01:01:01.000001"),
|
||||
]
|
||||
},
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
result = df.astype("timestamp[ns][pyarrow]")
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr._has_no_reference(0)
|
||||
if pa_version_under12p0:
|
||||
assert not np.shares_memory(
|
||||
get_array(df, "a"), get_array(result, "a")._pa_array
|
||||
)
|
||||
else:
|
||||
assert np.shares_memory(
|
||||
get_array(df, "a"), get_array(result, "a")._pa_array
|
||||
)
|
||||
|
||||
|
||||
def test_convert_dtypes_infer_objects(using_copy_on_write):
|
||||
ser = Series(["a", "b", "c"])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.convert_dtypes(
|
||||
convert_integer=False,
|
||||
convert_boolean=False,
|
||||
convert_floating=False,
|
||||
convert_string=False,
|
||||
)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
result.iloc[0] = "x"
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
|
||||
|
||||
def test_convert_dtypes(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.convert_dtypes()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(df2, "d"), get_array(df, "d"))
|
||||
|
||||
df2.iloc[0, 0] = "x"
|
||||
tm.assert_frame_equal(df, df_orig)
|
@ -0,0 +1,174 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
from pandas.errors import (
|
||||
ChainedAssignmentError,
|
||||
SettingWithCopyWarning,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_methods_iloc_warn(using_copy_on_write):
|
||||
if not using_copy_on_write:
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].replace(1, 5, inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].fillna(1, inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].interpolate(inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].ffill(inplace=True)
|
||||
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df.iloc[:, 0].bfill(inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, args",
|
||||
[
|
||||
("replace", (4, 5)),
|
||||
("fillna", (1,)),
|
||||
("interpolate", ()),
|
||||
("bfill", ()),
|
||||
("ffill", ()),
|
||||
],
|
||||
)
|
||||
def test_methods_iloc_getitem_item_cache(
|
||||
func, args, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# ensure we don't incorrectly raise chained assignment warning because
|
||||
# of the item cache / iloc not setting the item cache
|
||||
df_orig = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df.iloc[:, 0]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
# parent that holds item_cache is dead, so don't increase ref count
|
||||
df = df_orig.copy()
|
||||
ser = df.copy()["a"]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df.iloc[:, 0] # iloc creates a new object
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df["a"]
|
||||
getattr(ser, func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
# TODO(CoW-warn) because of the usage of *args, this doesn't warn on Py3.11+
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error(not PY311):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
else:
|
||||
with tm.assert_cow_warning(not PY311, match="A value"):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df["a"] # populate the item_cache and keep ref
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error(not PY311):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
else:
|
||||
# ideally also warns on the default mode, but the ser' _cacher
|
||||
# messes up the refcount + even in warning mode this doesn't trigger
|
||||
# the warning of Py3.1+ (see above)
|
||||
with tm.assert_cow_warning(warn_copy_on_write and not PY311, match="A value"):
|
||||
getattr(df["a"], func)(*args, inplace=True)
|
||||
|
||||
|
||||
def test_methods_iloc_getitem_item_cache_fillna(
|
||||
using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# ensure we don't incorrectly raise chained assignment warning because
|
||||
# of the item cache / iloc not setting the item cache
|
||||
df_orig = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df.iloc[:, 0]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
# parent that holds item_cache is dead, so don't increase ref count
|
||||
df = df_orig.copy()
|
||||
ser = df.copy()["a"]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df.iloc[:, 0] # iloc creates a new object
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
ser = df["a"]
|
||||
ser.fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
df["a"] # populate the item_cache
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(1, inplace=True)
|
||||
else:
|
||||
with tm.assert_cow_warning(match="A value"):
|
||||
df["a"].fillna(1, inplace=True)
|
||||
|
||||
df = df_orig.copy()
|
||||
ser = df["a"] # populate the item_cache and keep ref
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(1, inplace=True)
|
||||
else:
|
||||
# TODO(CoW-warn) ideally also warns on the default mode, but the ser' _cacher
|
||||
# messes up the refcount
|
||||
with tm.assert_cow_warning(warn_copy_on_write, match="A value"):
|
||||
df["a"].fillna(1, inplace=True)
|
||||
|
||||
|
||||
# TODO(CoW-warn) expand the cases
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write):
|
||||
# ensure we only get a single warning for those typical cases of chained
|
||||
# assignment
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
|
||||
# using custom check instead of tm.assert_produces_warning because that doesn't
|
||||
# fail if multiple warnings are raised
|
||||
with pytest.warns() as record:
|
||||
df["a"][indexer] = 0
|
||||
assert len(record) == 1
|
||||
if using_copy_on_write:
|
||||
assert record[0].category == ChainedAssignmentError
|
||||
else:
|
||||
assert record[0].category == FutureWarning
|
||||
assert "ChainedAssignmentError" in record[0].message.args[0]
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.SettingWithCopyWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])]
|
||||
)
|
||||
def test_frame_setitem(indexer, using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})
|
||||
|
||||
extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,)
|
||||
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.raises_chained_assignment_error(extra_warnings=extra_warnings):
|
||||
df[0:3][indexer] = 10
|
101
lib/python3.13/site-packages/pandas/tests/copy_view/test_clip.py
Normal file
101
lib/python3.13/site-packages/pandas/tests/copy_view/test_clip.py
Normal file
@ -0,0 +1,101 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
if warn_copy_on_write:
|
||||
with tm.assert_cow_warning():
|
||||
df.clip(lower=2, inplace=True)
|
||||
else:
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
def test_clip_inplace_reference_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_copy = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.clip(lower=0, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_copy, view)
|
||||
|
||||
|
||||
def test_clip_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.clip(lower=2, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_clip(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.clip(lower=2)
|
||||
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_clip_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df2 = df.clip(lower=0)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_clip_chained_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 4, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].clip(1, 2, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].clip(1, 2, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].clip(1, 2, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df["a"] > 1].clip(1, 2, inplace=True)
|
@ -0,0 +1,382 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for Series / DataFrame constructors
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write):
|
||||
# Case: constructing a Series from another Series object follows CoW rules:
|
||||
# a new object is returned and thus mutations are not propagated
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# default is copy=False -> new Series is a shallow copy / view of original
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
if using_copy_on_write:
|
||||
# mutating new series copy doesn't mutate original
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 1
|
||||
# mutating triggered a copy-on-write -> no longer shares memory
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
# mutating shallow copy does mutate original
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
# and still shares memory
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
# the same when modifying the parent
|
||||
result = Series(ser, dtype=dtype)
|
||||
|
||||
if using_copy_on_write:
|
||||
# mutating original doesn't mutate new series
|
||||
ser.iloc[0] = 0
|
||||
assert result.iloc[0] == 1
|
||||
else:
|
||||
# mutating original does mutate shallow copy
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.iloc[0] = 0
|
||||
assert result.iloc[0] == 0
|
||||
|
||||
|
||||
def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write):
|
||||
# Case: constructing a Series from another Series with specifying an index
|
||||
# that potentially requires a reindex of the values
|
||||
ser = Series([1, 2, 3], name="name")
|
||||
|
||||
# passing an index that doesn't actually require a reindex of the values
|
||||
# -> without CoW we get an actual mutating view
|
||||
for index in [
|
||||
ser.index,
|
||||
ser.index.copy(),
|
||||
list(ser.index),
|
||||
ser.index.rename("idx"),
|
||||
]:
|
||||
result = Series(ser, index=index)
|
||||
assert np.shares_memory(ser.values, result.values)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
if using_copy_on_write:
|
||||
assert ser.iloc[0] == 1
|
||||
else:
|
||||
assert ser.iloc[0] == 0
|
||||
|
||||
# ensure that if an actual reindex is needed, we don't have any refs
|
||||
# (mutating the result wouldn't trigger CoW)
|
||||
result = Series(ser, index=[0, 1, 2, 3])
|
||||
assert not np.shares_memory(ser.values, result.values)
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fastpath", [False, True])
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
|
||||
@pytest.mark.parametrize(
|
||||
"arr", [np.array([1, 2, 3], dtype="int64"), pd.array([1, 2, 3], dtype="Int64")]
|
||||
)
|
||||
def test_series_from_array(using_copy_on_write, idx, dtype, fastpath, arr):
|
||||
if idx is None or dtype is not None:
|
||||
fastpath = False
|
||||
msg = "The 'fastpath' keyword in pd.Series is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser = Series(arr, dtype=dtype, index=idx, fastpath=fastpath)
|
||||
ser_orig = ser.copy()
|
||||
data = getattr(arr, "_data", arr)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(ser), data)
|
||||
else:
|
||||
assert np.shares_memory(get_array(ser), data)
|
||||
|
||||
arr[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
else:
|
||||
expected = Series([100, 2, 3], dtype=dtype if dtype is not None else arr.dtype)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False, None])
|
||||
def test_series_from_array_different_dtype(using_copy_on_write, copy):
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
ser = Series(arr, dtype="int32", copy=copy)
|
||||
assert not np.shares_memory(get_array(ser), arr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
Index([1, 2]),
|
||||
DatetimeIndex([Timestamp("2019-12-31"), Timestamp("2020-12-31")]),
|
||||
PeriodIndex([Period("2019-12-31"), Period("2020-12-31")]),
|
||||
TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]),
|
||||
],
|
||||
)
|
||||
def test_series_from_index(using_copy_on_write, idx):
|
||||
ser = Series(idx)
|
||||
expected = idx.copy(deep=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(idx))
|
||||
assert not ser._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
ser.iloc[0] = ser.iloc[1]
|
||||
tm.assert_index_equal(idx, expected)
|
||||
|
||||
|
||||
def test_series_from_index_different_dtypes(using_copy_on_write):
|
||||
idx = Index([1, 2, 3], dtype="int64")
|
||||
ser = Series(idx, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(idx))
|
||||
if using_copy_on_write:
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
@pytest.mark.parametrize("fastpath", [False, True])
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
@pytest.mark.parametrize("idx", [None, pd.RangeIndex(start=0, stop=3, step=1)])
|
||||
def test_series_from_block_manager(using_copy_on_write, idx, dtype, fastpath):
|
||||
ser = Series([1, 2, 3], dtype="int64")
|
||||
ser_orig = ser.copy()
|
||||
msg = "The 'fastpath' keyword in pd.Series is deprecated"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser2 = Series(ser._mgr, dtype=dtype, fastpath=fastpath, index=idx)
|
||||
assert np.shares_memory(get_array(ser), get_array(ser2))
|
||||
if using_copy_on_write:
|
||||
assert not ser2._mgr._has_no_reference(0)
|
||||
|
||||
ser2.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
else:
|
||||
expected = Series([100, 2, 3])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_series_from_block_manager_different_dtype(using_copy_on_write):
|
||||
ser = Series([1, 2, 3], dtype="int64")
|
||||
msg = "Passing a SingleBlockManager to Series"
|
||||
with tm.assert_produces_warning(DeprecationWarning, match=msg):
|
||||
ser2 = Series(ser._mgr, dtype="int32")
|
||||
assert not np.shares_memory(get_array(ser), get_array(ser2))
|
||||
if using_copy_on_write:
|
||||
assert ser2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_mgr", [True, False])
|
||||
@pytest.mark.parametrize("columns", [None, ["a"]])
|
||||
def test_dataframe_constructor_mgr_or_df(
|
||||
using_copy_on_write, warn_copy_on_write, columns, use_mgr
|
||||
):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
if use_mgr:
|
||||
data = df._mgr
|
||||
warn = DeprecationWarning
|
||||
else:
|
||||
data = df
|
||||
warn = None
|
||||
msg = "Passing a BlockManager to DataFrame"
|
||||
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
|
||||
new_df = DataFrame(data)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
with tm.assert_cow_warning(warn_copy_on_write and not use_mgr):
|
||||
new_df.iloc[0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
|
||||
tm.assert_frame_equal(df, new_df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
@pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]])
|
||||
def test_dataframe_from_dict_of_series(
|
||||
request, using_copy_on_write, warn_copy_on_write, columns, index, dtype
|
||||
):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# has to do a lazy following CoW rules
|
||||
# (the default for DataFrame(dict) is still to copy to ensure consolidation)
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
s1_orig = s1.copy()
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [4, 5, 6]}, index=index, columns=columns, dtype=dtype
|
||||
)
|
||||
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
|
||||
# the shallow copy still shares memory
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
|
||||
# mutating the new dataframe doesn't mutate original
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0, 0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_series_equal(s1, s1_orig)
|
||||
else:
|
||||
assert s1.iloc[0] == 10
|
||||
|
||||
# the same when modifying the parent series
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
result = DataFrame(
|
||||
{"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False
|
||||
)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
s1.iloc[0] = 10
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(s1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
assert result.iloc[0, 0] == 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, "int64"])
|
||||
def test_dataframe_from_dict_of_series_with_reindex(dtype):
|
||||
# Case: constructing a DataFrame from Series objects with copy=False
|
||||
# and passing an index that requires an actual (no-view) reindex -> need
|
||||
# to ensure the result doesn't have refs set up to unnecessarily trigger
|
||||
# a copy on write
|
||||
s1 = Series([1, 2, 3])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=[1, 2, 3], dtype=dtype, copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [Series, Index])
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
|
||||
)
|
||||
def test_dataframe_from_series_or_index(
|
||||
using_copy_on_write, warn_copy_on_write, data, dtype, cons
|
||||
):
|
||||
obj = cons(data, dtype=dtype)
|
||||
obj_orig = obj.copy()
|
||||
df = DataFrame(obj, dtype=dtype)
|
||||
assert np.shares_memory(get_array(obj), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = data[-1]
|
||||
if using_copy_on_write:
|
||||
tm.assert_equal(obj, obj_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("cons", [Series, Index])
|
||||
def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, cons):
|
||||
obj = cons([1, 2], dtype="int64")
|
||||
df = DataFrame(obj, dtype="int32")
|
||||
assert not np.shares_memory(get_array(obj), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
|
||||
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
df = DataFrame(ser)
|
||||
assert not np.shares_memory(get_array(ser), get_array(df, 0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [None, [0, 1, 2]])
|
||||
def test_dataframe_from_dict_of_series_with_dtype(index):
|
||||
# Variant of above, but now passing a dtype that causes a copy
|
||||
# -> need to ensure the result doesn't have refs set up to unnecessarily
|
||||
# trigger a copy on write
|
||||
s1 = Series([1.0, 2.0, 3.0])
|
||||
s2 = Series([4, 5, 6])
|
||||
df = DataFrame({"a": s1, "b": s2}, index=index, dtype="int64", copy=False)
|
||||
|
||||
# df should own its memory, so mutating shouldn't trigger a copy
|
||||
arr_before = get_array(df, "a")
|
||||
assert not np.shares_memory(arr_before, get_array(s1))
|
||||
df.iloc[0, 0] = 100
|
||||
arr_after = get_array(df, "a")
|
||||
assert np.shares_memory(arr_before, arr_after)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [False, None, True])
|
||||
def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
|
||||
arr = np.array([[1, 2], [3, 4]])
|
||||
df = DataFrame(arr, copy=copy)
|
||||
|
||||
if (
|
||||
using_copy_on_write
|
||||
and copy is not False
|
||||
or copy is True
|
||||
or (using_array_manager and copy is None)
|
||||
):
|
||||
assert not np.shares_memory(get_array(df, 0), arr)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, 0), arr)
|
||||
|
||||
|
||||
def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df2 = DataFrame.from_records(df)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df2.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
def test_frame_from_dict_of_index(using_copy_on_write):
|
||||
idx = Index([1, 2, 3])
|
||||
expected = idx.copy(deep=True)
|
||||
df = DataFrame({"a": idx}, copy=False)
|
||||
assert np.shares_memory(get_array(df, "a"), idx._values)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_index_equal(idx, expected)
|
@ -0,0 +1,106 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_assigning_to_same_variable_removes_references(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
df = df.reset_index()
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(1)
|
||||
arr = get_array(df, "a")
|
||||
df.iloc[0, 1] = 100 # Write into a
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
# We split the block in setitem, if we are not careful the new blocks will
|
||||
# reference each other triggering a copy
|
||||
df.iloc[0, 0] = 100
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_setitem_with_view_copies(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
expected = df.copy()
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100 # Check that we correctly track reference
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(view, expected)
|
||||
|
||||
|
||||
def test_setitem_with_view_invalidated_does_not_copy(
|
||||
using_copy_on_write, warn_copy_on_write, request
|
||||
):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
||||
view = df[:]
|
||||
|
||||
df["b"] = 100
|
||||
arr = get_array(df, "a")
|
||||
view = None # noqa: F841
|
||||
# TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100`
|
||||
# which introduces additional refs, even when those of `view` go out of scopes
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
# Setitem split the block. Since the old block shared data with view
|
||||
# all the new blocks are referencing view and each other. When view
|
||||
# goes out of scope, they don't share data with any other block,
|
||||
# so we should not trigger a copy
|
||||
mark = pytest.mark.xfail(
|
||||
reason="blk.delete does not track references correctly"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_out_of_scope(using_copy_on_write):
|
||||
def func():
|
||||
df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
|
||||
# create some subset
|
||||
result = df[["a", "b"]]
|
||||
return result
|
||||
|
||||
result = func()
|
||||
if using_copy_on_write:
|
||||
assert not result._mgr.blocks[0].refs.has_reference()
|
||||
assert not result._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
del df["b"]
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
df = df[["a"]]
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
|
||||
|
||||
def test_delete_reference(using_copy_on_write):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
|
||||
)
|
||||
x = df[:]
|
||||
del df["b"]
|
||||
if using_copy_on_write:
|
||||
assert df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert x._mgr.blocks[0].refs.has_reference()
|
@ -0,0 +1,396 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
concat,
|
||||
merge,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_concat_frames(using_copy_on_write):
|
||||
df = DataFrame({"b": ["a"] * 3})
|
||||
df2 = DataFrame({"a": ["a"] * 3})
|
||||
df_orig = df.copy()
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
result.iloc[0, 1] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_concat_frames_updating_input(using_copy_on_write):
|
||||
df = DataFrame({"b": ["a"] * 3})
|
||||
df2 = DataFrame({"a": ["a"] * 3})
|
||||
result = concat([df, df2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
expected = result.copy()
|
||||
df.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.iloc[0, 0] = "d"
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series(using_copy_on_write):
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
ser_orig = ser.copy()
|
||||
ser2_orig = ser2.copy()
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert not np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), ser.values)
|
||||
assert np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
|
||||
result.iloc[0, 1] = 1000
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), ser2.values)
|
||||
tm.assert_series_equal(ser, ser_orig)
|
||||
tm.assert_series_equal(ser2, ser2_orig)
|
||||
|
||||
|
||||
def test_concat_frames_chained(using_copy_on_write):
|
||||
df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df2 = DataFrame({"c": [4, 5, 6]})
|
||||
df3 = DataFrame({"d": [4, 5, 6]})
|
||||
result = concat([concat([df1, df2], axis=1), df3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
|
||||
|
||||
df1.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_chained(using_copy_on_write):
|
||||
ser1 = Series([1, 2, 3], name="a")
|
||||
ser2 = Series([4, 5, 6], name="c")
|
||||
ser3 = Series([4, 5, 6], name="d")
|
||||
result = concat([concat([ser1, ser2], axis=1), ser3], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
|
||||
|
||||
ser1.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_series_updating_input(using_copy_on_write):
|
||||
ser = Series([1, 2], name="a")
|
||||
ser2 = Series([3, 4], name="b")
|
||||
expected = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = concat([ser, ser2], axis=1)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2.iloc[0] = 1000
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_concat_mixed_series_frame(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "c": 1})
|
||||
ser = Series([4, 5, 6], name="d")
|
||||
result = concat([df, ser], axis=1)
|
||||
expected = result.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(df, "c"))
|
||||
assert np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(df, "c"))
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
ser.iloc[0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, None, False])
|
||||
def test_concat_copy_keyword(using_copy_on_write, copy):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [1.5, 2.5]})
|
||||
|
||||
result = concat([df, df2], axis=1, copy=copy)
|
||||
|
||||
if using_copy_on_write or copy is False:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda df1, df2, **kwargs: df1.merge(df2, **kwargs),
|
||||
lambda df1, df2, **kwargs: merge(df1, df2, **kwargs),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key(using_copy_on_write, func):
|
||||
df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, on="key")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_merge_on_index(using_copy_on_write):
|
||||
df1 = DataFrame({"a": [1, 2, 3]})
|
||||
df2 = DataFrame({"b": [4, 5, 6]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = merge(df1, df2, left_index=True, right_index=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, how",
|
||||
[
|
||||
(lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"),
|
||||
(lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"),
|
||||
],
|
||||
)
|
||||
def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
|
||||
df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
|
||||
df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = func(df1, df2, how=how)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is (
|
||||
how == "left"
|
||||
)
|
||||
assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
if how == "left":
|
||||
result.iloc[0, 1] = 0
|
||||
else:
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, None, False])
|
||||
def test_merge_copy_keyword(using_copy_on_write, copy):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df2 = DataFrame({"b": [3, 4.5]})
|
||||
|
||||
result = df.merge(df2, copy=copy, left_index=True, right_index=True)
|
||||
|
||||
if using_copy_on_write or copy is False:
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|
||||
|
||||
|
||||
def test_join_on_key(using_copy_on_write):
|
||||
df_index = Index(["a", "b", "c"], name="key")
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
|
||||
|
||||
df1_orig = df1.copy()
|
||||
df2_orig = df2.copy()
|
||||
|
||||
result = df1.join(df2, on="key")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(get_array(result.index), get_array(df2.index))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
tm.assert_frame_equal(df2, df2_orig)
|
||||
|
||||
|
||||
def test_join_multiple_dataframes_on_key(using_copy_on_write):
|
||||
df_index = Index(["a", "b", "c"], name="key")
|
||||
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
|
||||
dfs_list = [
|
||||
DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True)),
|
||||
DataFrame({"c": [7, 8, 9]}, index=df_index.copy(deep=True)),
|
||||
]
|
||||
|
||||
df1_orig = df1.copy()
|
||||
dfs_list_orig = [df.copy() for df in dfs_list]
|
||||
|
||||
result = df1.join(dfs_list)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
assert np.shares_memory(get_array(result.index), get_array(df1.index))
|
||||
assert not np.shares_memory(
|
||||
get_array(result.index), get_array(dfs_list[0].index)
|
||||
)
|
||||
assert not np.shares_memory(
|
||||
get_array(result.index), get_array(dfs_list[1].index)
|
||||
)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 0] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
|
||||
assert np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 1] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "b"), get_array(dfs_list[0], "b"))
|
||||
assert np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
result.iloc[0, 2] = 0
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "c"), get_array(dfs_list[1], "c"))
|
||||
|
||||
tm.assert_frame_equal(df1, df1_orig)
|
||||
for df, df_orig in zip(dfs_list, dfs_list_orig):
|
||||
tm.assert_frame_equal(df, df_orig)
|
1266
lib/python3.13/site-packages/pandas/tests/copy_view/test_indexing.py
Normal file
1266
lib/python3.13/site-packages/pandas/tests/copy_view/test_indexing.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_consolidate(using_copy_on_write):
|
||||
# create unconsolidated DataFrame
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
df["c"] = [4, 5, 6]
|
||||
|
||||
# take a viewing subset
|
||||
subset = df[:]
|
||||
|
||||
# each block of subset references a block of df
|
||||
assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
|
||||
|
||||
# consolidate the two int64 blocks
|
||||
subset._consolidate_inplace()
|
||||
|
||||
# the float64 block still references the parent one because it still a view
|
||||
assert subset._mgr.blocks[0].refs.has_reference()
|
||||
# equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
|
||||
# but avoids caching df["b"]
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
|
||||
|
||||
# the new consolidated int64 block does not reference another
|
||||
assert not subset._mgr.blocks[1].refs.has_reference()
|
||||
|
||||
# the parent dataframe now also only is linked for the float column
|
||||
assert not df._mgr.blocks[0].refs.has_reference()
|
||||
assert df._mgr.blocks[1].refs.has_reference()
|
||||
assert not df._mgr.blocks[2].refs.has_reference()
|
||||
|
||||
# and modifying subset still doesn't modify parent
|
||||
if using_copy_on_write:
|
||||
subset.iloc[0, 1] = 0.0
|
||||
assert not df._mgr.blocks[1].refs.has_reference()
|
||||
assert df.loc[0, "b"] == 0.1
|
||||
|
||||
|
||||
@pytest.mark.single_cpu
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_switch_options():
|
||||
# ensure we can switch the value of the option within one session
|
||||
# (assuming data is constructed after switching)
|
||||
|
||||
# using the option_context to ensure we set back to global option value
|
||||
# after running the test
|
||||
with pd.option_context("mode.copy_on_write", False):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df updated with CoW disabled
|
||||
assert df.iloc[0, 0] == 0
|
||||
|
||||
pd.options.mode.copy_on_write = True
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df not updated with CoW enabled
|
||||
assert df.iloc[0, 0] == 1
|
||||
|
||||
pd.options.mode.copy_on_write = False
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
subset = df[:]
|
||||
subset.iloc[0, 0] = 0
|
||||
# df updated with CoW disabled
|
||||
assert df.iloc[0, 0] == 0
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
@pytest.mark.parametrize("dtype", [np.intp, np.int8])
|
||||
@pytest.mark.parametrize(
|
||||
"locs, arr",
|
||||
[
|
||||
([0], np.array([-1, -2, -3])),
|
||||
([1], np.array([-1, -2, -3])),
|
||||
([5], np.array([-1, -2, -3])),
|
||||
([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
|
||||
([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
|
||||
],
|
||||
)
|
||||
def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
|
||||
# Nothing currently calls iset with
|
||||
# more than 1 loc with inplace=True (only happens with inplace=False)
|
||||
# but ensure that it works
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [4, 5, 6],
|
||||
"c": [7, 8, 9],
|
||||
"d": [10, 11, 12],
|
||||
"e": [13, 14, 15],
|
||||
"f": ["a", "b", "c"],
|
||||
},
|
||||
)
|
||||
arr = arr.astype(dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy)
|
||||
df2._mgr.iset(locs, arr, inplace=True)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
if using_copy_on_write:
|
||||
for i, col in enumerate(df.columns):
|
||||
if i not in locs:
|
||||
assert np.shares_memory(get_array(df, col), get_array(df2, col))
|
||||
else:
|
||||
for col in df.columns:
|
||||
assert not np.shares_memory(get_array(df, col), get_array(df2, col))
|
||||
|
||||
|
||||
def test_exponential_backoff():
|
||||
# GH#55518
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
for i in range(490):
|
||||
df.copy(deep=False)
|
||||
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 491
|
||||
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
dfs = [df.copy(deep=False) for i in range(510)]
|
||||
|
||||
for i in range(20):
|
||||
df.copy(deep=False)
|
||||
assert len(df._mgr.blocks[0].refs.referenced_blocks) == 531
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Don't reduce since we still have over 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 1000
|
||||
|
||||
dfs = dfs[:300]
|
||||
for i in range(500):
|
||||
df.copy(deep=False)
|
||||
|
||||
# Reduce since there are less than 500 objects alive
|
||||
assert df._mgr.blocks[0].refs.clear_counter == 500
|
@ -0,0 +1,432 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
ArrowDtype,
|
||||
DataFrame,
|
||||
Interval,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
|
||||
def test_interpolate_no_op(using_copy_on_write, method):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
warn = None
|
||||
if method == "pad":
|
||||
warn = FutureWarning
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = df.interpolate(method=method)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
def test_interp_fill_functions(using_copy_on_write, func):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = getattr(df, func)()
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
|
||||
df = DataFrame({"a": vals})
|
||||
result = getattr(df, func)()
|
||||
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert result._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
|
||||
df = DataFrame({"a": vals})
|
||||
arr = get_array(df, "a")
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
# Check that we don't have references when triggering a copy
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||||
)
|
||||
def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2]})
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.interpolate(method="linear", inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_interp_fill_functions_inplace(
|
||||
using_copy_on_write, func, warn_copy_on_write, dtype
|
||||
):
|
||||
# Check that these takes the same code paths as interpolate
|
||||
df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype)
|
||||
df_orig = df.copy()
|
||||
arr = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"):
|
||||
getattr(df, func)(inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
# Check that copy was triggered in interpolate and that we don't
|
||||
# have any references left
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a")) is (dtype == "float64")
|
||||
|
||||
|
||||
def test_interpolate_cleaned_fill_method(using_copy_on_write):
|
||||
# Check that "method is set to None" case works correctly
|
||||
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
msg = "DataFrame.interpolate with object dtype"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = df.interpolate(method="linear")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = Timestamp("2021-12-31")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_interpolate_object_convert_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
|
||||
# Now CoW makes a copy, it should not!
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_object_convert_copies(using_copy_on_write):
|
||||
df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_downcast(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
msg = "DataFrame.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
tm.assert_frame_equal(df, view)
|
||||
|
||||
|
||||
def test_fillna(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna(5.5)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_dict(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.fillna({"a": 100.5})
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("downcast", [None, False])
|
||||
def test_fillna_inplace(using_copy_on_write, downcast):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
|
||||
msg = "The 'downcast' keyword in fillna is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.fillna(5.5, inplace=True, downcast=downcast)
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
|
||||
|
||||
def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
arr_b = get_array(df, "b")
|
||||
view = df[:]
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.fillna(5.5, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||||
expected = DataFrame({"a": [1.5, 5.5], "b": 1})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write):
|
||||
# Set dtype explicitly to avoid implicit cast when setting nan
|
||||
ser = Series(
|
||||
interval_range(start=0, end=5), name="a", dtype="interval[float64, right]"
|
||||
)
|
||||
ser.iloc[1] = np.nan
|
||||
|
||||
ser_orig = ser.copy()
|
||||
view = ser[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser.fillna(value=Interval(left=0, right=5), inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(
|
||||
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||||
)
|
||||
tm.assert_series_equal(view, ser_orig)
|
||||
else:
|
||||
assert np.shares_memory(
|
||||
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||||
)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg(using_copy_on_write):
|
||||
ser = Series([1, np.nan, 2])
|
||||
ser_orig = ser.copy()
|
||||
result = ser.fillna({})
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(ser), get_array(result))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(ser), get_array(result))
|
||||
|
||||
ser.iloc[0] = 100.5
|
||||
tm.assert_series_equal(ser_orig, result)
|
||||
|
||||
|
||||
def test_fillna_series_empty_arg_inplace(using_copy_on_write):
|
||||
ser = Series([1, np.nan, 2])
|
||||
arr = get_array(ser)
|
||||
ser.fillna({}, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(ser), arr)
|
||||
if using_copy_on_write:
|
||||
assert ser._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_fillna_ea_noop_shares_memory(
|
||||
using_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
df2 = df.fillna(100)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not df2._mgr._has_no_reference(1)
|
||||
elif isinstance(df.dtypes.iloc[0], ArrowDtype):
|
||||
# arrow is immutable, so no-ops do not need to copy underlying array
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
df2.iloc[0, 1] = 100
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert df2._mgr._has_no_reference(1)
|
||||
assert df._mgr._has_no_reference(1)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
|
||||
|
||||
def test_fillna_inplace_ea_noop_shares_memory(
|
||||
using_copy_on_write, warn_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.fillna(100, inplace=True)
|
||||
|
||||
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
else:
|
||||
# MaskedArray can actually respect inplace=True
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||||
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(1)
|
||||
assert not view._mgr._has_no_reference(1)
|
||||
|
||||
with tm.assert_cow_warning(
|
||||
warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype
|
||||
):
|
||||
df.iloc[0, 1] = 100
|
||||
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
# we actually have a view
|
||||
tm.assert_frame_equal(df, view)
|
||||
|
||||
|
||||
def test_fillna_chained_assignment(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].fillna(100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].fillna(100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df.a > 5].fillna(100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].fillna(100, inplace=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"])
|
||||
def test_interpolate_chained_assignment(using_copy_on_write, func):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df["a"], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
getattr(df[["a"]], func)(inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
getattr(df["a"], func)(inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
getattr(df[["a"]], func)(inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
getattr(df[df["a"] > 1], func)(inplace=True)
|
2055
lib/python3.13/site-packages/pandas/tests/copy_view/test_methods.py
Normal file
2055
lib/python3.13/site-packages/pandas/tests/copy_view/test_methods.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,481 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"replace_kwargs",
|
||||
[
|
||||
{"to_replace": {"a": 1, "b": 4}, "value": -1},
|
||||
# Test CoW splits blocks to avoid copying unchanged columns
|
||||
{"to_replace": {"a": 1}, "value": -1},
|
||||
{"to_replace": {"b": 4}, "value": -1},
|
||||
{"to_replace": {"b": {4: 1}}},
|
||||
# TODO: Add these in a further optimization
|
||||
# We would need to see which columns got replaced in the mask
|
||||
# which could be expensive
|
||||
# {"to_replace": {"b": 1}},
|
||||
# 1
|
||||
],
|
||||
)
|
||||
def test_replace(using_copy_on_write, replace_kwargs):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": ["foo", "bar", "baz"]})
|
||||
df_orig = df.copy()
|
||||
|
||||
df_replaced = df.replace(**replace_kwargs)
|
||||
|
||||
if using_copy_on_write:
|
||||
if (df_replaced["b"] == df["b"]).all():
|
||||
assert np.shares_memory(get_array(df_replaced, "b"), get_array(df, "b"))
|
||||
assert np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
# mutating squeezed df triggers a copy-on-write for that column/block
|
||||
df_replaced.loc[0, "c"] = -1
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df_replaced, "c"), get_array(df, "c"))
|
||||
|
||||
if "a" in replace_kwargs["to_replace"]:
|
||||
arr = get_array(df_replaced, "a")
|
||||
df_replaced.loc[0, "a"] = 100
|
||||
assert np.shares_memory(get_array(df_replaced, "a"), arr)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
arr = get_array(df, "a")
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_regex_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": ["aaa", "bbb"]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^b.*$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_regex_inplace_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(to_replace=r"^a.$", value="new", inplace=True, regex=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(to_replace=r"^x.$", value="new", regex=True)
|
||||
tm.assert_frame_equal(df_orig, df)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_mask_all_false_second_block(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5, "c": 1, "d": 2})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value=55.5)
|
||||
|
||||
if using_copy_on_write:
|
||||
# TODO: Block splitting would allow us to avoid copying b
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "c"] = 1
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
# TODO: This should split and not copy the whole block
|
||||
# assert np.shares_memory(get_array(df, "d"), get_array(df2, "d"))
|
||||
|
||||
|
||||
def test_replace_coerce_single_column(using_copy_on_write, using_array_manager):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace=1.5, value="a")
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
elif not using_array_manager:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
if using_copy_on_write:
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_to_replace_wrong_dtype(using_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3], "b": 100.5})
|
||||
df_orig = df.copy()
|
||||
|
||||
df2 = df.replace(to_replace="xxx", value=1.5)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
df2.loc[0, "b"] = 0.5
|
||||
tm.assert_frame_equal(df, df_orig) # Original is unchanged
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||||
|
||||
|
||||
def test_replace_list_categorical(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
arr = get_array(df, "a")
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
df_orig = df.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df2 = df.replace(["b"], value="a")
|
||||
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)
|
||||
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_list_inplace_refs_categorical(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
df.replace(["c"], value="a", inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(
|
||||
get_array(view, "a").codes, get_array(df, "a").codes
|
||||
)
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
# This could be inplace
|
||||
assert not np.shares_memory(
|
||||
get_array(view, "a").codes, get_array(df, "a").codes
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5], []])
|
||||
def test_replace_inplace(using_copy_on_write, to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace(to_replace=1.5, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1.5, [1.5]])
|
||||
def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", ["a", 100.5])
|
||||
def test_replace_inplace_reference_no_op(using_copy_on_write, to_replace):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
df.replace(to_replace=to_replace, value=15.5, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
if using_copy_on_write:
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not view._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("to_replace", [1, [1]])
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical_inplace_reference(using_copy_on_write, val, to_replace):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df.replace(to_replace=to_replace, value=val, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical_inplace(using_copy_on_write, val):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
arr_a = get_array(df, "a")
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df.replace(to_replace=1, value=val, inplace=True)
|
||||
|
||||
assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
expected = DataFrame({"a": Categorical([val, 2, 3])})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_replace_categorical(using_copy_on_write, val):
|
||||
df = DataFrame({"a": Categorical([1, 2, 3])})
|
||||
df_orig = df.copy()
|
||||
msg = (
|
||||
r"The behavior of Series\.replace \(and DataFrame.replace\) "
|
||||
"with CategoricalDtype"
|
||||
)
|
||||
warn = FutureWarning if val == 1.5 else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
df2 = df.replace(to_replace=1, value=val)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert df2._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(get_array(df, "a").codes, get_array(df2, "a").codes)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
arr_a = get_array(df2, "a").codes
|
||||
df2.iloc[0, 0] = 2.0
|
||||
assert np.shares_memory(get_array(df2, "a").codes, arr_a)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["where", "mask"])
|
||||
def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1.5, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
arr_a = get_array(df, "a")
|
||||
view = df[:]
|
||||
|
||||
method = getattr(df, method)
|
||||
if warn_copy_on_write:
|
||||
with tm.assert_cow_warning():
|
||||
method(df["a"] > 1.6, -1, inplace=True)
|
||||
else:
|
||||
method(df["a"] > 1.6, -1, inplace=True)
|
||||
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert view._mgr._has_no_reference(0)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
|
||||
|
||||
def test_replace_empty_list(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2]})
|
||||
|
||||
df2 = df.replace([], [])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
|
||||
arr_a = get_array(df, "a")
|
||||
df.replace([], [])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||||
assert not df._mgr._has_no_reference(0)
|
||||
assert not df2._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("value", ["d", None])
|
||||
def test_replace_object_list_inplace(using_copy_on_write, value):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace(["c"], value, inplace=True)
|
||||
if using_copy_on_write or value is None:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
else:
|
||||
# This could be inplace
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_replace_list_multiple_elements_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([1, 2], 4, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
assert df._mgr._has_no_reference(0)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_list_none(using_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace(["b"], value=None)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||||
|
||||
|
||||
def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": ["a", "b", "c"]})
|
||||
arr = get_array(df, "a")
|
||||
df_orig = df.copy()
|
||||
view = df[:]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace(["a"], value=None, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
assert not np.shares_memory(arr, get_array(df, "a"))
|
||||
tm.assert_frame_equal(df_orig, view)
|
||||
else:
|
||||
assert np.shares_memory(arr, get_array(df, "a"))
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op_inplace(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
df.replace({"a": 10}, 100, inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
|
||||
|
||||
def test_replace_columnwise_no_op(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
df2 = df.replace({"a": 10}, 100)
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
|
||||
df2.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_chained_assignment(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
|
||||
df_orig = df.copy()
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["a"].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
with tm.raises_chained_assignment_error():
|
||||
df[["a"]].replace(1, 100, inplace=True)
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
else:
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[["a"]].replace(1, 100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(None):
|
||||
with option_context("mode.chained_assignment", None):
|
||||
df[df.a > 5].replace(1, 100, inplace=True)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["a"].replace(1, 100, inplace=True)
|
||||
|
||||
|
||||
def test_replace_listlike(using_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
df_orig = df.copy()
|
||||
|
||||
result = df.replace([200, 201], [11, 11])
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
else:
|
||||
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||||
|
||||
result.iloc[0, 0] = 100
|
||||
tm.assert_frame_equal(df, df)
|
||||
|
||||
result = df.replace([200, 2], [10, 10])
|
||||
assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
|
||||
tm.assert_frame_equal(df, df_orig)
|
||||
|
||||
|
||||
def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
|
||||
arr = get_array(df, "a")
|
||||
df.replace([200, 2], [10, 11], inplace=True)
|
||||
assert np.shares_memory(get_array(df, "a"), arr)
|
||||
|
||||
view = df[:]
|
||||
df_orig = df.copy()
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
df.replace([200, 3], [10, 11], inplace=True)
|
||||
if using_copy_on_write:
|
||||
assert not np.shares_memory(get_array(df, "a"), arr)
|
||||
tm.assert_frame_equal(view, df_orig)
|
||||
else:
|
||||
assert np.shares_memory(get_array(df, "a"), arr)
|
||||
tm.assert_frame_equal(df, view)
|
@ -0,0 +1,156 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Copy/view behaviour for the values that are set in a DataFrame
|
||||
|
||||
|
||||
def test_set_column_with_array():
|
||||
# Case: setting an array as a new column (df[col] = arr) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
arr = np.array([1, 2, 3], dtype="int64")
|
||||
|
||||
df["c"] = arr
|
||||
|
||||
# the array data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), arr)
|
||||
# and thus modifying the array does not modify the DataFrame
|
||||
arr[0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_series(using_copy_on_write):
|
||||
# Case: setting a series as a new column (df[col] = s) copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
df["c"] = ser
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(ser))
|
||||
else:
|
||||
# the series data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(ser))
|
||||
|
||||
# and modifying the series does not modify the DataFrame
|
||||
ser.iloc[0] = 0
|
||||
assert ser.iloc[0] == 0
|
||||
tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
|
||||
|
||||
|
||||
def test_set_column_with_index(using_copy_on_write):
|
||||
# Case: setting an index as a new column (df[col] = idx) copies that data
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
idx = Index([1, 2, 3])
|
||||
|
||||
df["c"] = idx
|
||||
|
||||
# the index data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), idx.values)
|
||||
|
||||
idx = RangeIndex(1, 4)
|
||||
arr = idx.values
|
||||
|
||||
df["d"] = idx
|
||||
|
||||
assert not np.shares_memory(get_array(df, "d"), arr)
|
||||
|
||||
|
||||
def test_set_columns_with_dataframe(using_copy_on_write):
|
||||
# Case: setting a DataFrame as new columns copies that data
|
||||
# (with delayed copy with CoW)
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
|
||||
|
||||
df[["c", "d"]] = df2
|
||||
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
else:
|
||||
# the data is copied
|
||||
assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
|
||||
|
||||
# and modifying the set DataFrame does not modify the original DataFrame
|
||||
df2.iloc[0, 0] = 0
|
||||
tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))
|
||||
|
||||
|
||||
def test_setitem_series_no_copy(using_copy_on_write):
|
||||
# Case: setting a Series as column into a DataFrame can delay copying that data
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
# adding a new column
|
||||
df["b"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_single_block(using_copy_on_write):
|
||||
# Overwriting an existing column that is a single block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["a"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "a"))
|
||||
|
||||
df.iloc[0, 0] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_no_copy_split_block(using_copy_on_write):
|
||||
# Overwriting an existing column that is part of a larger block
|
||||
df = DataFrame({"a": [1, 2, 3], "b": 1})
|
||||
rhs = Series([4, 5, 6])
|
||||
rhs_orig = rhs.copy()
|
||||
|
||||
df["b"] = rhs
|
||||
if using_copy_on_write:
|
||||
assert np.shares_memory(get_array(rhs), get_array(df, "b"))
|
||||
|
||||
df.iloc[0, 1] = 100
|
||||
tm.assert_series_equal(rhs, rhs_orig)
|
||||
|
||||
|
||||
def test_setitem_series_column_midx_broadcasting(using_copy_on_write):
|
||||
# Setting a Series to multiple columns will repeat the data
|
||||
# (currently copying the data eagerly)
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [3, 4, 5]],
|
||||
columns=MultiIndex.from_arrays([["a", "a", "b"], [1, 2, 3]]),
|
||||
)
|
||||
rhs = Series([10, 11])
|
||||
df["a"] = rhs
|
||||
assert not np.shares_memory(get_array(rhs), df._get_column_array(0))
|
||||
if using_copy_on_write:
|
||||
assert df._mgr._has_no_reference(0)
|
||||
|
||||
|
||||
def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
# this should not raise any warning
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] += 1
|
||||
|
||||
# when it is not in a chain, then it should produce a warning
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
ser = df["a"]
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser += 1
|
@ -0,0 +1,14 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import DataFrame
|
||||
from pandas.tests.copy_view.util import get_array
|
||||
|
||||
|
||||
def test_get_array_numpy():
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
||||
|
||||
|
||||
def test_get_array_masked():
|
||||
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||||
assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
|
30
lib/python3.13/site-packages/pandas/tests/copy_view/util.py
Normal file
30
lib/python3.13/site-packages/pandas/tests/copy_view/util.py
Normal file
@ -0,0 +1,30 @@
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.arrays import BaseMaskedArray
|
||||
|
||||
|
||||
def get_array(obj, col=None):
|
||||
"""
|
||||
Helper method to get array for a DataFrame column or a Series.
|
||||
|
||||
Equivalent of df[col].values, but without going through normal getitem,
|
||||
which triggers tracking references / CoW (and we might be testing that
|
||||
this is done by some other operation).
|
||||
"""
|
||||
if isinstance(obj, Index):
|
||||
arr = obj._values
|
||||
elif isinstance(obj, Series) and (col is None or obj.name == col):
|
||||
arr = obj._values
|
||||
else:
|
||||
assert col is not None
|
||||
icol = obj.columns.get_loc(col)
|
||||
assert isinstance(icol, int)
|
||||
arr = obj._get_column_array(icol)
|
||||
if isinstance(arr, BaseMaskedArray):
|
||||
return arr._data
|
||||
elif isinstance(arr, Categorical):
|
||||
return arr
|
||||
return getattr(arr, "_ndarray", arr)
|
Reference in New Issue
Block a user