Updated script that can be controled by Nodejs web app
This commit is contained in:
652
lib/python3.13/site-packages/pandas/tests/io/test_pickle.py
Normal file
652
lib/python3.13/site-packages/pandas/tests/io/test_pickle.py
Normal file
@ -0,0 +1,652 @@
|
||||
"""
|
||||
manage legacy pickle tests
|
||||
|
||||
How to add pickle tests:
|
||||
|
||||
1. Install pandas version intended to output the pickle.
|
||||
|
||||
2. Execute "generate_legacy_storage_files.py" to create the pickle.
|
||||
$ python generate_legacy_storage_files.py <output_dir> pickle
|
||||
|
||||
3. Move the created pickle to "data/legacy_pickle/<version>" directory.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from array import array
|
||||
import bz2
|
||||
import datetime
|
||||
import functools
|
||||
from functools import partial
|
||||
import gzip
|
||||
import io
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
import shutil
|
||||
import tarfile
|
||||
from typing import Any
|
||||
import uuid
|
||||
import zipfile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import (
|
||||
get_lzma_file,
|
||||
is_platform_little_endian,
|
||||
)
|
||||
from pandas.compat._optional import import_optional_dependency
|
||||
from pandas.compat.compressors import flatten_buffer
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.io.generate_legacy_storage_files import create_pickle_data
|
||||
|
||||
import pandas.io.common as icom
|
||||
from pandas.tseries.offsets import (
|
||||
Day,
|
||||
MonthEnd,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# comparison functions
|
||||
# ---------------------
|
||||
def compare_element(result, expected, typ):
|
||||
if isinstance(expected, Index):
|
||||
tm.assert_index_equal(expected, result)
|
||||
return
|
||||
|
||||
if typ.startswith("sp_"):
|
||||
tm.assert_equal(result, expected)
|
||||
elif typ == "timestamp":
|
||||
if expected is pd.NaT:
|
||||
assert result is pd.NaT
|
||||
else:
|
||||
assert result == expected
|
||||
else:
|
||||
comparator = getattr(tm, f"assert_{typ}_equal", tm.assert_almost_equal)
|
||||
comparator(result, expected)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# tests
|
||||
# ---------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
b"123",
|
||||
b"123456",
|
||||
bytearray(b"123"),
|
||||
memoryview(b"123"),
|
||||
pickle.PickleBuffer(b"123"),
|
||||
array("I", [1, 2, 3]),
|
||||
memoryview(b"123456").cast("B", (3, 2)),
|
||||
memoryview(b"123456").cast("B", (3, 2))[::2],
|
||||
np.arange(12).reshape((3, 4), order="C"),
|
||||
np.arange(12).reshape((3, 4), order="F"),
|
||||
np.arange(12).reshape((3, 4), order="C")[:, ::2],
|
||||
],
|
||||
)
|
||||
def test_flatten_buffer(data):
|
||||
result = flatten_buffer(data)
|
||||
expected = memoryview(data).tobytes("A")
|
||||
assert result == expected
|
||||
if isinstance(data, (bytes, bytearray)):
|
||||
assert result is data
|
||||
elif isinstance(result, memoryview):
|
||||
assert result.ndim == 1
|
||||
assert result.format == "B"
|
||||
assert result.contiguous
|
||||
assert result.shape == (result.nbytes,)
|
||||
|
||||
|
||||
def test_pickles(datapath):
|
||||
if not is_platform_little_endian():
|
||||
pytest.skip("known failure on non-little endian")
|
||||
|
||||
# For loop for compat with --strict-data-files
|
||||
for legacy_pickle in Path(__file__).parent.glob("data/legacy_pickle/*/*.p*kl*"):
|
||||
legacy_pickle = datapath(legacy_pickle)
|
||||
|
||||
data = pd.read_pickle(legacy_pickle)
|
||||
|
||||
for typ, dv in data.items():
|
||||
for dt, result in dv.items():
|
||||
expected = data[typ][dt]
|
||||
|
||||
if typ == "series" and dt == "ts":
|
||||
# GH 7748
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.index.freq == expected.index.freq
|
||||
assert not result.index.freq.normalize
|
||||
tm.assert_series_equal(result > 0, expected > 0)
|
||||
|
||||
# GH 9291
|
||||
freq = result.index.freq
|
||||
assert freq + Day(1) == Day(2)
|
||||
|
||||
res = freq + pd.Timedelta(hours=1)
|
||||
assert isinstance(res, pd.Timedelta)
|
||||
assert res == pd.Timedelta(days=1, hours=1)
|
||||
|
||||
res = freq + pd.Timedelta(nanoseconds=1)
|
||||
assert isinstance(res, pd.Timedelta)
|
||||
assert res == pd.Timedelta(days=1, nanoseconds=1)
|
||||
elif typ == "index" and dt == "period":
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert isinstance(result.freq, MonthEnd)
|
||||
assert result.freq == MonthEnd()
|
||||
assert result.freqstr == "M"
|
||||
tm.assert_index_equal(result.shift(2), expected.shift(2))
|
||||
elif typ == "series" and dt in ("dt_tz", "cat"):
|
||||
tm.assert_series_equal(result, expected)
|
||||
elif typ == "frame" and dt in (
|
||||
"dt_mixed_tzs",
|
||||
"cat_onecol",
|
||||
"cat_and_float",
|
||||
):
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
compare_element(result, expected, typ)
|
||||
|
||||
|
||||
def python_pickler(obj, path):
|
||||
with open(path, "wb") as fh:
|
||||
pickle.dump(obj, fh, protocol=-1)
|
||||
|
||||
|
||||
def python_unpickler(path):
|
||||
with open(path, "rb") as fh:
|
||||
fh.seek(0)
|
||||
return pickle.load(fh)
|
||||
|
||||
|
||||
def flatten(data: dict) -> list[tuple[str, Any]]:
|
||||
"""Flatten create_pickle_data"""
|
||||
return [
|
||||
(typ, example)
|
||||
for typ, examples in data.items()
|
||||
for example in examples.values()
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pickle_writer",
|
||||
[
|
||||
pytest.param(python_pickler, id="python"),
|
||||
pytest.param(pd.to_pickle, id="pandas_proto_default"),
|
||||
pytest.param(
|
||||
functools.partial(pd.to_pickle, protocol=pickle.HIGHEST_PROTOCOL),
|
||||
id="pandas_proto_highest",
|
||||
),
|
||||
pytest.param(functools.partial(pd.to_pickle, protocol=4), id="pandas_proto_4"),
|
||||
pytest.param(
|
||||
functools.partial(pd.to_pickle, protocol=5),
|
||||
id="pandas_proto_5",
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("writer", [pd.to_pickle, python_pickler])
|
||||
@pytest.mark.parametrize("typ, expected", flatten(create_pickle_data()))
|
||||
def test_round_trip_current(typ, expected, pickle_writer, writer):
|
||||
with tm.ensure_clean() as path:
|
||||
# test writing with each pickler
|
||||
pickle_writer(expected, path)
|
||||
|
||||
# test reading with each unpickler
|
||||
result = pd.read_pickle(path)
|
||||
compare_element(result, expected, typ)
|
||||
|
||||
result = python_unpickler(path)
|
||||
compare_element(result, expected, typ)
|
||||
|
||||
# and the same for file objects (GH 35679)
|
||||
with open(path, mode="wb") as handle:
|
||||
writer(expected, path)
|
||||
handle.seek(0) # shouldn't close file handle
|
||||
with open(path, mode="rb") as handle:
|
||||
result = pd.read_pickle(handle)
|
||||
handle.seek(0) # shouldn't close file handle
|
||||
compare_element(result, expected, typ)
|
||||
|
||||
|
||||
def test_pickle_path_pathlib():
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
def test_pickle_path_localpath():
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# test pickle compression
|
||||
# ---------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def get_random_path():
|
||||
return f"__{uuid.uuid4()}__.pickle"
|
||||
|
||||
|
||||
class TestCompression:
|
||||
_extension_to_compression = icom.extension_to_compression
|
||||
|
||||
def compress_file(self, src_path, dest_path, compression):
|
||||
if compression is None:
|
||||
shutil.copyfile(src_path, dest_path)
|
||||
return
|
||||
|
||||
if compression == "gzip":
|
||||
f = gzip.open(dest_path, "w")
|
||||
elif compression == "bz2":
|
||||
f = bz2.BZ2File(dest_path, "w")
|
||||
elif compression == "zip":
|
||||
with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f:
|
||||
f.write(src_path, os.path.basename(src_path))
|
||||
elif compression == "tar":
|
||||
with open(src_path, "rb") as fh:
|
||||
with tarfile.open(dest_path, mode="w") as tar:
|
||||
tarinfo = tar.gettarinfo(src_path, os.path.basename(src_path))
|
||||
tar.addfile(tarinfo, fh)
|
||||
elif compression == "xz":
|
||||
f = get_lzma_file()(dest_path, "w")
|
||||
elif compression == "zstd":
|
||||
f = import_optional_dependency("zstandard").open(dest_path, "wb")
|
||||
else:
|
||||
msg = f"Unrecognized compression type: {compression}"
|
||||
raise ValueError(msg)
|
||||
|
||||
if compression not in ["zip", "tar"]:
|
||||
with open(src_path, "rb") as fh:
|
||||
with f:
|
||||
f.write(fh.read())
|
||||
|
||||
def test_write_explicit(self, compression, get_random_path):
|
||||
base = get_random_path
|
||||
path1 = base + ".compressed"
|
||||
path2 = base + ".raw"
|
||||
|
||||
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# write to compressed file
|
||||
df.to_pickle(p1, compression=compression)
|
||||
|
||||
# decompress
|
||||
with tm.decompress_file(p1, compression=compression) as f:
|
||||
with open(p2, "wb") as fh:
|
||||
fh.write(f.read())
|
||||
|
||||
# read decompressed file
|
||||
df2 = pd.read_pickle(p2, compression=None)
|
||||
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
@pytest.mark.parametrize("compression", ["", "None", "bad", "7z"])
|
||||
def test_write_explicit_bad(self, compression, get_random_path):
|
||||
with pytest.raises(ValueError, match="Unrecognized compression type"):
|
||||
with tm.ensure_clean(get_random_path) as path:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_pickle(path, compression=compression)
|
||||
|
||||
def test_write_infer(self, compression_ext, get_random_path):
|
||||
base = get_random_path
|
||||
path1 = base + compression_ext
|
||||
path2 = base + ".raw"
|
||||
compression = self._extension_to_compression.get(compression_ext.lower())
|
||||
|
||||
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# write to compressed file by inferred compression method
|
||||
df.to_pickle(p1)
|
||||
|
||||
# decompress
|
||||
with tm.decompress_file(p1, compression=compression) as f:
|
||||
with open(p2, "wb") as fh:
|
||||
fh.write(f.read())
|
||||
|
||||
# read decompressed file
|
||||
df2 = pd.read_pickle(p2, compression=None)
|
||||
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_read_explicit(self, compression, get_random_path):
|
||||
base = get_random_path
|
||||
path1 = base + ".raw"
|
||||
path2 = base + ".compressed"
|
||||
|
||||
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# write to uncompressed file
|
||||
df.to_pickle(p1, compression=None)
|
||||
|
||||
# compress
|
||||
self.compress_file(p1, p2, compression=compression)
|
||||
|
||||
# read compressed file
|
||||
df2 = pd.read_pickle(p2, compression=compression)
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_read_infer(self, compression_ext, get_random_path):
|
||||
base = get_random_path
|
||||
path1 = base + ".raw"
|
||||
path2 = base + compression_ext
|
||||
compression = self._extension_to_compression.get(compression_ext.lower())
|
||||
|
||||
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# write to uncompressed file
|
||||
df.to_pickle(p1, compression=None)
|
||||
|
||||
# compress
|
||||
self.compress_file(p1, p2, compression=compression)
|
||||
|
||||
# read compressed file by inferred compression method
|
||||
df2 = pd.read_pickle(p2)
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# test pickle compression
|
||||
# ---------------------
|
||||
|
||||
|
||||
class TestProtocol:
|
||||
@pytest.mark.parametrize("protocol", [-1, 0, 1, 2])
|
||||
def test_read(self, protocol, get_random_path):
|
||||
with tm.ensure_clean(get_random_path) as path:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_pickle(path, protocol=protocol)
|
||||
df2 = pd.read_pickle(path)
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["pickle_file", "excols"],
|
||||
[
|
||||
("test_py27.pkl", Index(["a", "b", "c"])),
|
||||
(
|
||||
"test_mi_py27.pkl",
|
||||
pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_unicode_decode_error(datapath, pickle_file, excols):
|
||||
# pickle file written with py27, should be readable without raising
|
||||
# UnicodeDecodeError, see GH#28645 and GH#31988
|
||||
path = datapath("io", "data", "pickle", pickle_file)
|
||||
df = pd.read_pickle(path)
|
||||
|
||||
# just test the columns are correct since the values are random
|
||||
tm.assert_index_equal(df.columns, excols)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# tests for buffer I/O
|
||||
# ---------------------
|
||||
|
||||
|
||||
def test_pickle_buffer_roundtrip():
|
||||
with tm.ensure_clean() as path:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
with open(path, "wb") as fh:
|
||||
df.to_pickle(fh)
|
||||
with open(path, "rb") as fh:
|
||||
result = pd.read_pickle(fh)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
# ---------------------
|
||||
# tests for URL I/O
|
||||
# ---------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mockurl", ["http://url.com", "ftp://test.com", "http://gzip.com"]
|
||||
)
|
||||
def test_pickle_generalurl_read(monkeypatch, mockurl):
|
||||
def python_pickler(obj, path):
|
||||
with open(path, "wb") as fh:
|
||||
pickle.dump(obj, fh, protocol=-1)
|
||||
|
||||
class MockReadResponse:
|
||||
def __init__(self, path) -> None:
|
||||
self.file = open(path, "rb")
|
||||
if "gzip" in path:
|
||||
self.headers = {"Content-Encoding": "gzip"}
|
||||
else:
|
||||
self.headers = {"Content-Encoding": ""}
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def read(self):
|
||||
return self.file.read()
|
||||
|
||||
def close(self):
|
||||
return self.file.close()
|
||||
|
||||
with tm.ensure_clean() as path:
|
||||
|
||||
def mock_urlopen_read(*args, **kwargs):
|
||||
return MockReadResponse(path)
|
||||
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
python_pickler(df, path)
|
||||
monkeypatch.setattr("urllib.request.urlopen", mock_urlopen_read)
|
||||
result = pd.read_pickle(mockurl)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
def test_pickle_fsspec_roundtrip():
|
||||
pytest.importorskip("fsspec")
|
||||
with tm.ensure_clean():
|
||||
mockurl = "memory://mockfile"
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_pickle(mockurl)
|
||||
result = pd.read_pickle(mockurl)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
class MyTz(datetime.tzinfo):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def test_read_pickle_with_subclass():
|
||||
# GH 12163
|
||||
expected = Series(dtype=object), MyTz()
|
||||
result = tm.round_trip_pickle(expected)
|
||||
|
||||
tm.assert_series_equal(result[0], expected[0])
|
||||
assert isinstance(result[1], MyTz)
|
||||
|
||||
|
||||
def test_pickle_binary_object_compression(compression):
|
||||
"""
|
||||
Read/write from binary file-objects w/wo compression.
|
||||
|
||||
GH 26237, GH 29054, and GH 29570
|
||||
"""
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# reference for compression
|
||||
with tm.ensure_clean() as path:
|
||||
df.to_pickle(path, compression=compression)
|
||||
reference = Path(path).read_bytes()
|
||||
|
||||
# write
|
||||
buffer = io.BytesIO()
|
||||
df.to_pickle(buffer, compression=compression)
|
||||
buffer.seek(0)
|
||||
|
||||
# gzip and zip safe the filename: cannot compare the compressed content
|
||||
assert buffer.getvalue() == reference or compression in ("gzip", "zip", "tar")
|
||||
|
||||
# read
|
||||
read_df = pd.read_pickle(buffer, compression=compression)
|
||||
buffer.seek(0)
|
||||
tm.assert_frame_equal(df, read_df)
|
||||
|
||||
|
||||
def test_pickle_dataframe_with_multilevel_index(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
multiindex_dataframe_random_data,
|
||||
):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
def _test_roundtrip(frame):
|
||||
unpickled = tm.round_trip_pickle(frame)
|
||||
tm.assert_frame_equal(frame, unpickled)
|
||||
|
||||
_test_roundtrip(frame)
|
||||
_test_roundtrip(frame.T)
|
||||
_test_roundtrip(ymd)
|
||||
_test_roundtrip(ymd.T)
|
||||
|
||||
|
||||
def test_pickle_timeseries_periodindex():
|
||||
# GH#2891
|
||||
prng = period_range("1/1/2011", "1/1/2012", freq="M")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(prng)), prng)
|
||||
new_ts = tm.round_trip_pickle(ts)
|
||||
assert new_ts.index.freqstr == "M"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name", [777, 777.0, "name", datetime.datetime(2001, 11, 11), (1, 2)]
|
||||
)
|
||||
def test_pickle_preserve_name(name):
|
||||
unpickled = tm.round_trip_pickle(Series(np.arange(10, dtype=np.float64), name=name))
|
||||
assert unpickled.name == name
|
||||
|
||||
|
||||
def test_pickle_datetimes(datetime_series):
|
||||
unp_ts = tm.round_trip_pickle(datetime_series)
|
||||
tm.assert_series_equal(unp_ts, datetime_series)
|
||||
|
||||
|
||||
def test_pickle_strings(string_series):
|
||||
unp_series = tm.round_trip_pickle(string_series)
|
||||
tm.assert_series_equal(unp_series, string_series)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_pickle_preserves_block_ndim():
|
||||
# GH#37631
|
||||
ser = Series(list("abc")).astype("category").iloc[[0]]
|
||||
res = tm.round_trip_pickle(ser)
|
||||
|
||||
assert res._mgr.blocks[0].ndim == 1
|
||||
assert res._mgr.blocks[0].shape == (1,)
|
||||
|
||||
# GH#37631 OP issue was about indexing, underlying problem was pickle
|
||||
tm.assert_series_equal(res[[True]], ser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("protocol", [pickle.DEFAULT_PROTOCOL, pickle.HIGHEST_PROTOCOL])
|
||||
def test_pickle_big_dataframe_compression(protocol, compression):
|
||||
# GH#39002
|
||||
df = DataFrame(range(100000))
|
||||
result = tm.round_trip_pathlib(
|
||||
partial(df.to_pickle, protocol=protocol, compression=compression),
|
||||
partial(pd.read_pickle, compression=compression),
|
||||
)
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
def test_pickle_frame_v124_unpickle_130(datapath):
|
||||
# GH#42345 DataFrame created in 1.2.x, unpickle in 1.3.x
|
||||
path = datapath(
|
||||
Path(__file__).parent,
|
||||
"data",
|
||||
"legacy_pickle",
|
||||
"1.2.4",
|
||||
"empty_frame_v1_2_4-GH#42345.pkl",
|
||||
)
|
||||
with open(path, "rb") as fd:
|
||||
df = pickle.load(fd)
|
||||
|
||||
expected = DataFrame(index=[], columns=[])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_pickle_pos_args_deprecation():
|
||||
# GH-54229
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
msg = (
|
||||
r"Starting with pandas version 3.0 all arguments of to_pickle except for the "
|
||||
r"argument 'path' will be keyword-only."
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
buffer = io.BytesIO()
|
||||
df.to_pickle(buffer, "infer")
|
Reference in New Issue
Block a user