Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,50 @@
|
||||
from collections.abc import Generator
|
||||
from contextlib import contextmanager
|
||||
import pathlib
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas.io.pytables import HDFStore
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
# set these parameters so we don't have file sharing
|
||||
tables.parameters.MAX_NUMEXPR_THREADS = 1
|
||||
tables.parameters.MAX_BLOSC_THREADS = 1
|
||||
tables.parameters.MAX_THREADS = 1
|
||||
|
||||
|
||||
def safe_close(store):
|
||||
try:
|
||||
if store is not None:
|
||||
store.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# contextmanager to ensure the file cleanup
|
||||
@contextmanager
|
||||
def ensure_clean_store(
|
||||
path, mode="a", complevel=None, complib=None, fletcher32=False
|
||||
) -> Generator[HDFStore, None, None]:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
tmp_path = pathlib.Path(tmpdirname, path)
|
||||
with HDFStore(
|
||||
tmp_path,
|
||||
mode=mode,
|
||||
complevel=complevel,
|
||||
complib=complib,
|
||||
fletcher32=fletcher32,
|
||||
) as store:
|
||||
yield store
|
||||
|
||||
|
||||
def _maybe_remove(store, key):
|
||||
"""
|
||||
For tests using tables, try removing the table to be sure there is
|
||||
no content from previous tests using the same table name.
|
||||
"""
|
||||
try:
|
||||
store.remove(key)
|
||||
except (ValueError, KeyError):
|
||||
pass
|
@ -0,0 +1,9 @@
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_path():
|
||||
"""Fixture for setup path"""
|
||||
return f"tmp.__{uuid.uuid4()}__.h5"
|
@ -0,0 +1,986 @@
|
||||
import datetime
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::tables.NaturalNameWarning")
|
||||
def test_append(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# this is allowed by almost always don't want to do it
|
||||
# tables.NaturalNameWarning):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=20, freq="B"),
|
||||
)
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df[:10])
|
||||
store.append("df1", df[10:])
|
||||
tm.assert_frame_equal(store["df1"], df)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.put("df2", df[:10], format="table")
|
||||
store.append("df2", df[10:])
|
||||
tm.assert_frame_equal(store["df2"], df)
|
||||
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("/df3", df[:10])
|
||||
store.append("/df3", df[10:])
|
||||
tm.assert_frame_equal(store["df3"], df)
|
||||
|
||||
# this is allowed by almost always don't want to do it
|
||||
# tables.NaturalNameWarning
|
||||
_maybe_remove(store, "/df3 foo")
|
||||
store.append("/df3 foo", df[:10])
|
||||
store.append("/df3 foo", df[10:])
|
||||
tm.assert_frame_equal(store["df3 foo"], df)
|
||||
|
||||
# dtype issues - mizxed type in a single object column
|
||||
df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
|
||||
df["mixed_column"] = "testing"
|
||||
df.loc[2, "mixed_column"] = np.nan
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# uints - test storage of uints
|
||||
uint_data = DataFrame(
|
||||
{
|
||||
"u08": Series(
|
||||
np.random.default_rng(2).integers(0, high=255, size=5),
|
||||
dtype=np.uint8,
|
||||
),
|
||||
"u16": Series(
|
||||
np.random.default_rng(2).integers(0, high=65535, size=5),
|
||||
dtype=np.uint16,
|
||||
),
|
||||
"u32": Series(
|
||||
np.random.default_rng(2).integers(0, high=2**30, size=5),
|
||||
dtype=np.uint32,
|
||||
),
|
||||
"u64": Series(
|
||||
[2**58, 2**59, 2**60, 2**61, 2**62],
|
||||
dtype=np.uint64,
|
||||
),
|
||||
},
|
||||
index=np.arange(5),
|
||||
)
|
||||
_maybe_remove(store, "uints")
|
||||
store.append("uints", uint_data)
|
||||
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
|
||||
|
||||
# uints - test storage of uints in indexable columns
|
||||
_maybe_remove(store, "uints")
|
||||
# 64-bit indices not yet supported
|
||||
store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
|
||||
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_series(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# basic
|
||||
ss = Series(range(20), dtype=np.float64, index=[f"i_{i}" for i in range(20)])
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
ns = Series(np.arange(100))
|
||||
|
||||
store.append("ss", ss)
|
||||
result = store["ss"]
|
||||
tm.assert_series_equal(result, ss)
|
||||
assert result.name is None
|
||||
|
||||
store.append("ts", ts)
|
||||
result = store["ts"]
|
||||
tm.assert_series_equal(result, ts)
|
||||
assert result.name is None
|
||||
|
||||
ns.name = "foo"
|
||||
store.append("ns", ns)
|
||||
result = store["ns"]
|
||||
tm.assert_series_equal(result, ns)
|
||||
assert result.name == ns.name
|
||||
|
||||
# select on the values
|
||||
expected = ns[ns > 60]
|
||||
result = store.select("ns", "foo>60")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# select on the index and values
|
||||
expected = ns[(ns > 70) & (ns.index < 90)]
|
||||
result = store.select("ns", "foo>70 and index<90")
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# multi-index
|
||||
mi = DataFrame(np.random.default_rng(2).standard_normal((5, 1)), columns=["A"])
|
||||
mi["B"] = np.arange(len(mi))
|
||||
mi["C"] = "foo"
|
||||
mi.loc[3:5, "C"] = "bar"
|
||||
mi.set_index(["C", "B"], inplace=True)
|
||||
s = mi.stack(future_stack=True)
|
||||
s.index = s.index.droplevel(2)
|
||||
store.append("mi", s)
|
||||
tm.assert_series_equal(store["mi"], s, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_some_nans(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(np.random.default_rng(2).standard_normal(20)).astype(
|
||||
"int32"
|
||||
),
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
"D": Timestamp("2001-01-01").as_unit("ns"),
|
||||
"E": Timestamp("2001-01-02").as_unit("ns"),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
# some nans
|
||||
_maybe_remove(store, "df1")
|
||||
df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan
|
||||
store.append("df1", df[:10])
|
||||
store.append("df1", df[10:])
|
||||
tm.assert_frame_equal(store["df1"], df, check_index_type=True)
|
||||
|
||||
# first column
|
||||
df1 = df.copy()
|
||||
df1["A1"] = np.nan
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df1[:10])
|
||||
store.append("df1", df1[10:])
|
||||
tm.assert_frame_equal(store["df1"], df1, check_index_type=True)
|
||||
|
||||
# 2nd column
|
||||
df2 = df.copy()
|
||||
df2["A2"] = np.nan
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df2[:10])
|
||||
store.append("df2", df2[10:])
|
||||
tm.assert_frame_equal(store["df2"], df2, check_index_type=True)
|
||||
|
||||
# datetimes
|
||||
df3 = df.copy()
|
||||
df3["E"] = np.nan
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df3[:10])
|
||||
store.append("df3", df3[10:])
|
||||
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_all_nans(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
# nan some entire rows (dropna=True)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df[-4:], check_index_type=True)
|
||||
|
||||
# nan some entire rows (dropna=False)
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
# tests the option io.hdf.dropna_table
|
||||
with pd.option_context("io.hdf.dropna_table", False):
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df[:10])
|
||||
store.append("df3", df[10:])
|
||||
tm.assert_frame_equal(store["df3"], df)
|
||||
|
||||
with pd.option_context("io.hdf.dropna_table", True):
|
||||
_maybe_remove(store, "df4")
|
||||
store.append("df4", df[:10])
|
||||
store.append("df4", df[10:])
|
||||
tm.assert_frame_equal(store["df4"], df[-4:])
|
||||
|
||||
# nan some entire rows (string are still written!)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df, check_index_type=True)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
# nan some entire rows (but since we have dates they are still
|
||||
# written!)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A1": np.random.default_rng(2).standard_normal(20),
|
||||
"A2": np.random.default_rng(2).standard_normal(20),
|
||||
"B": "foo",
|
||||
"C": "bar",
|
||||
"D": Timestamp("2001-01-01").as_unit("ns"),
|
||||
"E": Timestamp("2001-01-02").as_unit("ns"),
|
||||
},
|
||||
index=np.arange(20),
|
||||
)
|
||||
|
||||
df.loc[0:15, :] = np.nan
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:10], dropna=True)
|
||||
store.append("df", df[10:], dropna=True)
|
||||
tm.assert_frame_equal(store["df"], df, check_index_type=True)
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df[:10], dropna=False)
|
||||
store.append("df2", df[10:], dropna=False)
|
||||
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
|
||||
|
||||
|
||||
def test_append_frame_column_oriented(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# column oriented
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df.index = df.index._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
_maybe_remove(store, "df1")
|
||||
store.append("df1", df.iloc[:, :2], axes=["columns"])
|
||||
store.append("df1", df.iloc[:, 2:])
|
||||
tm.assert_frame_equal(store["df1"], df)
|
||||
|
||||
result = store.select("df1", "columns=A")
|
||||
expected = df.reindex(columns=["A"])
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
# selection on the non-indexable
|
||||
result = store.select("df1", ("columns=A", "index=df.index[0:4]"))
|
||||
expected = df.reindex(columns=["A"], index=df.index[0:4])
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
# this isn't supported
|
||||
msg = re.escape(
|
||||
"passing a filterable condition to a non-table indexer "
|
||||
"[Filter: Not Initialized]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df1", "columns=A and index>df.index[4]")
|
||||
|
||||
|
||||
def test_append_with_different_block_ordering(setup_path):
|
||||
# GH 4096; using same frames, but different block orderings
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
for i in range(10):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB")
|
||||
)
|
||||
df["index"] = range(10)
|
||||
df["index"] += i * 10
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
df["int16"] = Series([1] * len(df), dtype="int16")
|
||||
|
||||
if i % 2 == 0:
|
||||
del df["int64"]
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
if i % 3 == 0:
|
||||
a = df.pop("A")
|
||||
df["A"] = a
|
||||
|
||||
df.set_index("index", inplace=True)
|
||||
|
||||
store.append("df", df)
|
||||
|
||||
# test a different ordering but with more fields (like invalid
|
||||
# combinations)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)),
|
||||
columns=list("AB"),
|
||||
dtype="float64",
|
||||
)
|
||||
df["int64"] = Series([1] * len(df), dtype="int64")
|
||||
df["int16"] = Series([1] * len(df), dtype="int16")
|
||||
store.append("df", df)
|
||||
|
||||
# store additional fields in different blocks
|
||||
df["int16_2"] = Series([1] * len(df), dtype="int16")
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [int16] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# store multiple additional fields in different blocks
|
||||
df["float_3"] = Series([1.0] * len(df), dtype="float64")
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [A,B] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
|
||||
def test_append_with_strings(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
|
||||
def check_col(key, name, size):
|
||||
assert (
|
||||
getattr(store.get_storer(key).table.description, name).itemsize == size
|
||||
)
|
||||
|
||||
# avoid truncation on elements
|
||||
df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
|
||||
store.append("df_big", df)
|
||||
tm.assert_frame_equal(store.select("df_big"), df)
|
||||
check_col("df_big", "values_block_1", 15)
|
||||
|
||||
# appending smaller string ok
|
||||
df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]])
|
||||
store.append("df_big", df2)
|
||||
expected = concat([df, df2])
|
||||
tm.assert_frame_equal(store.select("df_big"), expected)
|
||||
check_col("df_big", "values_block_1", 15)
|
||||
|
||||
# avoid truncation on elements
|
||||
df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])
|
||||
store.append("df_big2", df, min_itemsize={"values": 50})
|
||||
tm.assert_frame_equal(store.select("df_big2"), df)
|
||||
check_col("df_big2", "values_block_1", 50)
|
||||
|
||||
# bigger string on next append
|
||||
store.append("df_new", df)
|
||||
df_new = DataFrame([[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]])
|
||||
msg = (
|
||||
r"Trying to store a string with len \[26\] in "
|
||||
r"\[values_block_1\] column but\n"
|
||||
r"this column has a limit of \[15\]!\n"
|
||||
"Consider using min_itemsize to preset the sizes on these "
|
||||
"columns"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_new", df_new)
|
||||
|
||||
# min_itemsize on Series index (GH 11412)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
|
||||
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
|
||||
"C": Index(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype=object),
|
||||
"D": date_range("20130101", periods=5),
|
||||
}
|
||||
).set_index("C")
|
||||
store.append("ss", df["B"], min_itemsize={"index": 4})
|
||||
tm.assert_series_equal(store.select("ss"), df["B"])
|
||||
|
||||
# same as above, with data_columns=True
|
||||
store.append("ss2", df["B"], data_columns=True, min_itemsize={"index": 4})
|
||||
tm.assert_series_equal(store.select("ss2"), df["B"])
|
||||
|
||||
# min_itemsize in index without appending (GH 10381)
|
||||
store.put("ss3", df, format="table", min_itemsize={"index": 6})
|
||||
# just make sure there is a longer string:
|
||||
df2 = df.copy().reset_index().assign(C="longer").set_index("C")
|
||||
store.append("ss3", df2)
|
||||
tm.assert_frame_equal(store.select("ss3"), concat([df, df2]))
|
||||
|
||||
# same as above, with a Series
|
||||
store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})
|
||||
store.append("ss4", df2["B"])
|
||||
tm.assert_series_equal(store.select("ss4"), concat([df["B"], df2["B"]]))
|
||||
|
||||
# with nans
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df.loc[df.index[1:4], "string"] = np.nan
|
||||
df["string2"] = "bar"
|
||||
df.loc[df.index[4:8], "string2"] = np.nan
|
||||
df["string3"] = "bah"
|
||||
df.loc[df.index[1:], "string3"] = np.nan
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": "foo", "B": "bar"}, index=range(10))
|
||||
|
||||
# a min_itemsize that creates a data_column
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, min_itemsize={"A": 200})
|
||||
check_col("df", "A", 200)
|
||||
assert store.get_storer("df").data_columns == ["A"]
|
||||
|
||||
# a min_itemsize that creates a data_column2
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=["B"], min_itemsize={"A": 200})
|
||||
check_col("df", "A", 200)
|
||||
assert store.get_storer("df").data_columns == ["B", "A"]
|
||||
|
||||
# a min_itemsize that creates a data_column2
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=["B"], min_itemsize={"values": 200})
|
||||
check_col("df", "B", 200)
|
||||
check_col("df", "values_block_0", 200)
|
||||
assert store.get_storer("df").data_columns == ["B"]
|
||||
|
||||
# infer the .typ on subsequent appends
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:5], min_itemsize=200)
|
||||
store.append("df", df[5:], min_itemsize=200)
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# invalid min_itemsize keys
|
||||
df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])
|
||||
_maybe_remove(store, "df")
|
||||
msg = re.escape(
|
||||
"min_itemsize has the key [foo] which is not an axis or data_column"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})
|
||||
|
||||
|
||||
def test_append_with_empty_string(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# with all empty strings (GH 12242)
|
||||
df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
|
||||
store.append("df", df[:-1], min_itemsize={"x": 1})
|
||||
store.append("df", df[-1:], min_itemsize={"x": 1})
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
def test_append_with_data_columns(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df.iloc[0, df.columns.get_loc("B")] = 1.0
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df[:2], data_columns=["B"])
|
||||
store.append("df", df[2:])
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
# check that we have indices created
|
||||
assert store._handle.root.df.table.cols.index.is_indexed is True
|
||||
assert store._handle.root.df.table.cols.B.is_indexed is True
|
||||
|
||||
# data column searching
|
||||
result = store.select("df", "B>0")
|
||||
expected = df[df.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# data column searching (with an indexable and a data_columns)
|
||||
result = store.select("df", "B>0 and index>df.index[3]")
|
||||
df_new = df.reindex(index=df.index[4:])
|
||||
expected = df_new[df_new.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# data column selection with a string data_column
|
||||
df_new = df.copy()
|
||||
df_new["string"] = "foo"
|
||||
df_new.loc[df_new.index[1:4], "string"] = np.nan
|
||||
df_new.loc[df_new.index[5:6], "string"] = "bar"
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"])
|
||||
result = store.select("df", "string='foo'")
|
||||
expected = df_new[df_new.string == "foo"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# using min_itemsize and a data column
|
||||
def check_col(key, name, size):
|
||||
assert (
|
||||
getattr(store.get_storer(key).table.description, name).itemsize == size
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30})
|
||||
check_col("df", "string", 30)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize=30)
|
||||
check_col("df", "string", 30)
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30})
|
||||
check_col("df", "string", 30)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df_new["string2"] = "foobarbah"
|
||||
df_new["string_block1"] = "foobarbah1"
|
||||
df_new["string_block2"] = "foobarbah2"
|
||||
_maybe_remove(store, "df")
|
||||
store.append(
|
||||
"df",
|
||||
df_new,
|
||||
data_columns=["string", "string2"],
|
||||
min_itemsize={"string": 30, "string2": 40, "values": 50},
|
||||
)
|
||||
check_col("df", "string", 30)
|
||||
check_col("df", "string2", 40)
|
||||
check_col("df", "values_block_1", 50)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# multiple data columns
|
||||
df_new = df.copy()
|
||||
df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
|
||||
df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0
|
||||
df_new["string"] = "foo"
|
||||
|
||||
sl = df_new.columns.get_loc("string")
|
||||
df_new.iloc[1:4, sl] = np.nan
|
||||
df_new.iloc[5:6, sl] = "bar"
|
||||
|
||||
df_new["string2"] = "foo"
|
||||
sl = df_new.columns.get_loc("string2")
|
||||
df_new.iloc[2:5, sl] = np.nan
|
||||
df_new.iloc[7:8, sl] = "bar"
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df_new, data_columns=["A", "B", "string", "string2"])
|
||||
result = store.select("df", "string='foo' and string2='foo' and A>0 and B<0")
|
||||
expected = df_new[
|
||||
(df_new.string == "foo")
|
||||
& (df_new.string2 == "foo")
|
||||
& (df_new.A > 0)
|
||||
& (df_new.B < 0)
|
||||
]
|
||||
tm.assert_frame_equal(result, expected, check_freq=False)
|
||||
# FIXME: 2020-05-07 freq check randomly fails in the CI
|
||||
|
||||
# yield an empty frame
|
||||
result = store.select("df", "string='foo' and string2='cool'")
|
||||
expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# doc example
|
||||
df_dc = df.copy()
|
||||
df_dc["string"] = "foo"
|
||||
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
|
||||
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
|
||||
df_dc["string2"] = "cool"
|
||||
df_dc["datetime"] = Timestamp("20010102").as_unit("ns")
|
||||
df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan
|
||||
|
||||
_maybe_remove(store, "df_dc")
|
||||
store.append(
|
||||
"df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]
|
||||
)
|
||||
result = store.select("df_dc", "B>0")
|
||||
|
||||
expected = df_dc[df_dc.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])
|
||||
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
|
||||
tm.assert_frame_equal(result, expected, check_freq=False)
|
||||
# FIXME: 2020-12-07 intermittent build failures here with freq of
|
||||
# None instead of BDay(4)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# doc example part 2
|
||||
|
||||
index = date_range("1/1/2000", periods=8)
|
||||
df_dc = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((8, 3)),
|
||||
index=index,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
df_dc["string"] = "foo"
|
||||
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
|
||||
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
|
||||
df_dc[["B", "C"]] = df_dc[["B", "C"]].abs()
|
||||
df_dc["string2"] = "cool"
|
||||
|
||||
# on-disk operations
|
||||
store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])
|
||||
|
||||
result = store.select("df_dc", "B>0")
|
||||
expected = df_dc[df_dc.B > 0]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])
|
||||
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.columns.name = None
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("mi", df)
|
||||
result = store.select("mi")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 3748
|
||||
result = store.select("mi", columns=["A", "B"])
|
||||
expected = df.reindex(columns=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
path = tmp_path / "test.hdf"
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
result = read_hdf(path, "df", columns=["A", "B"])
|
||||
expected = df.reindex(columns=["A", "B"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_misc(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
store.append("df", df, chunksize=1)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
store.append("df1", df, expectedrows=10)
|
||||
result = store.select("df1")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("chunksize", [10, 200, 1000])
|
||||
def test_append_misc_chunksize(setup_path, chunksize):
|
||||
# more chunksize in append tests
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df["float322"] = 1.0
|
||||
df["float322"] = df["float322"].astype("float32")
|
||||
df["bool"] = df["float322"] > 0
|
||||
df["time1"] = Timestamp("20130101").as_unit("ns")
|
||||
df["time2"] = Timestamp("20130102").as_unit("ns")
|
||||
with ensure_clean_store(setup_path, mode="w") as store:
|
||||
store.append("obj", df, chunksize=chunksize)
|
||||
result = store.select("obj")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_append_misc_empty_frame(setup_path):
|
||||
# empty frame, GH4273
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# 0 len
|
||||
df_empty = DataFrame(columns=list("ABC"))
|
||||
store.append("df", df_empty)
|
||||
with pytest.raises(KeyError, match="'No object named df in the file'"):
|
||||
store.select("df")
|
||||
|
||||
# repeated append of 0/non-zero frames
|
||||
df = DataFrame(np.random.default_rng(2).random((10, 3)), columns=list("ABC"))
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
store.append("df", df_empty)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# store
|
||||
df = DataFrame(columns=list("ABC"))
|
||||
store.put("df2", df)
|
||||
tm.assert_frame_equal(store.select("df2"), df)
|
||||
|
||||
|
||||
# TODO(ArrayManager) currently we rely on falling back to BlockManager, but
|
||||
# the conversion from AM->BM converts the invalid object dtype column into
|
||||
# a datetime64 column no longer raising an error
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_append_raise(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# test append with invalid input to get good error messages
|
||||
|
||||
# list in column
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["invalid"] = [["a"]] * len(df)
|
||||
assert df.dtypes["invalid"] == np.object_
|
||||
msg = re.escape(
|
||||
"""Cannot serialize the column [invalid]
|
||||
because its data contents are not [string] but [mixed] object dtype"""
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# multiple invalid columns
|
||||
df["invalid2"] = [["a"]] * len(df)
|
||||
df["invalid3"] = [["a"]] * len(df)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# datetime with embedded nans as object
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
|
||||
s = s.astype(object)
|
||||
s[0:5] = np.nan
|
||||
df["invalid"] = s
|
||||
assert df.dtypes["invalid"] == np.object_
|
||||
msg = "too many timezones in this block, create separate data columns"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# directly ndarray
|
||||
msg = "value must be None, Series, or DataFrame"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", np.arange(10))
|
||||
|
||||
# series directly
|
||||
msg = re.escape(
|
||||
"cannot properly create the storer for: "
|
||||
"[group->df,value-><class 'pandas.core.series.Series'>]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", Series(np.arange(10)))
|
||||
|
||||
# appending an incompatible table
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
store.append("df", df)
|
||||
|
||||
df["foo"] = "foo"
|
||||
msg = re.escape(
|
||||
"invalid combination of [non_index_axes] on appending data "
|
||||
"[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table "
|
||||
"[(1, ['A', 'B', 'C', 'D'])]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# incompatible type (GH 41897)
|
||||
_maybe_remove(store, "df")
|
||||
df["foo"] = Timestamp("20130101")
|
||||
store.append("df", df)
|
||||
df["foo"] = "bar"
|
||||
msg = re.escape(
|
||||
"invalid combination of [values_axes] on appending data "
|
||||
"[name->values_block_1,cname->values_block_1,"
|
||||
"dtype->bytes24,kind->string,shape->(1, 30)] "
|
||||
"vs current table "
|
||||
"[name->values_block_1,cname->values_block_1,"
|
||||
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
|
||||
def test_append_with_timedelta(setup_path):
|
||||
# GH 3577
|
||||
# append timedelta
|
||||
|
||||
ts = Timestamp("20130101").as_unit("ns")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ts,
|
||||
"B": [ts + timedelta(days=i, seconds=10) for i in range(10)],
|
||||
}
|
||||
)
|
||||
df["C"] = df["A"] - df["B"]
|
||||
df.loc[3:5, "C"] = np.nan
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# table
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, data_columns=True)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = store.select("df", where="C<100000")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = store.select("df", where="C<pd.Timedelta('-3D')")
|
||||
tm.assert_frame_equal(result, df.iloc[3:])
|
||||
|
||||
result = store.select("df", "C<'-3D'")
|
||||
tm.assert_frame_equal(result, df.iloc[3:])
|
||||
|
||||
# a bit hacky here as we don't really deal with the NaT properly
|
||||
|
||||
result = store.select("df", "C<'-500000s'")
|
||||
result = result.dropna(subset=["C"])
|
||||
tm.assert_frame_equal(result, df.iloc[6:])
|
||||
|
||||
result = store.select("df", "C<'-3.5D'")
|
||||
result = result.iloc[1:]
|
||||
tm.assert_frame_equal(result, df.iloc[4:])
|
||||
|
||||
# fixed
|
||||
_maybe_remove(store, "df2")
|
||||
store.put("df2", df)
|
||||
result = store.select("df2")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_append_to_multiple(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = df1.copy().rename(columns="{}_2".format)
|
||||
df2["foo"] = "bar"
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# exceptions
|
||||
msg = "append_to_multiple requires a selector that is in passed dict"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple(
|
||||
{"df1": ["A", "B"], "df2": None}, df, selector="df3"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")
|
||||
|
||||
msg = (
|
||||
"append_to_multiple must have a dictionary specified as the way to "
|
||||
"split the value"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append_to_multiple("df1", df, "df1")
|
||||
|
||||
# regular operation
|
||||
store.append_to_multiple({"df1": ["A", "B"], "df2": None}, df, selector="df1")
|
||||
result = store.select_as_multiple(
|
||||
["df1", "df2"], where=["A>0", "B>0"], selector="df1"
|
||||
)
|
||||
expected = df[(df.A > 0) & (df.B > 0)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_append_to_multiple_dropna(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
).rename(columns="{}_2".format)
|
||||
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# dropna=True should guarantee rows are synchronized
|
||||
store.append_to_multiple(
|
||||
{"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True
|
||||
)
|
||||
result = store.select_as_multiple(["df1", "df2"])
|
||||
expected = df.dropna()
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
tm.assert_index_equal(store.select("df1").index, store.select("df2").index)
|
||||
|
||||
|
||||
def test_append_to_multiple_dropna_false(setup_path):
|
||||
df1 = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df2 = df1.copy().rename(columns="{}_2".format)
|
||||
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
with ensure_clean_store(setup_path) as store, pd.option_context(
|
||||
"io.hdf.dropna_table", True
|
||||
):
|
||||
# dropna=False shouldn't synchronize row indexes
|
||||
store.append_to_multiple(
|
||||
{"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False
|
||||
)
|
||||
|
||||
msg = "all tables must have exactly the same nrows!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select_as_multiple(["df1a", "df2a"])
|
||||
|
||||
assert not store.select("df1a").index.equals(store.select("df2a").index)
|
||||
|
||||
|
||||
def test_append_to_multiple_min_itemsize(setup_path):
|
||||
# GH 11238
|
||||
df = DataFrame(
|
||||
{
|
||||
"IX": np.arange(1, 21),
|
||||
"Num": np.arange(1, 21),
|
||||
"BigNum": np.arange(1, 21) * 88,
|
||||
"Str": ["a" for _ in range(20)],
|
||||
"LongStr": ["abcde" for _ in range(20)],
|
||||
}
|
||||
)
|
||||
expected = df.iloc[[0]]
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append_to_multiple(
|
||||
{
|
||||
"index": ["IX"],
|
||||
"nums": ["Num", "BigNum"],
|
||||
"strs": ["Str", "LongStr"],
|
||||
},
|
||||
df.iloc[[0]],
|
||||
"index",
|
||||
min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
|
||||
)
|
||||
result = store.select_as_multiple(["index", "nums", "strs"])
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
@ -0,0 +1,214 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_categorical(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# Basic
|
||||
_maybe_remove(store, "s")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
store.append("s", s, format="table")
|
||||
result = store.select("s")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
_maybe_remove(store, "s_ordered")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
store.append("s_ordered", s, format="table")
|
||||
result = store.select("s_ordered")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
|
||||
store.append("df", df, format="table")
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# Dtypes
|
||||
_maybe_remove(store, "si")
|
||||
s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")
|
||||
store.append("si", s)
|
||||
result = store.select("si")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
_maybe_remove(store, "si2")
|
||||
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")
|
||||
store.append("si2", s)
|
||||
result = store.select("si2")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# Multiple
|
||||
_maybe_remove(store, "df2")
|
||||
df2 = df.copy()
|
||||
df2["s2"] = Series(list("abcdefg")).astype("category")
|
||||
store.append("df2", df2)
|
||||
result = store.select("df2")
|
||||
tm.assert_frame_equal(result, df2)
|
||||
|
||||
# Make sure the metadata is OK
|
||||
info = store.info()
|
||||
assert "/df2 " in info
|
||||
# df2._mgr.blocks[0] and df2._mgr.blocks[2] are Categorical
|
||||
assert "/df2/meta/values_block_0/meta" in info
|
||||
assert "/df2/meta/values_block_2/meta" in info
|
||||
|
||||
# unordered
|
||||
_maybe_remove(store, "s2")
|
||||
s = Series(
|
||||
Categorical(
|
||||
["a", "b", "b", "a", "a", "c"],
|
||||
categories=["a", "b", "c", "d"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
store.append("s2", s, format="table")
|
||||
result = store.select("s2")
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# Query
|
||||
_maybe_remove(store, "df3")
|
||||
store.append("df3", df, data_columns=["s"])
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s in ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s = ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["d"])]
|
||||
result = store.select("df3", where=['s in ["d"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df[df.s.isin(["f"])]
|
||||
result = store.select("df3", where=['s in ["f"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Appending with same categories is ok
|
||||
store.append("df3", df)
|
||||
|
||||
df = concat([df, df])
|
||||
expected = df[df.s.isin(["b", "c"])]
|
||||
result = store.select("df3", where=['s in ["b","c"]'])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Appending must have the same categories
|
||||
df3 = df.copy()
|
||||
df3["s"] = df3["s"].cat.remove_unused_categories()
|
||||
|
||||
msg = "cannot append a categorical with different categories to the existing"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df3", df3)
|
||||
|
||||
# Remove, and make sure meta data is removed (its a recursive
|
||||
# removal so should be).
|
||||
result = store.select("df3/meta/s/meta")
|
||||
assert result is not None
|
||||
store.remove("df3")
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'No object named df3/meta/s/meta in the file'"
|
||||
):
|
||||
store.select("df3/meta/s/meta")
|
||||
|
||||
|
||||
def test_categorical_conversion(tmp_path, setup_path):
|
||||
# GH13322
|
||||
# Check that read_hdf with categorical columns doesn't return rows if
|
||||
# where criteria isn't met.
|
||||
obsids = ["ESP_012345_6789", "ESP_987654_3210"]
|
||||
imgids = ["APF00006np", "APF0001imm"]
|
||||
data = [4.3, 9.8]
|
||||
|
||||
# Test without categories
|
||||
df = DataFrame({"obsids": obsids, "imgids": imgids, "data": data})
|
||||
|
||||
# We are expecting an empty DataFrame matching types of df
|
||||
expected = df.iloc[[], :]
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df", where="obsids=B")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Test with categories
|
||||
df.obsids = df.obsids.astype("category")
|
||||
df.imgids = df.imgids.astype("category")
|
||||
|
||||
# We are expecting an empty DataFrame matching types of df
|
||||
expected = df.iloc[[], :]
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df", where="obsids=B")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_categorical_nan_only_columns(tmp_path, setup_path):
|
||||
# GH18413
|
||||
# Check that read_hdf with categorical columns with NaN-only values can
|
||||
# be read back.
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["a", "b", "c", np.nan],
|
||||
"b": [np.nan, np.nan, np.nan, np.nan],
|
||||
"c": [1, 2, 3, 4],
|
||||
"d": Series([None] * 4, dtype=object),
|
||||
}
|
||||
)
|
||||
df["a"] = df.a.astype("category")
|
||||
df["b"] = df.b.astype("category")
|
||||
df["d"] = df.b.astype("category")
|
||||
expected = df
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", data_columns=True)
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"where, df, expected",
|
||||
[
|
||||
('col=="q"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": []})),
|
||||
('col=="a"', DataFrame({"col": ["a", "b", "s"]}), DataFrame({"col": ["a"]})),
|
||||
],
|
||||
)
|
||||
def test_convert_value(
|
||||
tmp_path, setup_path, where: str, df: DataFrame, expected: DataFrame
|
||||
):
|
||||
# GH39420
|
||||
# Check that read_hdf with categorical columns can filter by where condition.
|
||||
df.col = df.col.astype("category")
|
||||
max_widths = {"col": 1}
|
||||
categorical_values = sorted(df.col.unique())
|
||||
expected.col = expected.col.astype("category")
|
||||
expected.col = expected.col.cat.set_categories(categorical_values)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", min_itemsize=max_widths)
|
||||
result = read_hdf(path, where=where)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
tables = pytest.importorskip("tables")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pytables_hdf5_file(tmp_path):
|
||||
"""
|
||||
Use PyTables to create a simple HDF5 file.
|
||||
"""
|
||||
table_schema = {
|
||||
"c0": tables.Time64Col(pos=0),
|
||||
"c1": tables.StringCol(5, pos=1),
|
||||
"c2": tables.Int64Col(pos=2),
|
||||
}
|
||||
|
||||
t0 = 1_561_105_000.0
|
||||
|
||||
testsamples = [
|
||||
{"c0": t0, "c1": "aaaaa", "c2": 1},
|
||||
{"c0": t0 + 1, "c1": "bbbbb", "c2": 2},
|
||||
{"c0": t0 + 2, "c1": "ccccc", "c2": 10**5},
|
||||
{"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295},
|
||||
]
|
||||
|
||||
objname = "pandas_test_timeseries"
|
||||
|
||||
path = tmp_path / "written_with_pytables.h5"
|
||||
with tables.open_file(path, mode="w") as f:
|
||||
t = f.create_table("/", name=objname, description=table_schema)
|
||||
for sample in testsamples:
|
||||
for key, value in sample.items():
|
||||
t.row[key] = value
|
||||
t.row.append()
|
||||
|
||||
yield path, objname, pd.DataFrame(testsamples)
|
||||
|
||||
|
||||
class TestReadPyTablesHDF5:
|
||||
"""
|
||||
A group of tests which covers reading HDF5 files written by plain PyTables
|
||||
(not written by pandas).
|
||||
|
||||
Was introduced for regression-testing issue 11188.
|
||||
"""
|
||||
|
||||
def test_read_complete(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
result = pd.read_hdf(path, key=objname)
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_start(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, start=1)
|
||||
expected = df[1:].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_stop(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, stop=1)
|
||||
expected = df[:1].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_read_with_startstop(self, pytables_hdf5_file):
|
||||
path, objname, df = pytables_hdf5_file
|
||||
# This is a regression test for pandas-dev/pandas/issues/11188
|
||||
result = pd.read_hdf(path, key=objname, start=1, stop=2)
|
||||
expected = df[1:2].reset_index(drop=True)
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
@ -0,0 +1,195 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
from pandas.io.pytables import read_hdf
|
||||
|
||||
|
||||
def test_complex_fixed(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex64),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex128),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_table(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex64),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
reread = read_hdf(path, key="df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)).astype(np.complex128),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table", mode="w")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_mixed_fixed(tmp_path, setup_path):
|
||||
complex64 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
|
||||
)
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 2, 3, 4],
|
||||
"B": ["a", "b", "c", "d"],
|
||||
"C": complex64,
|
||||
"D": complex128,
|
||||
"E": [1.0, 2.0, 3.0, 4.0],
|
||||
},
|
||||
index=list("abcd"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_mixed_table(tmp_path, setup_path):
|
||||
complex64 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64
|
||||
)
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 2, 3, 4],
|
||||
"B": ["a", "b", "c", "d"],
|
||||
"C": complex64,
|
||||
"D": complex128,
|
||||
"E": [1.0, 2.0, 3.0, 4.0],
|
||||
},
|
||||
index=list("abcd"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["A", "B"])
|
||||
result = store.select("df", where="A>2")
|
||||
tm.assert_frame_equal(df.loc[df.A > 2], result)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
reread = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_across_dimensions_fixed(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
objs = [s, df]
|
||||
comps = [tm.assert_series_equal, tm.assert_frame_equal]
|
||||
for obj, comp in zip(objs, comps):
|
||||
path = tmp_path / setup_path
|
||||
obj.to_hdf(path, key="obj", format="fixed")
|
||||
reread = read_hdf(path, "obj")
|
||||
comp(obj, reread)
|
||||
|
||||
|
||||
def test_complex_across_dimensions(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="obj", format="table")
|
||||
reread = read_hdf(path, "obj")
|
||||
tm.assert_frame_equal(df, reread)
|
||||
|
||||
|
||||
def test_complex_indexing_error(setup_path):
|
||||
complex128 = np.array(
|
||||
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128
|
||||
)
|
||||
df = DataFrame(
|
||||
{"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},
|
||||
index=list("abcd"),
|
||||
)
|
||||
|
||||
msg = (
|
||||
"Columns containing complex values can be stored "
|
||||
"but cannot be indexed when using table format. "
|
||||
"Either use fixed format, set index=False, "
|
||||
"or do not include the columns containing complex "
|
||||
"values to data_columns when initializing the table."
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df", df, data_columns=["C"])
|
||||
|
||||
|
||||
def test_complex_series_error(tmp_path, setup_path):
|
||||
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
||||
s = Series(complex128, index=list("abcd"))
|
||||
|
||||
msg = (
|
||||
"Columns containing complex values can be stored "
|
||||
"but cannot be indexed when using table format. "
|
||||
"Either use fixed format, set index=False, "
|
||||
"or do not include the columns containing complex "
|
||||
"values to data_columns when initializing the table."
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.to_hdf(path, key="obj", format="t")
|
||||
|
||||
path = tmp_path / setup_path
|
||||
s.to_hdf(path, key="obj", format="t", index=False)
|
||||
reread = read_hdf(path, "obj")
|
||||
tm.assert_series_equal(s, reread)
|
||||
|
||||
|
||||
def test_complex_append(setup_path):
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.random.default_rng(2).standard_normal(100).astype(np.complex128),
|
||||
"b": np.random.default_rng(2).standard_normal(100),
|
||||
}
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["b"])
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(pd.concat([df, df], axis=0), result)
|
@ -0,0 +1,251 @@
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
MultiIndex,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
from pandas.io.pytables import (
|
||||
Term,
|
||||
_maybe_adjust_name,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_pass_spec_to_storer(setup_path):
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df)
|
||||
msg = (
|
||||
"cannot pass a column specification when reading a Fixed format "
|
||||
"store. this store must be selected in its entirety"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df", columns=["A"])
|
||||
msg = (
|
||||
"cannot pass a where specification when reading from a Fixed "
|
||||
"format store. this store must be selected in its entirety"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select("df", where=[("columns=A")])
|
||||
|
||||
|
||||
def test_table_index_incompatible_dtypes(setup_path):
|
||||
df1 = DataFrame({"a": [1, 2, 3]})
|
||||
df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df1, format="table")
|
||||
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.put("frame", df2, format="table", append=True)
|
||||
|
||||
|
||||
def test_unimplemented_dtypes_table_columns(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
dtypes = [("date", datetime.date(2001, 1, 2))]
|
||||
|
||||
# currently not supported dtypes ####
|
||||
for n, f in dtypes:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df[n] = f
|
||||
msg = re.escape(f"[{n}] is not implemented as a table column")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append(f"df1_{n}", df)
|
||||
|
||||
# frame
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["datetime1"] = datetime.date(2001, 1, 2)
|
||||
df = df._consolidate()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# this fails because we have a date in the object block......
|
||||
msg = re.escape(
|
||||
"""Cannot serialize the column [datetime1]
|
||||
because its data contents are not [string] but [date] object dtype"""
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.append("df_unimplemented", df)
|
||||
|
||||
|
||||
def test_invalid_terms(tmp_path, setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["string"] = "foo"
|
||||
df.loc[df.index[0:4], "string"] = "bar"
|
||||
|
||||
store.put("df", df, format="table")
|
||||
|
||||
# some invalid terms
|
||||
msg = re.escape("__init__() missing 1 required positional argument: 'where'")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Term()
|
||||
|
||||
# more invalid
|
||||
msg = re.escape(
|
||||
"cannot process expression [df.index[3]], "
|
||||
"[2000-01-06 00:00:00] is not a valid condition"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select("df", "df.index[3]")
|
||||
|
||||
msg = "invalid syntax"
|
||||
with pytest.raises(SyntaxError, match=msg):
|
||||
store.select("df", "index>")
|
||||
|
||||
# from the docs
|
||||
path = tmp_path / setup_path
|
||||
dfq = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=list("ABCD"),
|
||||
index=date_range("20130101", periods=10),
|
||||
)
|
||||
dfq.to_hdf(path, key="dfq", format="table", data_columns=True)
|
||||
|
||||
# check ok
|
||||
read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']")
|
||||
read_hdf(path, "dfq", where="A>0 or C>0")
|
||||
|
||||
# catch the invalid reference
|
||||
path = tmp_path / setup_path
|
||||
dfq = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=list("ABCD"),
|
||||
index=date_range("20130101", periods=10),
|
||||
)
|
||||
dfq.to_hdf(path, key="dfq", format="table")
|
||||
|
||||
msg = (
|
||||
r"The passed where expression: A>0 or C>0\n\s*"
|
||||
r"contains an invalid variable reference\n\s*"
|
||||
r"all of the variable references must be a reference to\n\s*"
|
||||
r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
|
||||
r"The currently defined references are: index,columns\n"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path, "dfq", where="A>0 or C>0")
|
||||
|
||||
|
||||
def test_append_with_diff_col_name_types_raises_value_error(setup_path):
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 1)))
|
||||
df2 = DataFrame({"a": np.random.default_rng(2).standard_normal(10)})
|
||||
df3 = DataFrame({(1, 2): np.random.default_rng(2).standard_normal(10)})
|
||||
df4 = DataFrame({("1", 2): np.random.default_rng(2).standard_normal(10)})
|
||||
df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
name = "df_diff_valerror"
|
||||
store.append(name, df)
|
||||
|
||||
for d in (df2, df3, df4, df5):
|
||||
msg = re.escape(
|
||||
"cannot match existing table structure for [0] on appending data"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append(name, d)
|
||||
|
||||
|
||||
def test_invalid_complib(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
with tm.ensure_clean(setup_path) as path:
|
||||
msg = r"complib only supports \[.*\] compression."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", complib="foolib")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
date_range("2019", freq="D", periods=3, tz="UTC"),
|
||||
CategoricalIndex(list("abc")),
|
||||
],
|
||||
)
|
||||
def test_to_hdf_multiindex_extension_dtype(idx, tmp_path, setup_path):
|
||||
# GH 7775
|
||||
mi = MultiIndex.from_arrays([idx, idx])
|
||||
df = DataFrame(0, index=mi, columns=["a"])
|
||||
path = tmp_path / setup_path
|
||||
with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):
|
||||
df.to_hdf(path, key="df")
|
||||
|
||||
|
||||
def test_unsuppored_hdf_file_error(datapath):
|
||||
# GH 9539
|
||||
data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")
|
||||
message = (
|
||||
r"Dataset\(s\) incompatible with Pandas data types, "
|
||||
"not table, or no datasets found in HDF5 file."
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=message):
|
||||
read_hdf(data_path)
|
||||
|
||||
|
||||
def test_read_hdf_errors(setup_path, tmp_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
msg = r"File [\S]* does not exist"
|
||||
with pytest.raises(OSError, match=msg):
|
||||
read_hdf(path, "key")
|
||||
|
||||
df.to_hdf(path, key="df")
|
||||
store = HDFStore(path, mode="r")
|
||||
store.close()
|
||||
|
||||
msg = "The HDFStore must be open for reading."
|
||||
with pytest.raises(OSError, match=msg):
|
||||
read_hdf(store, "df")
|
||||
|
||||
|
||||
def test_read_hdf_generic_buffer_errors():
|
||||
msg = "Support for generic buffers has not been implemented."
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
read_hdf(BytesIO(b""), "df")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_version", [(1, 2), (1,), [], "12", "123"])
|
||||
def test_maybe_adjust_name_bad_version_raises(bad_version):
|
||||
msg = "Version is incorrect, expected sequence of 3 integers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_maybe_adjust_name("values_block_0", version=bad_version)
|
@ -0,0 +1,495 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import (
|
||||
PY311,
|
||||
is_ci_environment,
|
||||
is_platform_linux,
|
||||
is_platform_little_endian,
|
||||
)
|
||||
from pandas.errors import (
|
||||
ClosedFileError,
|
||||
PossibleDataLossError,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
tables,
|
||||
)
|
||||
|
||||
from pandas.io import pytables
|
||||
from pandas.io.pytables import Term
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
|
||||
def test_mode(setup_path, tmp_path, mode):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
msg = r"[\S]* does not exist"
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# constructor
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
HDFStore(path, mode=mode)
|
||||
|
||||
else:
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
assert store._handle.mode == mode
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# context
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
pass
|
||||
else:
|
||||
with HDFStore(path, mode=mode) as store:
|
||||
assert store._handle.mode == mode
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
# conv write
|
||||
if mode in ["r", "r+"]:
|
||||
with pytest.raises(OSError, match=msg):
|
||||
df.to_hdf(path, key="df", mode=mode)
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
else:
|
||||
df.to_hdf(path, key="df", mode=mode)
|
||||
|
||||
# conv read
|
||||
if mode in ["w"]:
|
||||
msg = (
|
||||
"mode w is not allowed while performing a read. "
|
||||
r"Allowed modes are r, r\+ and a."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path, "df", mode=mode)
|
||||
else:
|
||||
result = read_hdf(path, "df", mode=mode)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_default_mode(tmp_path, setup_path):
|
||||
# read_hdf uses default mode
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_reopen_handle(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
store = HDFStore(path, mode="a")
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
|
||||
msg = (
|
||||
r"Re-opening the file \[[\S]*\] with mode \[a\] will delete the "
|
||||
"current file!"
|
||||
)
|
||||
# invalid mode change
|
||||
with pytest.raises(PossibleDataLossError, match=msg):
|
||||
store.open("w")
|
||||
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# truncation ok here
|
||||
store.open("w")
|
||||
assert store.is_open
|
||||
assert len(store) == 0
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
store = HDFStore(path, mode="a")
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
|
||||
# reopen as read
|
||||
store.open("r")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "r"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# reopen as append
|
||||
store.open("a")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "a"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
# reopen as append (again)
|
||||
store.open("a")
|
||||
assert store.is_open
|
||||
assert len(store) == 1
|
||||
assert store._mode == "a"
|
||||
store.close()
|
||||
assert not store.is_open
|
||||
|
||||
|
||||
def test_open_args(setup_path):
|
||||
with tm.ensure_clean(setup_path) as path:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# create an in memory store
|
||||
store = HDFStore(
|
||||
path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0
|
||||
)
|
||||
store["df"] = df
|
||||
store.append("df2", df)
|
||||
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
tm.assert_frame_equal(store["df2"], df)
|
||||
|
||||
store.close()
|
||||
|
||||
# the file should not have actually been written
|
||||
assert not os.path.exists(path)
|
||||
|
||||
|
||||
def test_flush(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(range(5))
|
||||
store.flush()
|
||||
store.flush(fsync=True)
|
||||
|
||||
|
||||
def test_complibs_default_settings(tmp_path, setup_path):
|
||||
# GH15943
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# Set complevel and check if complib is automatically set to
|
||||
# default value
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df", complevel=9)
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 9
|
||||
assert node.filters.complib == "zlib"
|
||||
|
||||
# Set complib and check to see if compression is disabled
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df", complib="zlib")
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
|
||||
# Check if not setting complib or complevel results in no compression
|
||||
tmpfile = tmp_path / setup_path
|
||||
df.to_hdf(tmpfile, key="df")
|
||||
result = read_hdf(tmpfile, "df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
|
||||
|
||||
def test_complibs_default_settings_override(tmp_path, setup_path):
|
||||
# Check if file-defaults can be overridden on a per table basis
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
tmpfile = tmp_path / setup_path
|
||||
store = HDFStore(tmpfile)
|
||||
store.append("dfc", df, complevel=9, complib="blosc")
|
||||
store.append("df", df)
|
||||
store.close()
|
||||
|
||||
with tables.open_file(tmpfile, mode="r") as h5file:
|
||||
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
|
||||
assert node.filters.complevel == 0
|
||||
assert node.filters.complib is None
|
||||
for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):
|
||||
assert node.filters.complevel == 9
|
||||
assert node.filters.complib == "blosc"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lvl", range(10))
|
||||
@pytest.mark.parametrize("lib", tables.filters.all_complibs)
|
||||
@pytest.mark.filterwarnings("ignore:object name is not a valid")
|
||||
@pytest.mark.skipif(
|
||||
not PY311 and is_ci_environment() and is_platform_linux(),
|
||||
reason="Segfaulting in a CI environment"
|
||||
# with xfail, would sometimes raise UnicodeDecodeError
|
||||
# invalid state byte
|
||||
)
|
||||
def test_complibs(tmp_path, lvl, lib, request):
|
||||
# GH14478
|
||||
if PY311 and is_platform_linux() and lib == "blosc2" and lvl != 0:
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason=f"Fails for {lib} on Linux and PY > 3.11")
|
||||
)
|
||||
df = DataFrame(
|
||||
np.ones((30, 4)), columns=list("ABCD"), index=np.arange(30).astype(np.str_)
|
||||
)
|
||||
|
||||
# Remove lzo if its not available on this platform
|
||||
if not tables.which_lib_version("lzo"):
|
||||
pytest.skip("lzo not available")
|
||||
# Remove bzip2 if its not available on this platform
|
||||
if not tables.which_lib_version("bzip2"):
|
||||
pytest.skip("bzip2 not available")
|
||||
|
||||
tmpfile = tmp_path / f"{lvl}_{lib}.h5"
|
||||
gname = f"{lvl}_{lib}"
|
||||
|
||||
# Write and read file to see if data is consistent
|
||||
df.to_hdf(tmpfile, key=gname, complib=lib, complevel=lvl)
|
||||
result = read_hdf(tmpfile, gname)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# Open file and check metadata for correct amount of compression
|
||||
with tables.open_file(tmpfile, mode="r") as h5table:
|
||||
for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):
|
||||
assert node.filters.complevel == lvl
|
||||
if lvl == 0:
|
||||
assert node.filters.complib is None
|
||||
else:
|
||||
assert node.filters.complib == lib
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not is_platform_little_endian(), reason="reason platform is not little endian"
|
||||
)
|
||||
def test_encoding(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": "foo", "B": "bar"}, index=range(5))
|
||||
df.loc[2, "A"] = np.nan
|
||||
df.loc[3, "B"] = np.nan
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df, encoding="ascii")
|
||||
tm.assert_frame_equal(store["df"], df)
|
||||
|
||||
expected = df.reindex(columns=["A"])
|
||||
result = store.select("df", Term("columns=A", encoding="ascii"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val",
|
||||
[
|
||||
[b"E\xc9, 17", b"", b"a", b"b", b"c"],
|
||||
[b"E\xc9, 17", b"a", b"b", b"c"],
|
||||
[b"EE, 17", b"", b"a", b"b", b"c"],
|
||||
[b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],
|
||||
[b"", b"a", b"b", b"c"],
|
||||
[b"\xf8\xfc", b"a", b"b", b"c"],
|
||||
[b"A\xf8\xfc", b"", b"a", b"b", b"c"],
|
||||
[np.nan, b"", b"b", b"c"],
|
||||
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", ["category", object])
|
||||
def test_latin_encoding(tmp_path, setup_path, dtype, val):
|
||||
enc = "latin-1"
|
||||
nan_rep = ""
|
||||
key = "data"
|
||||
|
||||
val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]
|
||||
ser = Series(val, dtype=dtype)
|
||||
|
||||
store = tmp_path / setup_path
|
||||
ser.to_hdf(store, key=key, format="table", encoding=enc, nan_rep=nan_rep)
|
||||
retr = read_hdf(store, key)
|
||||
|
||||
# TODO:(3.0): once Categorical replace deprecation is enforced,
|
||||
# we may be able to re-simplify the construction of s_nan
|
||||
if dtype == "category":
|
||||
if nan_rep in ser.cat.categories:
|
||||
s_nan = ser.cat.remove_categories([nan_rep])
|
||||
else:
|
||||
s_nan = ser
|
||||
else:
|
||||
s_nan = ser.replace(nan_rep, np.nan)
|
||||
|
||||
tm.assert_series_equal(s_nan, retr)
|
||||
|
||||
|
||||
def test_multiple_open_close(tmp_path, setup_path):
|
||||
# gh-4409: open & close multiple times
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
# single
|
||||
store = HDFStore(path)
|
||||
assert "CLOSED" not in store.info()
|
||||
assert store.is_open
|
||||
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
path = tmp_path / setup_path
|
||||
|
||||
if pytables._table_file_open_policy_is_strict:
|
||||
# multiples
|
||||
store1 = HDFStore(path)
|
||||
msg = (
|
||||
r"The file [\S]* is already opened\. Please close it before "
|
||||
r"reopening in write mode\."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
HDFStore(path)
|
||||
|
||||
store1.close()
|
||||
else:
|
||||
# multiples
|
||||
store1 = HDFStore(path)
|
||||
store2 = HDFStore(path)
|
||||
|
||||
assert "CLOSED" not in store1.info()
|
||||
assert "CLOSED" not in store2.info()
|
||||
assert store1.is_open
|
||||
assert store2.is_open
|
||||
|
||||
store1.close()
|
||||
assert "CLOSED" in store1.info()
|
||||
assert not store1.is_open
|
||||
assert "CLOSED" not in store2.info()
|
||||
assert store2.is_open
|
||||
|
||||
store2.close()
|
||||
assert "CLOSED" in store1.info()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store1.is_open
|
||||
assert not store2.is_open
|
||||
|
||||
# nested close
|
||||
store = HDFStore(path, mode="w")
|
||||
store.append("df", df)
|
||||
|
||||
store2 = HDFStore(path)
|
||||
store2.append("df2", df)
|
||||
store2.close()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store2.is_open
|
||||
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
# double closing
|
||||
store = HDFStore(path, mode="w")
|
||||
store.append("df", df)
|
||||
|
||||
store2 = HDFStore(path)
|
||||
store.close()
|
||||
assert "CLOSED" in store.info()
|
||||
assert not store.is_open
|
||||
|
||||
store2.close()
|
||||
assert "CLOSED" in store2.info()
|
||||
assert not store2.is_open
|
||||
|
||||
# ops on a closed store
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
store = HDFStore(path)
|
||||
store.close()
|
||||
|
||||
msg = r"[\S]* file is not open!"
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.keys()
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
"df" in store
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
len(store)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store["df"]
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.select("df")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.get("df")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.append("df2", df)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.put("df3", df)
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.get_storer("df2")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.remove("df2")
|
||||
|
||||
with pytest.raises(ClosedFileError, match=msg):
|
||||
store.select("df")
|
||||
|
||||
msg = "'HDFStore' object has no attribute 'df'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
store.df
|
||||
|
||||
|
||||
def test_fspath():
|
||||
with tm.ensure_clean("foo.h5") as path:
|
||||
with HDFStore(path) as store:
|
||||
assert os.fspath(store) == str(path)
|
@ -0,0 +1,87 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
ensure_clean_store,
|
||||
tables,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_keys(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
store["b"] = Series(
|
||||
range(10), dtype="float64", index=[f"i_{i}" for i in range(10)]
|
||||
)
|
||||
store["c"] = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
assert len(store) == 3
|
||||
expected = {"/a", "/b", "/c"}
|
||||
assert set(store.keys()) == expected
|
||||
assert set(store) == expected
|
||||
|
||||
|
||||
def test_non_pandas_keys(tmp_path, setup_path):
|
||||
class Table1(tables.IsDescription):
|
||||
value1 = tables.Float32Col()
|
||||
|
||||
class Table2(tables.IsDescription):
|
||||
value2 = tables.Float32Col()
|
||||
|
||||
class Table3(tables.IsDescription):
|
||||
value3 = tables.Float32Col()
|
||||
|
||||
path = tmp_path / setup_path
|
||||
with tables.open_file(path, mode="w") as h5file:
|
||||
group = h5file.create_group("/", "group")
|
||||
h5file.create_table(group, "table1", Table1, "Table 1")
|
||||
h5file.create_table(group, "table2", Table2, "Table 2")
|
||||
h5file.create_table(group, "table3", Table3, "Table 3")
|
||||
with HDFStore(path) as store:
|
||||
assert len(store.keys(include="native")) == 3
|
||||
expected = {"/group/table1", "/group/table2", "/group/table3"}
|
||||
assert set(store.keys(include="native")) == expected
|
||||
assert set(store.keys(include="pandas")) == set()
|
||||
for name in expected:
|
||||
df = store.get(name)
|
||||
assert len(df.columns) == 1
|
||||
|
||||
|
||||
def test_keys_illegal_include_keyword_value(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="`include` should be either 'pandas' or 'native' but is 'illegal'",
|
||||
):
|
||||
store.keys(include="illegal")
|
||||
|
||||
|
||||
def test_keys_ignore_hdf_softlink(setup_path):
|
||||
# GH 20523
|
||||
# Puts a softlink into HDF file and rereads
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame({"A": range(5), "B": range(5)})
|
||||
store.put("df", df)
|
||||
|
||||
assert store.keys() == ["/df"]
|
||||
|
||||
store._handle.create_soft_link(store._handle.root, "symlink", "df")
|
||||
|
||||
# Should ignore the softlink
|
||||
assert store.keys() == ["/df"]
|
@ -0,0 +1,374 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_format_type(tmp_path, setup_path):
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
with HDFStore(tmp_path / setup_path) as store:
|
||||
store.put("a", df, format="fixed")
|
||||
store.put("b", df, format="table")
|
||||
|
||||
assert store.get_storer("a").format_type == "fixed"
|
||||
assert store.get_storer("b").format_type == "table"
|
||||
|
||||
|
||||
def test_format_kwarg_in_constructor(tmp_path, setup_path):
|
||||
# GH 13291
|
||||
|
||||
msg = "format is not a defined argument for HDFStore"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
HDFStore(tmp_path / setup_path, format="table")
|
||||
|
||||
|
||||
def test_api_default_format(tmp_path, setup_path):
|
||||
# default_format option
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert not store.get_storer("df").is_table
|
||||
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df2", df)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
df.to_hdf(path, key="df")
|
||||
with HDFStore(path) as store:
|
||||
assert not store.get_storer("df").is_table
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df2", append=True)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
df.to_hdf(path, key="df3")
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df3").is_table
|
||||
df.to_hdf(path, key="df4", append=True)
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df4").is_table
|
||||
|
||||
|
||||
def test_put(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=20, freq="B"),
|
||||
)
|
||||
store["a"] = ts
|
||||
store["b"] = df[:10]
|
||||
store["foo/bar/bah"] = df[:10]
|
||||
store["foo"] = df[:10]
|
||||
store["/foo"] = df[:10]
|
||||
store.put("c", df[:10], format="table")
|
||||
|
||||
# not OK, not a table
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df[10:], append=True)
|
||||
|
||||
# node does not currently exist, test _is_table_type returns False
|
||||
# in this case
|
||||
_maybe_remove(store, "f")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("f", df[10:], append=True)
|
||||
|
||||
# can't put to a table (use append instead)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("c", df[10:], append=True)
|
||||
|
||||
# overwrite table
|
||||
store.put("c", df[:10], format="table", append=False)
|
||||
tm.assert_frame_equal(df[:10], store["c"])
|
||||
|
||||
|
||||
def test_put_string_index(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
index = Index([f"I am a very long string index: {i}" for i in range(20)])
|
||||
s = Series(np.arange(20), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
# mixed length
|
||||
index = Index(
|
||||
["abcdefghijklmnopqrstuvwxyz1234567890"]
|
||||
+ [f"I am a very long string index: {i}" for i in range(20)]
|
||||
)
|
||||
s = Series(np.arange(21), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
|
||||
def test_put_compression(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
store.put("c", df, format="table", complib="zlib")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="zlib")
|
||||
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_put_compression_blosc(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="blosc")
|
||||
|
||||
store.put("c", df, format="table", complib="blosc")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
|
||||
def test_put_mixed_type(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["bool1"] = df["A"] > 0
|
||||
df["bool2"] = df["B"] > 0
|
||||
df["bool3"] = True
|
||||
df["int1"] = 1
|
||||
df["int2"] = 2
|
||||
df["timestamp1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["timestamp2"] = Timestamp("20010103").as_unit("ns")
|
||||
df["datetime1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["datetime2"] = Timestamp("20010103").as_unit("ns")
|
||||
df.loc[df.index[3:6], ["obj1"]] = np.nan
|
||||
df = df._consolidate()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
store.put("df", df)
|
||||
|
||||
expected = store.get("df")
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["table", "fixed"])
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([str(i) for i in range(10)]),
|
||||
Index(np.arange(10, dtype=float)),
|
||||
Index(np.arange(10)),
|
||||
date_range("2020-01-01", periods=10),
|
||||
pd.period_range("2020-01-01", periods=10),
|
||||
],
|
||||
)
|
||||
def test_store_index_types(setup_path, format, index):
|
||||
# GH5386
|
||||
# test storing various index types
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)),
|
||||
columns=list("AB"),
|
||||
index=index,
|
||||
)
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df, format=format)
|
||||
tm.assert_frame_equal(df, store["df"])
|
||||
|
||||
|
||||
def test_column_multiindex(setup_path):
|
||||
# GH 4710
|
||||
# recreate multi-indexes properly
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
|
||||
)
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df)
|
||||
tm.assert_frame_equal(
|
||||
store["df"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df2", df, format="table", data_columns=["A"])
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns True")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df3", df, format="table", data_columns=True)
|
||||
|
||||
# appending multi-column on existing table (see GH 6167)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df2", df)
|
||||
store.append("df2", df)
|
||||
|
||||
tm.assert_frame_equal(store["df2"], concat((df, df)))
|
||||
|
||||
# non_index_axes name
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo"))
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
|
||||
def test_store_multiindex(setup_path):
|
||||
# validate multi-index names
|
||||
# GH 5527
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
|
||||
def make_index(names=None):
|
||||
dti = date_range("2013-12-01", "2013-12-02")
|
||||
mi = MultiIndex.from_product([dti, range(2), range(3)], names=names)
|
||||
return mi
|
||||
|
||||
# no names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# partial names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", None, None]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# series
|
||||
_maybe_remove(store, "ser")
|
||||
ser = Series(np.zeros(12), index=make_index(["date", None, None]))
|
||||
store.append("ser", ser)
|
||||
xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
|
||||
tm.assert_series_equal(store.select("ser"), xp)
|
||||
|
||||
# dup with column
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "a", "t"]),
|
||||
)
|
||||
msg = "duplicate names/columns in the multi-index when storing as a table"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# dup within level
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "date", "date"]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# fully names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "s", "t"]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["fixed", "table"])
|
||||
def test_store_periodindex(tmp_path, setup_path, format):
|
||||
# GH 7796
|
||||
# test of PeriodIndex in HDFStore
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 1)),
|
||||
index=pd.period_range("20220101", freq="M", periods=5),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format=format)
|
||||
expected = pd.read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, expected)
|
@ -0,0 +1,14 @@
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@td.skip_if_installed("tables")
|
||||
def test_pytables_raises():
|
||||
df = pd.DataFrame({"A": [1, 2]})
|
||||
with pytest.raises(ImportError, match="tables"):
|
||||
with tm.ensure_clean("foo.h5") as path:
|
||||
df.to_hdf(path, key="df")
|
@ -0,0 +1,412 @@
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
from pandas.io.pytables import TableIterator
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_read_missing_key_close_store(tmp_path, setup_path):
|
||||
# GH 25766
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"a": range(2), "b": range(2)})
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
||||
read_hdf(path, "k2")
|
||||
|
||||
# smoke test to test that file is properly closed after
|
||||
# read with KeyError before another write
|
||||
df.to_hdf(path, key="k2")
|
||||
|
||||
|
||||
def test_read_index_error_close_store(tmp_path, setup_path):
|
||||
# GH 25766
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"A": [], "B": []}, index=[])
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with pytest.raises(IndexError, match=r"list index out of range"):
|
||||
read_hdf(path, "k1", stop=0)
|
||||
|
||||
# smoke test to test that file is properly closed after
|
||||
# read with IndexError before another write
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
|
||||
def test_read_missing_key_opened_store(tmp_path, setup_path):
|
||||
# GH 28699
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame({"a": range(2), "b": range(2)})
|
||||
df.to_hdf(path, key="k1")
|
||||
|
||||
with HDFStore(path, "r") as store:
|
||||
with pytest.raises(KeyError, match="'No object named k2 in the file'"):
|
||||
read_hdf(store, "k2")
|
||||
|
||||
# Test that the file is still open after a KeyError and that we can
|
||||
# still read from it.
|
||||
read_hdf(store, "k1")
|
||||
|
||||
|
||||
def test_read_column(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
|
||||
# GH 17912
|
||||
# HDFStore.select_column should raise a KeyError
|
||||
# exception if the key is not a valid store
|
||||
with pytest.raises(KeyError, match="No object named df in the file"):
|
||||
store.select_column("df", "index")
|
||||
|
||||
store.append("df", df)
|
||||
# error
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("'column [foo] not found in the table'")
|
||||
):
|
||||
store.select_column("df", "foo")
|
||||
|
||||
msg = re.escape("select_column() got an unexpected keyword argument 'where'")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
store.select_column("df", "index", where=["index>5"])
|
||||
|
||||
# valid
|
||||
result = store.select_column("df", "index")
|
||||
tm.assert_almost_equal(result.values, Series(df.index).values)
|
||||
assert isinstance(result, Series)
|
||||
|
||||
# not a data indexable column
|
||||
msg = re.escape(
|
||||
"column [values_block_0] can not be extracted individually; "
|
||||
"it is not data indexable"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.select_column("df", "values_block_0")
|
||||
|
||||
# a data column
|
||||
df2 = df.copy()
|
||||
df2["string"] = "foo"
|
||||
store.append("df2", df2, data_columns=["string"])
|
||||
result = store.select_column("df2", "string")
|
||||
tm.assert_almost_equal(result.values, df2["string"].values)
|
||||
|
||||
# a data column with NaNs, result excludes the NaNs
|
||||
df3 = df.copy()
|
||||
df3["string"] = "foo"
|
||||
df3.loc[df3.index[4:6], "string"] = np.nan
|
||||
store.append("df3", df3, data_columns=["string"])
|
||||
result = store.select_column("df3", "string")
|
||||
tm.assert_almost_equal(result.values, df3["string"].values)
|
||||
|
||||
# start/stop
|
||||
result = store.select_column("df3", "string", start=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[2:])
|
||||
|
||||
result = store.select_column("df3", "string", start=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[-2:])
|
||||
|
||||
result = store.select_column("df3", "string", stop=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[:2])
|
||||
|
||||
result = store.select_column("df3", "string", stop=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[:-2])
|
||||
|
||||
result = store.select_column("df3", "string", start=2, stop=-2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[2:-2])
|
||||
|
||||
result = store.select_column("df3", "string", start=-2, stop=2)
|
||||
tm.assert_almost_equal(result.values, df3["string"].values[-2:2])
|
||||
|
||||
# GH 10392 - make sure column name is preserved
|
||||
df4 = DataFrame({"A": np.random.default_rng(2).standard_normal(10), "B": "foo"})
|
||||
store.append("df4", df4, data_columns=True)
|
||||
expected = df4["B"]
|
||||
result = store.select_column("df4", "B")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pytables_native_read(datapath):
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"
|
||||
) as store:
|
||||
d2 = store["detector/readout"]
|
||||
assert isinstance(d2, DataFrame)
|
||||
|
||||
|
||||
@pytest.mark.skipif(is_platform_windows(), reason="native2 read fails oddly on windows")
|
||||
def test_pytables_native2_read(datapath):
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"
|
||||
) as store:
|
||||
str(store)
|
||||
d1 = store["detector"]
|
||||
assert isinstance(d1, DataFrame)
|
||||
|
||||
|
||||
def test_legacy_table_fixed_format_read_py2(datapath):
|
||||
# GH 24510
|
||||
# legacy table with fixed format written in Python 2
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"
|
||||
) as store:
|
||||
result = store.select("df")
|
||||
expected = DataFrame(
|
||||
[[1, 2, 3, "D"]],
|
||||
columns=["A", "B", "C", "D"],
|
||||
index=Index(["ABC"], name="INDEX_NAME"),
|
||||
)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
|
||||
# GH 31750
|
||||
# legacy table with fixed format and datetime64 column written in Python 2
|
||||
expected = DataFrame(
|
||||
[[Timestamp("2020-02-06T18:00")]],
|
||||
columns=["A"],
|
||||
index=Index(["date"]),
|
||||
dtype="M8[ns]",
|
||||
)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),
|
||||
mode="r",
|
||||
) as store:
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_legacy_table_read_py2(datapath):
|
||||
# issue: 24925
|
||||
# legacy table written in Python 2
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"
|
||||
) as store:
|
||||
result = store.select("table")
|
||||
|
||||
expected = DataFrame({"a": ["a", "b"], "b": [2, 3]})
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_read_hdf_open_store(tmp_path, setup_path):
|
||||
# GH10330
|
||||
# No check for non-string path_or-buf, and no test of open store
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
df.index.name = "letters"
|
||||
df = df.set_index(keys="E", append=True)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w")
|
||||
direct = read_hdf(path, "df")
|
||||
with HDFStore(path, mode="r") as store:
|
||||
indirect = read_hdf(store, "df")
|
||||
tm.assert_frame_equal(direct, indirect)
|
||||
assert store.is_open
|
||||
|
||||
|
||||
def test_read_hdf_index_not_view(tmp_path, setup_path):
|
||||
# GH 37441
|
||||
# Ensure that the index of the DataFrame is not a view
|
||||
# into the original recarray that pytables reads in
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=[0, 1, 2, 3],
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format="table")
|
||||
|
||||
df2 = read_hdf(path, "df")
|
||||
assert df2.index._data.base is None
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
def test_read_hdf_iterator(tmp_path, setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
df.index.name = "letters"
|
||||
df = df.set_index(keys="E", append=True)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format="t")
|
||||
direct = read_hdf(path, "df")
|
||||
iterator = read_hdf(path, "df", iterator=True)
|
||||
with closing(iterator.store):
|
||||
assert isinstance(iterator, TableIterator)
|
||||
indirect = next(iterator.__iter__())
|
||||
tm.assert_frame_equal(direct, indirect)
|
||||
|
||||
|
||||
def test_read_nokey(tmp_path, setup_path):
|
||||
# GH10443
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
|
||||
# Categorical dtype not supported for "fixed" format. So no need
|
||||
# to test with that dtype in the dataframe here.
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="a")
|
||||
reread = read_hdf(path)
|
||||
tm.assert_frame_equal(df, reread)
|
||||
df.to_hdf(path, key="df2", mode="a")
|
||||
|
||||
msg = "key must be provided when HDF5 file contains multiple datasets."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_nokey_table(tmp_path, setup_path):
|
||||
# GH13231
|
||||
df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="a", format="table")
|
||||
reread = read_hdf(path)
|
||||
tm.assert_frame_equal(df, reread)
|
||||
df.to_hdf(path, key="df2", mode="a", format="table")
|
||||
|
||||
msg = "key must be provided when HDF5 file contains multiple datasets."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_nokey_empty(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
store = HDFStore(path)
|
||||
store.close()
|
||||
msg = re.escape(
|
||||
"Dataset(s) incompatible with Pandas data types, not table, or no "
|
||||
"datasets found in HDF5 file."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
read_hdf(path)
|
||||
|
||||
|
||||
def test_read_from_pathlib_path(tmp_path, setup_path):
|
||||
# GH11773
|
||||
expected = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
filename = tmp_path / setup_path
|
||||
path_obj = Path(filename)
|
||||
|
||||
expected.to_hdf(path_obj, key="df", mode="a")
|
||||
actual = read_hdf(path_obj, key="df")
|
||||
|
||||
tm.assert_frame_equal(expected, actual)
|
||||
|
||||
|
||||
@td.skip_if_no("py.path")
|
||||
def test_read_from_py_localpath(tmp_path, setup_path):
|
||||
# GH11773
|
||||
from py.path import local as LocalPath
|
||||
|
||||
expected = DataFrame(
|
||||
np.random.default_rng(2).random((4, 5)),
|
||||
index=list("abcd"),
|
||||
columns=list("ABCDE"),
|
||||
)
|
||||
filename = tmp_path / setup_path
|
||||
path_obj = LocalPath(filename)
|
||||
|
||||
expected.to_hdf(path_obj, key="df", mode="a")
|
||||
actual = read_hdf(path_obj, key="df")
|
||||
|
||||
tm.assert_frame_equal(expected, actual)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["fixed", "table"])
|
||||
def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
|
||||
# GH 16583
|
||||
# Tests that reading a Series saved to an HDF file
|
||||
# still works if a mode='r' argument is supplied
|
||||
series = Series(range(10), dtype=np.float64)
|
||||
path = tmp_path / setup_path
|
||||
series.to_hdf(path, key="data", format=format)
|
||||
result = read_hdf(path, key="data", mode="r")
|
||||
tm.assert_series_equal(result, series)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_read_py2_hdf_file_in_py3(datapath):
|
||||
# GH 16781
|
||||
|
||||
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
|
||||
|
||||
# the file was generated in Python 2.7 like so:
|
||||
#
|
||||
# df = DataFrame([1.,2,3], index=pd.PeriodIndex(
|
||||
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
|
||||
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
|
||||
|
||||
expected = DataFrame(
|
||||
[1.0, 2, 3],
|
||||
index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(
|
||||
datapath(
|
||||
"io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"
|
||||
),
|
||||
mode="r",
|
||||
) as store:
|
||||
result = store["p"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_read_infer_string(tmp_path, setup_path):
|
||||
# GH#54431
|
||||
pytest.importorskip("pyarrow")
|
||||
df = DataFrame({"a": ["a", "b", None]})
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="data", format="table")
|
||||
with pd.option_context("future.infer_string", True):
|
||||
result = read_hdf(path, key="data", mode="r")
|
||||
expected = DataFrame(
|
||||
{"a": ["a", "b", None]},
|
||||
dtype="string[pyarrow_numpy]",
|
||||
columns=Index(["a"], dtype="string[pyarrow_numpy]"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
errors,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_retain_index_attributes(setup_path, unit):
|
||||
# GH 3499, losing frequency info on index recreation
|
||||
dti = date_range("2000-1-1", periods=3, freq="h", unit=unit)
|
||||
df = DataFrame({"A": Series(range(3), index=dti)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "data")
|
||||
store.put("data", df, format="table")
|
||||
|
||||
result = store.get("data")
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
for attr in ["freq", "tz", "name"]:
|
||||
for idx in ["index", "columns"]:
|
||||
assert getattr(getattr(df, idx), attr, None) == getattr(
|
||||
getattr(result, idx), attr, None
|
||||
)
|
||||
|
||||
dti2 = date_range("2002-1-1", periods=3, freq="D", unit=unit)
|
||||
# try to append a table with a different frequency
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
df2 = DataFrame({"A": Series(range(3), index=dti2)})
|
||||
store.append("data", df2)
|
||||
|
||||
assert store.get_storer("data").info["index"]["freq"] is None
|
||||
|
||||
# this is ok
|
||||
_maybe_remove(store, "df2")
|
||||
dti3 = DatetimeIndex(
|
||||
["2001-01-01", "2001-01-02", "2002-01-01"], dtype=f"M8[{unit}]"
|
||||
)
|
||||
df2 = DataFrame(
|
||||
{
|
||||
"A": Series(
|
||||
range(3),
|
||||
index=dti3,
|
||||
)
|
||||
}
|
||||
)
|
||||
store.append("df2", df2)
|
||||
dti4 = date_range("2002-1-1", periods=3, freq="D", unit=unit)
|
||||
df3 = DataFrame({"A": Series(range(3), index=dti4)})
|
||||
store.append("df2", df3)
|
||||
|
||||
|
||||
def test_retain_index_attributes2(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
df = DataFrame(
|
||||
{"A": Series(range(3), index=date_range("2000-1-1", periods=3, freq="h"))}
|
||||
)
|
||||
df.to_hdf(path, key="data", mode="w", append=True)
|
||||
df2 = DataFrame(
|
||||
{"A": Series(range(3), index=date_range("2002-1-1", periods=3, freq="D"))}
|
||||
)
|
||||
|
||||
df2.to_hdf(path, key="data", append=True)
|
||||
|
||||
idx = date_range("2000-1-1", periods=3, freq="h")
|
||||
idx.name = "foo"
|
||||
df = DataFrame({"A": Series(range(3), index=idx)})
|
||||
df.to_hdf(path, key="data", mode="w", append=True)
|
||||
|
||||
assert read_hdf(path, key="data").index.name == "foo"
|
||||
|
||||
with tm.assert_produces_warning(errors.AttributeConflictWarning):
|
||||
idx2 = date_range("2001-1-1", periods=3, freq="h")
|
||||
idx2.name = "bar"
|
||||
df2 = DataFrame({"A": Series(range(3), index=idx2)})
|
||||
df2.to_hdf(path, key="data", append=True)
|
||||
|
||||
assert read_hdf(path, "data").index.name is None
|
@ -0,0 +1,578 @@
|
||||
import datetime
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
from pandas.compat import is_platform_windows
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
_testing as tm,
|
||||
bdate_range,
|
||||
date_range,
|
||||
read_hdf,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_conv_read_write():
|
||||
with tm.ensure_clean() as path:
|
||||
|
||||
def roundtrip(key, obj, **kwargs):
|
||||
obj.to_hdf(path, key=key, **kwargs)
|
||||
return read_hdf(path, key)
|
||||
|
||||
o = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
tm.assert_series_equal(o, roundtrip("series", o))
|
||||
|
||||
o = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)])
|
||||
tm.assert_series_equal(o, roundtrip("string_series", o))
|
||||
|
||||
o = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
tm.assert_frame_equal(o, roundtrip("frame", o))
|
||||
|
||||
# table
|
||||
df = DataFrame({"A": range(5), "B": range(5)})
|
||||
df.to_hdf(path, key="table", append=True)
|
||||
result = read_hdf(path, "table", where=["index>2"])
|
||||
tm.assert_frame_equal(df[df.index > 2], result)
|
||||
|
||||
|
||||
def test_long_strings(setup_path):
|
||||
# GH6166
|
||||
data = ["a" * 50] * 10
|
||||
df = DataFrame({"a": data}, index=data)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df", df, data_columns=["a"])
|
||||
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
|
||||
def test_api(tmp_path, setup_path):
|
||||
# GH4584
|
||||
# API issue when to_hdf doesn't accept append AND format args
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.iloc[:10].to_hdf(path, key="df", append=True, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
# append to False
|
||||
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
|
||||
def test_api_append(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.iloc[:10].to_hdf(path, key="df", append=True)
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True, format="table")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
# append to False
|
||||
df.iloc[:10].to_hdf(path, key="df", append=False, format="table")
|
||||
df.iloc[10:].to_hdf(path, key="df", append=True)
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
|
||||
def test_api_2(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
|
||||
df = DataFrame(range(20))
|
||||
df.to_hdf(path, key="df", append=False, format="fixed")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df", append=False, format="f")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df", append=False)
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
df.to_hdf(path, key="df")
|
||||
tm.assert_frame_equal(read_hdf(path, "df"), df)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(range(20))
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=True, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# append to False
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# formats
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format="table")
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df.iloc[:10], append=False, format="table")
|
||||
store.append("df", df.iloc[10:], append=True, format=None)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
def test_api_invalid(tmp_path, setup_path):
|
||||
path = tmp_path / setup_path
|
||||
# Invalid.
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
msg = "Can only append to Tables"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="f")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="fixed")
|
||||
|
||||
msg = r"invalid HDFStore format specified \[foo\]"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.to_hdf(path, key="df", append=True, format="foo")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.to_hdf(path, key="df", append=False, format="foo")
|
||||
|
||||
# File path doesn't exist
|
||||
path = ""
|
||||
msg = f"File {path} does not exist"
|
||||
|
||||
with pytest.raises(FileNotFoundError, match=msg):
|
||||
read_hdf(path, "df")
|
||||
|
||||
|
||||
def test_get(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
left = store.get("a")
|
||||
right = store["a"]
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
left = store.get("/a")
|
||||
right = store["/a"]
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
with pytest.raises(KeyError, match="'No object named b in the file'"):
|
||||
store.get("b")
|
||||
|
||||
|
||||
def test_put_integer(setup_path):
|
||||
# non-date, non-string index
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((50, 100)))
|
||||
_check_roundtrip(df, tm.assert_frame_equal, setup_path)
|
||||
|
||||
|
||||
def test_table_values_dtypes_roundtrip(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")
|
||||
store.append("df_f8", df1)
|
||||
tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)
|
||||
|
||||
df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")
|
||||
store.append("df_i8", df2)
|
||||
tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)
|
||||
|
||||
# incompatible dtype
|
||||
msg = re.escape(
|
||||
"invalid combination of [values_axes] on appending data "
|
||||
"[name->values_block_0,cname->values_block_0,"
|
||||
"dtype->float64,kind->float,shape->(1, 3)] vs "
|
||||
"current table [name->values_block_0,"
|
||||
"cname->values_block_0,dtype->int64,kind->integer,"
|
||||
"shape->None]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_i8", df1)
|
||||
|
||||
# check creation/storage/retrieval of float32 (a bit hacky to
|
||||
# actually create them thought)
|
||||
df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])
|
||||
store.append("df_f4", df1)
|
||||
tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)
|
||||
assert df1.dtypes.iloc[0] == "float32"
|
||||
|
||||
# check with mixed dtypes
|
||||
df1 = DataFrame(
|
||||
{
|
||||
c: Series(np.random.default_rng(2).integers(5), dtype=c)
|
||||
for c in ["float32", "float64", "int32", "int64", "int16", "int8"]
|
||||
}
|
||||
)
|
||||
df1["string"] = "foo"
|
||||
df1["float322"] = 1.0
|
||||
df1["float322"] = df1["float322"].astype("float32")
|
||||
df1["bool"] = df1["float32"] > 0
|
||||
df1["time1"] = Timestamp("20130101")
|
||||
df1["time2"] = Timestamp("20130102")
|
||||
|
||||
store.append("df_mixed_dtypes1", df1)
|
||||
result = store.select("df_mixed_dtypes1").dtypes.value_counts()
|
||||
result.index = [str(i) for i in result.index]
|
||||
expected = Series(
|
||||
{
|
||||
"float32": 2,
|
||||
"float64": 1,
|
||||
"int32": 1,
|
||||
"bool": 1,
|
||||
"int16": 1,
|
||||
"int8": 1,
|
||||
"int64": 1,
|
||||
"object": 1,
|
||||
"datetime64[ns]": 2,
|
||||
},
|
||||
name="count",
|
||||
)
|
||||
result = result.sort_index()
|
||||
expected = expected.sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_series(setup_path):
|
||||
s = Series(range(10), dtype="float64", index=[f"i_{i}" for i in range(10)])
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
_check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts2 = Series(ts.index, Index(ts.index, dtype=object))
|
||||
_check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))
|
||||
_check_roundtrip(
|
||||
ts3, tm.assert_series_equal, path=setup_path, check_index_type=False
|
||||
)
|
||||
|
||||
|
||||
def test_float_index(setup_path):
|
||||
# GH #454
|
||||
index = np.random.default_rng(2).standard_normal(10)
|
||||
s = Series(np.random.default_rng(2).standard_normal(10), index=index)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_tuple_index(setup_path):
|
||||
# GH #492
|
||||
col = np.arange(10)
|
||||
idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]
|
||||
data = np.random.default_rng(2).standard_normal(30).reshape((3, 10))
|
||||
DF = DataFrame(data, index=idx, columns=col)
|
||||
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
_check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_index_types(setup_path):
|
||||
values = np.random.default_rng(2).standard_normal(2)
|
||||
|
||||
func = lambda lhs, rhs: tm.assert_series_equal(lhs, rhs, check_index_type=True)
|
||||
|
||||
ser = Series(values, [0, "y"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.datetime.today(), 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, ["y", 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.date.today(), "a"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [0, "y"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.datetime.today(), 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, ["y", 0])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [datetime.date.today(), "a"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1.23, "b"])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1, 1.53])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser = Series(values, [1, 5])
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
dti = DatetimeIndex(["2012-01-01", "2012-01-02"], dtype="M8[ns]")
|
||||
ser = Series(values, index=dti)
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
ser.index = ser.index.as_unit("s")
|
||||
_check_roundtrip(ser, func, path=setup_path)
|
||||
|
||||
|
||||
def test_timeseries_preepoch(setup_path, request):
|
||||
dr = bdate_range("1/1/1940", "1/1/1960")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(dr)), index=dr)
|
||||
try:
|
||||
_check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
|
||||
except OverflowError:
|
||||
if is_platform_windows():
|
||||
request.applymarker(
|
||||
pytest.mark.xfail("known failure on some windows platforms")
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
|
||||
)
|
||||
def test_frame(compression, setup_path):
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
# put in some random NAs
|
||||
df.iloc[0, 0] = np.nan
|
||||
df.iloc[5, 3] = np.nan
|
||||
|
||||
_check_roundtrip_table(
|
||||
df, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
_check_roundtrip(
|
||||
df, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
|
||||
tdf = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
_check_roundtrip(
|
||||
tdf, tm.assert_frame_equal, path=setup_path, compression=compression
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# not consolidated
|
||||
df["foo"] = np.random.default_rng(2).standard_normal(len(df))
|
||||
store["df"] = df
|
||||
recons = store["df"]
|
||||
assert recons._mgr.is_consolidated()
|
||||
|
||||
# empty
|
||||
_check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_empty_series_frame(setup_path):
|
||||
s0 = Series(dtype=object)
|
||||
s1 = Series(name="myseries", dtype=object)
|
||||
df0 = DataFrame()
|
||||
df1 = DataFrame(index=["a", "b", "c"])
|
||||
df2 = DataFrame(columns=["d", "e", "f"])
|
||||
|
||||
_check_roundtrip(s0, tm.assert_series_equal, path=setup_path)
|
||||
_check_roundtrip(s1, tm.assert_series_equal, path=setup_path)
|
||||
_check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"])
|
||||
def test_empty_series(dtype, setup_path):
|
||||
s = Series(dtype=dtype)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_can_serialize_dates(setup_path):
|
||||
rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_store_hierarchical(setup_path, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)
|
||||
|
||||
# check that the names are stored
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["frame"] = frame
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, frame)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
|
||||
)
|
||||
def test_store_mixed(compression, setup_path):
|
||||
def _make_one():
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["bool1"] = df["A"] > 0
|
||||
df["bool2"] = df["B"] > 0
|
||||
df["int1"] = 1
|
||||
df["int2"] = 2
|
||||
return df._consolidate()
|
||||
|
||||
df1 = _make_one()
|
||||
df2 = _make_one()
|
||||
|
||||
_check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
|
||||
_check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["obj"] = df1
|
||||
tm.assert_frame_equal(store["obj"], df1)
|
||||
store["obj"] = df2
|
||||
tm.assert_frame_equal(store["obj"], df2)
|
||||
|
||||
# check that can store Series of all of these types
|
||||
_check_roundtrip(
|
||||
df1["obj1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
_check_roundtrip(
|
||||
df1["bool1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
_check_roundtrip(
|
||||
df1["int1"],
|
||||
tm.assert_series_equal,
|
||||
path=setup_path,
|
||||
compression=compression,
|
||||
)
|
||||
|
||||
|
||||
def _check_roundtrip(obj, comparator, path, compression=False, **kwargs):
|
||||
options = {}
|
||||
if compression:
|
||||
options["complib"] = "blosc"
|
||||
|
||||
with ensure_clean_store(path, "w", **options) as store:
|
||||
store["obj"] = obj
|
||||
retrieved = store["obj"]
|
||||
comparator(retrieved, obj, **kwargs)
|
||||
|
||||
|
||||
def _check_roundtrip_table(obj, comparator, path, compression=False):
|
||||
options = {}
|
||||
if compression:
|
||||
options["complib"] = "blosc"
|
||||
|
||||
with ensure_clean_store(path, "w", **options) as store:
|
||||
store.put("obj", obj, format="table")
|
||||
retrieved = store["obj"]
|
||||
|
||||
comparator(retrieved, obj)
|
||||
|
||||
|
||||
def test_unicode_index(setup_path):
|
||||
unicode_values = ["\u03c3", "\u03c3\u03c3"]
|
||||
|
||||
s = Series(
|
||||
np.random.default_rng(2).standard_normal(len(unicode_values)),
|
||||
unicode_values,
|
||||
)
|
||||
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_unicode_longer_encoded(setup_path):
|
||||
# GH 11234
|
||||
char = "\u0394"
|
||||
df = DataFrame({"A": [char]})
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df, format="table", encoding="utf-8")
|
||||
result = store.get("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
df = DataFrame({"A": ["a", char], "B": ["b", "b"]})
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df, format="table", encoding="utf-8")
|
||||
result = store.get("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_store_datetime_mixed(setup_path):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
df["d"] = ts.index[:3]
|
||||
_check_roundtrip(df, tm.assert_frame_equal, path=setup_path)
|
||||
|
||||
|
||||
def test_round_trip_equals(tmp_path, setup_path):
|
||||
# GH 9330
|
||||
df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
other = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, other)
|
||||
assert df.equals(other)
|
||||
assert other.equals(df)
|
||||
|
||||
|
||||
def test_infer_string_columns(tmp_path, setup_path):
|
||||
# GH#
|
||||
pytest.importorskip("pyarrow")
|
||||
path = tmp_path / setup_path
|
||||
with pd.option_context("future.infer_string", True):
|
||||
df = DataFrame(1, columns=list("ABCD"), index=list(range(10))).set_index(
|
||||
["A", "B"]
|
||||
)
|
||||
expected = df.copy()
|
||||
df.to_hdf(path, key="df", format="table")
|
||||
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
1047
lib/python3.13/site-packages/pandas/tests/io/pytables/test_select.py
Normal file
1047
lib/python3.13/site-packages/pandas/tests/io/pytables/test_select.py
Normal file
File diff suppressed because it is too large
Load Diff
1119
lib/python3.13/site-packages/pandas/tests/io/pytables/test_store.py
Normal file
1119
lib/python3.13/site-packages/pandas/tests/io/pytables/test_store.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,52 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.pytables import (
|
||||
HDFStore,
|
||||
read_hdf,
|
||||
)
|
||||
|
||||
pytest.importorskip("tables")
|
||||
|
||||
|
||||
class TestHDFStoreSubclass:
|
||||
# GH 33748
|
||||
def test_supported_for_subclass_dataframe(self, tmp_path):
|
||||
data = {"a": [1, 2], "b": [3, 4]}
|
||||
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
|
||||
|
||||
expected = DataFrame(data, dtype=np.intp)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
sdf.to_hdf(path, key="df")
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
with HDFStore(path) as store:
|
||||
store.put("df", sdf)
|
||||
result = read_hdf(path, "df")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_supported_for_subclass_series(self, tmp_path):
|
||||
data = [1, 2, 3]
|
||||
sser = tm.SubclassedSeries(data, dtype=np.intp)
|
||||
|
||||
expected = Series(data, dtype=np.intp)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
sser.to_hdf(path, key="ser")
|
||||
result = read_hdf(path, "ser")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
path = tmp_path / "temp.h5"
|
||||
with HDFStore(path) as store:
|
||||
store.put("ser", sser)
|
||||
result = read_hdf(path, "ser")
|
||||
tm.assert_series_equal(result, expected)
|
@ -0,0 +1,72 @@
|
||||
import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import ensure_clean_store
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
@pytest.mark.parametrize("unit", ["us", "ns"])
|
||||
def test_store_datetime_fractional_secs(setup_path, unit):
|
||||
dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
|
||||
dti = DatetimeIndex([dt], dtype=f"M8[{unit}]")
|
||||
series = Series([0], index=dti)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["a"] = series
|
||||
assert store["a"].index[0] == dt
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_tseries_indices_series(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx)
|
||||
store["a"] = ser
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result.index.freq == ser.index.freq
|
||||
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
||||
|
||||
idx = period_range("2020-01-01", periods=10, freq="D")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx)), idx)
|
||||
store["a"] = ser
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result.index.freq == ser.index.freq
|
||||
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_tseries_indices_frame(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(idx), 3)), index=idx
|
||||
)
|
||||
store["a"] = df
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
assert result.index.freq == df.index.freq
|
||||
tm.assert_class_equal(result.index, df.index, obj="dataframe index")
|
||||
|
||||
idx = period_range("2020-01-01", periods=10, freq="D")
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((len(idx), 3)), idx)
|
||||
store["a"] = df
|
||||
result = store["a"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
assert result.index.freq == df.index.freq
|
||||
tm.assert_class_equal(result.index, df.index, obj="dataframe index")
|
@ -0,0 +1,378 @@
|
||||
from datetime import (
|
||||
date,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
|
||||
|
||||
def _compare_with_tz(a, b):
|
||||
tm.assert_frame_equal(a, b)
|
||||
|
||||
# compare the zones on each element
|
||||
for c in a.columns:
|
||||
for i in a.index:
|
||||
a_e = a.loc[i, c]
|
||||
b_e = b.loc[i, c]
|
||||
if not (a_e == b_e and a_e.tz == b_e.tz):
|
||||
raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
|
||||
|
||||
|
||||
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
|
||||
# filename issues.
|
||||
gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
|
||||
gettz_pytz = lambda x: x
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
||||
def test_append_with_timezones(setup_path, gettz):
|
||||
# as columns
|
||||
|
||||
# Single-tzinfo, no DST transition
|
||||
df_est = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")).as_unit("ns")
|
||||
+ timedelta(hours=1) * i
|
||||
for i in range(5)
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
# frame with all columns having same tzinfo, but different sides
|
||||
# of DST transition
|
||||
df_crosses_dst = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130603", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
df_mixed_tz = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130102", tz=gettz("EET")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
df_different_tz = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz=gettz("US/Eastern")).as_unit("ns"),
|
||||
"B": Timestamp("20130102", tz=gettz("CET")).as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_est, data_columns=["A"])
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_est)
|
||||
tm.assert_frame_equal(result, df_est)
|
||||
|
||||
# select with tz aware
|
||||
expected = df_est[df_est.A >= df_est.A[3]]
|
||||
result = store.select("df_tz", where="A>=df_est.A[3]")
|
||||
_compare_with_tz(result, expected)
|
||||
|
||||
# ensure we include dates in DST and STD time here.
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_crosses_dst)
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_crosses_dst)
|
||||
tm.assert_frame_equal(result, df_crosses_dst)
|
||||
|
||||
msg = (
|
||||
r"invalid info for \[values_block_1\] for \[tz\], "
|
||||
r"existing_value \[(dateutil/.*)?(US/Eastern|America/New_York)\] "
|
||||
r"conflicts with new value \[(dateutil/.*)?EET\]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_tz", df_mixed_tz)
|
||||
|
||||
# this is ok
|
||||
_maybe_remove(store, "df_tz")
|
||||
store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
|
||||
result = store["df_tz"]
|
||||
_compare_with_tz(result, df_mixed_tz)
|
||||
tm.assert_frame_equal(result, df_mixed_tz)
|
||||
|
||||
# can't append with diff timezone
|
||||
msg = (
|
||||
r"invalid info for \[B\] for \[tz\], "
|
||||
r"existing_value \[(dateutil/.*)?EET\] "
|
||||
r"conflicts with new value \[(dateutil/.*)?CET\]"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df_tz", df_different_tz)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
|
||||
def test_append_with_timezones_as_index(setup_path, gettz):
|
||||
# GH#4098 example
|
||||
|
||||
dti = date_range("2000-1-1", periods=3, freq="h", tz=gettz("US/Eastern"))
|
||||
dti = dti._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
df = DataFrame({"A": Series(range(3), index=dti)})
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
_maybe_remove(store, "df")
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_roundtrip_tz_aware_index(setup_path, unit):
|
||||
# GH 17618
|
||||
ts = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
|
||||
dti = DatetimeIndex([ts]).as_unit(unit)
|
||||
df = DataFrame(data=[0], index=dti)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df, format="fixed")
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, df)
|
||||
|
||||
value = recons.index[0]._value
|
||||
denom = {"ns": 1, "us": 1000, "ms": 10**6, "s": 10**9}[unit]
|
||||
assert value == 946706400000000000 // denom
|
||||
|
||||
|
||||
def test_store_index_name_with_tz(setup_path):
|
||||
# GH 13884
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
|
||||
df.index = df.index.tz_localize("UTC")
|
||||
df.index.name = "foo"
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("frame", df, format="table")
|
||||
recons = store["frame"]
|
||||
tm.assert_frame_equal(recons, df)
|
||||
|
||||
|
||||
def test_tseries_select_index_column(setup_path):
|
||||
# GH7777
|
||||
# selecting a UTC datetimeindex column did
|
||||
# not preserve UTC tzinfo set before storing
|
||||
|
||||
# check that no tz still works
|
||||
rng = date_range("1/1/2000", "1/30/2000")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == DatetimeIndex(result.values).tz
|
||||
|
||||
# check utc
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == result.dt.tz
|
||||
|
||||
# double check non-utc
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("frame", frame)
|
||||
result = store.select_column("frame", "index")
|
||||
assert rng.tz == result.dt.tz
|
||||
|
||||
|
||||
def test_timezones_fixed_format_frame_non_empty(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# index
|
||||
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
|
||||
rng = rng._with_freq(None) # freq doesn't round-trip
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
store["df"] = df
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# as data
|
||||
# GH11411
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": rng,
|
||||
"B": rng.tz_convert("UTC").tz_localize(None),
|
||||
"C": rng.tz_convert("CET"),
|
||||
"D": range(len(rng)),
|
||||
},
|
||||
index=rng,
|
||||
)
|
||||
store["df"] = df
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
|
||||
# GH 20594
|
||||
|
||||
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
||||
|
||||
obj = Series(dtype=dtype, name="A")
|
||||
if frame_or_series is DataFrame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["obj"] = obj
|
||||
result = store["obj"]
|
||||
tm.assert_equal(result, obj)
|
||||
|
||||
|
||||
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
|
||||
# GH 20594
|
||||
|
||||
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
s = Series([0], dtype=dtype)
|
||||
store["s"] = s
|
||||
result = store["s"]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_fixed_offset_tz(setup_path):
|
||||
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
|
||||
frame = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((len(rng), 4)), index=rng
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store["frame"] = frame
|
||||
recons = store["frame"]
|
||||
tm.assert_index_equal(recons.index, rng)
|
||||
assert rng.tz == recons.index.tz
|
||||
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_store_timezone(setup_path):
|
||||
# GH2852
|
||||
# issue storing datetime.date with a timezone as it resets when read
|
||||
# back in a new timezone
|
||||
|
||||
# original method
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
today = date(2013, 9, 10)
|
||||
df = DataFrame([1, 2, 3], index=[today, today, today])
|
||||
store["obj1"] = df
|
||||
result = store["obj1"]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# with tz setting
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
with tm.set_timezone("EST5EDT"):
|
||||
today = date(2013, 9, 10)
|
||||
df = DataFrame([1, 2, 3], index=[today, today, today])
|
||||
store["obj1"] = df
|
||||
|
||||
with tm.set_timezone("CST6CDT"):
|
||||
result = store["obj1"]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_legacy_datetimetz_object(datapath):
|
||||
# legacy from < 0.17.0
|
||||
# 8260
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": Timestamp("20130102", tz="US/Eastern").as_unit("ns"),
|
||||
"B": Timestamp("20130603", tz="CET").as_unit("ns"),
|
||||
},
|
||||
index=range(5),
|
||||
)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
|
||||
) as store:
|
||||
result = store["df"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_dst_transitions(setup_path):
|
||||
# make sure we are not failing on transitions
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
times = date_range(
|
||||
"2013-10-26 23:00",
|
||||
"2013-10-27 01:00",
|
||||
tz="Europe/London",
|
||||
freq="h",
|
||||
ambiguous="infer",
|
||||
)
|
||||
times = times._with_freq(None) # freq doesn't round-trip
|
||||
|
||||
for i in [times, times + pd.Timedelta("10min")]:
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame({"A": range(len(i)), "B": i}, index=i)
|
||||
store.append("df", df)
|
||||
result = store.select("df")
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_read_with_where_tz_aware_index(tmp_path, setup_path):
|
||||
# GH 11926
|
||||
periods = 10
|
||||
dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
|
||||
mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
|
||||
expected = DataFrame({"MYCOL": 0}, index=mi)
|
||||
|
||||
key = "mykey"
|
||||
path = tmp_path / setup_path
|
||||
with pd.HDFStore(path) as store:
|
||||
store.append(key, expected, format="table", append=True)
|
||||
result = pd.read_hdf(path, key, where="DATE > 20151130")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_py2_created_with_datetimez(datapath):
|
||||
# The test HDF5 file was created in Python 2, but could not be read in
|
||||
# Python 3.
|
||||
#
|
||||
# GH26443
|
||||
index = DatetimeIndex(["2019-01-01T18:00"], dtype="M8[ns, America/New_York]")
|
||||
expected = DataFrame({"data": 123}, index=index)
|
||||
with ensure_clean_store(
|
||||
datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
|
||||
) as store:
|
||||
result = store["key"]
|
||||
tm.assert_frame_equal(result, expected)
|
Reference in New Issue
Block a user