Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,374 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
HDFStore,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
_testing as tm,
|
||||
concat,
|
||||
date_range,
|
||||
)
|
||||
from pandas.tests.io.pytables.common import (
|
||||
_maybe_remove,
|
||||
ensure_clean_store,
|
||||
)
|
||||
from pandas.util import _test_decorators as td
|
||||
|
||||
pytestmark = pytest.mark.single_cpu
|
||||
|
||||
|
||||
def test_format_type(tmp_path, setup_path):
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
with HDFStore(tmp_path / setup_path) as store:
|
||||
store.put("a", df, format="fixed")
|
||||
store.put("b", df, format="table")
|
||||
|
||||
assert store.get_storer("a").format_type == "fixed"
|
||||
assert store.get_storer("b").format_type == "table"
|
||||
|
||||
|
||||
def test_format_kwarg_in_constructor(tmp_path, setup_path):
|
||||
# GH 13291
|
||||
|
||||
msg = "format is not a defined argument for HDFStore"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
HDFStore(tmp_path / setup_path, format="table")
|
||||
|
||||
|
||||
def test_api_default_format(tmp_path, setup_path):
|
||||
# default_format option
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert not store.get_storer("df").is_table
|
||||
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df2", df)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
_maybe_remove(store, "df2")
|
||||
store.append("df2", df)
|
||||
assert store.get_storer("df").is_table
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "fixed"):
|
||||
df.to_hdf(path, key="df")
|
||||
with HDFStore(path) as store:
|
||||
assert not store.get_storer("df").is_table
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.to_hdf(path, key="df2", append=True)
|
||||
|
||||
with pd.option_context("io.hdf.default_format", "table"):
|
||||
df.to_hdf(path, key="df3")
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df3").is_table
|
||||
df.to_hdf(path, key="df4", append=True)
|
||||
with HDFStore(path) as store:
|
||||
assert store.get_storer("df4").is_table
|
||||
|
||||
|
||||
def test_put(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
|
||||
)
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((20, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=20, freq="B"),
|
||||
)
|
||||
store["a"] = ts
|
||||
store["b"] = df[:10]
|
||||
store["foo/bar/bah"] = df[:10]
|
||||
store["foo"] = df[:10]
|
||||
store["/foo"] = df[:10]
|
||||
store.put("c", df[:10], format="table")
|
||||
|
||||
# not OK, not a table
|
||||
msg = "Can only append to Tables"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df[10:], append=True)
|
||||
|
||||
# node does not currently exist, test _is_table_type returns False
|
||||
# in this case
|
||||
_maybe_remove(store, "f")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("f", df[10:], append=True)
|
||||
|
||||
# can't put to a table (use append instead)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("c", df[10:], append=True)
|
||||
|
||||
# overwrite table
|
||||
store.put("c", df[:10], format="table", append=False)
|
||||
tm.assert_frame_equal(df[:10], store["c"])
|
||||
|
||||
|
||||
def test_put_string_index(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
index = Index([f"I am a very long string index: {i}" for i in range(20)])
|
||||
s = Series(np.arange(20), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
# mixed length
|
||||
index = Index(
|
||||
["abcdefghijklmnopqrstuvwxyz1234567890"]
|
||||
+ [f"I am a very long string index: {i}" for i in range(20)]
|
||||
)
|
||||
s = Series(np.arange(21), index=index)
|
||||
df = DataFrame({"A": s, "B": s})
|
||||
store["a"] = s
|
||||
tm.assert_series_equal(store["a"], s)
|
||||
|
||||
store["b"] = df
|
||||
tm.assert_frame_equal(store["b"], df)
|
||||
|
||||
|
||||
def test_put_compression(setup_path):
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
store.put("c", df, format="table", complib="zlib")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="zlib")
|
||||
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_put_compression_blosc(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
# can't compress if format='fixed'
|
||||
msg = "Compression not supported on Fixed format stores"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("b", df, format="fixed", complib="blosc")
|
||||
|
||||
store.put("c", df, format="table", complib="blosc")
|
||||
tm.assert_frame_equal(store["c"], df)
|
||||
|
||||
|
||||
def test_put_mixed_type(setup_path):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
df["obj1"] = "foo"
|
||||
df["obj2"] = "bar"
|
||||
df["bool1"] = df["A"] > 0
|
||||
df["bool2"] = df["B"] > 0
|
||||
df["bool3"] = True
|
||||
df["int1"] = 1
|
||||
df["int2"] = 2
|
||||
df["timestamp1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["timestamp2"] = Timestamp("20010103").as_unit("ns")
|
||||
df["datetime1"] = Timestamp("20010102").as_unit("ns")
|
||||
df["datetime2"] = Timestamp("20010103").as_unit("ns")
|
||||
df.loc[df.index[3:6], ["obj1"]] = np.nan
|
||||
df = df._consolidate()
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
_maybe_remove(store, "df")
|
||||
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
store.put("df", df)
|
||||
|
||||
expected = store.get("df")
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["table", "fixed"])
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
Index([str(i) for i in range(10)]),
|
||||
Index(np.arange(10, dtype=float)),
|
||||
Index(np.arange(10)),
|
||||
date_range("2020-01-01", periods=10),
|
||||
pd.period_range("2020-01-01", periods=10),
|
||||
],
|
||||
)
|
||||
def test_store_index_types(setup_path, format, index):
|
||||
# GH5386
|
||||
# test storing various index types
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 2)),
|
||||
columns=list("AB"),
|
||||
index=index,
|
||||
)
|
||||
_maybe_remove(store, "df")
|
||||
store.put("df", df, format=format)
|
||||
tm.assert_frame_equal(df, store["df"])
|
||||
|
||||
|
||||
def test_column_multiindex(setup_path):
|
||||
# GH 4710
|
||||
# recreate multi-indexes properly
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
|
||||
)
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df", df)
|
||||
tm.assert_frame_equal(
|
||||
store["df"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df2", df, format="table", data_columns=["A"])
|
||||
msg = re.escape("cannot use a multi-index on axis [1] with data_columns True")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.put("df3", df, format="table", data_columns=True)
|
||||
|
||||
# appending multi-column on existing table (see GH 6167)
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.append("df2", df)
|
||||
store.append("df2", df)
|
||||
|
||||
tm.assert_frame_equal(store["df2"], concat((df, df)))
|
||||
|
||||
# non_index_axes name
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo"))
|
||||
expected = df.set_axis(df.index.to_numpy())
|
||||
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
store.put("df1", df, format="table")
|
||||
tm.assert_frame_equal(
|
||||
store["df1"], expected, check_index_type=True, check_column_type=True
|
||||
)
|
||||
|
||||
|
||||
def test_store_multiindex(setup_path):
|
||||
# validate multi-index names
|
||||
# GH 5527
|
||||
with ensure_clean_store(setup_path) as store:
|
||||
|
||||
def make_index(names=None):
|
||||
dti = date_range("2013-12-01", "2013-12-02")
|
||||
mi = MultiIndex.from_product([dti, range(2), range(3)], names=names)
|
||||
return mi
|
||||
|
||||
# no names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# partial names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", None, None]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
# series
|
||||
_maybe_remove(store, "ser")
|
||||
ser = Series(np.zeros(12), index=make_index(["date", None, None]))
|
||||
store.append("ser", ser)
|
||||
xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
|
||||
tm.assert_series_equal(store.select("ser"), xp)
|
||||
|
||||
# dup with column
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "a", "t"]),
|
||||
)
|
||||
msg = "duplicate names/columns in the multi-index when storing as a table"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# dup within level
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "date", "date"]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
store.append("df", df)
|
||||
|
||||
# fully names
|
||||
_maybe_remove(store, "df")
|
||||
df = DataFrame(
|
||||
np.zeros((12, 2)),
|
||||
columns=["a", "b"],
|
||||
index=make_index(["date", "s", "t"]),
|
||||
)
|
||||
store.append("df", df)
|
||||
tm.assert_frame_equal(store.select("df"), df)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("format", ["fixed", "table"])
|
||||
def test_store_periodindex(tmp_path, setup_path, format):
|
||||
# GH 7796
|
||||
# test of PeriodIndex in HDFStore
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((5, 1)),
|
||||
index=pd.period_range("20220101", freq="M", periods=5),
|
||||
)
|
||||
|
||||
path = tmp_path / setup_path
|
||||
df.to_hdf(path, key="df", mode="w", format=format)
|
||||
expected = pd.read_hdf(path, "df")
|
||||
tm.assert_frame_equal(df, expected)
|
Reference in New Issue
Block a user