Updated script that can be controled by Nodejs web app
This commit is contained in:
@@ -0,0 +1,271 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesValueCounts:
|
||||
def test_value_counts_datetime(self, unit):
|
||||
# most dtypes are tested in tests/base
|
||||
values = [
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 10:00"),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
]
|
||||
|
||||
exp_idx = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
|
||||
name="xxx",
|
||||
).as_unit(unit)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx").dt.as_unit(unit)
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check DatetimeIndex outputs the same result
|
||||
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_datetime_tz(self, unit):
|
||||
values = [
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
|
||||
]
|
||||
|
||||
exp_idx = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
|
||||
tz="US/Eastern",
|
||||
name="xxx",
|
||||
).as_unit(unit)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx").dt.as_unit(unit)
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_period(self):
|
||||
values = [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
]
|
||||
|
||||
exp_idx = pd.PeriodIndex(
|
||||
["2011-01", "2011-03", "2011-02"], freq="M", name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check DatetimeIndex outputs the same result
|
||||
idx = pd.PeriodIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical_ordered(self):
|
||||
# most dtypes are tested in tests/base
|
||||
values = Categorical([1, 2, 3, 1, 1, 3], ordered=True)
|
||||
|
||||
exp_idx = CategoricalIndex(
|
||||
[1, 3, 2], categories=[1, 2, 3], ordered=True, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check CategoricalIndex outputs the same result
|
||||
idx = CategoricalIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical_not_ordered(self):
|
||||
values = Categorical([1, 2, 3, 1, 1, 3], ordered=False)
|
||||
|
||||
exp_idx = CategoricalIndex(
|
||||
[1, 3, 2], categories=[1, 2, 3], ordered=False, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check CategoricalIndex outputs the same result
|
||||
idx = CategoricalIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical(self):
|
||||
# GH#12835
|
||||
cats = Categorical(list("abcccb"), categories=list("cabd"))
|
||||
ser = Series(cats, name="xxx")
|
||||
res = ser.value_counts(sort=False)
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("cabd"), categories=cats.categories, name="xxx"
|
||||
)
|
||||
exp = Series([3, 1, 2, 0], name="count", index=exp_index)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = ser.value_counts(sort=True)
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("cbad"), categories=cats.categories, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1, 0], name="count", index=exp_index)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# check object dtype handles the Series.name as the same
|
||||
# (tested in tests/base)
|
||||
ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx")
|
||||
res = ser.value_counts()
|
||||
exp = Series([3, 2, 1], name="count", index=Index(["c", "b", "a"], name="xxx"))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_value_counts_categorical_with_nan(self):
|
||||
# see GH#9443
|
||||
|
||||
# sanity check
|
||||
ser = Series(["a", "b", "a"], dtype="category")
|
||||
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
|
||||
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# same Series via two different constructions --> same behaviour
|
||||
series = [
|
||||
Series(["a", "b", None, "a", None, None], dtype="category"),
|
||||
Series(
|
||||
Categorical(["a", "b", None, "a", None, None], categories=["a", "b"])
|
||||
),
|
||||
]
|
||||
|
||||
for ser in series:
|
||||
# None is a NaN value, so we exclude its count here
|
||||
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# we don't exclude the count of None and sort by counts
|
||||
exp = Series(
|
||||
[3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]), name="count"
|
||||
)
|
||||
res = ser.value_counts(dropna=False)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# When we aren't sorting by counts, and np.nan isn't a
|
||||
# category, it should be last.
|
||||
exp = Series(
|
||||
[2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]), name="count"
|
||||
)
|
||||
res = ser.value_counts(dropna=False, sort=False)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, dropna, exp",
|
||||
[
|
||||
(
|
||||
Series([False, True, True, pd.NA]),
|
||||
False,
|
||||
Series([2, 1, 1], index=[True, False, pd.NA], name="count"),
|
||||
),
|
||||
(
|
||||
Series([False, True, True, pd.NA]),
|
||||
True,
|
||||
Series([2, 1], index=Index([True, False], dtype=object), name="count"),
|
||||
),
|
||||
(
|
||||
Series(range(3), index=[True, False, np.nan]).index,
|
||||
False,
|
||||
Series([1, 1, 1], index=[True, False, np.nan], name="count"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_value_counts_bool_with_nan(self, ser, dropna, exp):
|
||||
# GH32146
|
||||
out = ser.value_counts(dropna=dropna)
|
||||
tm.assert_series_equal(out, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_array,expected",
|
||||
[
|
||||
(
|
||||
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
|
||||
Series(
|
||||
[3, 2, 1],
|
||||
index=Index([3j, 1 + 1j, 1], dtype=np.complex128),
|
||||
name="count",
|
||||
),
|
||||
),
|
||||
(
|
||||
np.array([1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], dtype=np.complex64),
|
||||
Series(
|
||||
[3, 2, 1],
|
||||
index=Index([3j, 1 + 1j, 1], dtype=np.complex64),
|
||||
name="count",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_value_counts_complex_numbers(self, input_array, expected):
|
||||
# GH 17927
|
||||
result = Series(input_array).value_counts()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_value_counts_masked(self):
|
||||
# GH#54984
|
||||
dtype = "Int64"
|
||||
ser = Series([1, 2, None, 2, None, 3], dtype=dtype)
|
||||
result = ser.value_counts(dropna=False)
|
||||
expected = Series(
|
||||
[2, 2, 1, 1],
|
||||
index=Index([2, None, 1, 3], dtype=dtype),
|
||||
dtype=dtype,
|
||||
name="count",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.value_counts(dropna=True)
|
||||
expected = Series(
|
||||
[2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
Reference in New Issue
Block a user