Updated script that can be controled by Nodejs web app

This commit is contained in:
mac OS
2024-11-25 12:24:18 +07:00
parent c440eda1f4
commit 8b0ab2bd3a
8662 changed files with 1803808 additions and 34 deletions

View File

@ -0,0 +1,258 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Index,
Series,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays.categorical import CategoricalAccessor
from pandas.core.indexes.accessors import Properties
class TestCatAccessor:
@pytest.mark.parametrize(
"method",
[
lambda x: x.cat.set_categories([1, 2, 3]),
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
lambda x: x.cat.rename_categories([1, 2, 3]),
lambda x: x.cat.remove_unused_categories(),
lambda x: x.cat.remove_categories([2]),
lambda x: x.cat.add_categories([4]),
lambda x: x.cat.as_ordered(),
lambda x: x.cat.as_unordered(),
],
)
def test_getname_categorical_accessor(self, method):
# GH#17509
ser = Series([1, 2, 3], name="A").astype("category")
expected = "A"
result = method(ser).name
assert result == expected
def test_cat_accessor(self):
ser = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
assert not ser.cat.ordered, False
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
res = ser.cat.set_categories(["b", "a"])
tm.assert_categorical_equal(res.values, exp)
ser[:] = "a"
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
def test_cat_accessor_api(self):
# GH#9322
assert Series.cat is CategoricalAccessor
ser = Series(list("aabbcde")).astype("category")
assert isinstance(ser.cat, CategoricalAccessor)
invalid = Series([1])
with pytest.raises(AttributeError, match="only use .cat accessor"):
invalid.cat
assert not hasattr(invalid, "cat")
def test_cat_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
cat = Series(list("aabbcde")).astype("category")
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
cat.cat.xlabel = "a"
def test_categorical_delegations(self):
# invalid accessor
msg = r"Can only use \.cat accessor with a 'category' dtype"
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat
with pytest.raises(AttributeError, match=msg):
Series([1, 2, 3]).cat()
with pytest.raises(AttributeError, match=msg):
Series(["a", "b", "c"]).cat
with pytest.raises(AttributeError, match=msg):
Series(np.arange(5.0)).cat
with pytest.raises(AttributeError, match=msg):
Series([Timestamp("20130101")]).cat
# Series should delegate calls to '.categories', '.codes', '.ordered'
# and the methods '.set_categories()' 'drop_unused_categories()' to the
# categorical
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["a", "b", "c"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
ser = ser.cat.rename_categories([1, 2, 3])
exp_categories = Index([1, 2, 3])
tm.assert_index_equal(ser.cat.categories, exp_categories)
exp_codes = Series([0, 1, 2, 0], dtype="int8")
tm.assert_series_equal(ser.cat.codes, exp_codes)
assert ser.cat.ordered
ser = ser.cat.as_unordered()
assert not ser.cat.ordered
ser = ser.cat.as_ordered()
assert ser.cat.ordered
# reorder
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
exp_categories = Index(["c", "b", "a"])
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
ser = ser.cat.set_categories(["c", "b", "a"])
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# remove unused categories
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
exp_categories = Index(["a", "b"])
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
ser = ser.cat.remove_unused_categories()
tm.assert_index_equal(ser.cat.categories, exp_categories)
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
# This method is likely to be confused, so test that it raises an error
# on wrong inputs:
msg = "'Series' object has no attribute 'set_categories'"
with pytest.raises(AttributeError, match=msg):
ser.set_categories([4, 3, 2, 1])
# right: ser.cat.set_categories([4,3,2,1])
# GH#18862 (let Series.cat.rename_categories take callables)
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
result = ser.cat.rename_categories(lambda x: x.upper())
expected = Series(
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"idx",
[
date_range("1/1/2015", periods=5),
date_range("1/1/2015", periods=5, tz="MET"),
period_range("1/1/2015", freq="D", periods=5),
timedelta_range("1 days", "10 days"),
],
)
def test_dt_accessor_api_for_categorical(self, idx):
# https://github.com/pandas-dev/pandas/issues/10661
ser = Series(idx)
cat = ser.astype("category")
# only testing field (like .day)
# and bool (is_month_start)
attr_names = type(ser._values)._datetimelike_ops
assert isinstance(cat.dt, Properties)
special_func_defs = [
("strftime", ("%Y-%m-%d",), {}),
("round", ("D",), {}),
("floor", ("D",), {}),
("ceil", ("D",), {}),
("asfreq", ("D",), {}),
("as_unit", ("s"), {}),
]
if idx.dtype == "M8[ns]":
# exclude dt64tz since that is already localized and would raise
tup = ("tz_localize", ("UTC",), {})
special_func_defs.append(tup)
elif idx.dtype.kind == "M":
# exclude dt64 since that is not localized so would raise
tup = ("tz_convert", ("EST",), {})
special_func_defs.append(tup)
_special_func_names = [f[0] for f in special_func_defs]
_ignore_names = ["components", "tz_localize", "tz_convert"]
func_names = [
fname
for fname in dir(ser.dt)
if not (
fname.startswith("_")
or fname in attr_names
or fname in _special_func_names
or fname in _ignore_names
)
]
func_defs = [(fname, (), {}) for fname in func_names]
func_defs.extend(
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
)
for func, args, kwargs in func_defs:
warn_cls = []
if func == "to_period" and getattr(idx, "tz", None) is not None:
# dropping TZ
warn_cls.append(UserWarning)
if func == "to_pydatetime":
# deprecated to return Index[object]
warn_cls.append(FutureWarning)
if warn_cls:
warn_cls = tuple(warn_cls)
else:
warn_cls = None
with tm.assert_produces_warning(warn_cls):
res = getattr(cat.dt, func)(*args, **kwargs)
exp = getattr(ser.dt, func)(*args, **kwargs)
tm.assert_equal(res, exp)
for attr in attr_names:
res = getattr(cat.dt, attr)
exp = getattr(ser.dt, attr)
tm.assert_equal(res, exp)
def test_dt_accessor_api_for_categorical_invalid(self):
invalid = Series([1, 2, 3]).astype("category")
msg = "Can only use .dt accessor with datetimelike"
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")
def test_set_categories_setitem(self):
# GH#43334
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
df["Survived"] = df["Survived"].cat.rename_categories(["No", "Yes"])
df["Sex"] = df["Sex"].cat.rename_categories(["female", "male"])
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
df["Survived"] = Categorical(
df["Survived"], categories=["No", "Yes"], ordered=False
)
# values should not be coerced to NaN
assert list(df["Sex"]) == ["female", "male", "male"]
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
def test_categorical_of_booleans_is_boolean(self):
# https://github.com/pandas-dev/pandas/issues/46313
df = DataFrame(
{"int_cat": [1, 2, 3], "bool_cat": [True, False, False]}, dtype="category"
)
value = df["bool_cat"].cat.categories.dtype
expected = np.dtype(np.bool_)
assert value is expected

View File

@ -0,0 +1,843 @@
import calendar
from datetime import (
date,
datetime,
time,
)
import locale
import unicodedata
import numpy as np
import pytest
import pytz
from pandas._libs.tslibs.timezones import maybe_get_tz
from pandas.errors import SettingWithCopyError
from pandas.core.dtypes.common import (
is_integer_dtype,
is_list_like,
)
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Index,
Period,
PeriodIndex,
Series,
TimedeltaIndex,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
DatetimeArray,
PeriodArray,
TimedeltaArray,
)
ok_for_period = PeriodArray._datetimelike_ops
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
ok_for_dt = DatetimeArray._datetimelike_ops
ok_for_dt_methods = [
"to_period",
"to_pydatetime",
"tz_localize",
"tz_convert",
"normalize",
"strftime",
"round",
"floor",
"ceil",
"day_name",
"month_name",
"isocalendar",
"as_unit",
]
ok_for_td = TimedeltaArray._datetimelike_ops
ok_for_td_methods = [
"components",
"to_pytimedelta",
"total_seconds",
"round",
"floor",
"ceil",
"as_unit",
]
def get_dir(ser):
# check limited display api
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
return sorted(set(results))
class TestSeriesDatetimeValues:
def _compare(self, ser, name):
# GH 7207, 11128
# test .dt namespace accessor
def get_expected(ser, prop):
result = getattr(Index(ser._values), prop)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result) or isinstance(result, DataFrame):
return result
return Series(result, index=ser.index, name=ser.name)
left = getattr(ser.dt, name)
right = get_expected(ser, name)
if not (is_list_like(left) and is_list_like(right)):
assert left == right
elif isinstance(left, DataFrame):
tm.assert_frame_equal(left, right)
else:
tm.assert_series_equal(left, right)
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
def test_dt_namespace_accessor_datetime64(self, freq):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex
dti = date_range("20130101", periods=5, freq=freq)
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_localize("US/Eastern")
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "US/Eastern"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
# let's localize, then convert
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
exp_values = (
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
)
expected = Series(exp_values, index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_namespace_accessor_datetime64tz(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# datetimeindex with tz
dti = date_range("20130101", periods=5, tz="US/Eastern")
ser = Series(dti, name="xxx")
for prop in ok_for_dt:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_dt_methods:
getattr(ser.dt, prop)
msg = "The behavior of DatetimeProperties.to_pydatetime is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.dt.to_pydatetime()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.tz_convert("CET")
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
tm.assert_series_equal(result, expected)
tz_result = result.dt.tz
assert str(tz_result) == "CET"
freq_result = ser.dt.freq
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_timedelta(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# timedelta index
cases = [
Series(
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
),
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
Series(
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
name="xxx",
),
]
for ser in cases:
for prop in ok_for_td:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_td_methods:
getattr(ser.dt, prop)
result = ser.dt.components
assert isinstance(result, DataFrame)
tm.assert_index_equal(result.index, ser.index)
result = ser.dt.to_pytimedelta()
assert isinstance(result, np.ndarray)
assert result.dtype == object
result = ser.dt.total_seconds()
assert isinstance(result, Series)
assert result.dtype == "float64"
freq_result = ser.dt.freq
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
def test_dt_namespace_accessor_period(self):
# GH#7207, GH#11128
# test .dt namespace accessor
# periodindex
pi = period_range("20130101", periods=5, freq="D")
ser = Series(pi, name="xxx")
for prop in ok_for_period:
# we test freq below
if prop != "freq":
self._compare(ser, prop)
for prop in ok_for_period_methods:
getattr(ser.dt, prop)
freq_result = ser.dt.freq
assert freq_result == PeriodIndex(ser.values).freq
def test_dt_namespace_accessor_index_and_values(self):
# both
index = date_range("20130101", periods=3, freq="D")
dti = date_range("20140204", periods=3, freq="s")
ser = Series(dti, index=index, name="xxx")
exp = Series(
np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx"
)
tm.assert_series_equal(ser.dt.year, exp)
exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.month, exp)
exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx")
tm.assert_series_equal(ser.dt.second, exp)
exp = Series([ser.iloc[0]] * 3, index=index, name="xxx")
tm.assert_series_equal(ser.dt.normalize(), exp)
def test_dt_accessor_limited_display_api(self):
# tznaive
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# tzaware
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
results = get_dir(ser)
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
# Period
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
ser = Series(idx)
results = get_dir(ser)
tm.assert_almost_equal(
results, sorted(set(ok_for_period + ok_for_period_methods))
)
def test_dt_accessor_ambiguous_freq_conversions(self):
# GH#11295
# ambiguous time error on the conversions
ser = Series(date_range("2015-01-01", "2016-01-01", freq="min"), name="xxx")
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
exp_values = date_range(
"2015-01-01", "2016-01-01", freq="min", tz="UTC"
).tz_convert("America/Chicago")
# freq not preserved by tz_localize above
exp_values = exp_values._with_freq(None)
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(ser, expected)
def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write):
# no setting allowed
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
with pytest.raises(ValueError, match="modifications"):
ser.dt.hour = 5
# trying to set a copy
msg = "modifications to a property of a datetimelike.+not supported"
with pd.option_context("chained_assignment", "raise"):
if using_copy_on_write:
with tm.raises_chained_assignment_error():
ser.dt.hour[0] = 5
elif warn_copy_on_write:
with tm.assert_produces_warning(
FutureWarning, match="ChainedAssignmentError"
):
ser.dt.hour[0] = 5
else:
with pytest.raises(SettingWithCopyError, match=msg):
ser.dt.hour[0] = 5
@pytest.mark.parametrize(
"method, dates",
[
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
],
)
def test_dt_round(self, method, dates):
# round
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = getattr(ser.dt, method)("D")
expected = Series(pd.to_datetime(dates), name="xxx")
tm.assert_series_equal(result, expected)
def test_dt_round_tz(self):
ser = Series(
pd.to_datetime(
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
),
name="xxx",
)
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
exp_values = pd.to_datetime(
["2012-01-01", "2012-01-01", "2012-01-01"]
).tz_localize("US/Eastern")
expected = Series(exp_values, name="xxx")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
def test_dt_round_tz_ambiguous(self, method):
# GH 18946 round near "fall back" DST
df1 = DataFrame(
[
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
],
columns=["date"],
)
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
# infer
result = getattr(df1.date.dt, method)("h", ambiguous="infer")
expected = df1["date"]
tm.assert_series_equal(result, expected)
# bool-array
result = getattr(df1.date.dt, method)("h", ambiguous=[True, False, False])
tm.assert_series_equal(result, expected)
# NaT
result = getattr(df1.date.dt, method)("h", ambiguous="NaT")
expected = df1["date"].copy()
expected.iloc[0:2] = pd.NaT
tm.assert_series_equal(result, expected)
# raise
with tm.external_error_raised(pytz.AmbiguousTimeError):
getattr(df1.date.dt, method)("h", ambiguous="raise")
@pytest.mark.parametrize(
"method, ts_str, freq",
[
["ceil", "2018-03-11 01:59:00-0600", "5min"],
["round", "2018-03-11 01:59:00-0600", "5min"],
["floor", "2018-03-11 03:01:00-0500", "2h"],
],
)
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
# GH 23324 round near "spring forward" DST
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
tm.assert_series_equal(result, expected)
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
tm.assert_series_equal(result, expected)
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
getattr(ser.dt, method)(freq, nonexistent="raise")
@pytest.mark.parametrize("freq", ["ns", "us", "1000us"])
def test_dt_round_nonnano_higher_resolution_no_op(self, freq):
# GH 52761
ser = Series(
["2020-05-31 08:00:00", "2000-12-31 04:00:05", "1800-03-14 07:30:20"],
dtype="datetime64[ms]",
)
expected = ser.copy()
result = ser.dt.round(freq)
tm.assert_series_equal(result, expected)
assert not np.shares_memory(ser.array._ndarray, result.array._ndarray)
def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
ser = Series(pd.Categorical(dti), name="foo")
result = ser.dt.year
expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo")
tm.assert_series_equal(result, expected)
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_localize(tz)
expected = datetimes.dt.tz_localize(tz)
tm.assert_series_equal(result, expected)
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_convert(tz)
expected = datetimes.dt.tz_convert(tz)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
def test_dt_other_accessors_categorical(self, accessor):
# GH 27952
datetimes = Series(
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = getattr(categorical.dt, accessor)
expected = getattr(datetimes.dt, accessor)
tm.assert_series_equal(result, expected)
def test_dt_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
ser = Series(date_range("20130101", periods=5, freq="D"))
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
ser.dt.xlabel = "a"
# error: Unsupported operand types for + ("List[None]" and "List[str]")
@pytest.mark.parametrize(
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
)
def test_dt_accessor_datetime_name_accessors(self, time_locale):
# Test Monday -> Sunday and January -> December, in that sequence
if time_locale is None:
# If the time_locale is None, day-name and month_name should
# return the english attributes
expected_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
expected_months = [
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
]
else:
with tm.set_locale(time_locale, locale.LC_TIME):
expected_days = calendar.day_name[:]
expected_months = calendar.month_name[1:]
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
english_days = [
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
name = name.capitalize()
assert ser.dt.day_name(locale=time_locale)[day] == name
assert ser.dt.day_name(locale=None)[day] == eng_name
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
ser = Series(date_range(freq="ME", start="2012", end="2013"))
result = ser.dt.month_name(locale=time_locale)
expected = Series([month.capitalize() for month in expected_months])
# work around https://github.com/pandas-dev/pandas/issues/22342
result = result.str.normalize("NFD")
expected = expected.str.normalize("NFD")
tm.assert_series_equal(result, expected)
for s_date, expected in zip(ser, expected_months):
result = s_date.month_name(locale=time_locale)
expected = expected.capitalize()
result = unicodedata.normalize("NFD", result)
expected = unicodedata.normalize("NFD", expected)
assert result == expected
ser = pd.concat([ser, Series([pd.NaT])])
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
def test_strftime(self):
# GH 10086
ser = Series(date_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/04 11-22-33",
"2015/02/05 11-22-33",
"2015/02/06 11-22-33",
"2015/02/07 11-22-33",
]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("20130101", periods=5))
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
expected = Series(
[
"2015/02/03 11-22-33",
"2015/02/03 11-22-34",
"2015/02/03 11-22-35",
"2015/02/03 11-22-36",
"2015/02/03 11-22-37",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
result = ser.dt.strftime("%Y/%m/%d")
expected = Series(
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
)
tm.assert_series_equal(result, expected)
datetime_index = date_range("20150301", periods=5)
result = datetime_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
def test_strftime_period_days(self, using_infer_string):
period_index = period_range("20150301", periods=5)
result = period_index.strftime("%Y/%m/%d")
expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype="=U10",
)
if using_infer_string:
expected = expected.astype("string[pyarrow_numpy]")
tm.assert_index_equal(result, expected)
def test_strftime_dt64_microsecond_resolution(self):
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
tm.assert_series_equal(result, expected)
def test_strftime_period_hours(self):
ser = Series(period_range("20130101", periods=4, freq="h"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
expected = Series(
[
"2013/01/01 00:00:00",
"2013/01/01 01:00:00",
"2013/01/01 02:00:00",
"2013/01/01 03:00:00",
]
)
tm.assert_series_equal(result, expected)
def test_strftime_period_minutes(self):
ser = Series(period_range("20130101", periods=4, freq="ms"))
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
expected = Series(
[
"2013/01/01 00:00:00.000",
"2013/01/01 00:00:00.001",
"2013/01/01 00:00:00.002",
"2013/01/01 00:00:00.003",
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
DatetimeIndex(["2019-01-01", pd.NaT]),
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
],
)
def test_strftime_nat(self, data):
# GH 29578
ser = Series(data)
result = ser.dt.strftime("%Y-%m-%d")
expected = Series(["2019-01-01", np.nan])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data", [DatetimeIndex([pd.NaT]), PeriodIndex([pd.NaT], dtype="period[D]")]
)
def test_strftime_all_nat(self, data):
# https://github.com/pandas-dev/pandas/issues/45858
ser = Series(data)
with tm.assert_produces_warning(None):
result = ser.dt.strftime("%Y-%m-%d")
expected = Series([np.nan], dtype=object)
tm.assert_series_equal(result, expected)
def test_valid_dt_with_missing_values(self):
# GH 8689
ser = Series(date_range("20130101", periods=5, freq="D"))
ser.iloc[2] = pd.NaT
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
expected = getattr(ser.dt, attr).copy()
expected.iloc[2] = np.nan
result = getattr(ser.dt, attr)
tm.assert_series_equal(result, expected)
result = ser.dt.date
expected = Series(
[
date(2013, 1, 1),
date(2013, 1, 2),
pd.NaT,
date(2013, 1, 4),
date(2013, 1, 5),
],
dtype="object",
)
tm.assert_series_equal(result, expected)
result = ser.dt.time
expected = Series([time(0), time(0), pd.NaT, time(0), time(0)], dtype="object")
tm.assert_series_equal(result, expected)
def test_dt_accessor_api(self):
# GH 9322
from pandas.core.indexes.accessors import (
CombinedDatetimelikeProperties,
DatetimeProperties,
)
assert Series.dt is CombinedDatetimelikeProperties
ser = Series(date_range("2000-01-01", periods=3))
assert isinstance(ser.dt, DatetimeProperties)
@pytest.mark.parametrize(
"ser",
[
Series(np.arange(5)),
Series(list("abcde")),
Series(np.random.default_rng(2).standard_normal(5)),
],
)
def test_dt_accessor_invalid(self, ser):
# GH#9322 check that series with incorrect dtypes don't have attr
with pytest.raises(AttributeError, match="only use .dt accessor"):
ser.dt
assert not hasattr(ser, "dt")
def test_dt_accessor_updates_on_inplace(self):
ser = Series(date_range("2018-01-01", periods=10))
ser[2] = None
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
assert return_value is None
result = ser.dt.date
assert result[0] == result[2]
def test_date_tz(self):
# GH11757
rng = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
tz="US/Eastern",
)
ser = Series(rng)
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
tm.assert_series_equal(ser.dt.date, expected)
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
def test_dt_timetz_accessor(self, tz_naive_fixture):
# GH21358
tz = maybe_get_tz(tz_naive_fixture)
dtindex = DatetimeIndex(
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
)
ser = Series(dtindex)
expected = Series(
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
)
result = ser.dt.timetz
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input_series, expected_output",
[
[["2020-01-01"], [[2020, 1, 3]]],
[[pd.NaT], [[np.nan, np.nan, np.nan]]],
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
# see GH#36032
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
],
)
def test_isocalendar(self, input_series, expected_output):
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
expected_frame = DataFrame(
expected_output, columns=["year", "week", "day"], dtype="UInt32"
)
tm.assert_frame_equal(result, expected_frame)
def test_hour_index(self):
dt_series = Series(
date_range(start="2021-01-01", periods=5, freq="h"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.hour
expected = Series(
[0, 1, 2, 3, 4],
dtype="int32",
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
class TestSeriesPeriodValuesDtAccessor:
@pytest.mark.parametrize(
"input_vals",
[
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
[
Period("2016-01-01 00:00:00", freq="h"),
Period("2016-01-01 01:00:00", freq="h"),
],
[
Period("2016-01-01 00:00:00", freq="M"),
Period("2016-01-01 00:01:00", freq="M"),
],
[
Period("2016-01-01 00:00:00", freq="s"),
Period("2016-01-01 00:00:01", freq="s"),
],
],
)
def test_end_time_timevalues(self, input_vals):
# GH#17157
# Check that the time part of the Period is adjusted by end_time
# when using the dt accessor on a Series
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
ser = Series(input_vals)
result = ser.dt.end_time
expected = ser.apply(lambda x: x.end_time)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
def test_to_period(self, input_vals):
# GH#21205
expected = Series([input_vals], dtype="Period[D]")
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
tm.assert_series_equal(result, expected)
def test_normalize_pre_epoch_dates():
# GH: 36294
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
result = ser.dt.normalize()
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
tm.assert_series_equal(result, expected)
def test_day_attribute_non_nano_beyond_int32():
# GH 52386
data = np.array(
[
136457654736252,
134736784364431,
245345345545332,
223432411,
2343241,
3634548734,
23234,
],
dtype="timedelta64[s]",
)
ser = Series(data)
result = ser.dt.days
expected = Series([1579371003, 1559453522, 2839645203, 2586, 27, 42066, 0])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,129 @@
import re
import pytest
from pandas import (
ArrowDtype,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
from pandas.compat import pa_version_under11p0
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
actual = ser.list[1]
expected = Series([2, None, None], dtype="int64[pyarrow]")
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:None]
else:
actual = ser.list[1:None:None]
expected = Series(
[[2, 3], [None, 5], None], dtype=ArrowDtype(pa.list_(pa.int64()))
)
tm.assert_series_equal(actual, expected)
def test_list_len():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.len()
expected = Series([3, 2, None], dtype=ArrowDtype(pa.int32()))
tm.assert_series_equal(actual, expected)
def test_list_flatten():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
actual = ser.list.flatten()
expected = Series([1, 2, 3, 4, None], dtype=ArrowDtype(pa.int64()))
tm.assert_series_equal(actual, expected)
def test_list_getitem_slice_invalid():
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
if pa_version_under11p0:
with pytest.raises(
NotImplementedError, match="List slice not supported by pyarrow "
):
ser.list[1:None:0]
else:
with pytest.raises(pa.lib.ArrowInvalid, match=re.escape("`step` must be >= 1")):
ser.list[1:None:0]
def test_list_accessor_non_list_dtype():
ser = Series(
[1, 2, 4],
dtype=ArrowDtype(pa.int64()),
)
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.list' accessor with 'list[pyarrow]' dtype, "
"not int64[pyarrow]."
),
):
ser.list[1:None:0]
@pytest.mark.parametrize(
"list_dtype",
(
pa.list_(pa.int64()),
pa.list_(pa.int64(), list_size=3),
pa.large_list(pa.int64()),
),
)
def test_list_getitem_invalid_index(list_dtype):
ser = Series(
[[1, 2, 3], [4, None, 5], None],
dtype=ArrowDtype(list_dtype),
)
with pytest.raises(pa.lib.ArrowInvalid, match="Index -1 is out of bounds"):
ser.list[-1]
with pytest.raises(pa.lib.ArrowInvalid, match="Index 5 is out of bounds"):
ser.list[5]
with pytest.raises(ValueError, match="key must be an int or slice, got str"):
ser.list["abc"]
def test_list_accessor_not_iterable():
ser = Series(
[[1, 2, 3], [4, None], None],
dtype=ArrowDtype(pa.list_(pa.int64())),
)
with pytest.raises(TypeError, match="'ListAccessor' object is not iterable"):
iter(ser.list)

View File

@ -0,0 +1,9 @@
from pandas import Series
class TestSparseAccessor:
def test_sparse_accessor_updates_on_inplace(self):
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
return_value = ser.drop([0, 1], inplace=True)
assert return_value is None
assert ser.sparse.density == 1.0

View File

@ -0,0 +1,25 @@
import pytest
from pandas import Series
import pandas._testing as tm
class TestStrAccessor:
def test_str_attribute(self):
# GH#9068
methods = ["strip", "rstrip", "lstrip"]
ser = Series([" jack", "jill ", " jesse ", "frank"])
for method in methods:
expected = Series([getattr(str, method)(x) for x in ser.values])
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
# str accessor only valid with string values
ser = Series(range(5))
with pytest.raises(AttributeError, match="only use .str accessor"):
ser.str.repeat(2)
def test_str_accessor_updates_on_inplace(self):
ser = Series(list("abc"))
return_value = ser.drop([0], inplace=True)
assert return_value is None
assert len(ser.str.lower()) == 2

View File

@ -0,0 +1,196 @@
import re
import pytest
from pandas.compat.pyarrow import (
pa_version_under11p0,
pa_version_under13p0,
)
from pandas import (
ArrowDtype,
DataFrame,
Index,
Series,
)
import pandas._testing as tm
pa = pytest.importorskip("pyarrow")
pc = pytest.importorskip("pyarrow.compute")
def test_struct_accessor_dtypes():
ser = Series(
[],
dtype=ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("string_col", pa.string()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
),
),
]
)
),
)
actual = ser.struct.dtypes
expected = Series(
[
ArrowDtype(pa.int64()),
ArrowDtype(pa.string()),
ArrowDtype(
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
]
)
),
],
index=Index(["int_col", "string_col", "struct_col"]),
)
tm.assert_series_equal(actual, expected)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field():
index = Index([-100, 42, 123])
ser = Series(
[
{"rice": 1.0, "maize": -1, "wheat": "a"},
{"rice": 2.0, "maize": 0, "wheat": "b"},
{"rice": 3.0, "maize": 1, "wheat": "c"},
],
dtype=ArrowDtype(
pa.struct(
[
("rice", pa.float64()),
("maize", pa.int64()),
("wheat", pa.string()),
]
)
),
index=index,
)
by_name = ser.struct.field("maize")
by_name_expected = Series(
[-1, 0, 1],
dtype=ArrowDtype(pa.int64()),
index=index,
name="maize",
)
tm.assert_series_equal(by_name, by_name_expected)
by_index = ser.struct.field(2)
by_index_expected = Series(
["a", "b", "c"],
dtype=ArrowDtype(pa.string()),
index=index,
name="wheat",
)
tm.assert_series_equal(by_index, by_index_expected)
def test_struct_accessor_field_with_invalid_name_or_index():
ser = Series([], dtype=ArrowDtype(pa.struct([("field", pa.int64())])))
with pytest.raises(ValueError, match="name_or_index must be an int, str,"):
ser.struct.field(1.1)
@pytest.mark.skipif(pa_version_under11p0, reason="pyarrow>=11.0.0 required")
def test_struct_accessor_explode():
index = Index([-100, 42, 123])
ser = Series(
[
{"painted": 1, "snapping": {"sea": "green"}},
{"painted": 2, "snapping": {"sea": "leatherback"}},
{"painted": 3, "snapping": {"sea": "hawksbill"}},
],
dtype=ArrowDtype(
pa.struct(
[
("painted", pa.int64()),
("snapping", pa.struct([("sea", pa.string())])),
]
)
),
index=index,
)
actual = ser.struct.explode()
expected = DataFrame(
{
"painted": Series([1, 2, 3], index=index, dtype=ArrowDtype(pa.int64())),
"snapping": Series(
[{"sea": "green"}, {"sea": "leatherback"}, {"sea": "hawksbill"}],
index=index,
dtype=ArrowDtype(pa.struct([("sea", pa.string())])),
),
},
)
tm.assert_frame_equal(actual, expected)
@pytest.mark.parametrize(
"invalid",
[
pytest.param(Series([1, 2, 3], dtype="int64"), id="int64"),
pytest.param(
Series(["a", "b", "c"], dtype="string[pyarrow]"), id="string-pyarrow"
),
],
)
def test_struct_accessor_api_for_invalid(invalid):
with pytest.raises(
AttributeError,
match=re.escape(
"Can only use the '.struct' accessor with 'struct[pyarrow]' dtype, "
f"not {invalid.dtype}."
),
):
invalid.struct
@pytest.mark.parametrize(
["indices", "name"],
[
(0, "int_col"),
([1, 2], "str_col"),
(pc.field("int_col"), "int_col"),
("int_col", "int_col"),
(b"string_col", b"string_col"),
([b"string_col"], "string_col"),
],
)
@pytest.mark.skipif(pa_version_under13p0, reason="pyarrow>=13.0.0 required")
def test_struct_accessor_field_expanded(indices, name):
arrow_type = pa.struct(
[
("int_col", pa.int64()),
(
"struct_col",
pa.struct(
[
("int_col", pa.int64()),
("float_col", pa.float64()),
("str_col", pa.string()),
]
),
),
(b"string_col", pa.string()),
]
)
data = pa.array([], type=arrow_type)
ser = Series(data, dtype=ArrowDtype(arrow_type))
expected = pc.struct_field(data, indices)
result = ser.struct.field(indices)
tm.assert_equal(result.array._pa_array.combine_chunks(), expected)
assert result.name == name

View File

@ -0,0 +1,499 @@
"""
Also test support for datetime64[ns] in Series / DataFrame
"""
from datetime import (
datetime,
timedelta,
)
import re
from dateutil.tz import (
gettz,
tzutc,
)
import numpy as np
import pytest
import pytz
from pandas._libs import index as libindex
import pandas as pd
from pandas import (
DataFrame,
Series,
Timestamp,
date_range,
period_range,
)
import pandas._testing as tm
def test_fancy_getitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert s[48] == 48
assert s["1/2/2009"] == 48
assert s["2009-1-2"] == 48
assert s[datetime(2009, 1, 2)] == 48
assert s[Timestamp(datetime(2009, 1, 2))] == 48
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
s["2009-1-3"]
tm.assert_series_equal(
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
)
def test_fancy_setitem():
dti = date_range(
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
)
s = Series(np.arange(len(dti)), index=dti)
msg = "Series.__setitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
s[48] = -1
assert s.iloc[48] == -1
s["1/2/2009"] = -2
assert s.iloc[48] == -2
s["1/2/2009":"2009-06-05"] = -3
assert (s[48:54] == -3).all()
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
def test_getitem_setitem_datetime_tz(tz_source):
if tz_source == "pytz":
tzget = pytz.timezone
else:
# handle special case for utc in dateutil
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="h", tz=tzget("US/Eastern"))
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
# also test Timestamp tz handling, GH #2789
result = ts.copy()
result["1990-01-01 09:00:00+00:00"] = 0
result["1990-01-01 09:00:00+00:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
result["1990-01-01 03:00:00-06:00"] = 0
result["1990-01-01 03:00:00-06:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
# repeat with datetimes
result = ts.copy()
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts.copy()
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
dt = dt.to_pydatetime()
result[dt] = 0
result[dt] = ts.iloc[4]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_datetimeindex():
N = 50
# testing with timezone, GH #2785
rng = date_range("1/1/1990", periods=N, freq="h", tz="US/Eastern")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
result = ts["1990-01-01 04:00:00"]
expected = ts.iloc[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04:00:00"] = 0
result["1990-01-01 04:00:00"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04:00:00"
rb = "1990-01-01 07:00:00"
# GH#18435 strings get a pass from tzawareness compat
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
lb = "1990-01-01 04:00:00-0500"
rb = "1990-01-01 07:00:00-0500"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# But we do not give datetimes a pass on tzawareness compat
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
naive = datetime(1990, 1, 1, 4)
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
with pytest.raises(KeyError, match=re.escape(repr(key))):
# GH#36148 as of 2.0 we require tzawareness-compat
ts[key]
result = ts.copy()
# GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
# so setting it as a new key casts to object rather than matching
# rng[4]
result[naive] = ts.iloc[4]
assert result.index.dtype == object
tm.assert_index_equal(result.index[:-1], rng.astype(object))
assert result.index[-1] == naive
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
ts[naive : datetime(1990, 1, 1, 7)]
result = ts.copy()
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
result[naive : datetime(1990, 1, 1, 7)] = 0
with pytest.raises(TypeError, match=msg):
# GH#36148 require tzawareness compat as of 2.0
result[naive : datetime(1990, 1, 1, 7)] = 99
# the __setitems__ here failed, so result should still match ts
tm.assert_series_equal(result, ts)
lb = naive
rb = datetime(1990, 1, 1, 7)
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
with pytest.raises(TypeError, match=msg):
# tznaive vs tzaware comparison is invalid
# see GH#18376, GH#18162
ts[(ts.index >= lb) & (ts.index <= rb)]
lb = Timestamp(naive).tz_localize(rng.tzinfo)
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts[ts.index[4]]
expected = ts.iloc[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
# also test partial date slicing
result = ts["1990-01-02"]
expected = ts[24:48]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-02"] = 0
result["1990-01-02"] = ts[24:48]
tm.assert_series_equal(result, ts)
def test_getitem_setitem_periodindex():
N = 50
rng = period_range("1/1/1990", periods=N, freq="h")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
result = ts["1990-01-01 04"]
expected = ts.iloc[4]
assert result == expected
result = ts.copy()
result["1990-01-01 04"] = 0
result["1990-01-01 04"] = ts.iloc[4]
tm.assert_series_equal(result, ts)
result = ts["1990-01-01 04":"1990-01-01 07"]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result["1990-01-01 04":"1990-01-01 07"] = 0
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
tm.assert_series_equal(result, ts)
lb = "1990-01-01 04"
rb = "1990-01-01 07"
result = ts[(ts.index >= lb) & (ts.index <= rb)]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
# GH 2782
result = ts[ts.index[4]]
expected = ts.iloc[4]
assert result == expected
result = ts[ts.index[4:8]]
expected = ts[4:8]
tm.assert_series_equal(result, expected)
result = ts.copy()
result[ts.index[4:8]] = 0
result.iloc[4:8] = ts.iloc[4:8]
tm.assert_series_equal(result, ts)
def test_datetime_indexing():
index = date_range("1/1/2000", "1/7/2000")
index = index.repeat(3)
s = Series(len(index), index=index)
stamp = Timestamp("1/8/2000")
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# not monotonic
s = Series(len(index), index=index)
s = s[::-1]
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
s[stamp]
s[stamp] = 0
assert s[stamp] == 0
# test duplicates in time series
def test_indexing_with_duplicate_datetimeindex(
rand_series_with_duplicate_datetimeindex,
):
ts = rand_series_with_duplicate_datetimeindex
uniques = ts.index.unique()
for date in uniques:
result = ts[date]
mask = ts.index == date
total = (ts.index == date).sum()
expected = ts[mask]
if total > 1:
tm.assert_series_equal(result, expected)
else:
tm.assert_almost_equal(result, expected.iloc[0])
cp = ts.copy()
cp[date] = 0
expected = Series(np.where(mask, 0, ts), index=ts.index)
tm.assert_series_equal(cp, expected)
key = datetime(2000, 1, 6)
with pytest.raises(KeyError, match=re.escape(repr(key))):
ts[key]
# new index
ts[datetime(2000, 1, 6)] = 0
assert ts[datetime(2000, 1, 6)] == 0
def test_loc_getitem_over_size_cutoff(monkeypatch):
# #1821
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
# create large list of non periodic datetime
dates = []
sec = timedelta(seconds=1)
half_sec = timedelta(microseconds=500000)
d = datetime(2011, 12, 5, 20, 30)
n = 1100
for i in range(n):
dates.append(d)
dates.append(d + sec)
dates.append(d + sec + half_sec)
dates.append(d + sec + sec + half_sec)
d += 3 * sec
# duplicate some values in the list
duplicate_positions = np.random.default_rng(2).integers(0, len(dates) - 1, 20)
for p in duplicate_positions:
dates[p + 1] = dates[p]
df = DataFrame(
np.random.default_rng(2).standard_normal((len(dates), 4)),
index=dates,
columns=list("ABCD"),
)
pos = n * 3
timestamp = df.index[pos]
assert timestamp in df.index
# it works!
df.loc[timestamp]
assert len(df.loc[[timestamp]]) > 0
def test_indexing_over_size_cutoff_period_index(monkeypatch):
# GH 27136
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
n = 1100
idx = period_range("1/1/2000", freq="min", periods=n)
assert idx._engine.over_size_threshold
s = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
pos = n - 1
timestamp = idx[pos]
assert timestamp in s.index
# it works!
s[timestamp]
assert len(s.loc[[timestamp]]) > 0
def test_indexing_unordered():
# GH 2437
rng = date_range(start="2011-01-01", end="2011-01-15")
ts = Series(np.random.default_rng(2).random(len(rng)), index=rng)
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
for t in ts.index:
expected = ts[t]
result = ts2[t]
assert expected == result
# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
for key in [
slice("2011-01-01", "2011-01-15"),
slice("2010-12-30", "2011-01-15"),
slice("2011-01-01", "2011-01-16"),
# partial ranges
slice("2011-01-01", "2011-01-6"),
slice("2011-01-06", "2011-01-8"),
slice("2011-01-06", "2011-01-12"),
]:
with pytest.raises(
KeyError, match="Value based partial slicing on non-monotonic"
):
compare(key)
# single values
result = ts2["2011"].sort_index()
expected = ts["2011"]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
def test_indexing_unordered2():
# diff freq
rng = date_range(datetime(2005, 1, 1), periods=20, freq="ME")
ts = Series(np.arange(len(rng)), index=rng)
ts = ts.take(np.random.default_rng(2).permutation(20))
result = ts["2005"]
for t in result.index:
assert t.year == 2005
def test_indexing():
idx = date_range("2001-1-1", periods=20, freq="ME")
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
# getting
# GH 3070, make sure semantics work on Series/Frame
result = ts["2001"]
tm.assert_series_equal(result, ts.iloc[:12])
df = DataFrame({"A": ts.copy()})
# GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
# like any other key, so raises
with pytest.raises(KeyError, match="2001"):
df["2001"]
# setting
ts = Series(np.random.default_rng(2).random(len(idx)), index=idx)
expected = ts.copy()
expected.iloc[:12] = 1
ts["2001"] = 1
tm.assert_series_equal(ts, expected)
expected = df.copy()
expected.iloc[:12, 0] = 1
df.loc["2001", "A"] = 1
tm.assert_frame_equal(df, expected)
def test_getitem_str_month_with_datetimeindex():
# GH3546 (not including times on the last day)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="h")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="s")
ts = Series(range(len(idx)), index=idx)
expected = ts["2013-05"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_year_with_datetimeindex():
idx = [
Timestamp("2013-05-31 00:00"),
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
]
ts = Series(range(len(idx)), index=idx)
expected = ts["2013"]
tm.assert_series_equal(expected, ts)
def test_getitem_str_second_with_datetimeindex():
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(
np.random.default_rng(2).random((5, 5)),
columns=["open", "high", "low", "close", "volume"],
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
)
# this is a single date, so will raise
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
df["2012-01-02 18:01:02"]
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
with pytest.raises(KeyError, match=msg):
df[df.index[2]]
def test_compare_datetime_with_all_none():
# GH#54870
ser = Series(["2020-01-01", "2020-01-02"], dtype="datetime64[ns]")
ser2 = Series([None, None])
result = ser > ser2
expected = Series([False, False])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,70 @@
import pytest
from pandas import (
Index,
Series,
date_range,
)
import pandas._testing as tm
class TestSeriesDelItem:
def test_delitem(self):
# GH#5542
# should delete the item inplace
s = Series(range(5))
del s[0]
expected = Series(range(1, 5), index=range(1, 5))
tm.assert_series_equal(s, expected)
del s[1]
expected = Series(range(2, 5), index=range(2, 5))
tm.assert_series_equal(s, expected)
# only 1 left, del, add, del
s = Series(1)
del s[0]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
s[0] = 1
tm.assert_series_equal(s, Series(1))
del s[0]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
def test_delitem_object_index(self, using_infer_string):
# Index(dtype=object)
dtype = "string[pyarrow_numpy]" if using_infer_string else object
s = Series(1, index=Index(["a"], dtype=dtype))
del s["a"]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
s["a"] = 1
tm.assert_series_equal(s, Series(1, index=Index(["a"], dtype=dtype)))
del s["a"]
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype=dtype)))
def test_delitem_missing_key(self):
# empty
s = Series(dtype=object)
with pytest.raises(KeyError, match=r"^0$"):
del s[0]
def test_delitem_extension_dtype(self):
# GH#40386
# DatetimeTZDtype
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
ser = Series(dti)
expected = ser[[0, 2]]
del ser[1]
assert ser.dtype == dti.dtype
tm.assert_series_equal(ser, expected)
# PeriodDtype
pi = dti.tz_localize(None).to_period("D")
ser = Series(pi)
expected = ser[:2]
del ser[2]
assert ser.dtype == pi.dtype
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,238 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DatetimeIndex,
Index,
Series,
date_range,
)
import pandas._testing as tm
def test_get():
# GH 6383
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
)
)
result = s.get(25, 0)
expected = 0
assert result == expected
s = Series(
np.array(
[
43,
48,
60,
48,
50,
51,
50,
45,
57,
48,
56,
45,
51,
39,
55,
43,
54,
52,
51,
54,
]
),
index=Index(
[
25.0,
36.0,
49.0,
64.0,
81.0,
100.0,
121.0,
144.0,
169.0,
196.0,
1225.0,
1296.0,
1369.0,
1444.0,
1521.0,
1600.0,
1681.0,
1764.0,
1849.0,
1936.0,
],
dtype=np.float64,
),
)
result = s.get(25, 0)
expected = 43
assert result == expected
# GH 7407
# with a boolean accessor
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
vc = df.i.value_counts()
result = vc.get(99, default="Missing")
assert result == "Missing"
vc = df.b.value_counts()
result = vc.get(False, default="Missing")
assert result == 3
result = vc.get(True, default="Missing")
assert result == "Missing"
def test_get_nan(float_numpy_dtype):
# GH 8569
s = Index(range(10), dtype=float_numpy_dtype).to_series()
assert s.get(np.nan) is None
assert s.get(np.nan, default="Missing") == "Missing"
def test_get_nan_multiple(float_numpy_dtype):
# GH 8569
# ensure that fixing "test_get_nan" above hasn't broken get
# with multiple elements
s = Index(range(10), dtype=float_numpy_dtype).to_series()
idx = [2, 30]
assert s.get(idx) is None
idx = [2, np.nan]
assert s.get(idx) is None
# GH 17295 - all missing keys
idx = [20, 30]
assert s.get(idx) is None
idx = [np.nan, np.nan]
assert s.get(idx) is None
def test_get_with_default():
# GH#7725
d0 = ["a", "b", "c", "d"]
d1 = np.arange(4, dtype="int64")
for data, index in ((d0, d1), (d1, d0)):
s = Series(data, index=index)
for i, d in zip(index, data):
assert s.get(i) == d
assert s.get(i, d) == d
assert s.get(i, "z") == d
assert s.get("e", "z") == "z"
assert s.get("e", "e") == "e"
msg = "Series.__getitem__ treating keys as positions is deprecated"
warn = None
if index is d0:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
assert s.get(10, "z") == "z"
assert s.get(10, 10) == 10
@pytest.mark.parametrize(
"arr",
[
np.random.default_rng(2).standard_normal(10),
DatetimeIndex(date_range("2020-01-01", periods=10), name="a").tz_localize(
tz="US/Eastern"
),
],
)
def test_get_with_ea(arr):
# GH#21260
ser = Series(arr, index=[2 * i for i in range(len(arr))])
assert ser.get(4) == ser.iloc[2]
result = ser.get([4, 6])
expected = ser.iloc[[2, 3]]
tm.assert_series_equal(result, expected)
result = ser.get(slice(2))
expected = ser.iloc[[0, 1]]
tm.assert_series_equal(result, expected)
assert ser.get(-1) is None
assert ser.get(ser.index.max() + 1) is None
ser = Series(arr[:6], index=list("abcdef"))
assert ser.get("c") == ser.iloc[2]
result = ser.get(slice("b", "d"))
expected = ser.iloc[[1, 2, 3]]
tm.assert_series_equal(result, expected)
result = ser.get("Z")
assert result is None
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(4) == ser.iloc[4]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(-1) == ser.iloc[-1]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert ser.get(len(ser)) is None
# GH#21257
ser = Series(arr)
ser2 = ser[::2]
assert ser2.get(1) is None
def test_getitem_get(string_series, object_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
for obj in [string_series, object_series]:
idx = obj.index[5]
assert obj[idx] == obj.get(idx)
assert obj[idx] == obj.iloc[5]
with tm.assert_produces_warning(FutureWarning, match=msg):
assert string_series.get(-1) == string_series.get(string_series.index[-1])
assert string_series.iloc[5] == string_series.get(string_series.index[5])
def test_get_none():
# GH#5652
s1 = Series(dtype=object)
s2 = Series(dtype=object, index=list("abc"))
for s in [s1, s2]:
result = s.get(None)
assert result is None

View File

@ -0,0 +1,735 @@
"""
Series.__getitem__ test classes are organized by the type of key passed.
"""
from datetime import (
date,
datetime,
time,
)
import numpy as np
import pytest
from pandas._libs.tslibs import (
conversion,
timezones,
)
from pandas.core.dtypes.common import is_scalar
import pandas as pd
from pandas import (
Categorical,
DataFrame,
DatetimeIndex,
Index,
Series,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.indexing import IndexingError
from pandas.tseries.offsets import BDay
class TestSeriesGetitemScalars:
def test_getitem_object_index_float_string(self):
# GH#17286
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
assert ser["a"] == 1
assert ser[1.0] == 1
def test_getitem_float_keys_tuple_values(self):
# see GH#13509
# unique Index
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
result = ser[0.0]
assert result == (1, 1)
# non-unique Index
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
result = ser[0.0]
tm.assert_series_equal(result, expected)
def test_getitem_unrecognized_scalar(self):
# GH#32684 a scalar key that is not recognized by lib.is_scalar
# a series that might be produced via `frame.dtypes`
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
key = ser.index[1]
result = ser[key]
assert result == 2
def test_getitem_negative_out_of_bounds(self):
ser = Series(["a"] * 10, index=["a"] * 10)
msg = "index -11 is out of bounds for axis 0 with size 10|index out of bounds"
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
ser[-11]
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
# don't segfault, GH#495
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
datetime_series[len(datetime_series)]
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
# GH#917
# With a RangeIndex, an int key gives a KeyError
ser = Series([], dtype=object)
with pytest.raises(KeyError, match="-1"):
ser[-1]
def test_getitem_keyerror_with_integer_index(self, any_int_numpy_dtype):
dtype = any_int_numpy_dtype
ser = Series(
np.random.default_rng(2).standard_normal(6),
index=Index([0, 0, 1, 1, 2, 2], dtype=dtype),
)
with pytest.raises(KeyError, match=r"^5$"):
ser[5]
with pytest.raises(KeyError, match=r"^'c'$"):
ser["c"]
# not monotonic
ser = Series(
np.random.default_rng(2).standard_normal(6), index=[2, 2, 0, 0, 1, 1]
)
with pytest.raises(KeyError, match=r"^5$"):
ser[5]
with pytest.raises(KeyError, match=r"^'c'$"):
ser["c"]
def test_getitem_int64(self, datetime_series):
idx = np.int64(5)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = datetime_series[idx]
assert res == datetime_series.iloc[5]
def test_getitem_full_range(self):
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
ser = Series(range(5), index=list(range(5)))
result = ser[list(range(5))]
tm.assert_series_equal(result, ser)
# ------------------------------------------------------------------
# Series with DatetimeIndex
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
def test_getitem_pydatetime_tz(self, tzstr):
tz = timezones.maybe_get_tz(tzstr)
index = date_range(
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="h", tz=tzstr
)
ts = Series(index=index, data=index.hour)
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
dt = datetime(2012, 12, 24, 17, 0)
time_datetime = conversion.localize_pydatetime(dt, tz)
assert ts[time_pandas] == ts[time_datetime]
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
def test_string_index_alias_tz_aware(self, tz):
rng = date_range("1/1/2000", periods=10, tz=tz)
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
result = ser["1/3/2000"]
tm.assert_almost_equal(result, ser.iloc[2])
def test_getitem_time_object(self):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
mask = (rng.hour == 9) & (rng.minute == 30)
result = ts[time(9, 30)]
expected = ts[mask]
result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Series with CategoricalIndex
def test_getitem_scalar_categorical_index(self):
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
ser = Series([1, 2], index=cats)
expected = ser.iloc[0]
result = ser[cats[0]]
assert result == expected
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
# GH#15470
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
# 0 is treated as a label
assert ser[0] == "c"
# the listlike analogue should also be treated as labels
res = ser[[0]]
expected = ser.iloc[-1:]
tm.assert_series_equal(res, expected)
res2 = ser[[0, 1, 2]]
tm.assert_series_equal(res2, ser.iloc[::-1])
def test_getitem_integer_categorical_not_positional(self):
# GH#14865
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
assert ser.get(3) == "c"
assert ser[3] == "c"
def test_getitem_str_with_timedeltaindex(self):
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
ser = Series(np.arange(len(rng)), index=rng)
key = "6 days, 23:11:12"
indexer = rng.get_loc(key)
assert indexer == 133
result = ser[key]
assert result == ser.iloc[133]
msg = r"^Timedelta\('50 days 00:00:00'\)$"
with pytest.raises(KeyError, match=msg):
rng.get_loc("50 days")
with pytest.raises(KeyError, match=msg):
ser["50 days"]
def test_getitem_bool_index_positional(self):
# GH#48653
ser = Series({True: 1, False: 0})
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser[0]
assert result == 1
class TestSeriesGetitemSlices:
def test_getitem_partial_str_slice_with_datetimeindex(self):
# GH#34860
arr = date_range("1/1/2008", "1/1/2009")
ser = arr.to_series()
result = ser["2008"]
rng = date_range(start="2008-01-01", end="2008-12-31")
expected = Series(rng, index=rng)
tm.assert_series_equal(result, expected)
def test_getitem_slice_strings_with_datetimeindex(self):
idx = DatetimeIndex(
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
)
ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx)
result = ts["1/2/2000":]
expected = ts[1:]
tm.assert_series_equal(result, expected)
result = ts["1/2/2000":"1/3/2000"]
expected = ts[1:4]
tm.assert_series_equal(result, expected)
def test_getitem_partial_str_slice_with_timedeltaindex(self):
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
ser = Series(np.arange(len(rng)), index=rng)
result = ser["5 day":"6 day"]
expected = ser.iloc[86:134]
tm.assert_series_equal(result, expected)
result = ser["5 day":]
expected = ser.iloc[86:]
tm.assert_series_equal(result, expected)
result = ser[:"6 day"]
expected = ser.iloc[:134]
tm.assert_series_equal(result, expected)
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
# higher reso
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
ser = Series(np.arange(len(rng)), index=rng)
result = ser["1 day 10:11:12":]
expected = ser.iloc[0:]
tm.assert_series_equal(result, expected)
result = ser["1 day 10:11:12.001":]
expected = ser.iloc[1000:]
tm.assert_series_equal(result, expected)
result = ser["1 days, 10:11:12.001001"]
assert result == ser.iloc[1001]
def test_getitem_slice_2d(self, datetime_series):
# GH#30588 multi-dimensional indexing deprecated
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
datetime_series[:, np.newaxis]
def test_getitem_median_slice_bug(self):
index = date_range("20090415", "20090519", freq="2B")
ser = Series(np.random.default_rng(2).standard_normal(13), index=index)
indexer = [slice(6, 7, None)]
msg = "Indexing with a single-item list"
with pytest.raises(ValueError, match=msg):
# GH#31299
ser[indexer]
# but we're OK with a single-element tuple
result = ser[(indexer[0],)]
expected = ser[indexer[0]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"slc, positions",
[
[slice(date(2018, 1, 1), None), [0, 1, 2]],
[slice(date(2019, 1, 2), None), [2]],
[slice(date(2020, 1, 1), None), []],
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
[slice(None, date(2019, 1, 1)), [0]],
],
)
def test_getitem_slice_date(self, slc, positions):
# https://github.com/pandas-dev/pandas/issues/31501
ser = Series(
[0, 1, 2],
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
)
result = ser[slc]
expected = ser.take(positions)
tm.assert_series_equal(result, expected)
def test_getitem_slice_float_raises(self, datetime_series):
msg = (
"cannot do slice indexing on DatetimeIndex with these indexers "
r"\[{key}\] of type float"
)
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
datetime_series[4.0:10.0]
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
datetime_series[4.5:10.0]
def test_getitem_slice_bug(self):
ser = Series(range(10), index=list(range(10)))
result = ser[-12:]
tm.assert_series_equal(result, ser)
result = ser[-7:]
tm.assert_series_equal(result, ser[3:])
result = ser[:-12]
tm.assert_series_equal(result, ser[:0])
def test_getitem_slice_integers(self):
ser = Series(
np.random.default_rng(2).standard_normal(8),
index=[2, 4, 6, 8, 10, 12, 14, 16],
)
result = ser[:4]
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
tm.assert_series_equal(result, expected)
class TestSeriesGetitemListLike:
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
def test_getitem_no_matches(self, box):
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
ser = Series(["A", "B"])
key = Series(["C"], dtype=object)
key = box(key)
msg = (
r"None of \[Index\(\['C'\], dtype='object|string'\)\] are in the \[index\]"
)
with pytest.raises(KeyError, match=msg):
ser[key]
def test_getitem_intlist_intindex_periodvalues(self):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
result = ser[[2, 4]]
exp = Series(
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
index=[2, 4],
dtype="Period[D]",
)
tm.assert_series_equal(result, exp)
assert result.dtype == "Period[D]"
@pytest.mark.parametrize("box", [list, np.array, Index])
def test_getitem_intlist_intervalindex_non_int(self, box):
# GH#33404 fall back to positional since ints are unambiguous
dti = date_range("2000-01-03", periods=3)._with_freq(None)
ii = pd.IntervalIndex.from_breaks(dti)
ser = Series(range(len(ii)), index=ii)
expected = ser.iloc[:1]
key = box([0])
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser[key]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("box", [list, np.array, Index])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
# GH#33404 do _not_ fall back to positional since ints are ambiguous
idx = Index(range(4)).astype(dtype)
dti = date_range("2000-01-03", periods=3)
mi = pd.MultiIndex.from_product([idx, dti])
ser = Series(range(len(mi))[::-1], index=mi)
key = box([5])
with pytest.raises(KeyError, match="5"):
ser[key]
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
# GH #37218
ser = Series([1, 2, 3])
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
with pytest.raises(KeyError, match="4"):
ser[key]
with pytest.raises(KeyError, match="4"):
ser.loc[key]
class TestGetitemBooleanMask:
def test_getitem_boolean(self, string_series):
ser = string_series
mask = ser > ser.median()
# passing list is OK
result = ser[list(mask)]
expected = ser[mask]
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.index, ser.index[mask])
def test_getitem_boolean_empty(self):
ser = Series([], dtype=np.int64)
ser.index.name = "index_name"
ser = ser[ser.isna()]
assert ser.index.name == "index_name"
assert ser.dtype == np.int64
# GH#5877
# indexing with empty series
ser = Series(["A", "B"], dtype=object)
expected = Series(dtype=object, index=Index([], dtype="int64"))
result = ser[Series([], dtype=object)]
tm.assert_series_equal(result, expected)
# invalid because of the boolean indexer
# that's empty or not-aligned
msg = (
r"Unalignable boolean Series provided as indexer \(index of "
r"the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
ser[Series([], dtype=bool)]
with pytest.raises(IndexingError, match=msg):
ser[Series([True], dtype=bool)]
def test_getitem_boolean_object(self, string_series):
# using column from DataFrame
ser = string_series
mask = ser > ser.median()
omask = mask.astype(object)
# getitem
result = ser[omask]
expected = ser[mask]
tm.assert_series_equal(result, expected)
# setitem
s2 = ser.copy()
cop = ser.copy()
cop[omask] = 5
s2[mask] = 5
tm.assert_series_equal(cop, s2)
# nans raise exception
omask[5:10] = np.nan
msg = "Cannot mask with non-boolean array containing NA / NaN values"
with pytest.raises(ValueError, match=msg):
ser[omask]
with pytest.raises(ValueError, match=msg):
ser[omask] = 5
def test_getitem_boolean_dt64_copies(self):
# GH#36210
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
key = np.array([True, True, False, False])
ser = Series(dti._data)
res = ser[key]
assert res._values._ndarray.base is None
# compare with numeric case for reference
ser2 = Series(range(4))
res2 = ser2[key]
assert res2._values.base is None
def test_getitem_boolean_corner(self, datetime_series):
ts = datetime_series
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
msg = (
r"Unalignable boolean Series provided as indexer \(index of "
r"the boolean Series and of the indexed object do not match"
)
with pytest.raises(IndexingError, match=msg):
ts[mask_shifted]
with pytest.raises(IndexingError, match=msg):
ts.loc[mask_shifted]
def test_getitem_boolean_different_order(self, string_series):
ordered = string_series.sort_values()
sel = string_series[ordered > 0]
exp = string_series[string_series > 0]
tm.assert_series_equal(sel, exp)
def test_getitem_boolean_contiguous_preserve_freq(self):
rng = date_range("1/1/2000", "3/1/2000", freq="B")
mask = np.zeros(len(rng), dtype=bool)
mask[10:20] = True
masked = rng[mask]
expected = rng[10:20]
assert expected.freq == rng.freq
tm.assert_index_equal(masked, expected)
mask[22] = True
masked = rng[mask]
assert masked.freq is None
class TestGetitemCallable:
def test_getitem_callable(self):
# GH#12533
ser = Series(4, index=list("ABCD"))
result = ser[lambda x: "A"]
assert result == ser.loc["A"]
result = ser[lambda x: ["A", "B"]]
expected = ser.loc[["A", "B"]]
tm.assert_series_equal(result, expected)
result = ser[lambda x: [True, False, True, True]]
expected = ser.iloc[[0, 2, 3]]
tm.assert_series_equal(result, expected)
def test_getitem_generator(string_series):
gen = (x > 0 for x in string_series)
result = string_series[gen]
result2 = string_series[iter(string_series > 0)]
expected = string_series[string_series > 0]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)
@pytest.mark.parametrize(
"series",
[
Series([0, 1]),
Series(date_range("2012-01-01", periods=2)),
Series(date_range("2012-01-01", periods=2, tz="CET")),
],
)
def test_getitem_ndim_deprecated(series):
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
series[:, None]
def test_getitem_multilevel_scalar_slice_not_implemented(
multiindex_year_month_day_dataframe_random_data,
):
# not implementing this for now
df = multiindex_year_month_day_dataframe_random_data
ser = df["A"]
msg = r"\(2000, slice\(3, 4, None\)\)"
with pytest.raises(TypeError, match=msg):
ser[2000, 3:4]
def test_getitem_dataframe_raises():
rng = list(range(10))
ser = Series(10, index=rng)
df = DataFrame(rng, index=rng)
msg = (
"Indexing a Series with DataFrame is not supported, "
"use the appropriate DataFrame column"
)
with pytest.raises(TypeError, match=msg):
ser[df > 5]
def test_getitem_assignment_series_alignment():
# https://github.com/pandas-dev/pandas/issues/37427
# with getitem, when assigning with a Series, it is not first aligned
ser = Series(range(10))
idx = np.array([2, 4, 9])
ser[idx] = Series([10, 11, 12])
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
tm.assert_series_equal(ser, expected)
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
with pytest.raises(KeyError, match="None"):
ser[None]
with pytest.raises(KeyError, match="None"):
ser.index.get_loc(None)
with pytest.raises(KeyError, match="None"):
ser.index._engine.get_loc(None)
def test_getitem_1tuple_slice_without_multiindex():
ser = Series(range(5))
key = (slice(3),)
result = ser[key]
expected = ser[key[0]]
tm.assert_series_equal(result, expected)
def test_getitem_preserve_name(datetime_series):
result = datetime_series[datetime_series > 0]
assert result.name == datetime_series.name
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = datetime_series[[0, 2, 4]]
assert result.name == datetime_series.name
result = datetime_series[5:10]
assert result.name == datetime_series.name
def test_getitem_with_integer_labels():
# integer indexes, be careful
ser = Series(
np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2))
)
inds = [0, 2, 5, 7, 8]
arr_inds = np.array([0, 2, 5, 7, 8])
with pytest.raises(KeyError, match="not in index"):
ser[inds]
with pytest.raises(KeyError, match="not in index"):
ser[arr_inds]
def test_getitem_missing(datetime_series):
# missing
d = datetime_series.index[0] - BDay()
msg = r"Timestamp\('1999-12-31 00:00:00'\)"
with pytest.raises(KeyError, match=msg):
datetime_series[d]
def test_getitem_fancy(string_series, object_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
slice1 = string_series[[1, 2, 3]]
slice2 = object_series[[1, 2, 3]]
assert string_series.index[2] == slice1.index[1]
assert object_series.index[2] == slice2.index[1]
assert string_series.iloc[2] == slice1.iloc[1]
assert object_series.iloc[2] == slice2.iloc[1]
def test_getitem_box_float64(datetime_series):
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
value = datetime_series[5]
assert isinstance(value, np.float64)
def test_getitem_unordered_dup():
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
assert is_scalar(obj["c"])
assert obj["c"] == 0
def test_getitem_dups():
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
result = ser["C"]
tm.assert_series_equal(result, expected)
def test_getitem_categorical_str():
# GH#31765
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
result = ser["a"]
expected = ser.iloc[[0, 3]]
tm.assert_series_equal(result, expected)
def test_slice_can_reorder_not_uniquely_indexed():
ser = Series(1, index=["a", "a", "b", "b", "c"])
ser[::-1] # it works!
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
def test_duplicated_index_getitem_positional_indexer(index_vals):
# GH 11747
s = Series(range(5), index=list(index_vals))
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s[3]
assert result == 3
class TestGetitemDeprecatedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2, 3])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_disallowed(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2, 3])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser[key] = 1

View File

@ -0,0 +1,518 @@
""" test get/set & misc """
from datetime import timedelta
import re
import numpy as np
import pytest
from pandas.errors import IndexingError
from pandas import (
NA,
DataFrame,
Index,
IndexSlice,
MultiIndex,
NaT,
Series,
Timedelta,
Timestamp,
concat,
date_range,
isna,
period_range,
timedelta_range,
)
import pandas._testing as tm
def test_basic_indexing():
s = Series(
np.random.default_rng(2).standard_normal(5), index=["a", "b", "a", "a", "b"]
)
warn_msg = "Series.__[sg]etitem__ treating keys as positions is deprecated"
msg = "index 5 is out of bounds for axis 0 with size 5"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5]
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5] = 0
with pytest.raises(KeyError, match=r"^'c'$"):
s["c"]
s = s.sort_index()
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5]
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
with pytest.raises(IndexError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s[5] = 0
def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
# GH51053
dtype = any_numeric_dtype
idx = Index([1, 0, 1], dtype=dtype)
ser = Series(range(3), index=idx)
result = ser[1]
expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
tm.assert_series_equal(result, expected, check_exact=True)
def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
# GH51053
dtype = any_numeric_dtype
idx = Index([1, 0, 1], dtype=dtype)
ser = Series(range(3), index=idx)
ser[1] = 10
expected = Series([10, 1, 10], index=idx)
tm.assert_series_equal(ser, expected, check_exact=True)
def test_basic_getitem_with_labels(datetime_series):
indices = datetime_series.index[[5, 10, 15]]
result = datetime_series[indices]
expected = datetime_series.reindex(indices)
tm.assert_series_equal(result, expected)
result = datetime_series[indices[0] : indices[2]]
expected = datetime_series.loc[indices[0] : indices[2]]
tm.assert_series_equal(result, expected)
def test_basic_getitem_dt64tz_values():
# GH12089
# with tz for values
ser = Series(
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
)
expected = Timestamp("2011-01-01", tz="US/Eastern")
result = ser.loc["a"]
assert result == expected
result = ser.iloc[0]
assert result == expected
result = ser["a"]
assert result == expected
def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write):
s = Series(np.random.default_rng(2).standard_normal(10))
result = s[...]
tm.assert_series_equal(result, s)
with tm.assert_cow_warning(warn_copy_on_write):
s[...] = 5
if not using_copy_on_write:
assert (result == 5).all()
@pytest.mark.parametrize(
"result_1, duplicate_item, expected_1",
[
[
Series({1: 12, 2: [1, 2, 2, 3]}),
Series({1: 313}),
Series({1: 12}, dtype=object),
],
[
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
Series({1: [1, 2, 3]}),
Series({1: [1, 2, 3]}),
],
],
)
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
# GH 17610
result = result_1._append(duplicate_item)
expected = expected_1._append(duplicate_item)
tm.assert_series_equal(result[1], expected)
assert result[2] == result_1[2]
def test_getitem_setitem_integers():
# caused bug without test
s = Series([1, 2, 3], ["a", "b", "c"])
assert s.iloc[0] == s["a"]
s.iloc[0] = 5
tm.assert_almost_equal(s["a"], 5)
def test_series_box_timestamp():
rng = date_range("20090415", "20090519", freq="B")
ser = Series(rng)
assert isinstance(ser[0], Timestamp)
assert isinstance(ser.at[1], Timestamp)
assert isinstance(ser.iat[2], Timestamp)
assert isinstance(ser.loc[3], Timestamp)
assert isinstance(ser.iloc[4], Timestamp)
ser = Series(rng, index=rng)
msg = "Series.__getitem__ treating keys as positions is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isinstance(ser[0], Timestamp)
assert isinstance(ser.at[rng[1]], Timestamp)
assert isinstance(ser.iat[2], Timestamp)
assert isinstance(ser.loc[rng[3]], Timestamp)
assert isinstance(ser.iloc[4], Timestamp)
def test_series_box_timedelta():
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
ser = Series(rng)
assert isinstance(ser[0], Timedelta)
assert isinstance(ser.at[1], Timedelta)
assert isinstance(ser.iat[2], Timedelta)
assert isinstance(ser.loc[3], Timedelta)
assert isinstance(ser.iloc[4], Timedelta)
def test_getitem_ambiguous_keyerror(indexer_sl):
ser = Series(range(10), index=list(range(0, 20, 2)))
with pytest.raises(KeyError, match=r"^1$"):
indexer_sl(ser)[1]
def test_getitem_dups_with_missing(indexer_sl):
# breaks reindex, so need to use .loc internally
# GH 4246
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
def test_setitem_ambiguous_keyerror(indexer_sl):
s = Series(range(10), index=list(range(0, 20, 2)))
# equivalent of an append
s2 = s.copy()
indexer_sl(s2)[1] = 5
expected = concat([s, Series([5], index=[1])])
tm.assert_series_equal(s2, expected)
def test_setitem(datetime_series):
datetime_series[datetime_series.index[5]] = np.nan
datetime_series.iloc[[1, 2, 17]] = np.nan
datetime_series.iloc[6] = np.nan
assert np.isnan(datetime_series.iloc[6])
assert np.isnan(datetime_series.iloc[2])
datetime_series[np.isnan(datetime_series)] = 5
assert not np.isnan(datetime_series.iloc[2])
def test_setslice(datetime_series):
sl = datetime_series[5:20]
assert len(sl) == len(sl.index)
assert sl.index.is_unique is True
def test_basic_getitem_setitem_corner(datetime_series):
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
msg = "key of type tuple not found and not a MultiIndex"
with pytest.raises(KeyError, match=msg):
datetime_series[:, 2]
with pytest.raises(KeyError, match=msg):
datetime_series[:, 2] = 2
# weird lists. [slice(0, 5)] raises but not two slices
msg = "Indexing with a single-item list"
with pytest.raises(ValueError, match=msg):
# GH#31299
datetime_series[[slice(None, 5)]]
# but we're OK with a single-element tuple
result = datetime_series[(slice(None, 5),)]
expected = datetime_series[:5]
tm.assert_series_equal(result, expected)
# OK
msg = r"unhashable type(: 'slice')?"
with pytest.raises(TypeError, match=msg):
datetime_series[[5, [None, None]]]
with pytest.raises(TypeError, match=msg):
datetime_series[[5, [None, None]]] = 2
def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write):
original = string_series.copy()
numSlice = string_series[10:20]
numSliceEnd = string_series[-10:]
objSlice = object_series[10:20]
assert string_series.index[9] not in numSlice.index
assert object_series.index[9] not in objSlice.index
assert len(numSlice) == len(numSlice.index)
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
assert numSlice.index[1] == string_series.index[11]
tm.assert_numpy_array_equal(np.array(numSliceEnd), np.array(string_series)[-10:])
# Test return view.
sl = string_series[10:20]
with tm.assert_cow_warning(warn_copy_on_write):
sl[:] = 0
if using_copy_on_write:
# Doesn't modify parent (CoW)
tm.assert_series_equal(string_series, original)
else:
assert (string_series[10:20] == 0).all()
def test_timedelta_assignment():
# GH 8209
s = Series([], dtype=object)
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
s.loc["A"] = timedelta(1)
expected = Series(Timedelta("1 days"), index=["A", "B"])
tm.assert_series_equal(s, expected)
def test_underlying_data_conversion(using_copy_on_write):
# GH 4080
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
return_value = df.set_index(["a", "b", "c"], inplace=True)
assert return_value is None
s = Series([1], index=[(2, 2, 2)])
df["val"] = 0
df_original = df.copy()
df
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["val"].update(s)
expected = df_original
else:
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
df["val"].update(s)
expected = DataFrame(
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
)
return_value = expected.set_index(["a", "b", "c"], inplace=True)
assert return_value is None
tm.assert_frame_equal(df, expected)
def test_preserve_refs(datetime_series):
seq = datetime_series.iloc[[5, 10, 15]]
seq.iloc[1] = np.nan
assert not np.isnan(datetime_series.iloc[10])
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
index = lexsorted_two_level_string_multiindex
ser = Series(
np.random.default_rng(2).standard_normal(len(index)), index=index, name="sth"
)
result = indexer_sl(ser)["foo"]
assert result.name == ser.name
# miscellaneous methods
@pytest.mark.parametrize(
"index",
[
date_range("2014-01-01", periods=20, freq="MS"),
period_range("2014-01", periods=20, freq="M"),
timedelta_range("0", periods=20, freq="h"),
],
)
def test_slice_with_negative_step(index):
keystr1 = str(index[9])
keystr2 = str(index[13])
ser = Series(np.arange(20), index)
SLC = IndexSlice
for key in [keystr1, index[9]]:
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
for key2 in [keystr2, index[13]]:
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
def test_tuple_index():
# GH 35534 - Selecting values when a Series has an Index of tuples
s = Series([1, 2], index=[("a",), ("b",)])
assert s[("a",)] == 1
assert s[("b",)] == 2
s[("b",)] = 3
assert s[("b",)] == 3
def test_frozenset_index():
# GH35747 - Selecting values when a Series has an Index of frozenset
idx0, idx1 = frozenset("a"), frozenset("b")
s = Series([1, 2], index=[idx0, idx1])
assert s[idx0] == 1
assert s[idx1] == 2
s[idx1] = 3
assert s[idx1] == 3
def test_loc_setitem_all_false_indexer():
# GH#45778
ser = Series([1, 2], index=["a", "b"])
expected = ser.copy()
rhs = Series([6, 7], index=["a", "b"])
ser.loc[ser > 100] = rhs
tm.assert_series_equal(ser, expected)
def test_loc_boolean_indexer_non_matching_index():
# GH#46551
ser = Series([1])
result = ser.loc[Series([NA, False], dtype="boolean")]
expected = Series([], dtype="int64")
tm.assert_series_equal(result, expected)
def test_loc_boolean_indexer_miss_matching_index():
# GH#46551
ser = Series([1])
indexer = Series([NA, False], dtype="boolean", index=[1, 2])
with pytest.raises(IndexingError, match="Unalignable"):
ser.loc[indexer]
def test_loc_setitem_nested_data_enlargement():
# GH#48614
df = DataFrame({"a": [1]})
ser = Series({"label": df})
ser.loc["new_label"] = df
expected = Series({"label": df, "new_label": df})
tm.assert_series_equal(ser, expected)
def test_loc_ea_numeric_index_oob_slice_end():
# GH#50161
ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
result = ser.loc[2:3]
expected = Series(1, index=Index([2], dtype="Int64"))
tm.assert_series_equal(result, expected)
def test_getitem_bool_int_key():
# GH#48653
ser = Series({True: 1, False: 0})
with pytest.raises(KeyError, match="0"):
ser.loc[0]
@pytest.mark.parametrize("val", [{}, {"b": "x"}])
@pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
def test_setitem_empty_indexer(indexer, val):
# GH#45981
df = DataFrame({"a": [1, 2], **val})
expected = df.copy()
df.loc[indexer] = 1.5
tm.assert_frame_equal(df, expected)
class TestDeprecatedIndexers:
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_getitem_dict_and_set_deprecated(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key]
@pytest.mark.parametrize("key", [{1}, {1: 1}])
def test_setitem_dict_and_set_disallowed(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2])
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key] = 1
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
def test_setitem_dict_and_set_disallowed_multiindex(self, key):
# GH#42825 enforced in 2.0
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
with pytest.raises(TypeError, match="as an indexer is not supported"):
ser.loc[key] = 1
class TestSetitemValidation:
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
# but checks for warnings instead of errors.
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
msg = "Setting an item of incompatible dtype is deprecated"
msg = re.escape(msg)
orig_ser = ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser.iloc[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser.loc[indexer] = invalid
ser = orig_ser.copy()
with tm.assert_produces_warning(warn, match=msg):
ser[:] = invalid
_invalid_scalars = [
1 + 2j,
"True",
"1",
"1.0",
NaT,
np.datetime64("NaT"),
np.timedelta64("NaT"),
]
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
@pytest.mark.parametrize(
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
)
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_bool(self, invalid, indexer):
ser = Series([True, False, False], dtype="bool")
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
warn = None
else:
warn = FutureWarning
self._check_setitem_invalid(ser, invalid, indexer, warn)
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
ser = Series([1, 2, None], dtype=float_numpy_dtype)
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)

View File

@ -0,0 +1,69 @@
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
def test_mask():
# compare with tested results in test_where
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.where(~cond, np.nan)
tm.assert_series_equal(rs, s.mask(cond))
rs = s.where(~cond)
rs2 = s.mask(cond)
tm.assert_series_equal(rs, rs2)
rs = s.where(~cond, -s)
rs2 = s.mask(cond, -s)
tm.assert_series_equal(rs, rs2)
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
rs = s2.where(~cond[:3])
rs2 = s2.mask(cond[:3])
tm.assert_series_equal(rs, rs2)
rs = s2.where(~cond[:3], -s2)
rs2 = s2.mask(cond[:3], -s2)
tm.assert_series_equal(rs, rs2)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.mask(1)
with pytest.raises(ValueError, match=msg):
s.mask(cond[:3].values, -s)
def test_mask_casts():
# dtype changes
ser = Series([1, 2, 3, 4])
result = ser.mask(ser > 2, np.nan)
expected = Series([1, 2, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_mask_casts2():
# see gh-21891
ser = Series([1, 2])
res = ser.mask([True, False])
exp = Series([np.nan, 2])
tm.assert_series_equal(res, exp)
def test_mask_inplace():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.copy()
rs.mask(cond, inplace=True)
tm.assert_series_equal(rs.dropna(), s[~cond])
tm.assert_series_equal(rs, s.mask(cond))
rs = s.copy()
rs.mask(cond, -s, inplace=True)
tm.assert_series_equal(rs, s.mask(cond, -s))

View File

@ -0,0 +1,45 @@
from datetime import datetime
import numpy as np
from pandas import (
DatetimeIndex,
Series,
)
import pandas._testing as tm
def test_series_set_value():
# GH#1561
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
s = Series(dtype=object)
s._set_value(dates[0], 1.0)
s._set_value(dates[1], np.nan)
expected = Series([1.0, np.nan], index=index)
tm.assert_series_equal(s, expected)
def test_set_value_dt64(datetime_series):
idx = datetime_series.index[10]
res = datetime_series._set_value(idx, 0)
assert res is None
assert datetime_series[idx] == 0
def test_set_value_str_index(string_series):
# equiv
ser = string_series.copy()
res = ser._set_value("foobar", 0)
assert res is None
assert ser.index[-1] == "foobar"
assert ser["foobar"] == 0
ser2 = string_series.copy()
ser2.loc["foobar"] = 0
assert ser2.index[-1] == "foobar"
assert ser2["foobar"] == 0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
def test_take_validate_axis():
# GH#51022
ser = Series([-1, 5, 6, 2, 4])
msg = "No axis named foo for object type Series"
with pytest.raises(ValueError, match=msg):
ser.take([1, 2], axis="foo")
def test_take():
ser = Series([-1, 5, 6, 2, 4])
actual = ser.take([1, 3, 4])
expected = Series([5, 2, 4], index=[1, 3, 4])
tm.assert_series_equal(actual, expected)
actual = ser.take([-1, 3, 4])
expected = Series([4, 2, 4], index=[4, 3, 4])
tm.assert_series_equal(actual, expected)
msg = "indices are out-of-bounds"
with pytest.raises(IndexError, match=msg):
ser.take([1, 10])
with pytest.raises(IndexError, match=msg):
ser.take([2, 5])
def test_take_categorical():
# https://github.com/pandas-dev/pandas/issues/20664
ser = Series(pd.Categorical(["a", "b", "c"]))
result = ser.take([-2, -2, 0])
expected = Series(
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
)
tm.assert_series_equal(result, expected)
def test_take_slice_raises():
ser = Series([-1, 5, 6, 2, 4])
msg = "Series.take requires a sequence of integers, not slice"
with pytest.raises(TypeError, match=msg):
ser.take(slice(0, 3, 1))

View File

@ -0,0 +1,481 @@
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import (
Series,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
def test_where_unsafe_int(any_signed_int_numpy_dtype):
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
mask = s < 5
s[mask] = range(2, 7)
expected = Series(
list(range(2, 7)) + list(range(5, 10)),
dtype=any_signed_int_numpy_dtype,
)
tm.assert_series_equal(s, expected)
def test_where_unsafe_float(float_numpy_dtype):
s = Series(np.arange(10), dtype=float_numpy_dtype)
mask = s < 5
s[mask] = range(2, 7)
data = list(range(2, 7)) + list(range(5, 10))
expected = Series(data, dtype=float_numpy_dtype)
tm.assert_series_equal(s, expected)
@pytest.mark.parametrize(
"dtype,expected_dtype",
[
(np.int8, np.float64),
(np.int16, np.float64),
(np.int32, np.float64),
(np.int64, np.float64),
(np.float32, np.float32),
(np.float64, np.float64),
],
)
def test_where_unsafe_upcast(dtype, expected_dtype):
# see gh-9743
s = Series(np.arange(10), dtype=dtype)
values = [2.5, 3.5, 4.5, 5.5, 6.5]
mask = s < 5
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
warn = (
None
if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f"
else FutureWarning
)
with tm.assert_produces_warning(warn, match="incompatible dtype"):
s[mask] = values
tm.assert_series_equal(s, expected)
def test_where_unsafe():
# see gh-9731
s = Series(np.arange(10), dtype="int64")
values = [2.5, 3.5, 4.5, 5.5]
mask = s > 5
expected = Series(list(range(6)) + values, dtype="float64")
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
s[mask] = values
tm.assert_series_equal(s, expected)
# see gh-3235
s = Series(np.arange(10), dtype="int64")
mask = s < 5
s[mask] = range(2, 7)
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
tm.assert_series_equal(s, expected)
assert s.dtype == expected.dtype
s = Series(np.arange(10), dtype="int64")
mask = s > 5
s[mask] = [0] * 4
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
tm.assert_series_equal(s, expected)
s = Series(np.arange(10))
mask = s > 5
msg = "cannot set using a list-like indexer with a different length than the value"
with pytest.raises(ValueError, match=msg):
s[mask] = [5, 4, 3, 2, 1]
with pytest.raises(ValueError, match=msg):
s[mask] = [0] * 5
# dtype changes
s = Series([1, 2, 3, 4])
result = s.where(s > 2, np.nan)
expected = Series([np.nan, np.nan, 3, 4])
tm.assert_series_equal(result, expected)
# GH 4667
# setting with None changes dtype
s = Series(range(10)).astype(float)
s[8] = None
result = s[8]
assert isna(result)
s = Series(range(10)).astype(float)
s[s > 8] = None
result = s[isna(s)]
expected = Series(np.nan, index=[9])
tm.assert_series_equal(result, expected)
def test_where():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.where(cond).dropna()
rs2 = s[cond]
tm.assert_series_equal(rs, rs2)
rs = s.where(cond, -s)
tm.assert_series_equal(rs, s.abs())
rs = s.where(cond)
assert s.shape == rs.shape
assert rs is not s
# test alignment
cond = Series([True, False, False, True, False], index=s.index)
s2 = -(s.abs())
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
rs = s2.where(cond[:3])
tm.assert_series_equal(rs, expected)
expected = s2.abs()
expected.iloc[0] = s2[0]
rs = s2.where(cond[:3], -s2)
tm.assert_series_equal(rs, expected)
def test_where_error():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where(1)
with pytest.raises(ValueError, match=msg):
s.where(cond[:3].values, -s)
# GH 2745
s = Series([1, 2])
s[[True, False]] = [0, 1]
expected = Series([0, 2])
tm.assert_series_equal(s, expected)
# failures
msg = "cannot set using a list-like indexer with a different length than the value"
with pytest.raises(ValueError, match=msg):
s[[True, False]] = [0, 2, 3]
with pytest.raises(ValueError, match=msg):
s[[True, False]] = []
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
def test_where_array_like(klass):
# see gh-15414
s = Series([1, 2, 3])
cond = [False, True, True]
expected = Series([np.nan, 2, 3])
result = s.where(klass(cond))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"cond",
[
[1, 0, 1],
Series([2, 5, 7]),
["True", "False", "True"],
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
],
)
def test_where_invalid_input(cond):
# see gh-15414: only boolean arrays accepted
s = Series([1, 2, 3])
msg = "Boolean array expected for the condition"
with pytest.raises(ValueError, match=msg):
s.where(cond)
msg = "Array conditional must be same shape as self"
with pytest.raises(ValueError, match=msg):
s.where([True])
def test_where_ndframe_align():
msg = "Array conditional must be same shape as self"
s = Series([1, 2, 3])
cond = [True]
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([1, np.nan, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
cond = np.array([False, True, False, True])
with pytest.raises(ValueError, match=msg):
s.where(cond)
expected = Series([np.nan, 2, np.nan])
out = s.where(Series(cond))
tm.assert_series_equal(out, expected)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set ints into string")
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
msg = (
lambda x: f"cannot set using a {x} indexer with a "
"different length than the value"
)
# slice
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("slice")):
s[0:3] = list(range(27))
s[0:3] = list(range(3))
expected = Series([0, 1, 2])
tm.assert_series_equal(s.astype(np.int64), expected)
# slice with step
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg("slice")):
s[0:4:2] = list(range(27))
s = Series(list("abcdef"))
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
tm.assert_series_equal(s, expected)
# neg slices
s = Series(list("abcdef"))
with pytest.raises(ValueError, match=msg("slice")):
s[:-1] = list(range(27))
s[-3:-1] = list(range(2))
expected = Series(["a", "b", "c", 0, 1, "f"])
tm.assert_series_equal(s, expected)
# list
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(27))
s = Series(list("abc"))
with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(2))
# scalar
s = Series(list("abc"))
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
tm.assert_series_equal(s, expected)
@pytest.mark.parametrize("size", range(2, 6))
@pytest.mark.parametrize(
"mask", [[True, False, False, False, False], [True, False], [False]]
)
@pytest.mark.parametrize(
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
)
# Test numpy arrays, lists and tuples as the input to be
# broadcast
@pytest.mark.parametrize(
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
)
def test_broadcast(size, mask, item, box):
# GH#8801, GH#4195
selection = np.resize(mask, size)
data = np.arange(size, dtype=float)
# Construct the expected series by taking the source
# data or item based on the selection
expected = Series(
[item if use_item else data[i] for i, use_item in enumerate(selection)]
)
s = Series(data)
s[selection] = item
tm.assert_series_equal(s, expected)
s = Series(data)
result = s.where(~selection, box(item))
tm.assert_series_equal(result, expected)
s = Series(data)
result = s.mask(selection, box(item))
tm.assert_series_equal(result, expected)
def test_where_inplace():
s = Series(np.random.default_rng(2).standard_normal(5))
cond = s > 0
rs = s.copy()
rs.where(cond, inplace=True)
tm.assert_series_equal(rs.dropna(), s[cond])
tm.assert_series_equal(rs, s.where(cond))
rs = s.copy()
rs.where(cond, -s, inplace=True)
tm.assert_series_equal(rs, s.where(cond, -s))
def test_where_dups():
# GH 4550
# where crashes with dups in index
s1 = Series(list(range(3)))
s2 = Series(list(range(3)))
comb = pd.concat([s1, s2])
result = comb.where(comb < 2)
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(result, expected)
# GH 4548
# inplace updating not working with dups
comb[comb < 1] = 5
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(comb, expected)
comb[comb < 2] += 10
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
tm.assert_series_equal(comb, expected)
def test_where_numeric_with_string():
# GH 9280
s = Series([1, 2, 3])
w = s.where(s > 1, "X")
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, ["X", "Y", "Z"])
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
assert not is_integer(w[0])
assert is_integer(w[1])
assert is_integer(w[2])
assert isinstance(w[0], str)
assert w.dtype == "object"
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
def test_where_datetimelike_coerce(dtype):
ser = Series([1, 2], dtype=dtype)
expected = Series([10, 10])
mask = np.array([False, False])
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10, 10])
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10)
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, 10.0)
tm.assert_series_equal(rs, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.where(mask, [10.0, 10.0])
tm.assert_series_equal(rs, expected)
rs = ser.where(mask, [10.0, np.nan])
expected = Series([10, np.nan], dtype="object")
tm.assert_series_equal(rs, expected)
def test_where_datetimetz():
# GH 15701
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
rs = ser.where(Series([False, True]))
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
tm.assert_series_equal(rs, expected)
def test_where_sparse():
# GH#17198 make sure we dont get an AttributeError for sp_index
ser = Series(pd.arrays.SparseArray([1, 2]))
result = ser.where(ser >= 2, 0)
expected = Series(pd.arrays.SparseArray([0, 2]))
tm.assert_series_equal(result, expected)
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
# https://github.com/pandas-dev/pandas/issues/34592
ser = Series([], dtype=float)
result = ser.where([])
tm.assert_series_equal(result, ser)
def test_where_categorical(frame_or_series):
# https://github.com/pandas-dev/pandas/issues/18888
exp = frame_or_series(
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
dtype="category",
)
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
res = df.where(df != "C")
tm.assert_equal(exp, res)
def test_where_datetimelike_categorical(tz_naive_fixture):
# GH#37682
tz = tz_naive_fixture
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
mask = np.array([True, True, False])
# DatetimeIndex.where
res = lvals.where(mask, rvals)
tm.assert_index_equal(res, dr)
# DatetimeArray.where
res = lvals._data._where(mask, rvals)
tm.assert_datetime_array_equal(res, dr._data)
# Series.where
res = Series(lvals).where(mask, rvals)
tm.assert_series_equal(res, Series(dr))
# DataFrame.where
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
tm.assert_frame_equal(res, pd.DataFrame(dr))

View File

@ -0,0 +1,82 @@
import numpy as np
import pytest
from pandas import (
MultiIndex,
Series,
date_range,
)
import pandas._testing as tm
def test_xs_datetimelike_wrapping():
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
arr = date_range("2016-01-01", periods=3)._data._ndarray
ser = Series(arr, dtype=object)
for i in range(len(ser)):
ser.iloc[i] = arr[i]
assert ser.dtype == object
assert isinstance(ser[0], np.datetime64)
result = ser.xs(0)
assert isinstance(result, np.datetime64)
class TestXSWithMultiIndex:
def test_xs_level_series(self, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
ser = df["A"]
expected = ser[:, "two"]
result = df.xs("two", level=1)["A"]
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs_by_label(self):
# GH#5684
idx = MultiIndex.from_tuples(
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
)
ser = Series([1, 2, 3, 4], index=idx)
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
assert return_value is None
expected = Series([1, 3], index=["a", "b"])
return_value = expected.index.set_names(["L1"], inplace=True)
assert return_value is None
result = ser.xs("one", level="L2")
tm.assert_series_equal(result, expected)
def test_series_getitem_multiindex_xs(self):
# GH#6258
dt = list(date_range("20130903", periods=3))
idx = MultiIndex.from_product([list("AB"), dt])
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
expected = Series([1, 1], index=list("AB"))
result = ser.xs("20130903", level=1)
tm.assert_series_equal(result, expected)
def test_series_xs_droplevel_false(self):
# GH: 19056
mi = MultiIndex.from_tuples(
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
)
ser = Series([1, 1, 1], index=mi)
result = ser.xs("a", axis=0, drop_level=False)
expected = Series(
[1, 1],
index=MultiIndex.from_tuples(
[("a", "x"), ("a", "y")], names=["level1", "level2"]
),
)
tm.assert_series_equal(result, expected)
def test_xs_key_as_list(self):
# GH#41760
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
ser = Series([1], index=mi)
with pytest.raises(TypeError, match="list keys are not supported"):
ser.xs(["a", "x"], axis=0, drop_level=False)
with pytest.raises(TypeError, match="list keys are not supported"):
ser.xs(["a"], axis=0, drop_level=False)

View File

@ -0,0 +1,7 @@
"""
Test files dedicated to individual (stand-alone) Series methods
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
These may also present opportunities for sharing/de-duplicating test code.
"""

View File

@ -0,0 +1,41 @@
import pytest
from pandas import Index
import pandas._testing as tm
def test_add_prefix_suffix(string_series):
with_prefix = string_series.add_prefix("foo#")
expected = Index([f"foo#{c}" for c in string_series.index])
tm.assert_index_equal(with_prefix.index, expected)
with_suffix = string_series.add_suffix("#foo")
expected = Index([f"{c}#foo" for c in string_series.index])
tm.assert_index_equal(with_suffix.index, expected)
with_pct_prefix = string_series.add_prefix("%")
expected = Index([f"%{c}" for c in string_series.index])
tm.assert_index_equal(with_pct_prefix.index, expected)
with_pct_suffix = string_series.add_suffix("%")
expected = Index([f"{c}%" for c in string_series.index])
tm.assert_index_equal(with_pct_suffix.index, expected)
def test_add_prefix_suffix_axis(string_series):
# GH 47819
with_prefix = string_series.add_prefix("foo#", axis=0)
expected = Index([f"foo#{c}" for c in string_series.index])
tm.assert_index_equal(with_prefix.index, expected)
with_pct_suffix = string_series.add_suffix("#foo", axis=0)
expected = Index([f"{c}#foo" for c in string_series.index])
tm.assert_index_equal(with_pct_suffix.index, expected)
def test_add_prefix_suffix_invalid_axis(string_series):
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
string_series.add_prefix("foo#", axis=1)
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
string_series.add_suffix("foo#", axis=1)

View File

@ -0,0 +1,249 @@
from datetime import timezone
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
period_range,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("fill", [None, -1])
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
aa, ab = a.align(b, join=join_type, fill_value=fill)
join_index = a.index.join(b.index, how=join_type)
if fill is not None:
diff_a = aa.index.difference(join_index)
diff_b = ab.index.difference(join_index)
if len(diff_a) > 0:
assert (aa.reindex(diff_a) == fill).all()
if len(diff_b) > 0:
assert (ab.reindex(diff_b) == fill).all()
ea = a.reindex(join_index)
eb = b.reindex(join_index)
if fill is not None:
ea = ea.fillna(fill)
eb = eb.fillna(fill)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
assert aa.name == "ts"
assert ea.name == "ts"
assert ab.name == "ts"
assert eb.name == "ts"
@pytest.mark.parametrize(
"first_slice,second_slice",
[
[[2, None], [None, -5]],
[[None, 0], [None, -5]],
[[None, -5], [None, 0]],
[[None, 0], [None, 0]],
],
)
@pytest.mark.parametrize("method", ["pad", "bfill"])
@pytest.mark.parametrize("limit", [None, 1])
def test_align_fill_method(
datetime_series, first_slice, second_slice, join_type, method, limit
):
a = datetime_series[slice(*first_slice)]
b = datetime_series[slice(*second_slice)]
msg = (
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
"are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
join_index = a.index.join(b.index, how=join_type)
ea = a.reindex(join_index)
eb = b.reindex(join_index)
msg2 = "Series.fillna with 'method' is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
ea = ea.fillna(method=method, limit=limit)
eb = eb.fillna(method=method, limit=limit)
tm.assert_series_equal(aa, ea)
tm.assert_series_equal(ab, eb)
def test_align_nocopy(datetime_series, using_copy_on_write):
b = datetime_series[:5].copy()
# do copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left")
ra[:5] = 5
assert not (a[:5] == 5).any()
# do not copy
a = datetime_series.copy()
ra, _ = a.align(b, join="left", copy=False)
ra[:5] = 5
if using_copy_on_write:
assert not (a[:5] == 5).any()
else:
assert (a[:5] == 5).all()
# do copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right")
rb[:3] = 5
assert not (b[:3] == 5).any()
# do not copy
a = datetime_series.copy()
b = datetime_series[:5].copy()
_, rb = a.align(b, join="right", copy=False)
rb[:2] = 5
if using_copy_on_write:
assert not (b[:2] == 5).any()
else:
assert (b[:2] == 5).all()
def test_align_same_index(datetime_series, using_copy_on_write):
a, b = datetime_series.align(datetime_series, copy=False)
if not using_copy_on_write:
assert a.index is datetime_series.index
assert b.index is datetime_series.index
else:
assert a.index.is_(datetime_series.index)
assert b.index.is_(datetime_series.index)
a, b = datetime_series.align(datetime_series, copy=True)
assert a.index is not datetime_series.index
assert b.index is not datetime_series.index
assert a.index.is_(datetime_series.index)
assert b.index.is_(datetime_series.index)
def test_align_multiindex():
# GH 10665
midx = pd.MultiIndex.from_product(
[range(2), range(3), range(2)], names=("a", "b", "c")
)
idx = pd.Index(range(2), name="b")
s1 = Series(np.arange(12, dtype="int64"), index=midx)
s2 = Series(np.arange(2, dtype="int64"), index=idx)
# these must be the same results (but flipped)
res1l, res1r = s1.align(s2, join="left")
res2l, res2r = s2.align(s1, join="right")
expl = s1
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
res1l, res1r = s1.align(s2, join="right")
res2l, res2r = s2.align(s1, join="left")
exp_idx = pd.MultiIndex.from_product(
[range(2), range(2), range(2)], names=("a", "b", "c")
)
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
tm.assert_series_equal(expl, res1l)
tm.assert_series_equal(expl, res2r)
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
tm.assert_series_equal(expr, res1r)
tm.assert_series_equal(expr, res2l)
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
def test_align_with_dataframe_method(method):
# GH31788
ser = Series(range(3), index=range(3))
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
msg = (
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
"are deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result_ser, result_df = ser.align(df, method=method)
tm.assert_series_equal(result_ser, ser)
tm.assert_frame_equal(result_df, df)
def test_align_dt64tzindex_mismatched_tzs():
idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern")
ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1)
ser_central = ser.tz_convert("US/Central")
# different timezones convert to UTC
new1, new2 = ser.align(ser_central)
assert new1.index.tz is timezone.utc
assert new2.index.tz is timezone.utc
def test_align_periodindex(join_type):
rng = period_range("1/1/2000", "1/1/2010", freq="Y")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
# TODO: assert something?
ts.align(ts[::2], join=join_type)
def test_align_left_fewer_levels():
# GH#45224
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
)
result_left, result_right = left.align(right)
expected_right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
)
expected_left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)
def test_align_left_different_named_levels():
# GH#45224
left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
)
right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
)
result_left, result_right = left.align(right)
expected_left = Series(
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
expected_right = Series(
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
)
tm.assert_series_equal(result_left, expected_left)
tm.assert_series_equal(result_right, expected_right)

View File

@ -0,0 +1,84 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
isna,
)
import pandas._testing as tm
class TestSeriesArgsort:
def test_argsort_axis(self):
# GH#54257
ser = Series(range(3))
msg = "No axis named 2 for object type Series"
with pytest.raises(ValueError, match=msg):
ser.argsort(axis=2)
def test_argsort_numpy(self, datetime_series):
ser = datetime_series
res = np.argsort(ser).values
expected = np.argsort(np.array(ser))
tm.assert_numpy_array_equal(res, expected)
# with missing values
ts = ser.copy()
ts[::2] = np.nan
msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
result = np.argsort(ts)[1::2]
expected = np.argsort(np.array(ts.dropna()))
tm.assert_numpy_array_equal(result.values, expected)
def test_argsort(self, datetime_series):
argsorted = datetime_series.argsort()
assert issubclass(argsorted.dtype.type, np.integer)
def test_argsort_dt64(self, unit):
# GH#2967 (introduced bug in 0.11-dev I think)
ser = Series(
[Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]"
)
assert ser.dtype == f"datetime64[{unit}]"
shifted = ser.shift(-1)
assert shifted.dtype == f"datetime64[{unit}]"
assert isna(shifted[4])
result = ser.argsort()
expected = Series(range(5), dtype=np.intp)
tm.assert_series_equal(result, expected)
msg = "The behavior of Series.argsort in the presence of NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = shifted.argsort()
expected = Series(list(range(4)) + [-1], dtype=np.intp)
tm.assert_series_equal(result, expected)
def test_argsort_stable(self):
ser = Series(np.random.default_rng(2).integers(0, 100, size=10000))
mindexer = ser.argsort(kind="mergesort")
qindexer = ser.argsort()
mexpected = np.argsort(ser.values, kind="mergesort")
qexpected = np.argsort(ser.values, kind="quicksort")
tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
msg = (
r"ndarray Expected type <class 'numpy\.ndarray'>, "
r"found <class 'pandas\.core\.series\.Series'> instead"
)
with pytest.raises(AssertionError, match=msg):
tm.assert_numpy_array_equal(qindexer, mindexer)
def test_argsort_preserve_name(self, datetime_series):
result = datetime_series.argsort()
assert result.name == datetime_series.name

View File

@ -0,0 +1,205 @@
import numpy as np
import pytest
from pandas._libs.tslibs import IncompatibleFrequency
from pandas import (
DatetimeIndex,
PeriodIndex,
Series,
Timestamp,
date_range,
isna,
notna,
offsets,
period_range,
)
import pandas._testing as tm
class TestSeriesAsof:
def test_asof_nanosecond_index_access(self):
ts = Timestamp("20130101").as_unit("ns")._value
dti = DatetimeIndex([ts + 50 + i for i in range(100)])
ser = Series(np.random.default_rng(2).standard_normal(100), index=dti)
first_value = ser.asof(ser.index[0])
# GH#46903 previously incorrectly was "day"
assert dti.resolution == "nanosecond"
# this used to not work bc parsing was done by dateutil that didn't
# handle nanoseconds
assert first_value == ser["2013-01-01 00:00:00.000000050"]
expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
assert first_value == ser[Timestamp(expected_ts)]
def test_basic(self):
# array or list or dates
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
val = result[result.index[result.index >= ub][0]]
assert ts[ub] == val
def test_scalar(self):
N = 30
rng = date_range("1/1/1990", periods=N, freq="53s")
# Explicit cast to float avoid implicit cast when setting nan
ts = Series(np.arange(N), index=rng, dtype="float")
ts.iloc[5:10] = np.nan
ts.iloc[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts.iloc[4]
assert val2 == ts.iloc[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts.iloc[4]
# in there
result = ts.asof(ts.index[3])
assert result == ts.iloc[3]
# no as of value
d = ts.index[0] - offsets.BDay()
assert np.isnan(ts.asof(d))
def test_with_nan(self):
# basic asof test
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
s = Series(np.arange(len(rng)), index=rng)
r = s.resample("2h").mean()
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[3:5] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
r.iloc[-3:] = np.nan
result = r.asof(r.index)
expected = Series(
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
)
tm.assert_series_equal(result, expected)
def test_periodindex(self):
# array or list or dates
N = 50
rng = period_range("1/1/1990", periods=N, freq="h")
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
ts.iloc[15:30] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
result = ts.asof(dates)
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
result = ts.asof(list(dates))
assert notna(result).all()
lb = ts.index[14]
ub = ts.index[30]
pix = PeriodIndex(result.index.values, freq="h")
mask = (pix >= lb) & (pix < ub)
rs = result[mask]
assert (rs == ts[lb]).all()
ts.iloc[5:10] = np.nan
ts.iloc[15:20] = np.nan
val1 = ts.asof(ts.index[7])
val2 = ts.asof(ts.index[19])
assert val1 == ts.iloc[4]
assert val2 == ts.iloc[14]
# accepts strings
val1 = ts.asof(str(ts.index[7]))
assert val1 == ts.iloc[4]
# in there
assert ts.asof(ts.index[3]) == ts.iloc[3]
# no as of value
d = ts.index[0].to_timestamp() - offsets.BDay()
assert isna(ts.asof(d))
# Mismatched freq
msg = "Input has different freq"
with pytest.raises(IncompatibleFrequency, match=msg):
ts.asof(rng.asfreq("D"))
def test_errors(self):
s = Series(
[1, 2, 3],
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
)
# non-monotonic
assert not s.index.is_monotonic_increasing
with pytest.raises(ValueError, match="requires a sorted index"):
s.asof(s.index[0])
# subset with Series
N = 10
rng = date_range("1/1/1990", periods=N, freq="53s")
s = Series(np.random.default_rng(2).standard_normal(N), index=rng)
with pytest.raises(ValueError, match="not valid for Series"):
s.asof(s.index[0], subset="foo")
def test_all_nans(self):
# GH 15713
# series is all nans
# testing non-default indexes
N = 50
rng = date_range("1/1/1990", periods=N, freq="53s")
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)
# testing scalar input
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
result = Series(np.nan, index=rng).asof(date)
assert isna(result)
# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
expected = Series(np.nan, index=[4, 5], name="test")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,683 @@
from datetime import (
datetime,
timedelta,
)
from importlib import reload
import string
import sys
import numpy as np
import pytest
from pandas._libs.tslibs import iNaT
import pandas.util._test_decorators as td
from pandas import (
NA,
Categorical,
CategoricalDtype,
DatetimeTZDtype,
Index,
Interval,
NaT,
Series,
Timedelta,
Timestamp,
cut,
date_range,
to_datetime,
)
import pandas._testing as tm
def rand_str(nchars: int) -> str:
"""
Generate one random byte string.
"""
RANDS_CHARS = np.array(
list(string.ascii_letters + string.digits), dtype=(np.str_, 1)
)
return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
class TestAstypeAPI:
def test_astype_unitless_dt64_raises(self):
# GH#47844
ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]")
df = ser.to_frame()
msg = "Casting to unit-less dtype 'datetime64' is not supported"
with pytest.raises(TypeError, match=msg):
ser.astype(np.datetime64)
with pytest.raises(TypeError, match=msg):
df.astype(np.datetime64)
with pytest.raises(TypeError, match=msg):
ser.astype("datetime64")
with pytest.raises(TypeError, match=msg):
df.astype("datetime64")
def test_arg_for_errors_in_astype(self):
# see GH#14878
ser = Series([1, 2, 3])
msg = (
r"Expected value of kwarg 'errors' to be one of \['raise', "
r"'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
ser.astype(np.float64, errors=False)
ser.astype(np.int8, errors="raise")
@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see GH#7271
ser = Series(range(0, 10, 2), name="abc")
dt1 = dtype_class({"abc": str})
result = ser.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype=object)
tm.assert_series_equal(result, expected)
dt2 = dtype_class({"abc": "float64"})
result = ser.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)
dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype "
r"mappings\."
)
with pytest.raises(KeyError, match=msg):
ser.astype(dt3)
dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
ser.astype(dt4)
# GH#16717
# if dtypes provided is empty, it should error
if dtype_class is Series:
dt5 = dtype_class({}, dtype=object)
else:
dt5 = dtype_class({})
with pytest.raises(KeyError, match=msg):
ser.astype(dt5)
class TestAstype:
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
def test_astype_object_to_dt64_non_nano(self, tz):
# GH#55756, GH#54620
ts = Timestamp("2999-01-01")
dtype = "M8[us]"
if tz is not None:
dtype = f"M8[us, {tz}]"
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
ser = Series(vals, dtype=object)
result = ser.astype(dtype)
# The 2500 is interpreted as microseconds, consistent with what
# we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
# and concated the results.
pointwise = [
vals[0].tz_localize(tz),
Timestamp(vals[1], tz=tz),
to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
]
exp_vals = [x.as_unit("us").asm8 for x in pointwise]
exp_arr = np.array(exp_vals, dtype="M8[us]")
expected = Series(exp_arr, dtype="M8[us]")
if tz is not None:
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
tm.assert_series_equal(result, expected)
def test_astype_mixed_object_to_dt64tz(self):
# pre-2.0 this raised ValueError bc of tz mismatch
# xref GH#32581
ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific")
ts2 = ts.tz_convert("Asia/Tokyo")
ser = Series([ts, ts2], dtype=object)
res = ser.astype("datetime64[ns, Europe/Brussels]")
expected = Series(
[ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")],
dtype="datetime64[ns, Europe/Brussels]",
)
tm.assert_series_equal(res, expected)
@pytest.mark.parametrize("dtype", np.typecodes["All"])
def test_astype_empty_constructor_equality(self, dtype):
# see GH#15524
if dtype not in (
"S",
"V", # poor support (if any) currently
"M",
"m", # Generic timestamps raise a ValueError. Already tested.
):
init_empty = Series([], dtype=dtype)
as_type_empty = Series([]).astype(dtype)
tm.assert_series_equal(init_empty, as_type_empty)
@pytest.mark.parametrize("dtype", [str, np.str_])
@pytest.mark.parametrize(
"series",
[
Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
],
)
def test_astype_str_map(self, dtype, series, using_infer_string):
# see GH#4405
result = series.astype(dtype)
expected = series.map(str)
if using_infer_string:
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
def test_astype_float_to_period(self):
result = Series([np.nan]).astype("period[D]")
expected = Series([NaT], dtype="period[D]")
tm.assert_series_equal(result, expected)
def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = Series([1, 2], dtype="int64")
# Don't have NumpyEADtype in the public API, so we use `.array.dtype`,
# which is a NumpyEADtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
# see GH#15524, GH#15987
data = [1]
ser = Series(data)
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
request.applymarker(mark)
msg = (
rf"The '{dtype.__name__}' dtype has no unit\. "
rf"Please pass in '{dtype.__name__}\[ns\]' instead."
)
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
def test_astype_dt64_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti = date_range("2012-01-01", periods=3)
result = Series(dti).astype(str)
expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
tm.assert_series_equal(result, expected)
def test_astype_dt64tz_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
result = Series(dti_tz).astype(str)
expected = Series(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
dtype=object,
)
tm.assert_series_equal(result, expected)
def test_astype_datetime(self, unit):
ser = Series(iNaT, dtype=f"M8[{unit}]", index=range(5))
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series([datetime(2001, 1, 2, 0, 0)])
ser = ser.astype("O")
assert ser.dtype == np.object_
ser = Series(
[datetime(2001, 1, 2, 0, 0) for i in range(3)], dtype=f"M8[{unit}]"
)
ser[1] = np.nan
assert ser.dtype == f"M8[{unit}]"
ser = ser.astype("O")
assert ser.dtype == np.object_
def test_astype_datetime64tz(self):
ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
# astype
result = ser.astype(object)
expected = Series(ser.astype(object), dtype=object)
tm.assert_series_equal(result, expected)
result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz)
tm.assert_series_equal(result, ser)
# astype - object, preserves on construction
result = Series(ser.astype(object))
expected = ser.astype(object)
tm.assert_series_equal(result, expected)
# astype - datetime64[ns, tz]
msg = "Cannot use .astype to convert from timezone-naive"
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype("datetime64[ns, US/Eastern]")
with pytest.raises(TypeError, match=msg):
# dt64->dt64tz astype deprecated
Series(ser.values).astype(ser.dtype)
result = ser.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)
def test_astype_str_cast_dt64(self):
# see GH#9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
res = ts.astype(str)
expected = Series(["2010-01-04"], dtype=object)
tm.assert_series_equal(res, expected)
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
res = ts.astype(str)
expected = Series(["2010-01-04 00:00:00-05:00"], dtype=object)
tm.assert_series_equal(res, expected)
def test_astype_str_cast_td64(self):
# see GH#9757
td = Series([Timedelta(1, unit="d")])
ser = td.astype(str)
expected = Series(["1 days"], dtype=object)
tm.assert_series_equal(ser, expected)
def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_
def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_
@pytest.mark.parametrize(
"data, dtype",
[
(["x", "y", "z"], "string[python]"),
pytest.param(
["x", "y", "z"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
(["x", "y", "z"], "category"),
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
(3 * [Interval(0, 1)], None),
],
)
@pytest.mark.parametrize("errors", ["raise", "ignore"])
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
# https://github.com/pandas-dev/pandas/issues/35471
ser = Series(data, dtype=dtype)
if errors == "ignore":
expected = ser
result = ser.astype(float, errors="ignore")
tm.assert_series_equal(result, expected)
else:
msg = "(Cannot cast)|(could not convert)"
with pytest.raises((ValueError, TypeError), match=msg):
ser.astype(float, errors=errors)
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
def test_astype_from_float_to_str(self, dtype):
# https://github.com/pandas-dev/pandas/issues/36451
ser = Series([0.1], dtype=dtype)
result = ser.astype(str)
expected = Series(["0.1"], dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"value, string_value",
[
(None, "None"),
(np.nan, "nan"),
(NA, "<NA>"),
],
)
def test_astype_to_str_preserves_na(self, value, string_value):
# https://github.com/pandas-dev/pandas/issues/36904
ser = Series(["a", "b", value], dtype=object)
result = ser.astype(str)
expected = Series(["a", "b", string_value], dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
ser = Series(np.random.default_rng(2).standard_normal(5), name="foo")
as_typed = ser.astype(dtype)
assert as_typed.dtype == dtype
assert as_typed.name == ser.name
@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
ser = Series([value])
with pytest.raises(ValueError, match=msg):
ser.astype(dtype)
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)
def test_astype_float_to_uint_negatives_raise(
self, float_numpy_dtype, any_unsigned_int_numpy_dtype
):
# GH#45151 We don't cast negative numbers to nonsense values
# TODO: same for EA float/uint dtypes, signed integers?
arr = np.arange(5).astype(float_numpy_dtype) - 3 # includes negatives
ser = Series(arr)
msg = "Cannot losslessly cast from .* to .*"
with pytest.raises(ValueError, match=msg):
ser.astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
ser.to_frame().astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
# We currently catch and re-raise in Index.astype
Index(ser).astype(any_unsigned_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
ser.array.astype(any_unsigned_int_numpy_dtype)
def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)
tm.assert_series_equal(result, Series(np.arange(1, 5)))
def test_astype_unicode(self, using_infer_string):
# see GH#7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]
former_encoding = None
if sys.getdefaultencoding() == "utf-8":
# GH#45326 as of 2.0 Series.astype matches Index.astype by handling
# bytes with obj.decode() instead of str(obj)
item = "野菜食べないとやばい"
ser = Series([item.encode()])
result = ser.astype(np.str_)
expected = Series([item], dtype=object)
tm.assert_series_equal(result, expected)
for ser in test_series:
res = ser.astype(np.str_)
expec = ser.map(str)
if using_infer_string:
expec = expec.astype(object)
tm.assert_series_equal(res, expec)
# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)
def test_astype_bytes(self):
# GH#39474
result = Series(["foo", "bar", "baz"]).astype(bytes)
assert result.dtypes == np.dtype("S3")
def test_astype_nan_to_bool(self):
# GH#43018
ser = Series(np.nan, dtype="object")
result = ser.astype("bool")
expected = Series(True, dtype="bool")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES,
)
def test_astype_ea_to_datetimetzdtype(self, dtype):
# GH37553
ser = Series([4, 0, 9], dtype=dtype)
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
expected = Series(
{
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"),
2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"),
}
)
tm.assert_series_equal(result, expected)
def test_astype_retain_attrs(self, any_numpy_dtype):
# GH#44414
ser = Series([0, 1, 2, 3])
ser.attrs["Location"] = "Michigan"
result = ser.astype(any_numpy_dtype).attrs
expected = ser.attrs
tm.assert_dict_equal(expected, result)
class TestAstypeString:
@pytest.mark.parametrize(
"data, dtype",
[
([True, NA], "boolean"),
(["A", NA], "category"),
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
(
["2012-01-01 00:00:00-05:00", NaT],
"datetime64[ns, US/Eastern]",
),
([1, None], "UInt16"),
(["1/1/2021", "2/1/2021"], "period[M]"),
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
# currently no way to parse IntervalArray from a list of strings
],
)
def test_astype_string_to_extension_dtype_roundtrip(
self, data, dtype, request, nullable_string_dtype
):
if dtype == "boolean":
mark = pytest.mark.xfail(
reason="TODO StringArray.astype() with missing values #GH40566"
)
request.applymarker(mark)
# GH-40351
ser = Series(data, dtype=dtype)
# Note: just passing .astype(dtype) fails for dtype="category"
# with bc ser.dtype.categories will be object dtype whereas
# result.dtype.categories will have string dtype
result = ser.astype(nullable_string_dtype).astype(ser.dtype)
tm.assert_series_equal(result, ser)
class TestAstypeCategorical:
def test_astype_categorical_to_other(self):
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
expected = ser
tm.assert_series_equal(ser.astype("category"), expected)
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
msg = r"Cannot cast object|string dtype to float64"
with pytest.raises(ValueError, match=msg):
ser.astype("float64")
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype=object)
tm.assert_series_equal(cat.astype("str"), exp)
s2 = Series(Categorical(["1", "2", "3", "4"]))
exp2 = Series([1, 2, 3, 4]).astype("int")
tm.assert_series_equal(s2.astype("int"), exp2)
# object don't sort correctly, so just compare that we have the same
# values
def cmp(a, b):
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
expected = Series(np.array(ser.values), name="value_group")
cmp(ser.astype("object"), expected)
cmp(ser.astype(np.object_), expected)
# array conversion
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
tm.assert_series_equal(ser.astype("category"), ser)
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
roundtrip_expected = ser.cat.set_categories(
ser.cat.categories.sort_values()
).cat.remove_unused_categories()
result = ser.astype("object").astype("category")
tm.assert_series_equal(result, roundtrip_expected)
result = ser.astype("object").astype(CategoricalDtype())
tm.assert_series_equal(result, roundtrip_expected)
def test_astype_categorical_invalid_conversions(self):
# invalid conversion (these are NOT a dtype)
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
msg = (
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
"not understood"
)
with pytest.raises(TypeError, match=msg):
ser.astype(Categorical)
with pytest.raises(TypeError, match=msg):
ser.astype("object").astype(Categorical)
def test_astype_categoricaldtype(self):
ser = Series(["a", "b", "a"])
result = ser.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)
result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
ser = Series(s_data, dtype=s_dtype, name=name)
# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = ser.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)
# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = ser.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)
if dtype_ordered is False:
# not specifying ordered, so only test once
expected = ser
result = ser.astype("category")
tm.assert_series_equal(result, expected)
def test_astype_bool_missing_to_categorical(self):
# GH-19182
ser = Series([True, False, np.nan])
assert ser.dtypes == np.object_
result = ser.astype(CategoricalDtype(categories=[True, False]))
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
tm.assert_series_equal(result, expected)
def test_astype_categories_raises(self):
# deprecated GH#17636, removed in GH#27141
ser = Series(["a", "b", "a"])
with pytest.raises(TypeError, match="got an unexpected"):
ser.astype("category", categories=["a", "b"], ordered=True)
@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
def test_astype_from_categorical(self, items):
ser = Series(items)
exp = Series(Categorical(items))
res = ser.astype("category")
tm.assert_series_equal(res, exp)
def test_astype_from_categorical_with_keywords(self):
# with keywords
lst = ["a", "b", "c", "a"]
ser = Series(lst)
exp = Series(Categorical(lst, ordered=True))
res = ser.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True))
tm.assert_series_equal(res, exp)
def test_astype_timedelta64_with_np_nan(self):
# GH45798
result = Series([Timedelta(1), np.nan], dtype="timedelta64[ns]")
expected = Series([Timedelta(1), NaT], dtype="timedelta64[ns]")
tm.assert_series_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_astype_int_na_string(self):
# GH#57418
ser = Series([12, NA], dtype="Int64[pyarrow]")
result = ser.astype("string[pyarrow]")
expected = Series(["12", NA], dtype="string[pyarrow]")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,30 @@
import numpy as np
class TestAutoCorr:
def test_autocorr(self, datetime_series):
# Just run the function
corr1 = datetime_series.autocorr()
# Now run it with the lag parameter
corr2 = datetime_series.autocorr(lag=1)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2
# Choose a random lag between 1 and length of Series - 2
# and compare the result with the Series corr() function
n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2))
corr1 = datetime_series.corr(datetime_series.shift(n))
corr2 = datetime_series.autocorr(lag=n)
# corr() with lag needs Series of at least length 2
if len(datetime_series) <= 2:
assert np.isnan(corr1)
assert np.isnan(corr2)
else:
assert corr1 == corr2

View File

@ -0,0 +1,75 @@
import numpy as np
import pytest
from pandas import (
Series,
bdate_range,
date_range,
period_range,
)
import pandas._testing as tm
class TestBetween:
def test_between(self):
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right)
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
def test_between_datetime_object_dtype(self):
ser = Series(bdate_range("1/1/2000", periods=20), dtype=object)
ser[::2] = np.nan
result = ser[ser.between(ser[3], ser[17])]
expected = ser[3:18].dropna()
tm.assert_series_equal(result, expected)
result = ser[ser.between(ser[3], ser[17], inclusive="neither")]
expected = ser[5:16].dropna()
tm.assert_series_equal(result, expected)
def test_between_period_values(self):
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
left, right = ser[[2, 7]]
result = ser.between(left, right)
expected = (ser >= left) & (ser <= right)
tm.assert_series_equal(result, expected)
def test_between_inclusive_string(self):
# GH 40628
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
result = series.between(left, right, inclusive="both")
expected = (series >= left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="left")
expected = (series >= left) & (series < right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="right")
expected = (series > left) & (series <= right)
tm.assert_series_equal(result, expected)
result = series.between(left, right, inclusive="neither")
expected = (series > left) & (series < right)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("inclusive", ["yes", True, False])
def test_between_error_args(self, inclusive):
# GH 40628
series = Series(date_range("1/1/2000", periods=10))
left, right = series[[2, 7]]
value_error_msg = (
"Inclusive has to be either string of 'both',"
"'left', 'right', or 'neither'."
)
with pytest.raises(ValueError, match=value_error_msg):
series = Series(date_range("1/1/2000", periods=10))
series.between(left, right, inclusive=inclusive)

View File

@ -0,0 +1,148 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
array as pd_array,
date_range,
)
import pandas._testing as tm
@pytest.fixture
def df():
"""
base dataframe for testing
"""
return DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
def test_case_when_caselist_is_not_a_list(df):
"""
Raise ValueError if caselist is not a list.
"""
msg = "The caselist argument should be a list; "
msg += "instead got.+"
with pytest.raises(TypeError, match=msg): # GH39154
df["a"].case_when(caselist=())
def test_case_when_no_caselist(df):
"""
Raise ValueError if no caselist is provided.
"""
msg = "provide at least one boolean condition, "
msg += "with a corresponding replacement."
with pytest.raises(ValueError, match=msg): # GH39154
df["a"].case_when([])
def test_case_when_odd_caselist(df):
"""
Raise ValueError if no of caselist is odd.
"""
msg = "Argument 0 must have length 2; "
msg += "a condition and replacement; instead got length 3."
with pytest.raises(ValueError, match=msg):
df["a"].case_when([(df["a"].eq(1), 1, df.a.gt(1))])
def test_case_when_raise_error_from_mask(df):
"""
Raise Error from within Series.mask
"""
msg = "Failed to apply condition0 and replacement0."
with pytest.raises(ValueError, match=msg):
df["a"].case_when([(df["a"].eq(1), [1, 2])])
def test_case_when_single_condition(df):
"""
Test output on a single condition.
"""
result = Series([np.nan, np.nan, np.nan]).case_when([(df.a.eq(1), 1)])
expected = Series([1, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions(df):
"""
Test output when booleans are derived from a computation
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[(df.a.eq(1), 1), (Series([False, True, False]), 2)]
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_list(df):
"""
Test output when replacement is a list
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[([True, False, False], 1), (df["a"].gt(1) & df["b"].eq(5), [1, 2, 3])]
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_extension_dtype(df):
"""
Test output when replacement has an extension dtype
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[
([True, False, False], 1),
(df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")),
],
)
expected = Series([1, 2, np.nan], dtype="Float64")
tm.assert_series_equal(result, expected)
def test_case_when_multiple_conditions_replacement_series(df):
"""
Test output when replacement is a Series
"""
result = Series([np.nan, np.nan, np.nan]).case_when(
[
(np.array([True, False, False]), 1),
(df["a"].gt(1) & df["b"].eq(5), Series([1, 2, 3])),
],
)
expected = Series([1, 2, np.nan])
tm.assert_series_equal(result, expected)
def test_case_when_non_range_index():
"""
Test output if index is not RangeIndex
"""
rng = np.random.default_rng(seed=123)
dates = date_range("1/1/2000", periods=8)
df = DataFrame(
rng.standard_normal(size=(8, 4)), index=dates, columns=["A", "B", "C", "D"]
)
result = Series(5, index=df.index, name="A").case_when([(df.A.gt(0), df.B)])
expected = df.A.mask(df.A.gt(0), df.B).where(df.A.gt(0), 5)
tm.assert_series_equal(result, expected)
def test_case_when_callable():
"""
Test output on a callable
"""
# https://numpy.org/doc/stable/reference/generated/numpy.piecewise.html
x = np.linspace(-2.5, 2.5, 6)
ser = Series(x)
result = ser.case_when(
caselist=[
(lambda df: df < 0, lambda df: -df),
(lambda df: df >= 0, lambda df: df),
]
)
expected = np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x])
tm.assert_series_equal(result, Series(expected))

View File

@ -0,0 +1,146 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
isna,
notna,
)
import pandas._testing as tm
class TestSeriesClip:
def test_clip(self, datetime_series):
val = datetime_series.median()
assert datetime_series.clip(lower=val).min() == val
assert datetime_series.clip(upper=val).max() == val
result = datetime_series.clip(-0.5, 0.5)
expected = np.clip(datetime_series, -0.5, 0.5)
tm.assert_series_equal(result, expected)
assert isinstance(expected, Series)
def test_clip_types_and_nulls(self):
sers = [
Series([np.nan, 1.0, 2.0, 3.0]),
Series([None, "a", "b", "c"]),
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
]
for s in sers:
thresh = s[2]
lower = s.clip(lower=thresh)
upper = s.clip(upper=thresh)
assert lower[notna(lower)].min() == thresh
assert upper[notna(upper)].max() == thresh
assert list(isna(s)) == list(isna(lower))
assert list(isna(s)) == list(isna(upper))
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
# Ensure that clipping method can handle NA values with out failing
# GH#40581
if nulls_fixture is pd.NaT:
# constructor will raise, see
# test_constructor_mismatched_null_nullable_dtype
pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
s_clipped_upper = ser.clip(upper=2.0)
s_clipped_lower = ser.clip(lower=2.0)
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
tm.assert_series_equal(s_clipped_upper, expected_upper)
tm.assert_series_equal(s_clipped_lower, expected_lower)
def test_clip_with_na_args(self):
"""Should process np.nan argument as None"""
# GH#17276
s = Series([1, 2, 3])
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
# GH#19992
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
# TODO: avoid this warning here? seems like we should never be upcasting
# in the first place?
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(lower=[0, 4, np.nan])
tm.assert_series_equal(res, Series([1, 4, 3]))
with tm.assert_produces_warning(FutureWarning, match=msg):
res = s.clip(upper=[1, np.nan, 1])
tm.assert_series_equal(res, Series([1, 2, 1]))
# GH#40420
s = Series([1, 2, 3])
result = s.clip(0, [np.nan, np.nan, np.nan])
tm.assert_series_equal(s, result)
def test_clip_against_series(self):
# GH#6966
s = Series([1.0, 1.0, 4.0])
lower = Series([1.0, 2.0, 3.0])
upper = Series([1.5, 2.5, 3.5])
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
def test_clip_against_list_like(self, inplace, upper):
# GH#15390
original = Series([5, 6, 7])
result = original.clip(upper=upper, inplace=inplace)
expected = Series([1, 2, 3])
if inplace:
result = original
tm.assert_series_equal(result, expected, check_exact=True)
def test_clip_with_datetimes(self):
# GH#11838
# naive and tz-aware datetimes
t = Timestamp("2015-12-01 09:30:30")
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
result = s.clip(upper=t)
expected = Series(
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
)
tm.assert_series_equal(result, expected)
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
s = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
]
)
result = s.clip(upper=t)
expected = Series(
[
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
# GH-42794
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,17 @@
from pandas import Series
import pandas._testing as tm
class TestCombine:
def test_combine_scalar(self):
# GH#21248
# Note - combine() with another Series is tested elsewhere because
# it is used when testing operators
ser = Series([i * 10 for i in range(5)])
result = ser.combine(3, lambda x, y: x + y)
expected = Series([i * 10 + 3 for i in range(5)])
tm.assert_series_equal(result, expected)
result = ser.combine(22, lambda x, y: min(x, y))
expected = Series([min(i * 10, 22) for i in range(5)])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,149 @@
from datetime import datetime
import numpy as np
import pandas as pd
from pandas import (
Period,
Series,
date_range,
period_range,
to_datetime,
)
import pandas._testing as tm
class TestCombineFirst:
def test_combine_first_period_datetime(self):
# GH#3367
didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME")
pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M")
# check to be consistent with DatetimeIndex
for idx in [didx, pidx]:
a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
b = Series([9, 9, 9, 9, 9, 9, 9], index=idx)
result = a.combine_first(b)
expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
tm.assert_series_equal(result, expected)
def test_combine_first_name(self, datetime_series):
result = datetime_series.combine_first(datetime_series[:5])
assert result.name == datetime_series.name
def test_combine_first(self):
values = np.arange(20, dtype=np.float64)
series = Series(values, index=np.arange(20, dtype=np.int64))
series_copy = series * 2
series_copy[::2] = np.nan
# nothing used from the input
combined = series.combine_first(series_copy)
tm.assert_series_equal(combined, series)
# Holes filled from input
combined = series_copy.combine_first(series)
assert np.isfinite(combined).all()
tm.assert_series_equal(combined[::2], series[::2])
tm.assert_series_equal(combined[1::2], series_copy[1::2])
# mixed types
index = pd.Index([str(i) for i in range(20)])
floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
strings = Series([str(i) for i in range(10)], index=index[::2], dtype=object)
combined = strings.combine_first(floats)
tm.assert_series_equal(strings, combined.loc[index[::2]])
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
# corner case
ser = Series([1.0, 2, 3], index=[0, 1, 2])
empty = Series([], index=[], dtype=object)
msg = "The behavior of array concatenation with empty entries is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.combine_first(empty)
ser.index = ser.index.astype("O")
tm.assert_series_equal(ser, result)
def test_combine_first_dt64(self, unit):
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
s1 = to_datetime(Series([np.nan, "2011"])).dt.as_unit(unit)
rs = s0.combine_first(s1)
xp = to_datetime(Series(["2010", "2011"])).dt.as_unit(unit)
tm.assert_series_equal(rs, xp)
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
s1 = Series([np.nan, "2011"])
rs = s0.combine_first(s1)
xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]")
tm.assert_series_equal(rs, xp)
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
ser1 = Series(
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
name="ser1",
)
ser2 = Series(
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
index=[2, 3, 4],
name="ser2",
)
result = ser1.combine_first(ser2)
exp_vals = pd.DatetimeIndex(
["20150101", "20150102", "20150103", "20160515", "20160516"],
tz=tz_naive_fixture,
)
exp = Series(exp_vals, name="ser1")
tm.assert_series_equal(exp, result)
def test_combine_first_timezone_series_with_empty_series(self):
# GH 41800
time_index = date_range(
datetime(2021, 1, 1, 1),
datetime(2021, 1, 1, 10),
freq="h",
tz="Europe/Rome",
)
s1 = Series(range(10), index=time_index)
s2 = Series(index=time_index)
msg = "The behavior of array concatenation with empty entries is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s1.combine_first(s2)
tm.assert_series_equal(result, s1)
def test_combine_first_preserves_dtype(self):
# GH51764
s1 = Series([1666880195890293744, 1666880195890293837])
s2 = Series([1, 2, 3])
result = s1.combine_first(s2)
expected = Series([1666880195890293744, 1666880195890293837, 3])
tm.assert_series_equal(result, expected)
def test_combine_mixed_timezone(self):
# GH 26283
uniform_tz = Series({pd.Timestamp("2019-05-01", tz="UTC"): 1.0})
multi_tz = Series(
{
pd.Timestamp("2019-05-01 01:00:00+0100", tz="Europe/London"): 2.0,
pd.Timestamp("2019-05-02", tz="UTC"): 3.0,
}
)
result = uniform_tz.combine_first(multi_tz)
expected = Series(
[1.0, 3.0],
index=pd.Index(
[
pd.Timestamp("2019-05-01 00:00:00+00:00", tz="UTC"),
pd.Timestamp("2019-05-02 00:00:00+00:00", tz="UTC"),
],
dtype="object",
),
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,141 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
def test_compare_axis(align_axis):
# GH#30429
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, align_axis=align_axis)
if align_axis in (1, "columns"):
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
else:
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep_shape, keep_equal",
[
(True, False),
(False, True),
(True, True),
# False, False case is already covered in test_compare_axis
],
)
def test_compare_various_formats(keep_shape, keep_equal):
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", "z"])
result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
if keep_shape:
indices = pd.Index([0, 1, 2])
columns = pd.Index(["self", "other"])
if keep_equal:
expected = pd.DataFrame(
[["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
)
else:
expected = pd.DataFrame(
[["a", "x"], [np.nan, np.nan], ["c", "z"]],
index=indices,
columns=columns,
)
else:
indices = pd.Index([0, 2])
columns = pd.Index(["self", "other"])
expected = pd.DataFrame(
[["a", "x"], ["c", "z"]], index=indices, columns=columns
)
tm.assert_frame_equal(result, expected)
def test_compare_with_equal_nulls():
# We want to make sure two NaNs are considered the same
# and dropped where applicable
s1 = pd.Series(["a", "b", np.nan])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2)
expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
tm.assert_frame_equal(result, expected)
def test_compare_with_non_equal_nulls():
# We want to make sure the relevant NaNs do not get dropped
s1 = pd.Series(["a", "b", "c"])
s2 = pd.Series(["x", "b", np.nan])
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
expected = pd.Series(["a", "x", "c", np.nan], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_multi_index():
index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
s1 = pd.Series(["a", "b", "c"], index=index)
s2 = pd.Series(["x", "b", "z"], index=index)
result = s1.compare(s2, align_axis=0)
indices = pd.MultiIndex.from_arrays(
[[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
)
expected = pd.Series(["a", "x", "c", "z"], index=indices)
tm.assert_series_equal(result, expected)
def test_compare_unaligned_objects():
# test Series with different indices
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
ser1.compare(ser2)
# test Series with different lengths
msg = "Can only compare identically-labeled Series objects"
with pytest.raises(ValueError, match=msg):
ser1 = pd.Series([1, 2, 3])
ser2 = pd.Series([1, 2, 3, 4])
ser1.compare(ser2)
def test_compare_datetime64_and_string():
# Issue https://github.com/pandas-dev/pandas/issues/45506
# Catch OverflowError when comparing datetime64 and string
data = [
{"a": "2015-07-01", "b": "08335394550"},
{"a": "2015-07-02", "b": "+49 (0) 0345 300033"},
{"a": "2015-07-03", "b": "+49(0)2598 04457"},
{"a": "2015-07-04", "b": "0741470003"},
{"a": "2015-07-05", "b": "04181 83668"},
]
dtypes = {"a": "datetime64[ns]", "b": "string"}
df = pd.DataFrame(data=data).astype(dtypes)
result_eq1 = df["a"].eq(df["b"])
result_eq2 = df["a"] == df["b"]
result_neq = df["a"] != df["b"]
expected_eq = pd.Series([False] * 5) # For .eq and ==
expected_neq = pd.Series([True] * 5) # For !=
tm.assert_series_equal(result_eq1, expected_eq)
tm.assert_series_equal(result_eq2, expected_eq)
tm.assert_series_equal(result_neq, expected_neq)

View File

@ -0,0 +1,306 @@
from itertools import product
import numpy as np
import pytest
from pandas._libs import lib
import pandas as pd
import pandas._testing as tm
# Each test case consists of a tuple with the data and dtype to create the
# test Series, the default dtype for the expected result (which is valid
# for most cases), and the specific cases where the result deviates from
# this default. Those overrides are defined as a dict with (keyword, val) as
# dictionary key. In case of multiple items, the last override takes precedence.
@pytest.fixture(
params=[
(
# data
[1, 2, 3],
# original dtype
np.dtype("int32"),
# default expected dtype
"Int32",
# exceptions on expected dtype
{("convert_integer", False): np.dtype("int32")},
),
(
[1, 2, 3],
np.dtype("int64"),
"Int64",
{("convert_integer", False): np.dtype("int64")},
),
(
["x", "y", "z"],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
(
[True, False, np.nan],
np.dtype("O"),
pd.BooleanDtype(),
{("convert_boolean", False): np.dtype("O")},
),
(
["h", "i", np.nan],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
( # GH32117
["h", "i", 1],
np.dtype("O"),
np.dtype("O"),
{},
),
(
[10, np.nan, 20],
np.dtype("float"),
"Int64",
{
("convert_integer", False, "convert_floating", True): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype(
"float"
),
},
),
(
[np.nan, 100.5, 200],
np.dtype("float"),
"Float64",
{("convert_floating", False): np.dtype("float")},
),
(
[3, 4, 5],
"Int8",
"Int8",
{},
),
(
[[1, 2], [3, 4], [5]],
None,
np.dtype("O"),
{},
),
(
[4, 5, 6],
np.dtype("uint32"),
"UInt32",
{("convert_integer", False): np.dtype("uint32")},
),
(
[-10, 12, 13],
np.dtype("i1"),
"Int8",
{("convert_integer", False): np.dtype("i1")},
),
(
[1.2, 1.3],
np.dtype("float32"),
"Float32",
{("convert_floating", False): np.dtype("float32")},
),
(
[1, 2.0],
object,
"Int64",
{
("convert_integer", False): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype(
"float"
),
("infer_objects", False): np.dtype("object"),
},
),
(
[1, 2.5],
object,
"Float64",
{
("convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("s"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ms"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("us"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
"datetime64[ns]",
np.dtype("datetime64[ns]"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
object,
np.dtype("datetime64[ns]"),
{("infer_objects", False): np.dtype("object")},
),
(
pd.period_range("1/1/2011", freq="M", periods=3),
None,
pd.PeriodDtype("M"),
{},
),
(
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
None,
pd.IntervalDtype("int64", "right"),
{},
),
]
)
def test_cases(request):
return request.param
class TestSeriesConvertDtypes:
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
def test_convert_dtypes(
self,
test_cases,
params,
using_infer_string,
):
data, maindtype, expected_default, expected_other = test_cases
if (
hasattr(data, "dtype")
and lib.is_np_dtype(data.dtype, "M")
and isinstance(maindtype, pd.DatetimeTZDtype)
):
# this astype is deprecated in favor of tz_localize
msg = "Cannot use .astype to convert from timezone-naive dtype"
with pytest.raises(TypeError, match=msg):
pd.Series(data, dtype=maindtype)
return
if maindtype is not None:
series = pd.Series(data, dtype=maindtype)
else:
series = pd.Series(data)
result = series.convert_dtypes(*params)
param_names = [
"infer_objects",
"convert_string",
"convert_integer",
"convert_boolean",
"convert_floating",
]
params_dict = dict(zip(param_names, params))
expected_dtype = expected_default
for spec, dtype in expected_other.items():
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
expected_dtype = dtype
if (
using_infer_string
and expected_default == "string"
and expected_dtype == object
and params[0]
and not params[1]
):
# If we would convert with convert strings then infer_objects converts
# with the option
expected_dtype = "string[pyarrow_numpy]"
expected = pd.Series(data, dtype=expected_dtype)
tm.assert_series_equal(result, expected)
# Test that it is a copy
copy = series.copy(deep=True)
if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan
# Make sure original not changed
tm.assert_series_equal(series, copy)
def test_convert_string_dtype(self, nullable_string_dtype):
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
# that are already string dtype
df = pd.DataFrame(
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
)
result = df.convert_dtypes()
tm.assert_frame_equal(df, result)
def test_convert_bool_dtype(self):
# GH32287
df = pd.DataFrame({"A": pd.array([True])})
tm.assert_frame_equal(df, df.convert_dtypes())
def test_convert_byte_string_dtype(self):
# GH-43183
byte_str = b"binary-string"
df = pd.DataFrame(data={"A": byte_str}, index=[0])
result = df.convert_dtypes()
expected = df
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"infer_objects, dtype", [(True, "Int64"), (False, "object")]
)
def test_convert_dtype_object_with_na(self, infer_objects, dtype):
# GH#48791
ser = pd.Series([1, pd.NA])
result = ser.convert_dtypes(infer_objects=infer_objects)
expected = pd.Series([1, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"infer_objects, dtype", [(True, "Float64"), (False, "object")]
)
def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
# GH#48791
ser = pd.Series([1.5, pd.NA])
result = ser.convert_dtypes(infer_objects=infer_objects)
expected = pd.Series([1.5, pd.NA], dtype=dtype)
tm.assert_series_equal(result, expected)
def test_convert_dtypes_pyarrow_to_np_nullable(self):
# GH 53648
pytest.importorskip("pyarrow")
ser = pd.Series(range(2), dtype="int32[pyarrow]")
result = ser.convert_dtypes(dtype_backend="numpy_nullable")
expected = pd.Series(range(2), dtype="Int32")
tm.assert_series_equal(result, expected)
def test_convert_dtypes_pyarrow_null(self):
# GH#55346
pa = pytest.importorskip("pyarrow")
ser = pd.Series([None, None])
result = ser.convert_dtypes(dtype_backend="pyarrow")
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,91 @@
import numpy as np
import pytest
from pandas import (
Series,
Timestamp,
)
import pandas._testing as tm
class TestCopy:
@pytest.mark.parametrize("deep", ["default", None, False, True])
def test_copy(self, deep, using_copy_on_write, warn_copy_on_write):
ser = Series(np.arange(10), dtype="float64")
# default deep is True
if deep == "default":
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
if using_copy_on_write:
# INFO(CoW) a shallow copy doesn't yet copy the data
# but parent will not be modified (CoW)
if deep is None or deep is False:
assert np.may_share_memory(ser.values, ser2.values)
else:
assert not np.may_share_memory(ser.values, ser2.values)
with tm.assert_cow_warning(warn_copy_on_write and deep is False):
ser2[::2] = np.nan
if deep is not False or using_copy_on_write:
# Did not modify original Series
assert np.isnan(ser2[0])
assert not np.isnan(ser[0])
else:
# we DID modify the original Series
assert np.isnan(ser2[0])
assert np.isnan(ser[0])
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
@pytest.mark.parametrize("deep", ["default", None, False, True])
def test_copy_tzaware(self, deep, using_copy_on_write):
# GH#11794
# copy of tz-aware
expected = Series([Timestamp("2012/01/01", tz="UTC")])
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
ser = Series([Timestamp("2012/01/01", tz="UTC")])
if deep == "default":
ser2 = ser.copy()
else:
ser2 = ser.copy(deep=deep)
if using_copy_on_write:
# INFO(CoW) a shallow copy doesn't yet copy the data
# but parent will not be modified (CoW)
if deep is None or deep is False:
assert np.may_share_memory(ser.values, ser2.values)
else:
assert not np.may_share_memory(ser.values, ser2.values)
ser2[0] = Timestamp("1999/01/01", tz="UTC")
# default deep is True
if deep is not False or using_copy_on_write:
# Did not modify original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected)
else:
# we DID modify the original Series
tm.assert_series_equal(ser2, expected2)
tm.assert_series_equal(ser, expected2)
def test_copy_name(self, datetime_series):
result = datetime_series.copy()
assert result.name == datetime_series.name
def test_copy_index_name_checking(self, datetime_series):
# don't want to be able to modify the index stored elsewhere after
# making a copy
datetime_series.index.name = None
assert datetime_series.index.name is None
assert datetime_series is datetime_series
cp = datetime_series.copy()
cp.index.name = "foo"
assert datetime_series.index.name is None

View File

@ -0,0 +1,34 @@
import numpy as np
import pandas as pd
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestSeriesCount:
def test_count(self, datetime_series):
assert datetime_series.count() == len(datetime_series)
datetime_series[::2] = np.nan
assert datetime_series.count() == np.isfinite(datetime_series).sum()
def test_count_inf_as_na(self):
# GH#29478
ser = Series([pd.Timestamp("1990/1/1")])
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("use_inf_as_na", True):
assert ser.count() == 1
def test_count_categorical(self):
ser = Series(
Categorical(
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
)
)
result = ser.count()
assert result == 2

View File

@ -0,0 +1,185 @@
import math
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
isna,
)
import pandas._testing as tm
class TestSeriesCov:
def test_cov(self, datetime_series):
# full overlap
tm.assert_almost_equal(
datetime_series.cov(datetime_series), datetime_series.std() ** 2
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].cov(datetime_series[5:]),
datetime_series[5:15].std() ** 2,
)
# No overlap
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.cov(cp))
# min_periods
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.cov(ts2, min_periods=12))
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
def test_cov_ddof(self, test_ddof, dtype):
# GH#34611
np_array1 = np.random.default_rng(2).random(10)
np_array2 = np.random.default_rng(2).random(10)
s1 = Series(np_array1, dtype=dtype)
s2 = Series(np_array2, dtype=dtype)
result = s1.cov(s2, ddof=test_ddof)
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
assert math.isclose(expected, result)
class TestSeriesCorr:
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
def test_corr(self, datetime_series, dtype):
stats = pytest.importorskip("scipy.stats")
datetime_series = datetime_series.astype(dtype)
# full overlap
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
# partial overlap
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
ts1 = datetime_series[:15].reindex(datetime_series.index)
ts2 = datetime_series[5:].reindex(datetime_series.index)
assert isna(ts1.corr(ts2, min_periods=12))
# No overlap
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
# all NA
cp = datetime_series[:10].copy()
cp[:] = np.nan
assert isna(cp.corr(cp))
A = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
B = A.copy()
result = A.corr(B)
expected, _ = stats.pearsonr(A, B)
tm.assert_almost_equal(result, expected)
def test_corr_rank(self):
stats = pytest.importorskip("scipy.stats")
# kendall and spearman
A = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
B = A.copy()
A[-5:] = A[:5].copy()
result = A.corr(B, method="kendall")
expected = stats.kendalltau(A, B)[0]
tm.assert_almost_equal(result, expected)
result = A.corr(B, method="spearman")
expected = stats.spearmanr(A, B)[0]
tm.assert_almost_equal(result, expected)
# results from R
A = Series(
[
-0.89926396,
0.94209606,
-1.03289164,
-0.95445587,
0.76910310,
-0.06430576,
-2.09704447,
0.40660407,
-0.89926396,
0.94209606,
]
)
B = Series(
[
-1.01270225,
-0.62210117,
-1.56895827,
0.59592943,
-0.01680292,
1.17258718,
-1.06009347,
-0.10222060,
-0.89076239,
0.89372375,
]
)
kexp = 0.4319297
sexp = 0.5853767
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
def test_corr_invalid_method(self):
# GH PR #22298
s1 = Series(np.random.default_rng(2).standard_normal(10))
s2 = Series(np.random.default_rng(2).standard_normal(10))
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
with pytest.raises(ValueError, match=msg):
s1.corr(s2, method="____")
def test_corr_callable_method(self, datetime_series):
# simple correlation example
# returns 1 if exact equality, 0 otherwise
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
# simple example
s1 = Series([1, 2, 3, 4, 5])
s2 = Series([5, 4, 3, 2, 1])
expected = 0
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
# full overlap
tm.assert_almost_equal(
datetime_series.corr(datetime_series, method=my_corr), 1.0
)
# partial overlap
tm.assert_almost_equal(
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
)
# No overlap
assert np.isnan(
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
)
# dataframe example
df = pd.DataFrame([s1, s2])
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)

View File

@ -0,0 +1,203 @@
import numpy as np
import pytest
from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import (
is_complex_dtype,
is_extension_array_dtype,
)
from pandas import (
NA,
Period,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestSeriesDescribe:
def test_describe_ints(self):
ser = Series([0, 1, 2, 3, 4], name="int_data")
result = ser.describe()
expected = Series(
[5, 2, ser.std(), 0, 1, 2, 3, 4],
name="int_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_bools(self):
ser = Series([True, True, False, False, False], name="bool_data")
result = ser.describe()
expected = Series(
[5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_strs(self):
ser = Series(["a", "a", "b", "c", "d"], name="str_data")
result = ser.describe()
expected = Series(
[5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"]
)
tm.assert_series_equal(result, expected)
def test_describe_timedelta64(self):
ser = Series(
[
Timedelta("1 days"),
Timedelta("2 days"),
Timedelta("3 days"),
Timedelta("4 days"),
Timedelta("5 days"),
],
name="timedelta_data",
)
result = ser.describe()
expected = Series(
[5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]],
name="timedelta_data",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_period(self):
ser = Series(
[Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")],
name="period_data",
)
result = ser.describe()
expected = Series(
[3, 2, ser[0], 2],
name="period_data",
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
def test_describe_empty_object(self):
# https://github.com/pandas-dev/pandas/issues/27183
s = Series([None, None], dtype=object)
result = s.describe()
expected = Series(
[0, 0, np.nan, np.nan],
dtype=object,
index=["count", "unique", "top", "freq"],
)
tm.assert_series_equal(result, expected)
result = s[:0].describe()
tm.assert_series_equal(result, expected)
# ensure NaN, not None
assert np.isnan(result.iloc[2])
assert np.isnan(result.iloc[3])
def test_describe_with_tz(self, tz_naive_fixture):
# GH 21332
tz = tz_naive_fixture
name = str(tz_naive_fixture)
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe()
expected = Series(
[
5,
Timestamp(2018, 1, 3).tz_localize(tz),
start.tz_localize(tz),
s[1],
s[2],
s[3],
end.tz_localize(tz),
],
name=name,
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_describe_with_tz_numeric(self):
name = tz = "CET"
start = Timestamp(2018, 1, 1)
end = Timestamp(2018, 1, 5)
s = Series(date_range(start, end, tz=tz), name=name)
result = s.describe()
expected = Series(
[
5,
Timestamp("2018-01-03 00:00:00", tz=tz),
Timestamp("2018-01-01 00:00:00", tz=tz),
Timestamp("2018-01-02 00:00:00", tz=tz),
Timestamp("2018-01-03 00:00:00", tz=tz),
Timestamp("2018-01-04 00:00:00", tz=tz),
Timestamp("2018-01-05 00:00:00", tz=tz),
],
name=name,
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
def test_datetime_is_numeric_includes_datetime(self):
s = Series(date_range("2012", periods=3))
result = s.describe()
expected = Series(
[
3,
Timestamp("2012-01-02"),
Timestamp("2012-01-01"),
Timestamp("2012-01-01T12:00:00"),
Timestamp("2012-01-02"),
Timestamp("2012-01-02T12:00:00"),
Timestamp("2012-01-03"),
],
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)
@pytest.mark.filterwarnings("ignore:Casting complex values to real discards")
def test_numeric_result_dtype(self, any_numeric_dtype):
# GH#48340 - describe should always return float on non-complex numeric input
if is_extension_array_dtype(any_numeric_dtype):
dtype = "Float64"
else:
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
ser = Series([0, 1], dtype=any_numeric_dtype)
if dtype == "complex128" and np_version_gte1p25:
with pytest.raises(
TypeError, match=r"^a must be an array of real numbers$"
):
ser.describe()
return
result = ser.describe()
expected = Series(
[
2.0,
0.5,
ser.std(),
0,
0.25,
0.5,
0.75,
1.0,
],
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
dtype=dtype,
)
tm.assert_series_equal(result, expected)
def test_describe_one_element_ea(self):
# GH#52515
ser = Series([0.0], dtype="Float64")
with tm.assert_produces_warning(None):
result = ser.describe()
expected = Series(
[1, 0, NA, 0, 0, 0, 0, 0],
dtype="Float64",
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,88 @@
import numpy as np
import pytest
from pandas import (
Series,
TimedeltaIndex,
date_range,
)
import pandas._testing as tm
class TestSeriesDiff:
def test_diff_np(self):
# TODO(__array_function__): could make np.diff return a Series
# matching ser.diff()
ser = Series(np.arange(5))
res = np.diff(ser)
expected = np.array([1, 1, 1, 1])
tm.assert_numpy_array_equal(res, expected)
def test_diff_int(self):
# int dtype
a = 10000000000000000
b = a + 1
ser = Series([a, b])
result = ser.diff()
assert result[1] == 1
def test_diff_tz(self):
# Combined datetime diff, normal diff and boolean diff test
ts = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10),
name="ts",
)
ts.diff()
# neg n
result = ts.diff(-1)
expected = ts - ts.shift(-1)
tm.assert_series_equal(result, expected)
# 0
result = ts.diff(0)
expected = ts - ts
tm.assert_series_equal(result, expected)
def test_diff_dt64(self):
# datetime diff (GH#3100)
ser = Series(date_range("20130102", periods=5))
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)
# timedelta diff
result = result - result.shift(1) # previous result
expected = expected.diff() # previously expected
tm.assert_series_equal(result, expected)
def test_diff_dt64tz(self):
# with tz
ser = Series(
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
)
result = ser.diff()
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"input,output,diff",
[([False, True, True, False, False], [np.nan, True, False, True, False], 1)],
)
def test_diff_bool(self, input, output, diff):
# boolean series (test for fixing #17294)
ser = Series(input)
result = ser.diff()
expected = Series(output)
tm.assert_series_equal(result, expected)
def test_diff_object_dtype(self):
# object series
ser = Series([False, True, 5.0, np.nan, True, False])
result = ser.diff()
expected = ser - ser.shift(1)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,99 @@
import pytest
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.api.types import is_bool_dtype
@pytest.mark.parametrize(
"data, index, drop_labels, axis, expected_data, expected_index",
[
# Unique Index
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
# GH 5248 Non-Unique Index
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
],
)
def test_drop_unique_and_non_unique_index(
data, index, axis, drop_labels, expected_data, expected_index
):
ser = Series(data=data, index=index)
result = ser.drop(drop_labels, axis=axis)
expected = Series(data=expected_data, index=expected_index)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, index, drop_labels, axis, error_type, error_desc",
[
# single string/tuple-like
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
# bad axis
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
],
)
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
ser = Series(data, index=index)
with pytest.raises(error_type, match=error_desc):
ser.drop(drop_labels, axis=axis)
def test_drop_with_ignore_errors():
# errors='ignore'
ser = Series(range(3), index=list("abc"))
result = ser.drop("bc", errors="ignore")
tm.assert_series_equal(result, ser)
result = ser.drop(["a", "d"], errors="ignore")
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
# GH 8522
ser = Series([2, 3], index=[True, False])
assert is_bool_dtype(ser.index)
assert ser.index.dtype == bool
result = ser.drop(True)
expected = Series([3], index=[False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
def test_drop_empty_list(index, drop_labels):
# GH 21494
expected_index = [i for i in index if i not in drop_labels]
series = Series(index=index, dtype=object).drop(drop_labels)
expected = Series(index=expected_index, dtype=object)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, index, drop_labels",
[
(None, [1, 2, 3], [1, 4]),
(None, [1, 2, 2], [1, 4]),
([2, 3], [0, 1], [False, True]),
],
)
def test_drop_non_empty_list(data, index, drop_labels):
# GH 21494 and GH 16877
dtype = object if data is None else None
ser = Series(data=data, index=index, dtype=dtype)
with pytest.raises(KeyError, match="not found in axis"):
ser.drop(drop_labels)
def test_drop_index_ea_dtype(any_numeric_ea_dtype):
# GH#45860
df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype))
idx = Index([df.index[1]])
result = df.drop(idx)
expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,267 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, False, False, True, True, False])),
("last", Series([False, True, True, False, False, False, False])),
(False, Series([False, True, True, False, True, True, False])),
],
)
def test_drop_duplicates(any_numpy_dtype, keep, expected):
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
if tc.dtype == "bool":
pytest.skip("tested separately in test_drop_duplicates_bool")
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, True])),
("last", Series([True, True, False, False])),
(False, Series([True, True, True, True])),
],
)
def test_drop_duplicates_bool(keep, expected):
tc = Series([True, False, True, False])
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=keep, inplace=True)
tm.assert_series_equal(sc, tc[~expected])
assert return_value is None
@pytest.mark.parametrize("values", [[], list(range(5))])
def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
tc = Series(values, dtype=np.dtype(any_numpy_dtype))
expected = Series([False] * len(tc), dtype="bool")
if tc.dtype == "bool":
# 0 -> False and 1-> True
# any other value would be duplicated
tc = tc[:2]
expected = expected[:2]
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
result_dropped = tc.drop_duplicates(keep=keep)
tm.assert_series_equal(result_dropped, tc)
# validate shallow copy
assert result_dropped is not tc
class TestSeriesDropDuplicates:
@pytest.fixture(
params=["int_", "uint", "float64", "str_", "timedelta64[h]", "datetime64[D]"]
)
def dtype(self, request):
return request.param
@pytest.fixture
def cat_series_unused_category(self, dtype, ordered):
# Test case 1
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
cat = Categorical(input1, categories=cat_array, ordered=ordered)
tc1 = Series(cat)
return tc1
def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category):
tc1 = cat_series_unused_category
expected = Series([False, False, False, True])
result = tc1.duplicated()
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates()
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keeplast(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, False])
result = tc1.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
def test_drop_duplicates_categorical_non_bool_keepfalse(
self, cat_series_unused_category
):
tc1 = cat_series_unused_category
expected = Series([False, False, True, True])
result = tc1.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc1.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc1[~expected])
sc = tc1.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc1[~expected])
@pytest.fixture
def cat_series(self, dtype, ordered):
# no unused categories, unlike cat_series_unused_category
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
cat = Categorical(input2, categories=cat_array, ordered=ordered)
tc2 = Series(cat)
return tc2
def test_drop_duplicates_categorical_non_bool2(self, cat_series):
tc2 = cat_series
expected = Series([False, False, False, False, True, True, False])
result = tc2.duplicated()
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates()
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, False, False, False])
result = tc2.duplicated(keep="last")
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep="last")
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series):
tc2 = cat_series
expected = Series([False, True, True, False, True, True, False])
result = tc2.duplicated(keep=False)
tm.assert_series_equal(result, expected)
result = tc2.drop_duplicates(keep=False)
tm.assert_series_equal(result, tc2[~expected])
sc = tc2.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc2[~expected])
def test_drop_duplicates_categorical_bool(self, ordered):
tc = Series(
Categorical(
[True, False, True, False], categories=[True, False], ordered=ordered
)
)
expected = Series([False, False, True, True])
tm.assert_series_equal(tc.duplicated(), expected)
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, False, False])
tm.assert_series_equal(tc.duplicated(keep="last"), expected)
tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep="last", inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
expected = Series([True, True, True, True])
tm.assert_series_equal(tc.duplicated(keep=False), expected)
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
sc = tc.copy()
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])
def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.drop_duplicates()
expected = Series(
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
index=[0, 1, 4],
)
tm.assert_series_equal(result, expected)
def test_drop_duplicates_ignore_index(self):
# GH#48304
ser = Series([1, 2, 2, 3])
result = ser.drop_duplicates(ignore_index=True)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)
def test_duplicated_arrow_dtype(self):
pytest.importorskip("pyarrow")
ser = Series([True, False, None, False], dtype="bool[pyarrow]")
result = ser.drop_duplicates()
expected = Series([True, False, None], dtype="bool[pyarrow]")
tm.assert_series_equal(result, expected)
def test_drop_duplicates_arrow_strings(self):
# GH#54904
pa = pytest.importorskip("pyarrow")
ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string()))
result = ser.drop_duplicates()
expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string()))
tm.assert_series_equal(result, expecetd)

View File

@ -0,0 +1,117 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
NaT,
Period,
Series,
Timestamp,
)
import pandas._testing as tm
class TestDropna:
def test_dropna_empty(self):
ser = Series([], dtype=object)
assert len(ser.dropna()) == 0
return_value = ser.dropna(inplace=True)
assert return_value is None
assert len(ser) == 0
# invalid axis
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
ser.dropna(axis=1)
def test_dropna_preserve_name(self, datetime_series):
datetime_series[:5] = np.nan
result = datetime_series.dropna()
assert result.name == datetime_series.name
name = datetime_series.name
ts = datetime_series.copy()
return_value = ts.dropna(inplace=True)
assert return_value is None
assert ts.name == name
def test_dropna_no_nan(self):
for ser in [
Series([1, 2, 3], name="x"),
Series([False, True, False], name="x"),
]:
result = ser.dropna()
tm.assert_series_equal(result, ser)
assert result is not ser
s2 = ser.copy()
return_value = s2.dropna(inplace=True)
assert return_value is None
tm.assert_series_equal(s2, ser)
def test_dropna_intervals(self):
ser = Series(
[np.nan, 1, 2, 3],
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
)
result = ser.dropna()
expected = ser.iloc[1:]
tm.assert_series_equal(result, expected)
def test_dropna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
result = ser.dropna()
expected = Series([Period("2011-01", freq="M")])
tm.assert_series_equal(result, expected)
def test_datetime64_tz_dropna(self, unit):
# DatetimeLikeBlock
ser = Series(
[
Timestamp("2011-01-01 10:00"),
NaT,
Timestamp("2011-01-03 10:00"),
NaT,
],
dtype=f"M8[{unit}]",
)
result = ser.dropna()
expected = Series(
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")],
index=[0, 2],
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)
# DatetimeTZBlock
idx = DatetimeIndex(
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo"
).as_unit(unit)
ser = Series(idx)
assert ser.dtype == f"datetime64[{unit}, Asia/Tokyo]"
result = ser.dropna()
expected = Series(
[
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
],
index=[0, 2],
dtype=f"datetime64[{unit}, Asia/Tokyo]",
)
assert result.dtype == f"datetime64[{unit}, Asia/Tokyo]"
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("val", [1, 1.5])
def test_dropna_ignore_index(self, val):
# GH#31725
ser = Series([1, 2, val], index=[3, 2, 1])
result = ser.dropna(ignore_index=True)
expected = Series([1, 2, val])
tm.assert_series_equal(result, expected)
ser.dropna(ignore_index=True, inplace=True)
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,7 @@
import numpy as np
class TestSeriesDtypes:
def test_dtype(self, datetime_series):
assert datetime_series.dtype == np.dtype("float64")
assert datetime_series.dtypes == np.dtype("float64")

View File

@ -0,0 +1,77 @@
import numpy as np
import pytest
from pandas import (
NA,
Categorical,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True], name="name")),
("last", Series([True, True, False, False, False], name="name")),
(False, Series([True, True, True, False, True], name="name")),
],
)
def test_duplicated_keep(keep, expected):
ser = Series(["a", "b", "b", "c", "a"], name="name")
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, expected",
[
("first", Series([False, False, True, False, True])),
("last", Series([True, True, False, False, False])),
(False, Series([True, True, True, False, True])),
],
)
def test_duplicated_nan_none(keep, expected):
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
result = ser.duplicated(keep=keep)
tm.assert_series_equal(result, expected)
def test_duplicated_categorical_bool_na(nulls_fixture):
# GH#44351
ser = Series(
Categorical(
[True, False, True, False, nulls_fixture],
categories=[True, False],
ordered=True,
)
)
result = ser.duplicated()
expected = Series([False, False, True, True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"keep, vals",
[
("last", [True, True, False]),
("first", [False, True, True]),
(False, [True, True, True]),
],
)
def test_duplicated_mask(keep, vals):
# GH#48150
ser = Series([1, 2, NA, NA, NA], dtype="Int64")
result = ser.duplicated(keep=keep)
expected = Series([False, False] + vals)
tm.assert_series_equal(result, expected)
def test_duplicated_mask_no_duplicated_na(keep):
# GH#48150
ser = Series([1, 2, NA], dtype="Int64")
result = ser.duplicated(keep=keep)
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,145 @@
from contextlib import nullcontext
import copy
import numpy as np
import pytest
from pandas._libs.missing import is_matching_na
from pandas.compat.numpy import np_version_gte1p25
from pandas.core.dtypes.common import is_float
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"arr, idx",
[
([1, 2, 3, 4], [0, 2, 1, 3]),
([1, np.nan, 3, np.nan], [0, 2, 1, 3]),
(
[1, np.nan, 3, np.nan],
MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]),
),
],
)
def test_equals(arr, idx):
s1 = Series(arr, index=idx)
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = 9
assert not s1.equals(s2)
@pytest.mark.parametrize(
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
)
def test_equals_list_array(val):
# GH20676 Verify equals operator for list of Numpy arrays
arr = np.array([1, 2])
s1 = Series([arr, arr])
s2 = s1.copy()
assert s1.equals(s2)
s1[1] = val
cm = (
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
if isinstance(val, str) and not np_version_gte1p25
else nullcontext()
)
with cm:
assert not s1.equals(s2)
def test_equals_false_negative():
# GH8437 Verify false negative behavior of equals function for dtype object
arr = [False, np.nan]
s1 = Series(arr)
s2 = s1.copy()
s3 = Series(index=range(2), dtype=object)
s4 = s3.copy()
s5 = s3.copy()
s6 = s3.copy()
s3[:-1] = s4[:-1] = s5[0] = s6[0] = False
assert s1.equals(s1)
assert s1.equals(s2)
assert s1.equals(s3)
assert s1.equals(s4)
assert s1.equals(s5)
assert s5.equals(s6)
def test_equals_matching_nas():
# matching but not identical NAs
left = Series([np.datetime64("NaT")], dtype=object)
right = Series([np.datetime64("NaT")], dtype=object)
assert left.equals(right)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.timedelta64("NaT")], dtype=object)
right = Series([np.timedelta64("NaT")], dtype=object)
assert left.equals(right)
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
assert Index(left).equals(Index(right))
assert left.array.equals(right.array)
left = Series([np.float64("NaN")], dtype=object)
right = Series([np.float64("NaN")], dtype=object)
assert left.equals(right)
assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype))
assert left.array.equals(right.array)
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
# GH#39650
left = nulls_fixture
right = nulls_fixture2
if hasattr(right, "copy"):
right = right.copy()
else:
right = copy.copy(right)
ser = Series([left], dtype=object)
ser2 = Series([right], dtype=object)
if is_matching_na(left, right):
assert ser.equals(ser2)
elif (left is None and is_float(right)) or (right is None and is_float(left)):
assert ser.equals(ser2)
else:
assert not ser.equals(ser2)
def test_equals_none_vs_nan():
# GH#39650
ser = Series([1, None], dtype=object)
ser2 = Series([1, np.nan], dtype=object)
assert ser.equals(ser2)
assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype))
assert ser.array.equals(ser2.array)
def test_equals_None_vs_float():
# GH#44190
left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object)
right = Series([None] * len(left))
# these series were found to be equal due to a bug, check that they are correctly
# found to not equal
assert not left.equals(right)
assert not right.equals(left)
assert not left.to_frame().equals(right.to_frame())
assert not right.to_frame().equals(left.to_frame())
assert not Index(left, dtype="object").equals(Index(right, dtype="object"))
assert not Index(right, dtype="object").equals(Index(left, dtype="object"))

View File

@ -0,0 +1,175 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_basic():
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_mixed_type():
s = pd.Series(
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
)
result = s.explode()
expected = pd.Series(
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
index=[0, 0, 0, 1, 2, 3, 4, 4],
dtype=object,
name="foo",
)
tm.assert_series_equal(result, expected)
def test_empty():
s = pd.Series(dtype=object)
result = s.explode()
expected = s.copy()
tm.assert_series_equal(result, expected)
def test_nested_lists():
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
result = s.explode()
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
tm.assert_series_equal(result, expected)
def test_multi_index():
s = pd.Series(
[[0, 1, 2], np.nan, [], (3, 4)],
name="foo",
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
)
result = s.explode()
index = pd.MultiIndex.from_tuples(
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
names=["foo", "bar"],
)
expected = pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
)
tm.assert_series_equal(result, expected)
def test_large():
s = pd.Series([range(256)]).explode()
result = s.explode()
tm.assert_series_equal(result, s)
def test_invert_array():
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
listify = df.apply(lambda x: x.array, axis=1)
result = listify.explode()
tm.assert_series_equal(result, df["a"].rename())
@pytest.mark.parametrize(
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
)
def test_non_object_dtype(s):
result = s.explode()
tm.assert_series_equal(result, s)
def test_typical_usecase():
df = pd.DataFrame(
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
columns=["var1", "var2"],
)
exploded = df.var1.str.split(",").explode()
result = df[["var2"]].join(exploded)
expected = pd.DataFrame(
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
columns=["var2", "var1"],
index=[0, 0, 0, 1, 1, 1],
)
tm.assert_frame_equal(result, expected)
def test_nested_EA():
# a nested EA array
s = pd.Series(
[
pd.date_range("20170101", periods=3, tz="UTC"),
pd.date_range("20170104", periods=3, tz="UTC"),
]
)
result = s.explode()
expected = pd.Series(
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
)
tm.assert_series_equal(result, expected)
def test_duplicate_index():
# GH 28005
s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
result = s.explode()
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
tm.assert_series_equal(result, expected)
def test_ignore_index():
# GH 34932
s = pd.Series([[1, 2], [3, 4]])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
def test_explode_sets():
# https://github.com/pandas-dev/pandas/issues/35614
s = pd.Series([{"a", "b", "c"}], index=[1])
result = s.explode().sort_values()
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
tm.assert_series_equal(result, expected)
def test_explode_scalars_can_ignore_index():
# https://github.com/pandas-dev/pandas/issues/40487
s = pd.Series([1, 2, 3], index=["a", "b", "c"])
result = s.explode(ignore_index=True)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ignore_index", [True, False])
def test_explode_pyarrow_list_type(ignore_index):
# GH 53602
pa = pytest.importorskip("pyarrow")
data = [
[None, None],
[1],
[],
[2, 3],
None,
]
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
result = ser.explode(ignore_index=ignore_index)
expected = pd.Series(
data=[None, None, 1, None, 2, 3, None],
index=None if ignore_index else [0, 0, 1, 2, 3, 3, 4],
dtype=pd.ArrowDtype(pa.int64()),
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("ignore_index", [True, False])
def test_explode_pyarrow_non_list_type(ignore_index):
pa = pytest.importorskip("pyarrow")
data = [1, 2, 3]
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.int64()))
result = ser.explode(ignore_index=ignore_index)
expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2])
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,38 @@
from pandas import (
Index,
Series,
date_range,
)
import pandas._testing as tm
class TestGetNumericData:
def test_get_numeric_data_preserve_dtype(
self, using_copy_on_write, warn_copy_on_write
):
# get the numeric data
obj = Series([1, 2, 3])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
# returned object is a shallow copy
with tm.assert_cow_warning(warn_copy_on_write):
result.iloc[0] = 0
if using_copy_on_write:
assert obj.iloc[0] == 1
else:
assert obj.iloc[0] == 0
obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
tm.assert_series_equal(result, expected)
obj = Series([True, False, True])
result = obj._get_numeric_data()
tm.assert_series_equal(result, obj)
obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,8 @@
import pandas._testing as tm
def test_head_tail(string_series):
tm.assert_series_equal(string_series.head(), string_series[:5])
tm.assert_series_equal(string_series.head(0), string_series[0:0])
tm.assert_series_equal(string_series.tail(), string_series[-5:])
tm.assert_series_equal(string_series.tail(0), string_series[0:0])

View File

@ -0,0 +1,56 @@
import numpy as np
from pandas import (
Series,
interval_range,
)
import pandas._testing as tm
class TestInferObjects:
def test_copy(self, index_or_series):
# GH#50096
# case where we don't need to do inference because it is already non-object
obj = index_or_series(np.array([1, 2, 3], dtype="int64"))
result = obj.infer_objects(copy=False)
assert tm.shares_memory(result, obj)
# case where we try to do inference but can't do better than object
obj2 = index_or_series(np.array(["foo", 2], dtype=object))
result2 = obj2.infer_objects(copy=False)
assert tm.shares_memory(result2, obj2)
def test_infer_objects_series(self, index_or_series):
# GH#11221
actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = index_or_series([1, 2, 3])
tm.assert_equal(actual, expected)
actual = index_or_series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = index_or_series([1.0, 2.0, 3.0, np.nan])
tm.assert_equal(actual, expected)
# only soft conversions, unconvertible pass thru unchanged
obj = index_or_series(np.array([1, 2, 3, None, "a"], dtype="O"))
actual = obj.infer_objects()
expected = index_or_series([1, 2, 3, None, "a"], dtype=object)
assert actual.dtype == "object"
tm.assert_equal(actual, expected)
def test_infer_objects_interval(self, index_or_series):
# GH#50090
ii = interval_range(1, 10)
obj = index_or_series(ii)
result = obj.astype(object).infer_objects()
tm.assert_equal(result, obj)
def test_infer_objects_bytes(self):
# GH#49650
ser = Series([b"a"], dtype="bytes")
expected = ser.copy()
result = ser.infer_objects()
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,181 @@
from io import StringIO
from string import ascii_uppercase
import textwrap
import numpy as np
import pytest
from pandas.compat import PYPY
from pandas import (
CategoricalIndex,
MultiIndex,
Series,
date_range,
)
def test_info_categorical_column_just_works():
n = 2500
data = np.array(list("abcdefghij")).take(
np.random.default_rng(2).integers(0, 10, size=n, dtype=int)
)
s = Series(data).astype("category")
s.isna()
buf = StringIO()
s.info(buf=buf)
s2 = s[s == "d"]
buf = StringIO()
s2.info(buf=buf)
def test_info_categorical():
# GH14298
idx = CategoricalIndex(["a", "b"])
s = Series(np.zeros(2), index=idx)
buf = StringIO()
s.info(buf=buf)
@pytest.mark.parametrize("verbose", [True, False])
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
buf = StringIO()
ser.info(verbose=verbose, buf=buf)
result = buf.getvalue()
expected = textwrap.dedent(
"""\
<class 'pandas.core.series.Series'>
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
"""
)
if verbose:
expected += textwrap.dedent(
"""\
Series name: sth
Non-Null Count Dtype
-------------- -----
10 non-null int64
"""
)
expected += textwrap.dedent(
f"""\
dtypes: int64(1)
memory usage: {ser.memory_usage()}.0+ bytes
"""
)
assert result == expected
def test_info_memory():
s = Series([1, 2], dtype="i8")
buf = StringIO()
s.info(buf=buf)
result = buf.getvalue()
memory_bytes = float(s.memory_usage())
expected = textwrap.dedent(
f"""\
<class 'pandas.core.series.Series'>
RangeIndex: 2 entries, 0 to 1
Series name: None
Non-Null Count Dtype
-------------- -----
2 non-null int64
dtypes: int64(1)
memory usage: {memory_bytes} bytes
"""
)
assert result == expected
def test_info_wide():
s = Series(np.random.default_rng(2).standard_normal(101))
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
with pytest.raises(ValueError, match=msg):
s.info(max_cols=1)
def test_info_shows_dtypes():
dtypes = [
"int64",
"float64",
"datetime64[ns]",
"timedelta64[ns]",
"complex128",
"object",
"bool",
]
n = 10
for dtype in dtypes:
s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype))
buf = StringIO()
s.info(buf=buf)
res = buf.getvalue()
name = f"{n:d} non-null {dtype}"
assert name in res
@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) > s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
s_with_object_index = Series({"a": [1]}, index=["foo"])
assert s_with_object_index.memory_usage(
index=True, deep=True
) == s_with_object_index.memory_usage(index=True)
s_object = Series({"a": ["a"]})
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
@pytest.mark.parametrize(
"series, plus",
[
(Series(1, index=[1, 2, 3]), False),
(Series(1, index=list("ABC")), True),
(Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
(
Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
True,
),
],
)
def test_info_memory_usage_qualified(series, plus):
buf = StringIO()
series.info(buf=buf)
if plus:
assert "+" in buf.getvalue()
else:
assert "+" not in buf.getvalue()
def test_info_memory_usage_bug_on_multiindex():
# GH 14308
# memory usage introspection should not materialize .values
N = 100
M = len(ascii_uppercase)
index = MultiIndex.from_product(
[list(ascii_uppercase), date_range("20160101", periods=N)],
names=["id", "date"],
)
s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
unstacked = s.unstack("id")
assert s.values.nbytes == unstacked.values.nbytes
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
# high upper bound
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
assert diff < 2000

View File

@ -0,0 +1,868 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
Index,
MultiIndex,
Series,
date_range,
isna,
)
import pandas._testing as tm
@pytest.fixture(
params=[
"linear",
"index",
"values",
"nearest",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def nontemporal_method(request):
"""Fixture that returns an (method name, required kwargs) pair.
This fixture does not include method 'time' as a parameterization; that
method requires a Series with a DatetimeIndex, and is generally tested
separately from these non-temporal methods.
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
@pytest.fixture(
params=[
"linear",
"slinear",
"zero",
"quadratic",
"cubic",
"barycentric",
"krogh",
"polynomial",
"spline",
"piecewise_polynomial",
"from_derivatives",
"pchip",
"akima",
"cubicspline",
]
)
def interp_methods_ind(request):
"""Fixture that returns a (method name, required kwargs) pair to
be tested for various Index types.
This fixture does not include methods - 'time', 'index', 'nearest',
'values' as a parameterization
"""
method = request.param
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
return method, kwargs
class TestSeriesInterpolateData:
@pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
def test_interpolate_period_values(self):
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = pd.NaT
# period cast
ser_per = ser.dt.to_period("D")
res_per = ser_per.interpolate()
expected_per = orig.dt.to_period("D")
tm.assert_series_equal(res_per, expected_per)
def test_interpolate(self, datetime_series):
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
ts_copy = ts.copy()
ts_copy[5:10] = np.nan
linear_interp = ts_copy.interpolate(method="linear")
tm.assert_series_equal(linear_interp, ts)
ord_ts = Series(
[d.toordinal() for d in datetime_series.index], index=datetime_series.index
).astype(float)
ord_ts_copy = ord_ts.copy()
ord_ts_copy[5:10] = np.nan
time_interp = ord_ts_copy.interpolate(method="time")
tm.assert_series_equal(time_interp, ord_ts)
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.nan])
msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method="time")
def test_interpolate_cubicspline(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3]
tm.assert_series_equal(result, expected)
def test_interpolate_pchip(self):
pytest.importorskip("scipy")
ser = Series(np.sort(np.random.default_rng(2).uniform(size=100)))
# interpolate at new_index
new_index = ser.index.union(
Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
).astype(float)
interp_s = ser.reindex(new_index).interpolate(method="pchip")
# does not blow up, GH5977
interp_s.loc[49:51]
def test_interpolate_akima(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
# interpolate at new_index where `der` is zero
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima")
tm.assert_series_equal(interp_s.loc[1:3], expected)
# interpolate at new_index where `der` is a non-zero int
expected = Series(
[11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
tm.assert_series_equal(interp_s.loc[1:3], expected)
def test_interpolate_piecewise_polynomial(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
tm.assert_series_equal(interp_s.loc[1:3], expected)
def test_interpolate_from_derivatives(self):
pytest.importorskip("scipy")
ser = Series([10, 11, 12, 13])
expected = Series(
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
)
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
tm.assert_series_equal(interp_s.loc[1:3], expected)
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
),
],
)
def test_interpolate_corners(self, kwargs):
s = Series([np.nan, np.nan])
tm.assert_series_equal(s.interpolate(**kwargs), s)
s = Series([], dtype=object).interpolate()
tm.assert_series_equal(s.interpolate(**kwargs), s)
def test_interpolate_index_values(self):
s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30)))
s.loc[::3] = np.random.default_rng(2).standard_normal(10)
vals = s.index.values.astype(float)
result = s.interpolate(method="index")
expected = s.copy()
bad = isna(expected.values)
good = ~bad
expected = Series(
np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]
)
tm.assert_series_equal(result[bad], expected)
# 'values' is synonymous with 'index' for the method kwarg
other_result = s.interpolate(method="values")
tm.assert_series_equal(other_result, result)
tm.assert_series_equal(other_result[bad], expected)
def test_interpolate_non_ts(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
"time-weighted interpolation only works on Series or DataFrames "
"with a DatetimeIndex"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="time")
@pytest.mark.parametrize(
"kwargs",
[
{},
pytest.param(
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
),
],
)
def test_nan_interpolate(self, kwargs):
s = Series([0, 1, np.nan, 3])
result = s.interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0])
tm.assert_series_equal(result, expected)
def test_nan_irregular_index(self):
s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
result = s.interpolate()
expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
tm.assert_series_equal(result, expected)
def test_nan_str_index(self):
s = Series([0, 1, 2, np.nan], index=list("abcd"))
result = s.interpolate()
expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
tm.assert_series_equal(result, expected)
def test_interp_quad(self):
pytest.importorskip("scipy")
sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
result = sq.interpolate(method="quadratic")
expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
tm.assert_series_equal(result, expected)
def test_interp_scipy_basic(self):
pytest.importorskip("scipy")
s = Series([1, 3, np.nan, 12, np.nan, 25])
# slinear
expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
result = s.interpolate(method="slinear")
tm.assert_series_equal(result, expected)
msg = "The 'downcast' keyword in Series.interpolate is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="slinear", downcast="infer")
tm.assert_series_equal(result, expected)
# nearest
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="nearest")
tm.assert_series_equal(result, expected.astype("float"))
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="nearest", downcast="infer")
tm.assert_series_equal(result, expected)
# zero
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method="zero")
tm.assert_series_equal(result, expected.astype("float"))
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="zero", downcast="infer")
tm.assert_series_equal(result, expected)
# quadratic
# GH #15662.
expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
result = s.interpolate(method="quadratic")
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(method="quadratic", downcast="infer")
tm.assert_series_equal(result, expected)
# cubic
expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
result = s.interpolate(method="cubic")
tm.assert_series_equal(result, expected)
def test_interp_limit(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit):
# GH 9217: make sure limit is greater than zero.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
with pytest.raises(ValueError, match="Limit must be greater than 0"):
s.interpolate(limit=limit, method=method, **kwargs)
def test_interpolate_invalid_float_limit(self, nontemporal_method):
# GH 9217: make sure limit is an integer.
s = Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
s.interpolate(limit=limit, method=method, **kwargs)
@pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])
msg = f"method must be one of.* Got '{invalid_method}' instead"
if invalid_method is None:
msg = "'method' should be a string, not None"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)
# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)
def test_interp_invalid_method_and_value(self):
# GH#36624
ser = Series([1, 3, np.nan, 12, np.nan, 25])
msg = "'fill_value' is not a valid keyword for Series.interpolate"
msg2 = "Series.interpolate with method=pad"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
ser.interpolate(fill_value=3, method="pad")
def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
# Provide 'forward' (the default) explicitly here.
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="forward")
tm.assert_series_equal(result, expected)
result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD")
tm.assert_series_equal(result, expected)
def test_interp_unlimited(self):
# these test are for issue #16282 default Limit=None is unlimited
s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
result = s.interpolate(method="linear", limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
result = s.interpolate(method="linear", limit_direction="backward")
tm.assert_series_equal(result, expected)
def test_interp_limit_bad_direction(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
msg = (
r"Invalid limit_direction: expecting one of \['forward', "
r"'backward', 'both'\], got 'abc'"
)
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit=2, limit_direction="abc")
# raises an error even if no limit is specified.
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_direction="abc")
# limit_area introduced GH #16284
def test_interp_limit_area(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit_area="inside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]
)
result = s.interpolate(method="linear", limit_area="inside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="inside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
result = s.interpolate(method="linear", limit_area="outside")
tm.assert_series_equal(result, expected)
expected = Series(
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]
)
result = s.interpolate(method="linear", limit_area="outside", limit=1)
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="both", limit=1
)
tm.assert_series_equal(result, expected)
expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
result = s.interpolate(
method="linear", limit_area="outside", limit_direction="backward"
)
tm.assert_series_equal(result, expected)
# raises an error even if limit type is wrong.
msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="linear", limit_area="abc")
@pytest.mark.parametrize(
"method, limit_direction, expected",
[
("pad", "backward", "forward"),
("ffill", "backward", "forward"),
("backfill", "forward", "backward"),
("bfill", "forward", "backward"),
("pad", "both", "forward"),
("ffill", "both", "forward"),
("backfill", "both", "backward"),
("bfill", "both", "backward"),
],
)
def test_interp_limit_direction_raises(self, method, limit_direction, expected):
# https://github.com/pandas-dev/pandas/pull/34746
s = Series([1, 2, 3])
msg = f"`limit_direction` must be '{expected}' for method `{method}`"
msg2 = "Series.interpolate with method="
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
s.interpolate(method=method, limit_direction=limit_direction)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "pad", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
{"method": "pad", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
{"method": "pad", "limit_area": "outside", "limit": 1},
),
(
range(5),
range(5),
{"method": "pad", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=pad"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_data, kwargs",
(
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "inside", "limit": 1},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside"},
),
(
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
{"method": "bfill", "limit_area": "outside", "limit": 1},
),
),
)
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
# GH26796
s = Series(data)
expected = Series(expected_data)
msg = "Series.interpolate with method=bfill"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.interpolate(**kwargs)
tm.assert_series_equal(result, expected)
def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
# Check that this works on a longer series of nans.
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])
expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
expected = Series(
[1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]
)
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_to_ends(self):
# These test are for issue #10420 -- flow back to beginning.
s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
result = s.interpolate(method="linear", limit=2, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_limit_before_ends(self):
# These test are for issue #11115 -- limit ends properly.
s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="forward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="backward")
tm.assert_series_equal(result, expected)
expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
result = s.interpolate(method="linear", limit=1, limit_direction="both")
tm.assert_series_equal(result, expected)
def test_interp_all_good(self):
pytest.importorskip("scipy")
s = Series([1, 2, 3])
result = s.interpolate(method="polynomial", order=1)
tm.assert_series_equal(result, s)
# non-scipy
result = s.interpolate()
tm.assert_series_equal(result, s)
@pytest.mark.parametrize(
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))]
)
def test_interp_multiIndex(self, check_scipy):
idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
s = Series([1, 2, np.nan], index=idx)
expected = s.copy()
expected.loc[2] = 2
result = s.interpolate()
tm.assert_series_equal(result, expected)
msg = "Only `method=linear` interpolation is supported on MultiIndexes"
if check_scipy:
with pytest.raises(ValueError, match=msg):
s.interpolate(method="polynomial", order=1)
def test_interp_nonmono_raise(self):
pytest.importorskip("scipy")
s = Series([1, np.nan, 3], index=[0, 2, 1])
msg = "krogh interpolation requires that the index be monotonic"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="krogh")
@pytest.mark.parametrize("method", ["nearest", "pad"])
def test_interp_datetime64(self, method, tz_naive_fixture):
pytest.importorskip("scipy")
df = Series(
[1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
)
warn = None if method == "nearest" else FutureWarning
msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = df.interpolate(method=method)
if warn is not None:
# check the "use ffill instead" is equivalent
alt = df.ffill()
tm.assert_series_equal(result, alt)
expected = Series(
[1.0, 1.0, 3.0],
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
)
tm.assert_series_equal(result, expected)
def test_interp_pad_datetime64tz_values(self):
# GH#27628 missing.interpolate_2d should handle datetimetz values
dti = date_range("2015-04-05", periods=3, tz="US/Central")
ser = Series(dti)
ser[1] = pd.NaT
msg = "Series.interpolate with method=pad is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.interpolate(method="pad")
# check the "use ffill instead" is equivalent
alt = ser.ffill()
tm.assert_series_equal(result, alt)
expected = Series(dti)
expected[1] = expected[0]
tm.assert_series_equal(result, expected)
def test_interp_limit_no_nans(self):
# GH 7173
s = Series([1.0, 2.0, 3.0])
result = s.interpolate(limit=1)
expected = s
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("method", ["polynomial", "spline"])
def test_no_order(self, method):
# see GH-10633, GH-24014
pytest.importorskip("scipy")
s = Series([0, 1, np.nan, 3])
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)
@pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
pytest.importorskip("scipy")
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method="spline", order=order)
def test_spline(self):
pytest.importorskip("scipy")
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
result = s.interpolate(method="spline", order=1)
expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result, expected)
def test_spline_extrapolate(self):
pytest.importorskip("scipy")
s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
result3 = s.interpolate(method="spline", order=1, ext=3)
expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
tm.assert_series_equal(result3, expected3)
result1 = s.interpolate(method="spline", order=1, ext=0)
expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
tm.assert_series_equal(result1, expected1)
def test_spline_smooth(self):
pytest.importorskip("scipy")
s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
assert (
s.interpolate(method="spline", order=3, s=0)[5]
!= s.interpolate(method="spline", order=3)[5]
)
def test_spline_interpolation(self):
# Explicit cast to float to avoid implicit cast when setting np.nan
pytest.importorskip("scipy")
s = Series(np.arange(10) ** 2, dtype="float")
s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan
result1 = s.interpolate(method="spline", order=1)
expected1 = s.interpolate(method="spline", order=1)
tm.assert_series_equal(result1, expected1)
def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
result = df.interpolate(method="time")
expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
tm.assert_series_equal(result, expected)
# test for non uniform spacing
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
result = df.interpolate(method="time")
expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4]))
tm.assert_series_equal(result, expected)
def test_series_interpolate_method_values(self):
# GH#1646
rng = date_range("1/1/2000", "1/20/2000", freq="D")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
ts[::2] = np.nan
result = ts.interpolate(method="values")
exp = ts.interpolate()
tm.assert_series_equal(result, exp)
def test_series_interpolate_intraday(self):
# #1698
index = date_range("1/1/2012", periods=4, freq="12D")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
exp = ts.reindex(new_index).interpolate(method="time")
index = date_range("1/1/2012", periods=4, freq="12h")
ts = Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
result = ts.reindex(new_index).interpolate(method="time")
tm.assert_numpy_array_equal(result.values, exp.values)
@pytest.mark.parametrize(
"ind",
[
["a", "b", "c", "d"],
pd.period_range(start="2019-01-01", periods=4),
pd.interval_range(start=0, end=4),
],
)
def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
# gh 21662
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
if method == "pchip":
pytest.importorskip("scipy")
if method == "linear":
result = df[0].interpolate(**kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
else:
expected_error = (
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)
with pytest.raises(ValueError, match=expected_error):
df[0].interpolate(method=method, **kwargs)
def test_interpolate_timedelta_index(self, request, interp_methods_ind):
"""
Tests for non numerical index types - object, period, timedelta
Note that all methods except time, index, nearest and values
are tested here.
"""
# gh 21662
pytest.importorskip("scipy")
ind = pd.timedelta_range(start=1, periods=4)
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
method, kwargs = interp_methods_ind
if method in {"cubic", "zero"}:
request.applymarker(
pytest.mark.xfail(
reason=f"{method} interpolation is not supported for TimedeltaIndex"
)
)
result = df[0].interpolate(method=method, **kwargs)
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending, expected_values",
[(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
)
def test_interpolate_unsorted_index(self, ascending, expected_values):
# GH 21037
ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
result = ts.sort_index(ascending=ascending).interpolate(method="index")
expected = Series(data=expected_values, index=expected_values, dtype=float)
tm.assert_series_equal(result, expected)
def test_interpolate_asfreq_raises(self):
ser = Series(["a", None, "b"], dtype=object)
msg2 = "Series.interpolate with object dtype"
msg = "Invalid fill method"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
ser.interpolate(method="asfreq")
def test_interpolate_fill_value(self):
# GH#54920
pytest.importorskip("scipy")
ser = Series([np.nan, 0, 1, np.nan, 3, np.nan])
result = ser.interpolate(method="nearest", fill_value=0)
expected = Series([np.nan, 0, 1, 1, 3, 0])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,26 @@
import numpy as np
from pandas import (
Series,
date_range,
)
class TestIsMonotonic:
def test_is_monotonic_numeric(self):
ser = Series(np.random.default_rng(2).integers(0, 10, size=1000))
assert not ser.is_monotonic_increasing
ser = Series(np.arange(1000))
assert ser.is_monotonic_increasing is True
assert ser.is_monotonic_increasing is True
ser = Series(np.arange(1000, 0, -1))
assert ser.is_monotonic_decreasing is True
def test_is_monotonic_dt64(self):
ser = Series(date_range("20130101", periods=10))
assert ser.is_monotonic_increasing is True
assert ser.is_monotonic_increasing is True
ser = Series(list(reversed(ser)))
assert ser.is_monotonic_increasing is False
assert ser.is_monotonic_decreasing is True

View File

@ -0,0 +1,40 @@
import numpy as np
import pytest
from pandas import Series
@pytest.mark.parametrize(
"data, expected",
[
(np.random.default_rng(2).integers(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(["foo", "bar", np.nan], True),
(["foo", "foo", np.nan], False),
(["foo", "bar", np.nan, np.nan], False),
],
)
def test_is_unique(data, expected):
# GH#11946 / GH#25180
ser = Series(data)
assert ser.is_unique is expected
def test_is_unique_class_ne(capsys):
# GH#20661
class Foo:
def __init__(self, val) -> None:
self._value = val
def __ne__(self, other):
raise Exception("NEQ not supported")
with capsys.disabled():
li = [Foo(i) for i in range(5)]
ser = Series(li, index=list(range(5)))
ser.is_unique
captured = capsys.readouterr()
assert len(captured.err) == 0

View File

@ -0,0 +1,252 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
from pandas.core import algorithms
from pandas.core.arrays import PeriodArray
class TestSeriesIsIn:
def test_isin(self):
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
result = s.isin(["A", "C"])
expected = Series([True, False, True, False, False, False, True, True])
tm.assert_series_equal(result, expected)
# GH#16012
# This specific issue has to have a series over 1e6 in len, but the
# comparison array (in_list) must be large enough so that numpy doesn't
# do a manual masking trick that will avoid this issue altogether
s = Series(list("abcdefghijk" * 10**5))
# If numpy doesn't do the manual comparison/mask, these
# unorderable mixed types are what cause the exception in numpy
in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6
assert s.isin(in_list).sum() == 200000
def test_isin_with_string_scalar(self):
# GH#4763
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
msg = (
r"only list-like objects are allowed to be passed to isin\(\), "
r"you passed a `str`"
)
with pytest.raises(TypeError, match=msg):
s.isin("a")
s = Series(["aaa", "b", "c"])
with pytest.raises(TypeError, match=msg):
s.isin("aaa")
def test_isin_datetimelike_mismatched_reso(self):
expected = Series([True, True, False, False, False])
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
# fails on dtype conversion in the first place
day_values = np.asarray(ser[0:2].values).astype("datetime64[D]")
result = ser.isin(day_values)
tm.assert_series_equal(result, expected)
dta = ser[:2]._values.astype("M8[s]")
result = ser.isin(dta)
tm.assert_series_equal(result, expected)
def test_isin_datetimelike_mismatched_reso_list(self):
expected = Series([True, True, False, False, False])
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
dta = ser[:2]._values.astype("M8[s]")
result = ser.isin(list(dta))
tm.assert_series_equal(result, expected)
def test_isin_with_i8(self):
# GH#5021
expected = Series([True, True, False, False, False])
expected2 = Series([False, True, False, False, False])
# datetime64[ns]
s = Series(date_range("jan-01-2013", "jan-05-2013"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
result = s.isin(s[0:2].values)
tm.assert_series_equal(result, expected)
result = s.isin([s[1]])
tm.assert_series_equal(result, expected2)
result = s.isin([np.datetime64(s[1])])
tm.assert_series_equal(result, expected2)
result = s.isin(set(s[0:2]))
tm.assert_series_equal(result, expected)
# timedelta64[ns]
s = Series(pd.to_timedelta(range(5), unit="d"))
result = s.isin(s[0:2])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
def test_isin_empty(self, empty):
# see GH#16991
s = Series(["a", "b"])
expected = Series([False, False])
result = s.isin(empty)
tm.assert_series_equal(expected, result)
def test_isin_read_only(self):
# https://github.com/pandas-dev/pandas/issues/37174
arr = np.array([1, 2, 3])
arr.setflags(write=False)
s = Series([1, 2, 3])
result = s.isin(arr)
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [object, None])
def test_isin_dt64_values_vs_ints(self, dtype):
# GH#36621 dont cast integers to datetimes for isin
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
comps = np.asarray([1356998400000000000], dtype=dtype)
res = dti.isin(comps)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(comps)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, comps)
tm.assert_numpy_array_equal(res, expected)
def test_isin_tzawareness_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
ser = Series(dti)
other = dti.tz_localize("UTC")
res = dti.isin(other)
expected = np.array([False] * len(dti), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
def test_isin_period_freq_mismatch(self):
dti = date_range("2013-01-01", "2013-01-05")
pi = dti.to_period("M")
ser = Series(pi)
# We construct another PeriodIndex with the same i8 values
# but different dtype
dtype = dti.to_period("Y").dtype
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)
res = pi.isin(other)
expected = np.array([False] * len(pi), dtype=bool)
tm.assert_numpy_array_equal(res, expected)
res = ser.isin(other)
tm.assert_series_equal(res, Series(expected))
res = pd.core.algorithms.isin(ser, other)
tm.assert_numpy_array_equal(res, expected)
@pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]])
def test_isin_float_in_int_series(self, values):
# GH#19356 GH#21804
ser = Series(values)
result = ser.isin([-9, -0.5])
expected = Series([True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
@pytest.mark.parametrize(
"data,values,expected",
[
([0, 1, 0], [1], [False, True, False]),
([0, 1, 0], [1, pd.NA], [False, True, False]),
([0, pd.NA, 0], [1, 0], [True, False, True]),
([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
([0, 1, pd.NA], [1, np.nan], [False, True, False]),
([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
],
)
def test_isin_masked_types(self, dtype, data, values, expected):
# GH#42405
ser = Series(data, dtype=dtype)
result = ser.isin(values)
expected = Series(expected, dtype="boolean")
tm.assert_series_equal(result, expected)
def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch):
# https://github.com/pandas-dev/pandas/issues/37094
# combination of object dtype for the values
# and > _MINIMUM_COMP_ARR_LEN elements
min_isin_comp = 5
ser = Series([1, 2, np.nan] * min_isin_comp)
with monkeypatch.context() as m:
m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp)
result = ser.isin({"foo", "bar"})
expected = Series([False] * 3 * min_isin_comp)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"array,expected",
[
(
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
Series([False, True, True, False, True, True, True], dtype=bool),
)
],
)
def test_isin_complex_numbers(array, expected):
# GH 17927
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data,is_in",
[([1, [2]], [1]), (["simple str", [{"values": 3}]], ["simple str"])],
)
def test_isin_filtering_with_mixed_object_types(data, is_in):
# GH 20883
ser = Series(data)
result = ser.isin(is_in)
expected = Series([True, False])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, 2.0, 3.0]])
@pytest.mark.parametrize("isin", [[1, 2], [1.0, 2.0]])
def test_isin_filtering_on_iterable(data, isin):
# GH 50234
ser = Series(data)
result = ser.isin(i for i in isin)
expected_result = Series([True, True, False])
tm.assert_series_equal(result, expected_result)

View File

@ -0,0 +1,35 @@
"""
We also test Series.notna in this file.
"""
import numpy as np
from pandas import (
Period,
Series,
)
import pandas._testing as tm
class TestIsna:
def test_isna_period_dtype(self):
# GH#13737
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
expected = Series([False, True])
result = ser.isna()
tm.assert_series_equal(result, expected)
result = ser.notna()
tm.assert_series_equal(result, ~expected)
def test_isna(self):
ser = Series([0, 5.4, 3, np.nan, -0.001])
expected = Series([False, False, False, True, False])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)
ser = Series(["hi", "", np.nan])
expected = Series([False, False, True])
tm.assert_series_equal(ser.isna(), expected)
tm.assert_series_equal(ser.notna(), ~expected)

View File

@ -0,0 +1,59 @@
"""
Series.item method, mainly testing that we get python scalars as opposed to
numpy scalars.
"""
import pytest
from pandas import (
Series,
Timedelta,
Timestamp,
date_range,
)
class TestItem:
def test_item(self):
# We are testing that we get python scalars as opposed to numpy scalars
ser = Series([1])
result = ser.item()
assert result == 1
assert result == ser.iloc[0]
assert isinstance(result, int) # i.e. not np.int64
ser = Series([0.5], index=[3])
result = ser.item()
assert isinstance(result, float)
assert result == 0.5
ser = Series([1, 2])
msg = "can only convert an array of size 1"
with pytest.raises(ValueError, match=msg):
ser.item()
dti = date_range("2016-01-01", periods=2)
with pytest.raises(ValueError, match=msg):
dti.item()
with pytest.raises(ValueError, match=msg):
Series(dti).item()
val = dti[:1].item()
assert isinstance(val, Timestamp)
val = Series(dti)[:1].item()
assert isinstance(val, Timestamp)
tdi = dti - dti
with pytest.raises(ValueError, match=msg):
tdi.item()
with pytest.raises(ValueError, match=msg):
Series(tdi).item()
val = tdi[:1].item()
assert isinstance(val, Timedelta)
val = Series(tdi)[:1].item()
assert isinstance(val, Timedelta)
# Case where ser[0] would not work
ser = Series(dti, index=[5, 6])
val = ser.iloc[:1].item()
assert val == dti[0]

View File

@ -0,0 +1,609 @@
from collections import (
Counter,
defaultdict,
)
from decimal import Decimal
import math
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
bdate_range,
date_range,
isna,
timedelta_range,
)
import pandas._testing as tm
def test_series_map_box_timedelta():
# GH#11349
ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
def f(x):
return x.total_seconds()
ser.map(f)
def test_map_callable(datetime_series):
with np.errstate(all="ignore"):
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series))
# map function element-wise
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series))
# empty series
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
rs = s.map(lambda x: x)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
assert s is not rs
assert s.index is rs.index
assert s.dtype == rs.dtype
assert s.name == rs.name
# index but no data
s = Series(index=[1, 2, 3], dtype=np.float64)
rs = s.map(lambda x: x)
tm.assert_series_equal(s, rs)
def test_map_same_length_inference_bug():
s = Series([1, 2])
def f(x):
return (x, x + 1)
s = Series([1, 2, 3])
result = s.map(f)
expected = Series([(1, 2), (2, 3), (3, 4)])
tm.assert_series_equal(result, expected)
s = Series(["foo,bar"])
result = s.map(lambda x: x.split(","))
expected = Series([("foo", "bar")])
tm.assert_series_equal(result, expected)
def test_series_map_box_timestamps():
# GH#2689, GH#2627
ser = Series(date_range("1/1/2000", periods=3))
def func(x):
return (x.hour, x.day, x.month)
result = ser.map(func)
expected = Series([(0, 1, 1), (0, 2, 1), (0, 3, 1)])
tm.assert_series_equal(result, expected)
def test_map_series_stringdtype(any_string_dtype, using_infer_string):
# map test on StringDType, GH#40823
ser1 = Series(
data=["cat", "dog", "rabbit"],
index=["id1", "id2", "id3"],
dtype=any_string_dtype,
)
ser2 = Series(["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
result = ser2.map(ser1)
item = pd.NA
if ser2.dtype == object:
item = np.nan
expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
if using_infer_string and any_string_dtype == "object":
expected = expected.astype("string[pyarrow_numpy]")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data, expected_dtype",
[(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], object)],
)
def test_map_categorical_with_nan_values(data, expected_dtype, using_infer_string):
# GH 20714 bug fixed in: GH 24275
def func(val):
return val.split("-")[0]
s = Series(data, dtype="category")
result = s.map(func, na_action="ignore")
if using_infer_string and expected_dtype == object:
expected_dtype = "string[pyarrow_numpy]"
expected = Series(["1", "1", np.nan], dtype=expected_dtype)
tm.assert_series_equal(result, expected)
def test_map_empty_integer_series():
# GH52384
s = Series([], dtype=int)
result = s.map(lambda x: x)
tm.assert_series_equal(result, s)
def test_map_empty_integer_series_with_datetime_index():
# GH 21245
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
result = s.map(lambda x: x)
tm.assert_series_equal(result, s)
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
def test_map_simple_str_callables_same_as_astype(
string_series, func, using_infer_string
):
# test that we are evaluating row-by-row first
# before vectorized evaluation
result = string_series.map(func)
expected = string_series.astype(
str if not using_infer_string else "string[pyarrow_numpy]"
)
tm.assert_series_equal(result, expected)
def test_list_raises(string_series):
with pytest.raises(TypeError, match="'list' object is not callable"):
string_series.map([lambda x: x])
def test_map():
data = {
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
"D": bdate_range("1/1/2009", periods=5),
}
source = Series(data["B"], index=data["C"])
target = Series(data["C"][:4], index=data["D"][:4])
merged = target.map(source)
for k, v in merged.items():
assert v == source[target[k]]
# input could be a dict
merged = target.map(source.to_dict())
for k, v in merged.items():
assert v == source[target[k]]
def test_map_datetime(datetime_series):
# function
result = datetime_series.map(lambda x: x * 2)
tm.assert_series_equal(result, datetime_series * 2)
def test_map_category():
# GH 10324
a = Series([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
tm.assert_series_equal(a.map(b), exp)
exp = Series(["odd", "even", "odd", np.nan])
tm.assert_series_equal(a.map(c), exp)
def test_map_category_numeric():
a = Series(["a", "b", "c", "d"])
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(c), exp)
def test_map_category_string():
a = Series(["a", "b", "c", "d"])
b = Series(
["B", "C", "D", "E"],
dtype="category",
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
)
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
exp = Series(
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
)
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, "B", "C", "D"])
tm.assert_series_equal(a.map(c), exp)
def test_map_empty(request, index):
if isinstance(index, MultiIndex):
request.applymarker(
pytest.mark.xfail(
reason="Initializing a Series from a MultiIndex is not supported"
)
)
s = Series(index)
result = s.map({})
expected = Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)
def test_map_compat():
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
result = s.map({True: "foo", False: "bar"})
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_int():
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
right = Series({1: 11, 2: 22, 3: 33})
assert left.dtype == np.float64
assert issubclass(right.dtype.type, np.integer)
merged = left.map(right)
assert merged.dtype == np.float64
assert isna(merged["d"])
assert not isna(merged["c"])
def test_map_type_inference():
s = Series(range(3))
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
assert issubclass(s2.dtype.type, np.integer)
def test_map_decimal(string_series):
result = string_series.map(lambda x: Decimal(str(x)))
assert result.dtype == np.object_
assert isinstance(result.iloc[0], Decimal)
def test_map_na_exclusion():
s = Series([1.5, np.nan, 3, np.nan, 5])
result = s.map(lambda x: x * 2, na_action="ignore")
exp = s * 2
tm.assert_series_equal(result, exp)
def test_map_dict_with_tuple_keys():
"""
Due to new MultiIndex-ing behaviour in v0.14.0,
dicts with tuple keys passed to map were being
converted to a multi-index, preventing tuple values
from being mapped properly.
"""
# GH 18496
df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
df["labels"] = df["a"].map(label_mappings)
df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
# All labels should be filled now
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
def test_map_counter():
s = Series(["a", "b", "c"], index=[1, 2, 3])
counter = Counter()
counter["b"] = 5
counter["c"] += 1
result = s.map(counter)
expected = Series([0, 5, 1], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_defaultdict():
s = Series([1, 2, 3], index=["a", "b", "c"])
default_dict = defaultdict(lambda: "blank")
default_dict[1] = "stuff"
result = s.map(default_dict)
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)
def test_map_dict_na_key():
# https://github.com/pandas-dev/pandas/issues/17648
# Checks that np.nan key is appropriately mapped
s = Series([1, 2, np.nan])
expected = Series(["a", "b", "c"])
result = s.map({1: "a", 2: "b", np.nan: "c"})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_na_key(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
result = s.map(default_map, na_action=na_action)
expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_missing_key(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"})
result = s.map(default_map, na_action=na_action)
expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_defaultdict_unmutated(na_action):
# GH 48813
s = Series([1, 2, np.nan])
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
expected_default_map = default_map.copy()
s.map(default_map, na_action=na_action)
assert default_map == expected_default_map
@pytest.mark.parametrize("arg_func", [dict, Series])
def test_map_dict_ignore_na(arg_func):
# GH#47527
mapping = arg_func({1: 10, np.nan: 42})
ser = Series([1, np.nan, 2])
result = ser.map(mapping, na_action="ignore")
expected = Series([10, np.nan, np.nan])
tm.assert_series_equal(result, expected)
def test_map_defaultdict_ignore_na():
# GH#47527
mapping = defaultdict(int, {1: 10, np.nan: 42})
ser = Series([1, np.nan, 2])
result = ser.map(mapping)
expected = Series([10, 42, 0])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"na_action, expected",
[(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))],
)
def test_map_categorical_na_ignore(na_action, expected):
# GH#47527
values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2])
ser = Series(values)
result = ser.map({1: 10, np.nan: 42}, na_action=na_action)
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_with_missing():
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
"""
class DictWithMissing(dict):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
dictionary = DictWithMissing({3: "three"})
result = s.map(dictionary)
expected = Series(["missing", "missing", "three"])
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_without_missing():
class DictWithoutMissing(dict):
pass
s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: "three"})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
s = Series([1, 2, 3])
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
result = s.map(not_a_dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
class NonDictMappingWithMissing(non_dict_mapping_subclass):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
result = s.map(not_a_dictionary)
# __missing__ is a dict concept, not a Mapping concept,
# so it should not change the result!
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_box_dt64(unit):
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"datetime64[{unit}]"
# boxed value must be Timestamp instance
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
def test_map_box_dt64tz(unit):
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"datetime64[{unit}, US/Eastern]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
def test_map_box_td64(unit):
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
ser = Series(vals).dt.as_unit(unit)
assert ser.dtype == f"timedelta64[{unit}]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.days}")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
def test_map_box_period():
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
ser = Series(vals)
assert ser.dtype == "Period[M]"
res = ser.map(lambda x: f"{type(x).__name__}_{x.freqstr}")
exp = Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize("na_action", [None, "ignore"])
def test_map_categorical(na_action, using_infer_string):
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
s = Series(values, name="XX", index=list("abcdefg"))
result = s.map(lambda x: x.lower(), na_action=na_action)
exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
exp = Series(exp_values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp_values)
result = s.map(lambda x: "A", na_action=na_action)
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == object if not using_infer_string else "string"
@pytest.mark.parametrize(
"na_action, expected",
(
[None, Series(["A", "B", "nan"], name="XX")],
[
"ignore",
Series(
["A", "B", np.nan],
name="XX",
dtype=pd.CategoricalDtype(list("DCBA"), True),
),
],
),
)
def test_map_categorical_na_action(na_action, expected):
dtype = pd.CategoricalDtype(list("DCBA"), ordered=True)
values = pd.Categorical(list("AB") + [np.nan], dtype=dtype)
s = Series(values, name="XX")
result = s.map(str, na_action=na_action)
tm.assert_series_equal(result, expected)
def test_map_datetimetz():
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
s = Series(values, name="XX")
# keep tz
result = s.map(lambda x: x + pd.offsets.Day())
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
"Asia/Tokyo"
)
exp = Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
result = s.map(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"vals,mapping,exp",
[
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
],
)
def test_map_missing_mixed(vals, mapping, exp, using_infer_string):
# GH20495
s = Series(vals + [np.nan])
result = s.map(mapping)
exp = Series(exp)
if using_infer_string and mapping == {np.nan: "not NaN"}:
exp.iloc[-1] = np.nan
tm.assert_series_equal(result, exp)
def test_map_scalar_on_date_time_index_aware_series():
# GH 25959
# Calling map on a localized time series should not cause an error
series = Series(
np.arange(10, dtype=np.float64),
index=date_range("2020-01-01", periods=10, tz="UTC"),
name="ts",
)
result = Series(series.index).map(lambda x: 1)
tm.assert_series_equal(result, Series(np.ones(len(series)), dtype="int64"))
def test_map_float_to_string_precision():
# GH 13228
ser = Series(1 / 3)
result = ser.map(lambda val: str(val)).to_dict()
expected = {0: "0.3333333333333333"}
assert result == expected
def test_map_to_timedelta():
list_of_valid_strings = ["00:00:01", "00:00:02"]
a = pd.to_timedelta(list_of_valid_strings)
b = Series(list_of_valid_strings).map(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
a = pd.to_timedelta(list_of_strings)
ser = Series(list_of_strings)
b = ser.map(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
def test_map_type():
# GH 46719
s = Series([3, "string", float], index=["a", "b", "c"])
result = s.map(type)
expected = Series([int, str, type], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,82 @@
import operator
import numpy as np
import pytest
from pandas import (
DataFrame,
Series,
)
import pandas._testing as tm
class TestMatmul:
def test_matmul(self):
# matmul test is for GH#10259
a = Series(
np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"]
)
b = DataFrame(
np.random.default_rng(2).standard_normal((3, 4)),
index=["1", "2", "3"],
columns=["p", "q", "r", "s"],
).T
# Series @ DataFrame -> Series
result = operator.matmul(a, b)
expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# DataFrame @ Series -> Series
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# Series @ Series -> scalar
result = operator.matmul(a, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D np.array) @ Series (__rmatmul__)
result = operator.matmul(a.values, a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# vector (1D list) @ Series (__rmatmul__)
result = operator.matmul(a.values.tolist(), a)
expected = np.dot(a.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D np.array) @ Series (__rmatmul__)
result = operator.matmul(b.T.values, a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# GH#21530
# matrix (2D nested lists) @ Series (__rmatmul__)
result = operator.matmul(b.T.values.tolist(), a)
expected = np.dot(b.T.values, a.values)
tm.assert_almost_equal(result, expected)
# mixed dtype DataFrame @ Series
a["p"] = int(a.p)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
# different dtypes DataFrame @ Series
a = a.astype(int)
result = operator.matmul(b.T, a)
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
tm.assert_series_equal(result, expected)
msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)"
# exception raised is of type Exception
with pytest.raises(Exception, match=msg):
a.dot(a.values[:3])
msg = "matrices are not aligned"
with pytest.raises(ValueError, match=msg):
a.dot(b.T)

View File

@ -0,0 +1,248 @@
"""
Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo"
but are implicitly also testing nsmallest_foo.
"""
from itertools import product
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
main_dtypes = [
"datetime",
"datetimetz",
"timedelta",
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]
@pytest.fixture
def s_main_dtypes():
"""
A DataFrame with many dtypes
* datetime
* datetimetz
* timedelta
* [u]int{8,16,32,64}
* float{32,64}
The columns are the name of the dtype.
"""
df = pd.DataFrame(
{
"datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]),
"datetimetz": pd.to_datetime(
["2003", "2002", "2001", "2002", "2005"]
).tz_localize("US/Eastern"),
"timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]),
}
)
for dtype in [
"int8",
"int16",
"int32",
"int64",
"float32",
"float64",
"uint8",
"uint16",
"uint32",
"uint64",
]:
df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype)
return df
@pytest.fixture(params=main_dtypes)
def s_main_dtypes_split(request, s_main_dtypes):
"""Each series in s_main_dtypes."""
return s_main_dtypes[request.param]
def assert_check_nselect_boundary(vals, dtype, method):
# helper function for 'test_boundary_{dtype}' tests
ser = Series(vals, dtype=dtype)
result = getattr(ser, method)(3)
expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1]
expected = ser.loc[expected_idxr]
tm.assert_series_equal(result, expected)
class TestSeriesNLargestNSmallest:
@pytest.mark.parametrize(
"r",
[
Series([3.0, 2, 1, 2, "5"], dtype="object"),
Series([3.0, 2, 1, 2, 5], dtype="object"),
# not supported on some archs
# Series([3., 2, 1, 2, 5], dtype='complex256'),
Series([3.0, 2, 1, 2, 5], dtype="complex128"),
Series(list("abcde")),
Series(list("abcde"), dtype="category"),
],
)
def test_nlargest_error(self, r):
dt = r.dtype
msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}"
args = 2, len(r), 0, -1
methods = r.nlargest, r.nsmallest
for method, arg in product(methods, args):
with pytest.raises(TypeError, match=msg):
method(arg)
def test_nsmallest_nlargest(self, s_main_dtypes_split):
# float, int, datetime64 (use i8), timedelts64 (same),
# object that are numbers, object that are strings
ser = s_main_dtypes_split
tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]])
tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]])
empty = ser.iloc[0:0]
tm.assert_series_equal(ser.nsmallest(0), empty)
tm.assert_series_equal(ser.nsmallest(-1), empty)
tm.assert_series_equal(ser.nlargest(0), empty)
tm.assert_series_equal(ser.nlargest(-1), empty)
tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values())
tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values())
tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]])
tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]])
def test_nlargest_misc(self):
ser = Series([3.0, np.nan, 1, 2, 5])
result = ser.nlargest()
expected = ser.iloc[[4, 0, 3, 2, 1]]
tm.assert_series_equal(result, expected)
result = ser.nsmallest()
expected = ser.iloc[[2, 3, 0, 4, 1]]
tm.assert_series_equal(result, expected)
msg = 'keep must be either "first", "last"'
with pytest.raises(ValueError, match=msg):
ser.nsmallest(keep="invalid")
with pytest.raises(ValueError, match=msg):
ser.nlargest(keep="invalid")
# GH#15297
ser = Series([1] * 5, index=[1, 2, 3, 4, 5])
expected_first = Series([1] * 3, index=[1, 2, 3])
expected_last = Series([1] * 3, index=[5, 4, 3])
result = ser.nsmallest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nsmallest(3, keep="last")
tm.assert_series_equal(result, expected_last)
result = ser.nlargest(3)
tm.assert_series_equal(result, expected_first)
result = ser.nlargest(3, keep="last")
tm.assert_series_equal(result, expected_last)
@pytest.mark.parametrize("n", range(1, 5))
def test_nlargest_n(self, n):
# GH 13412
ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1])
result = ser.nlargest(n)
expected = ser.sort_values(ascending=False).head(n)
tm.assert_series_equal(result, expected)
result = ser.nsmallest(n)
expected = ser.sort_values().head(n)
tm.assert_series_equal(result, expected)
def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype):
# GH#21426
dtype_info = np.iinfo(any_int_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val, min_val + 1, max_val - 1, max_val]
assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method)
def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype):
# GH#21426
dtype_info = np.finfo(float_numpy_dtype)
min_val, max_val = dtype_info.min, dtype_info.max
min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype)
vals = [min_val, min_2nd, max_2nd, max_val]
assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method)
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
def test_nlargest_boundary_datetimelike(self, nselect_method, dtype):
# GH#21426
# use int64 bounds and +1 to min_val since true minimum is NaT
# (include min_val/NaT at end to maintain same expected_idxr)
dtype_info = np.iinfo("int64")
min_val, max_val = dtype_info.min, dtype_info.max
vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
assert_check_nselect_boundary(vals, dtype, nselect_method)
def test_nlargest_duplicate_keep_all_ties(self):
# see GH#16818
ser = Series([10, 9, 8, 7, 7, 7, 7, 6])
result = ser.nlargest(4, keep="all")
expected = Series([10, 9, 8, 7, 7, 7, 7])
tm.assert_series_equal(result, expected)
result = ser.nsmallest(2, keep="all")
expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"data,expected", [([True, False], [True]), ([True, False, True, True], [True])]
)
def test_nlargest_boolean(self, data, expected):
# GH#26154 : ensure True > False
ser = Series(data)
result = ser.nlargest(1)
expected = Series(expected)
tm.assert_series_equal(result, expected)
def test_nlargest_nullable(self, any_numeric_ea_dtype):
# GH#42816
dtype = any_numeric_ea_dtype
if dtype.startswith("UInt"):
# Can't cast from negative float to uint on some platforms
arr = np.random.default_rng(2).integers(1, 10, 10)
else:
arr = np.random.default_rng(2).standard_normal(10)
arr = arr.astype(dtype.lower(), copy=False)
ser = Series(arr.copy(), dtype=dtype)
ser[1] = pd.NA
result = ser.nlargest(5)
expected = (
Series(np.delete(arr, 1), index=ser.index.delete(1))
.nlargest(5)
.astype(dtype)
)
tm.assert_series_equal(result, expected)
def test_nsmallest_nan_when_keep_is_all(self):
# GH#46589
s = Series([1, 2, 3, 3, 3, None])
result = s.nsmallest(3, keep="all")
expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
tm.assert_series_equal(result, expected)
s = Series([1, 2, None, None, None])
result = s.nsmallest(3, keep="all")
expected = Series([1, 2, None, None, None])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,24 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
def test_nunique():
# basics.rst doc example
series = Series(np.random.default_rng(2).standard_normal(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11
def test_nunique_categorical():
# GH#18051
ser = Series(Categorical([]))
assert ser.nunique() == 0
ser = Series(Categorical([np.nan]))
assert ser.nunique() == 0

View File

@ -0,0 +1,128 @@
import numpy as np
import pytest
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestSeriesPctChange:
def test_pct_change(self, datetime_series):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
rs = datetime_series.pct_change(fill_method=None)
tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)
rs = datetime_series.pct_change(2)
filled = datetime_series.ffill()
tm.assert_series_equal(rs, filled / filled.shift(2) - 1)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = datetime_series.pct_change(fill_method="bfill", limit=1)
filled = datetime_series.bfill(limit=1)
tm.assert_series_equal(rs, filled / filled.shift(1) - 1)
rs = datetime_series.pct_change(freq="5D")
filled = datetime_series.ffill()
tm.assert_series_equal(
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
)
def test_pct_change_with_duplicate_axis(self):
# GH#28664
common_idx = date_range("2019-11-14", periods=5, freq="D")
result = Series(range(5), common_idx).pct_change(freq="B")
# the reason that the expected should be like this is documented at PR 28681
expected = Series([np.nan, np.inf, np.nan, np.nan, 3.0], common_idx)
tm.assert_series_equal(result, expected)
def test_pct_change_shift_over_nas(self):
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
msg = "The default fill_method='pad' in Series.pct_change is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
chg = s.pct_change()
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
tm.assert_series_equal(chg, expected)
@pytest.mark.parametrize(
"freq, periods, fill_method, limit",
[
("5B", 5, None, None),
("3B", 3, None, None),
("3B", 3, "bfill", None),
("7B", 7, "pad", 1),
("7B", 7, "bfill", 3),
("14B", 14, None, None),
],
)
def test_pct_change_periods_freq(
self, freq, periods, fill_method, limit, datetime_series
):
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
# GH#7292
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = datetime_series.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = datetime_series.pct_change(
periods, fill_method=fill_method, limit=limit
)
tm.assert_series_equal(rs_freq, rs_periods)
empty_ts = Series(index=datetime_series.index, dtype=object)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_freq = empty_ts.pct_change(
freq=freq, fill_method=fill_method, limit=limit
)
with tm.assert_produces_warning(FutureWarning, match=msg):
rs_periods = empty_ts.pct_change(
periods, fill_method=fill_method, limit=limit
)
tm.assert_series_equal(rs_freq, rs_periods)
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
def test_pct_change_with_duplicated_indices(fill_method):
# GH30463
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
warn = None if fill_method is None else FutureWarning
msg = (
"The 'fill_method' keyword being not None and the 'limit' keyword in "
"Series.pct_change are deprecated"
)
with tm.assert_produces_warning(warn, match=msg):
result = s.pct_change(fill_method=fill_method)
expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
tm.assert_series_equal(result, expected)
def test_pct_change_no_warning_na_beginning():
# GH#54981
ser = Series([None, None, 1, 2, 3])
result = ser.pct_change()
expected = Series([np.nan, np.nan, np.nan, 1, 0.5])
tm.assert_series_equal(result, expected)
def test_pct_change_empty():
# GH 57056
ser = Series([], dtype="float64")
expected = ser.copy()
result = ser.pct_change(periods=0)
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,13 @@
from pandas import Series
import pandas._testing as tm
def test_pop():
# GH#6600
ser = Series([0, 4, 0], index=["A", "B", "C"], name=4)
result = ser.pop("B")
assert result == 4
expected = Series([0, 0], index=["A", "C"], name=4)
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,247 @@
import numpy as np
import pytest
from pandas.core.dtypes.common import is_integer
import pandas as pd
from pandas import (
Index,
Series,
)
import pandas._testing as tm
from pandas.core.indexes.datetimes import Timestamp
class TestSeriesQuantile:
def test_quantile(self, datetime_series):
q = datetime_series.quantile(0.1)
assert q == np.percentile(datetime_series.dropna(), 10)
q = datetime_series.quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# object dtype
q = Series(datetime_series, dtype=object).quantile(0.9)
assert q == np.percentile(datetime_series.dropna(), 90)
# datetime64[ns] dtype
dts = datetime_series.index.to_series()
q = dts.quantile(0.2)
assert q == Timestamp("2000-01-10 19:12:00")
# timedelta64[ns] dtype
tds = dts.diff()
q = tds.quantile(0.25)
assert q == pd.to_timedelta("24:00:00")
# GH7661
result = Series([np.timedelta64("NaT")]).sum()
assert result == pd.Timedelta(0)
msg = "percentiles should all be in the interval \\[0, 1\\]"
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
with pytest.raises(ValueError, match=msg):
datetime_series.quantile(invalid)
s = Series(np.random.default_rng(2).standard_normal(100))
percentile_array = [-0.5, 0.25, 1.5]
with pytest.raises(ValueError, match=msg):
s.quantile(percentile_array)
def test_quantile_multi(self, datetime_series, unit):
datetime_series.index = datetime_series.index.as_unit(unit)
qs = [0.1, 0.9]
result = datetime_series.quantile(qs)
expected = Series(
[
np.percentile(datetime_series.dropna(), 10),
np.percentile(datetime_series.dropna(), 90),
],
index=qs,
name=datetime_series.name,
)
tm.assert_series_equal(result, expected)
dts = datetime_series.index.to_series()
dts.name = "xxx"
result = dts.quantile((0.2, 0.2))
expected = Series(
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
index=[0.2, 0.2],
name="xxx",
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)
result = datetime_series.quantile([])
expected = Series(
[], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
)
tm.assert_series_equal(result, expected)
def test_quantile_interpolation(self, datetime_series):
# see gh-10174
# interpolation = linear (default case)
q = datetime_series.quantile(0.1, interpolation="linear")
assert q == np.percentile(datetime_series.dropna(), 10)
q1 = datetime_series.quantile(0.1)
assert q1 == np.percentile(datetime_series.dropna(), 10)
# test with and without interpolation keyword
assert q == q1
def test_quantile_interpolation_dtype(self):
# GH #10174
# interpolation = linear (default case)
q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
assert q == np.percentile(np.array([1, 3, 4]), 50)
assert is_integer(q)
def test_quantile_nan(self):
# GH 13098
ser = Series([1, 2, 3, 4, np.nan])
result = ser.quantile(0.5)
expected = 2.5
assert result == expected
# all nan/empty
s1 = Series([], dtype=object)
cases = [s1, Series([np.nan, np.nan])]
for ser in cases:
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
tm.assert_series_equal(res, Series([np.nan], index=[0.5]))
res = ser.quantile([0.2, 0.3])
tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))
@pytest.mark.parametrize(
"case",
[
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
],
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
# NaT
[
Timestamp("2011-01-01"),
Timestamp("2011-01-02"),
Timestamp("2011-01-03"),
pd.NaT,
],
[
Timestamp("2011-01-01", tz="US/Eastern"),
Timestamp("2011-01-02", tz="US/Eastern"),
Timestamp("2011-01-03", tz="US/Eastern"),
pd.NaT,
],
[
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
pd.NaT,
],
],
)
def test_quantile_box(self, case):
ser = Series(case, name="XXX")
res = ser.quantile(0.5)
assert res == case[1]
res = ser.quantile([0.5])
exp = Series([case[1]], index=[0.5], name="XXX")
tm.assert_series_equal(res, exp)
def test_datetime_timedelta_quantiles(self):
# covers #9694
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
def test_quantile_nat(self):
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
assert res is pd.NaT
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))
@pytest.mark.parametrize(
"values, dtype",
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
)
def test_quantile_sparse(self, values, dtype):
ser = Series(values, dtype=dtype)
result = ser.quantile([0.5])
expected = Series(np.asarray(ser)).quantile([0.5]).astype("Sparse[float]")
tm.assert_series_equal(result, expected)
def test_quantile_empty_float64(self):
# floats
ser = Series([], dtype="float64")
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
def test_quantile_empty_int64(self):
# int
ser = Series([], dtype="int64")
res = ser.quantile(0.5)
assert np.isnan(res)
res = ser.quantile([0.5])
exp = Series([np.nan], index=[0.5])
tm.assert_series_equal(res, exp)
def test_quantile_empty_dt64(self):
# datetime
ser = Series([], dtype="datetime64[ns]")
res = ser.quantile(0.5)
assert res is pd.NaT
res = ser.quantile([0.5])
exp = Series([pd.NaT], index=[0.5], dtype=ser.dtype)
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize("dtype", [int, float, "Int64"])
def test_quantile_dtypes(self, dtype):
result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
if dtype == "Int64":
expected = expected.astype("Float64")
tm.assert_series_equal(result, expected)
def test_quantile_all_na(self, any_int_ea_dtype):
# GH#50681
ser = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
with tm.assert_produces_warning(None):
result = ser.quantile([0.1, 0.5])
expected = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype, index=[0.1, 0.5])
tm.assert_series_equal(result, expected)
def test_quantile_dtype_size(self, any_int_ea_dtype):
# GH#50681
ser = Series([pd.NA, pd.NA, 1], dtype=any_int_ea_dtype)
result = ser.quantile([0.1, 0.5])
expected = Series([1, 1], dtype=any_int_ea_dtype, index=[0.1, 0.5])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,519 @@
from itertools import chain
import operator
import numpy as np
import pytest
from pandas._libs.algos import (
Infinity,
NegInfinity,
)
import pandas.util._test_decorators as td
from pandas import (
NA,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import CategoricalDtype
@pytest.fixture
def ser():
return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])
@pytest.fixture(
params=[
["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])],
["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])],
["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
]
)
def results(request):
return request.param
@pytest.fixture(
params=[
"object",
"float64",
"int64",
"Float64",
"Int64",
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
]
)
def dtype(request):
return request.param
class TestSeriesRank:
def test_rank(self, datetime_series):
sp_stats = pytest.importorskip("scipy.stats")
datetime_series[::2] = np.nan
datetime_series[:10:3] = 4.0
ranks = datetime_series.rank()
oranks = datetime_series.astype("O").rank()
tm.assert_series_equal(ranks, oranks)
mask = np.isnan(datetime_series)
filled = datetime_series.fillna(np.inf)
# rankdata returns a ndarray
exp = Series(sp_stats.rankdata(filled), index=filled.index, name="ts")
exp[mask] = np.nan
tm.assert_series_equal(ranks, exp)
iseries = Series(np.arange(5).repeat(2))
iranks = iseries.rank()
exp = iseries.astype(float).rank()
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
exp = iseries / 5.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(1, 100))
exp = Series(np.repeat(0.505, 100))
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = iseries.astype("float")
iseries[1] = np.nan
exp = Series(np.repeat(50.0 / 99.0, 100))
exp[1] = np.nan
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.arange(5)) + 1.0
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series(np.repeat(np.nan, 100))
exp = iseries.copy()
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = Series(np.arange(5), dtype="float") + 1
iseries[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
rng = date_range("1/1/1990", periods=5)
# Explicit cast to float to avoid implicit cast when setting nan
iseries = Series(np.arange(5), rng, dtype="float") + 1
iseries.iloc[4] = np.nan
exp = iseries / 4.0
iranks = iseries.rank(pct=True)
tm.assert_series_equal(iranks, exp)
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
exp = Series([2, 1, 3, 5, 4, 6.0])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
# GH 5968
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
exp = Series([3, 2, 1, np.nan])
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
values = np.array(
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
dtype="float64",
)
random_order = np.random.default_rng(2).permutation(len(values))
iseries = Series(values[random_order])
exp = Series(random_order + 1.0, dtype="float64")
iranks = iseries.rank()
tm.assert_series_equal(iranks, exp)
def test_rank_categorical(self):
# GH issue #15420 rank incorrectly orders ordered categories
# Test ascending/descending ranking for ordered categoricals
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
ordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=True,
)
)
tm.assert_series_equal(ordered.rank(), exp)
tm.assert_series_equal(ordered.rank(ascending=False), exp_desc)
# Unordered categoricals should be ranked as objects
unordered = Series(
["first", "second", "third", "fourth", "fifth", "sixth"]
).astype(
CategoricalDtype(
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
ordered=False,
)
)
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
res = unordered.rank()
tm.assert_series_equal(res, exp_unordered)
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
)
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
res1 = unordered1.rank()
tm.assert_series_equal(res1, exp_unordered1)
# Test na_option for rank data
na_ser = Series(
["first", "second", "third", "fourth", "fifth", "sixth", np.nan]
).astype(
CategoricalDtype(
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
True,
)
)
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
# Test na_option for rank data with ascending False
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
tm.assert_series_equal(
na_ser.rank(na_option="bottom", ascending=False), exp_bot
)
tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
# Test invalid values for na_option
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option="bad", ascending=False)
# invalid type
with pytest.raises(ValueError, match=msg):
na_ser.rank(na_option=True, ascending=False)
# Test with pct=True
na_ser = Series(["first", "second", "third", "fourth", np.nan]).astype(
CategoricalDtype(["first", "second", "third", "fourth"], True)
)
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.nan])
tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
def test_rank_signature(self):
s = Series([0, 1])
s.rank(method="average")
msg = "No axis named average for object type Series"
with pytest.raises(ValueError, match=msg):
s.rank("average")
@pytest.mark.parametrize("dtype", [None, object])
def test_rank_tie_methods(self, ser, results, dtype):
method, exp = results
ser = ser if dtype is None else ser.astype(dtype)
result = ser.rank(method=method)
tm.assert_series_equal(result, Series(exp))
@pytest.mark.parametrize("ascending", [True, False])
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
@pytest.mark.parametrize(
"dtype, na_value, pos_inf, neg_inf",
[
("object", None, Infinity(), NegInfinity()),
("float64", np.nan, np.inf, -np.inf),
("Float64", NA, np.inf, -np.inf),
pytest.param(
"float64[pyarrow]",
NA,
np.inf,
-np.inf,
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_rank_tie_methods_on_infs_nans(
self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
):
pytest.importorskip("scipy")
if dtype == "float64[pyarrow]":
if method == "average":
exp_dtype = "float64[pyarrow]"
else:
exp_dtype = "uint64[pyarrow]"
else:
exp_dtype = "float64"
chunk = 3
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
iseries = Series(in_arr, dtype=dtype)
exp_ranks = {
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
}
ranks = exp_ranks[method]
if na_option == "top":
order = [ranks[1], ranks[0], ranks[2]]
elif na_option == "bottom":
order = [ranks[0], ranks[2], ranks[1]]
else:
order = [ranks[0], [np.nan] * chunk, ranks[1]]
expected = order if ascending else order[::-1]
expected = list(chain.from_iterable(expected))
result = iseries.rank(method=method, na_option=na_option, ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype=exp_dtype))
def test_rank_desc_mix_nans_infs(self):
# GH 19538
# check descending ranking when mix nans and infs
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
result = iseries.rank(ascending=False)
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
@pytest.mark.parametrize(
"op, value",
[
[operator.add, 0],
[operator.add, 1e6],
[operator.mul, 1e-6],
],
)
def test_rank_methods_series(self, method, op, value):
sp_stats = pytest.importorskip("scipy.stats")
xs = np.random.default_rng(2).standard_normal(9)
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
np.random.default_rng(2).shuffle(xs)
index = [chr(ord("a") + i) for i in range(len(xs))]
vals = op(xs, value)
ts = Series(vals, index=index)
result = ts.rank(method=method)
sprank = sp_stats.rankdata(vals, method if method != "first" else "ordinal")
expected = Series(sprank, index=index).astype("float64")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1]),
([2], [1]),
([0], [1]),
([2, 2], [1, 1]),
([1, 2, 3], [1, 2, 3]),
([4, 2, 1], [3, 2, 1]),
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
],
)
def test_rank_dense_method(self, dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="dense")
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
def test_rank_descending(self, ser, results, dtype):
method, _ = results
if "i" in dtype:
s = ser.dropna()
else:
s = ser.astype(dtype)
res = s.rank(ascending=False)
expected = (s.max() - s).rank()
tm.assert_series_equal(res, expected)
expected = (s.max() - s).rank(method=method)
res2 = s.rank(method=method, ascending=False)
tm.assert_series_equal(res2, expected)
def test_rank_int(self, ser, results):
method, exp = results
s = ser.dropna().astype("i8")
result = s.rank(method=method)
expected = Series(exp).dropna()
expected.index = result.index
tm.assert_series_equal(result, expected)
def test_rank_object_bug(self):
# GH 13445
# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)
def test_rank_modify_inplace(self):
# GH 18521
# Check rank does not mutate series
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
expected = s.copy()
s.rank()
result = s
tm.assert_series_equal(result, expected)
def test_rank_ea_small_values(self):
# GH#52471
ser = Series(
[5.4954145e29, -9.791984e-21, 9.3715776e-26, NA, 1.8790257e-28],
dtype="Float64",
)
result = ser.rank(method="min")
expected = Series([4, 1, 3, np.nan, 2])
tm.assert_series_equal(result, expected)
# GH15630, pct should be on 100% basis when method='dense'
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_dense_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="dense", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 1.0 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_min_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="min", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0, 1.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_max_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="max", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.5 / 2, 1.5 / 2]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_average_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="average", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp",
[
([1], [1.0]),
([1, 2], [1.0 / 2, 2.0 / 2]),
([2, 2], [1.0 / 2, 2.0 / 2.0]),
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
],
)
def test_rank_first_pct(dtype, ser, exp):
s = Series(ser).astype(dtype)
result = s.rank(method="first", pct=True)
expected = Series(exp).astype(result.dtype)
tm.assert_series_equal(result, expected)
@pytest.mark.single_cpu
def test_pct_max_many_rows():
# GH 18271
s = Series(np.arange(2**24 + 1))
result = s.rank(pct=True).max()
assert result == 1

View File

@ -0,0 +1,448 @@
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas.util._test_decorators as td
from pandas import (
NA,
Categorical,
Float64Dtype,
Index,
MultiIndex,
NaT,
Period,
PeriodIndex,
RangeIndex,
Series,
Timedelta,
Timestamp,
date_range,
isna,
)
import pandas._testing as tm
@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="share memory doesn't work for arrow"
)
def test_reindex(datetime_series, string_series):
identity = string_series.reindex(string_series.index)
assert np.may_share_memory(string_series.index, identity.index)
assert identity.index.is_(string_series.index)
assert identity.index.identical(string_series.index)
subIndex = string_series.index[10:20]
subSeries = string_series.reindex(subIndex)
for idx, val in subSeries.items():
assert val == string_series[idx]
subIndex2 = datetime_series.index[10:20]
subTS = datetime_series.reindex(subIndex2)
for idx, val in subTS.items():
assert val == datetime_series[idx]
stuffSeries = datetime_series.reindex(subIndex)
assert np.isnan(stuffSeries).all()
# This is extremely important for the Cython code to not screw up
nonContigIndex = datetime_series.index[::2]
subNonContig = datetime_series.reindex(nonContigIndex)
for idx, val in subNonContig.items():
assert val == datetime_series[idx]
# return a copy the same index here
result = datetime_series.reindex()
assert result is not datetime_series
def test_reindex_nan():
ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
ts.index = ts.index.astype("object")
# reindex coerces index.dtype to float, loc/iloc doesn't
tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
def test_reindex_series_add_nat():
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
series = Series(rng)
result = series.reindex(range(15))
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
mask = result.isna()
assert mask[-5:].all()
assert not mask[:-5].any()
def test_reindex_with_datetimes():
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.default_rng(2).standard_normal(20), index=rng)
result = ts.reindex(list(ts.index[5:10]))
expected = ts[5:10]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
result = ts[list(ts.index[5:10])]
tm.assert_series_equal(result, expected)
def test_reindex_corner(datetime_series):
# (don't forget to fix this) I think it's fixed
empty = Series(index=[])
empty.reindex(datetime_series.index, method="pad") # it works
# corner case: pad empty series
reindexed = empty.reindex(datetime_series.index, method="pad")
# pass non-Index
reindexed = datetime_series.reindex(list(datetime_series.index))
datetime_series.index = datetime_series.index._with_freq(None)
tm.assert_series_equal(datetime_series, reindexed)
# bad fill method
ts = datetime_series[::2]
msg = (
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
r"\(bfill\) or nearest\. Got foo"
)
with pytest.raises(ValueError, match=msg):
ts.reindex(datetime_series.index, method="foo")
def test_reindex_pad():
s = Series(np.arange(10), dtype="int64")
s2 = s[::2]
reindexed = s2.reindex(s.index, method="pad")
reindexed2 = s2.reindex(s.index, method="ffill")
tm.assert_series_equal(reindexed, reindexed2)
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
tm.assert_series_equal(reindexed, expected)
def test_reindex_pad2():
# GH4604
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
new_index = ["a", "g", "c", "f"]
expected = Series([1, 1, 3, 3], index=new_index)
# this changes dtype because the ffill happens after
result = s.reindex(new_index).ffill()
tm.assert_series_equal(result, expected.astype("float64"))
msg = "The 'downcast' keyword in ffill is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(new_index).ffill(downcast="infer")
tm.assert_series_equal(result, expected)
expected = Series([1, 5, 3, 5], index=new_index)
result = s.reindex(new_index, method="ffill")
tm.assert_series_equal(result, expected)
def test_reindex_inference():
# inference of new dtype
s = Series([True, False, False, True], index=list("abcd"))
new_index = "agc"
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.reindex(list(new_index)).ffill()
expected = Series([True, True, False], index=list(new_index))
tm.assert_series_equal(result, expected)
def test_reindex_downcasting():
# GH4618 shifted series downcasting
s = Series(False, index=range(5))
msg = "Downcasting object dtype arrays on"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.shift(1).bfill()
expected = Series(False, index=range(5))
tm.assert_series_equal(result, expected)
def test_reindex_nearest():
s = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
result = s.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=0.2)
expected = Series([0, 1, np.nan, 2], target)
tm.assert_series_equal(expected, result)
result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
expected = Series([0, np.nan, np.nan, 2], target)
tm.assert_series_equal(expected, result)
def test_reindex_int(datetime_series):
ts = datetime_series[::2]
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
# this should work fine
reindexed_int = int_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_int.dtype == np.float64
# NO NaNs introduced
reindexed_int = int_ts.reindex(int_ts.index[::2])
assert reindexed_int.dtype == np.dtype(int)
def test_reindex_bool(datetime_series):
# A series other than float, int, string, or object
ts = datetime_series[::2]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
# this should work fine
reindexed_bool = bool_ts.reindex(datetime_series.index)
# if NaNs introduced
assert reindexed_bool.dtype == np.object_
# NO NaNs introduced
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
assert reindexed_bool.dtype == np.bool_
def test_reindex_bool_pad(datetime_series):
# fail
ts = datetime_series[5:]
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
assert isna(filled_bool[:5]).all()
def test_reindex_categorical():
index = date_range("20000101", periods=3)
# reindexing to an invalid Categorical
s = Series(["a", "b", "c"], dtype="category")
result = s.reindex(index)
expected = Series(
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
)
expected.index = index
tm.assert_series_equal(result, expected)
# partial reindexing
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
expected.index = [1, 2]
result = s.reindex([1, 2])
tm.assert_series_equal(result, expected)
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
expected.index = [2, 3]
result = s.reindex([2, 3])
tm.assert_series_equal(result, expected)
def test_reindex_astype_order_consistency():
# GH#17444
ser = Series([1, 2, 3], index=[2, 0, 1])
new_index = [0, 1, 2]
temp_dtype = "category"
new_dtype = str
result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
tm.assert_series_equal(result, expected)
def test_reindex_fill_value():
# -----------------------------------------------------------
# floats
floats = Series([1.0, 2.0, 3.0])
result = floats.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
result = floats.reindex([1, 2, 3], fill_value=0)
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# ints
ints = Series([1, 2, 3])
result = ints.reindex([1, 2, 3])
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
# don't upcast
result = ints.reindex([1, 2, 3], fill_value=0)
expected = Series([2, 3, 0], index=[1, 2, 3])
assert issubclass(result.dtype.type, np.integer)
tm.assert_series_equal(result, expected)
# -----------------------------------------------------------
# objects
objects = Series([1, 2, 3], dtype=object)
result = objects.reindex([1, 2, 3])
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = objects.reindex([1, 2, 3], fill_value="foo")
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------
# bools
bools = Series([True, False, True])
result = bools.reindex([1, 2, 3])
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
tm.assert_series_equal(result, expected)
result = bools.reindex([1, 2, 3], fill_value=False)
expected = Series([False, True, False], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
@td.skip_array_manager_not_yet_implemented
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/42921
if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
# use the scalar that is not compatible with the dtype for this test
fill_value = Timestamp(0)
ser = Series([NaT], dtype=dtype)
result = ser.reindex([0, 1], fill_value=fill_value)
expected = Series([NaT, fill_value], index=[0, 1], dtype=object)
tm.assert_series_equal(result, expected)
def test_reindex_datetimeindexes_tz_naive_and_aware():
# GH 8306
idx = date_range("20131101", tz="America/Chicago", periods=7)
newidx = date_range("20131103", periods=10, freq="h")
s = Series(range(7), index=idx)
msg = (
r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
r"and datetime64\[ns\]"
)
with pytest.raises(TypeError, match=msg):
s.reindex(newidx, method="ffill")
def test_reindex_empty_series_tz_dtype():
# GH 20869
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"p_values, o_values, values, expected_values",
[
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
[1.0, 1.0],
[1.0, 1.0, np.nan],
),
(
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
[1.0, 1.0],
[1.0, 1.0],
),
],
)
def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
# GH#28337
period_index = PeriodIndex(p_values)
object_index = Index(o_values)
ser = Series(values, index=period_index)
result = ser.reindex(object_index)
expected = Series(expected_values, index=object_index)
tm.assert_series_equal(result, expected)
def test_reindex_too_many_args():
# GH 40980
ser = Series([1, 2])
msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
with pytest.raises(TypeError, match=msg):
ser.reindex([2, 3], False)
def test_reindex_double_index():
# GH 40980
ser = Series([1, 2])
msg = r"reindex\(\) got multiple values for argument 'index'"
with pytest.raises(TypeError, match=msg):
ser.reindex([2, 3], index=[3, 4])
def test_reindex_no_posargs():
# GH 40980
ser = Series([1, 2])
result = ser.reindex(index=[1, 0])
expected = Series([2, 1], index=[1, 0])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
def test_reindex_empty_with_level(values):
# GH41170
ser = Series(
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
)
result = ser.reindex(np.array(["b"]), level=0)
expected = Series(
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
)
tm.assert_series_equal(result, expected)
def test_reindex_missing_category():
# GH#18185
ser = Series([1, 2, 3, 1], dtype="category")
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
with pytest.raises(TypeError, match=msg):
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
def test_reindexing_with_float64_NA_log():
# GH 47055
s = Series([1.0, NA], dtype=Float64Dtype())
s_reindex = s.reindex(range(3))
result = s_reindex.values._data
expected = np.array([1, np.nan, np.nan])
tm.assert_numpy_array_equal(result, expected)
with tm.assert_produces_warning(None):
result_log = np.log(s_reindex)
expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype())
tm.assert_series_equal(result_log, expected_log)
@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
def test_reindex_expand_nonnano_nat(dtype):
# GH 53497
ser = Series(np.array([1], dtype=f"{dtype}[s]"))
result = ser.reindex(RangeIndex(2))
expected = Series(
np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,41 @@
from datetime import datetime
import numpy as np
from pandas import Series
import pandas._testing as tm
def test_reindex_like(datetime_series):
other = datetime_series[::2]
tm.assert_series_equal(
datetime_series.reindex(other.index), datetime_series.reindex_like(other)
)
# GH#7179
day1 = datetime(2013, 3, 5)
day2 = datetime(2013, 5, 5)
day3 = datetime(2014, 3, 5)
series1 = Series([5, None, None], [day1, day2, day3])
series2 = Series([None, None], [day1, day3])
result = series1.reindex_like(series2, method="pad")
expected = Series([5, np.nan], index=[day1, day3])
tm.assert_series_equal(result, expected)
def test_reindex_like_nearest():
ser = Series(np.arange(10, dtype="int64"))
target = [0.1, 0.9, 1.5, 2.0]
other = ser.reindex(target, method="nearest")
expected = Series(np.around(target).astype("int64"), target)
result = ser.reindex_like(other, method="nearest")
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=1)
tm.assert_series_equal(expected, result)
result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4])
tm.assert_series_equal(expected, result)

View File

@ -0,0 +1,184 @@
from datetime import datetime
import re
import numpy as np
import pytest
from pandas import (
Index,
MultiIndex,
Series,
array,
)
import pandas._testing as tm
class TestRename:
def test_rename(self, datetime_series):
ts = datetime_series
renamer = lambda x: x.strftime("%Y%m%d")
renamed = ts.rename(renamer)
assert renamed.index[0] == renamer(ts.index[0])
# dict
rename_dict = dict(zip(ts.index, renamed.index))
renamed2 = ts.rename(rename_dict)
tm.assert_series_equal(renamed, renamed2)
def test_rename_partial_dict(self):
# partial dict
ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
renamed = ser.rename({"b": "foo", "d": "bar"})
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
def test_rename_retain_index_name(self):
# index with name
renamer = Series(
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
)
renamed = renamer.rename({})
assert renamed.index.name == renamer.index.name
def test_rename_by_series(self):
ser = Series(range(5), name="foo")
renamer = Series({1: 10, 2: 20})
result = ser.rename(renamer)
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
tm.assert_series_equal(result, expected)
def test_rename_set_name(self, using_infer_string):
ser = Series(range(4), index=list("abcd"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
result = ser.rename(name)
assert result.name == name
if using_infer_string:
tm.assert_extension_array_equal(result.index.values, ser.index.values)
else:
tm.assert_numpy_array_equal(result.index.values, ser.index.values)
assert ser.name is None
def test_rename_set_name_inplace(self, using_infer_string):
ser = Series(range(3), index=list("abc"))
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
ser.rename(name, inplace=True)
assert ser.name == name
exp = np.array(["a", "b", "c"], dtype=np.object_)
if using_infer_string:
exp = array(exp, dtype="string[pyarrow_numpy]")
tm.assert_extension_array_equal(ser.index.values, exp)
else:
tm.assert_numpy_array_equal(ser.index.values, exp)
def test_rename_axis_supported(self):
# Supporting axis for compatibility, detailed in GH-18589
ser = Series(range(5))
ser.rename({}, axis=0)
ser.rename({}, axis="index")
with pytest.raises(ValueError, match="No axis named 5"):
ser.rename({}, axis=5)
def test_rename_inplace(self, datetime_series):
renamer = lambda x: x.strftime("%Y%m%d")
expected = renamer(datetime_series.index[0])
datetime_series.rename(renamer, inplace=True)
assert datetime_series.index[0] == expected
def test_rename_with_custom_indexer(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3]).rename(ix)
assert ser.name is ix
def test_rename_with_custom_indexer_inplace(self):
# GH 27814
class MyIndexer:
pass
ix = MyIndexer()
ser = Series([1, 2, 3])
ser.rename(ix, inplace=True)
assert ser.name is ix
def test_rename_callable(self):
# GH 17407
ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
result = ser.rename(str)
expected = ser.rename(lambda i: str(i))
tm.assert_series_equal(result, expected)
assert result.name == expected.name
def test_rename_none(self):
# GH 40977
ser = Series([1, 2], name="foo")
result = ser.rename(None)
expected = Series([1, 2])
tm.assert_series_equal(result, expected)
def test_rename_series_with_multiindex(self):
# issue #43659
arrays = [
["bar", "baz", "baz", "foo", "qux"],
["one", "one", "two", "two", "one"],
]
index = MultiIndex.from_arrays(arrays, names=["first", "second"])
ser = Series(np.ones(5), index=index)
result = ser.rename(index={"one": "yes"}, level="second", errors="raise")
arrays_expected = [
["bar", "baz", "baz", "foo", "qux"],
["yes", "yes", "two", "two", "yes"],
]
index_expected = MultiIndex.from_arrays(
arrays_expected, names=["first", "second"]
)
series_expected = Series(np.ones(5), index=index_expected)
tm.assert_series_equal(result, series_expected)
def test_rename_series_with_multiindex_keeps_ea_dtypes(self):
# GH21055
arrays = [
Index([1, 2, 3], dtype="Int64").astype("category"),
Index([1, 2, 3], dtype="Int64"),
]
mi = MultiIndex.from_arrays(arrays, names=["A", "B"])
ser = Series(1, index=mi)
result = ser.rename({1: 4}, level=1)
arrays_expected = [
Index([1, 2, 3], dtype="Int64").astype("category"),
Index([4, 2, 3], dtype="Int64"),
]
mi_expected = MultiIndex.from_arrays(arrays_expected, names=["A", "B"])
expected = Series(1, index=mi_expected)
tm.assert_series_equal(result, expected)
def test_rename_error_arg(self):
# GH 46889
ser = Series(["foo", "bar"])
match = re.escape("[2] not found in axis")
with pytest.raises(KeyError, match=match):
ser.rename({2: 9}, errors="raise")
def test_rename_copy_false(self, using_copy_on_write, warn_copy_on_write):
# GH 46889
ser = Series(["foo", "bar"])
ser_orig = ser.copy()
shallow_copy = ser.rename({1: 9}, copy=False)
with tm.assert_cow_warning(warn_copy_on_write):
ser[0] = "foobar"
if using_copy_on_write:
assert ser_orig[0] == shallow_copy[0]
assert ser_orig[1] == shallow_copy[9]
else:
assert ser[0] == shallow_copy[0]
assert ser[1] == shallow_copy[9]

View File

@ -0,0 +1,47 @@
import pytest
from pandas import (
Index,
MultiIndex,
Series,
)
import pandas._testing as tm
class TestSeriesRenameAxis:
def test_rename_axis_mapper(self):
# GH 19978
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
ser = Series(list(range(len(mi))), index=mi)
result = ser.rename_axis(index={"ll": "foo"})
assert result.index.names == ["foo", "nn"]
result = ser.rename_axis(index=str.upper, axis=0)
assert result.index.names == ["LL", "NN"]
result = ser.rename_axis(index=["foo", "goo"])
assert result.index.names == ["foo", "goo"]
with pytest.raises(TypeError, match="unexpected"):
ser.rename_axis(columns="wrong")
def test_rename_axis_inplace(self, datetime_series):
# GH 15704
expected = datetime_series.rename_axis("foo")
result = datetime_series
no_return = result.rename_axis("foo", inplace=True)
assert no_return is None
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
def test_rename_axis_none(self, kwargs):
# GH 25034
index = Index(list("abc"), name="foo")
ser = Series([1, 2, 3], index=index)
result = ser.rename_axis(**kwargs)
expected_index = index.rename(None) if kwargs else index
expected = Series([1, 2, 3], index=expected_index)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,40 @@
import numpy as np
import pytest
from pandas import (
MultiIndex,
Series,
)
import pandas._testing as tm
class TestRepeat:
def test_repeat(self):
ser = Series(np.random.default_rng(2).standard_normal(3), index=["a", "b", "c"])
reps = ser.repeat(5)
exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5))
tm.assert_series_equal(reps, exp)
to_rep = [2, 3, 4]
reps = ser.repeat(to_rep)
exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep))
tm.assert_series_equal(reps, exp)
def test_numpy_repeat(self):
ser = Series(np.arange(3), name="x")
expected = Series(
ser.values.repeat(2), name="x", index=ser.index.values.repeat(2)
)
tm.assert_series_equal(np.repeat(ser, 2), expected)
msg = "the 'axis' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.repeat(ser, 2, axis=0)
def test_repeat_with_multiindex(self):
# GH#9361, fixed by GH#7891
m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)])
data = ["a", "b", "c", "d"]
m_df = Series(data, index=m_idx)
assert m_df.repeat(3).shape == (3 * len(data),)

View File

@ -0,0 +1,813 @@
import re
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
class TestSeriesReplace:
def test_replace_explicit_none(self):
# GH#36984 if the user explicitly passes value=None, give it to them
ser = pd.Series([0, 0, ""], dtype=object)
result = ser.replace("", None)
expected = pd.Series([0, 0, None], dtype=object)
tm.assert_series_equal(result, expected)
# Cast column 2 to object to avoid implicit cast when setting entry to ""
df = pd.DataFrame(np.zeros((3, 3))).astype({2: object})
df.iloc[2, 2] = ""
result = df.replace("", None)
expected = pd.DataFrame(
{
0: np.zeros(3),
1: np.zeros(3),
2: np.array([0.0, 0.0, None], dtype=object),
}
)
assert expected.iloc[2, 2] is None
tm.assert_frame_equal(result, expected)
# GH#19998 same thing with object dtype
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
result = ser.replace("a", None)
expected = pd.Series([10, 20, 30, None, None, "b", None])
assert expected.iloc[-1] is None
tm.assert_series_equal(result, expected)
def test_replace_noop_doesnt_downcast(self):
# GH#44498
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
res = ser.replace({np.nan: None}) # should be a no-op
tm.assert_series_equal(res, ser)
assert res.dtype == object
# same thing but different calling convention
res = ser.replace(np.nan, None)
tm.assert_series_equal(res, ser)
assert res.dtype == object
def test_replace(self):
N = 50
ser = pd.Series(np.random.default_rng(2).standard_normal(N))
ser[0:4] = np.nan
ser[6:10] = 0
# replace list with a single value
return_value = ser.replace([np.nan], -1, inplace=True)
assert return_value is None
exp = ser.fillna(-1)
tm.assert_series_equal(ser, exp)
rs = ser.replace(0.0, np.nan)
ser[ser == 0.0] = np.nan
tm.assert_series_equal(rs, ser)
ser = pd.Series(
np.fabs(np.random.default_rng(2).standard_normal(N)),
pd.date_range("2020-01-01", periods=N),
dtype=object,
)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
with tm.assert_produces_warning(FutureWarning, match=msg):
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
def test_replace_nan_with_inf(self):
ser = pd.Series([np.nan, 0, np.inf])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
filled = ser.copy()
filled[4] = 0
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
def test_replace_listlike_value_listlike_target(self, datetime_series):
ser = pd.Series(datetime_series.index)
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
# malformed
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
with pytest.raises(ValueError, match=msg):
ser.replace([1, 2, 3], [np.nan, 0])
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
result = ser.replace([1, 2], [np.nan, 0])
tm.assert_series_equal(result, ser)
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
def test_replace_gh5319(self):
# API change from 0.12?
# GH 5319
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
msg = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace([np.nan])
tm.assert_series_equal(result, expected)
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(np.nan)
tm.assert_series_equal(result, expected)
def test_replace_datetime64(self):
# GH 5797
ser = pd.Series(pd.date_range("20130101", periods=5))
expected = ser.copy()
expected.loc[2] = pd.Timestamp("20120101")
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
tm.assert_series_equal(result, expected)
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
tm.assert_series_equal(result, expected)
def test_replace_nat_with_tz(self):
# GH 11792: Test with replacing NaT in a list with tz data
ts = pd.Timestamp("2015/01/01", tz="UTC")
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
tm.assert_series_equal(expected, result)
def test_replace_timedelta_td64(self):
tdi = pd.timedelta_range(0, periods=5)
ser = pd.Series(tdi)
# Using a single dict argument means we go through replace_list
result = ser.replace({ser[1]: ser[3]})
expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
tm.assert_series_equal(result, expected)
def test_replace_with_single_list(self):
ser = pd.Series([0, 1, 2, 3, 4])
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = ser.replace([1, 2, 3])
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
s = ser.copy()
with tm.assert_produces_warning(FutureWarning, match=msg2):
return_value = s.replace([1, 2, 3], inplace=True)
assert return_value is None
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
# make sure things don't get corrupted when fillna call fails
s = ser.copy()
msg = (
r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
r"\(bfill\)\. Got crash_cymbal"
)
msg3 = "The 'method' keyword in Series.replace is deprecated"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg3):
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
assert return_value is None
tm.assert_series_equal(s, ser)
def test_replace_mixed_types(self):
ser = pd.Series(np.arange(5), dtype="int64")
def check_replace(to_rep, val, expected):
sc = ser.copy()
result = ser.replace(to_rep, val)
return_value = sc.replace(to_rep, val, inplace=True)
assert return_value is None
tm.assert_series_equal(expected, result)
tm.assert_series_equal(expected, sc)
# 3.0 can still be held in our int64 series, so we do not upcast GH#44940
tr, v = [3], [3.0]
check_replace(tr, v, ser)
# Note this matches what we get with the scalars 3 and 3.0
check_replace(tr[0], v[0], ser)
# MUST upcast to float
e = pd.Series([0, 1, 2, 3.5, 4])
tr, v = [3], [3.5]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, "a"])
tr, v = [3, 4], [3.5, "a"]
check_replace(tr, v, e)
# again casts to object
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
check_replace(tr, v, e)
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)
# test an object with dates + floats + integers + strings
dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_bool_with_string_no_op(self):
s = pd.Series([True, False, True])
result = s.replace("fun", "in-the-sun")
tm.assert_series_equal(s, result)
def test_replace_bool_with_string(self):
# nonexistent elements
s = pd.Series([True, False, True])
result = s.replace(True, "2u")
expected = pd.Series(["2u", False, "2u"])
tm.assert_series_equal(expected, result)
def test_replace_bool_with_bool(self):
s = pd.Series([True, False, True])
result = s.replace(True, False)
expected = pd.Series([False] * len(s))
tm.assert_series_equal(expected, result)
def test_replace_with_dict_with_bool_keys(self):
s = pd.Series([True, False, True])
result = s.replace({"asdf": "asdb", True: "yes"})
expected = pd.Series(["yes", False, "yes"])
tm.assert_series_equal(result, expected)
def test_replace_Int_with_na(self, any_int_ea_dtype):
# GH 38267
result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
tm.assert_series_equal(result, expected)
result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
result.replace(1, pd.NA, inplace=True)
tm.assert_series_equal(result, expected)
def test_replace2(self):
N = 50
ser = pd.Series(
np.fabs(np.random.default_rng(2).standard_normal(N)),
pd.date_range("2020-01-01", periods=N),
dtype=object,
)
ser[:5] = np.nan
ser[6:10] = "foo"
ser[20:30] = "bar"
# replace list with a single value
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace([np.nan, "foo", "bar"], -1)
assert (rs[:5] == -1).all()
assert (rs[6:10] == -1).all()
assert (rs[20:30] == -1).all()
assert (pd.isna(ser[:5])).all()
# replace with different values
with tm.assert_produces_warning(FutureWarning, match=msg):
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
assert (rs[:5] == -1).all()
assert (rs[6:10] == -2).all()
assert (rs[20:30] == -3).all()
assert (pd.isna(ser[:5])).all()
# replace with different values with 2 lists
with tm.assert_produces_warning(FutureWarning, match=msg):
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
tm.assert_series_equal(rs, rs2)
# replace inplace
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
assert return_value is None
assert (ser[:5] == -1).all()
assert (ser[6:10] == -1).all()
assert (ser[20:30] == -1).all()
@pytest.mark.parametrize("inplace", [True, False])
def test_replace_cascade(self, inplace):
# Test that replaced values are not replaced again
# GH #50778
ser = pd.Series([1, 2, 3])
expected = pd.Series([2, 3, 4])
res = ser.replace([1, 2, 3], [2, 3, 4], inplace=inplace)
if inplace:
tm.assert_series_equal(ser, expected)
else:
tm.assert_series_equal(res, expected)
def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
# GH 32621, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
result = ser.replace({"one": "1", "two": "2"})
tm.assert_series_equal(expected, result)
def test_replace_with_empty_dictlike(self):
# GH 15289
s = pd.Series(list("abcd"))
tm.assert_series_equal(s, s.replace({}))
empty_series = pd.Series([])
tm.assert_series_equal(s, s.replace(empty_series))
def test_replace_string_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_replacer_equals_replacement(self):
# GH 20656
# make sure all replacers are matching against original values
s = pd.Series(["a", "b"])
expected = pd.Series(["b", "a"])
result = s.replace({"a": "b", "b": "a"})
tm.assert_series_equal(expected, result)
def test_replace_unicode_with_number(self):
# GH 15743
s = pd.Series([1, 2, 3])
result = s.replace("2", np.nan)
expected = pd.Series([1, 2, 3])
tm.assert_series_equal(expected, result)
def test_replace_mixed_types_with_string(self):
# Testing mixed
s = pd.Series([1, 2, 3, "4", 4, 5])
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.replace([2, "4"], np.nan)
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
tm.assert_series_equal(expected, result)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 0 in string")
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
(pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
],
)
def test_replace_categorical(self, categorical, numeric):
# GH 24971, GH#23305
ser = pd.Series(categorical)
msg = "Downcasting behavior in `replace`"
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace({"A": 1, "B": 2})
expected = pd.Series(numeric).astype("category")
if 2 not in expected.cat.categories:
# i.e. categories should be [1, 2] even if there are no "B"s present
# GH#44940
expected = expected.cat.add_categories(2)
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])]
)
def test_replace_categorical_inplace(self, data, data_exp):
# GH 53358
result = pd.Series(data, dtype="category")
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result.replace(to_replace="a", value="b", inplace=True)
expected = pd.Series(data_exp, dtype="category")
tm.assert_series_equal(result, expected)
def test_replace_categorical_single(self):
# GH 26988
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
s = pd.Series(dti)
c = s.astype("category")
expected = c.copy()
expected = expected.cat.add_categories("foo")
expected[2] = "foo"
expected = expected.cat.remove_unused_categories()
assert c[2] != "foo"
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = c.replace(c[2], "foo")
tm.assert_series_equal(expected, result)
assert c[2] != "foo" # ensure non-inplace call does not alter original
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[2], "foo", inplace=True)
assert return_value is None
tm.assert_series_equal(expected, c)
first_value = c[0]
msg = "with CategoricalDtype is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = c.replace(c[1], c[0], inplace=True)
assert return_value is None
assert c[0] == c[1] == first_value # test replacing with existing value
def test_replace_with_no_overflowerror(self):
# GH 25616
# casts to object without Exception from OverflowError
s = pd.Series([0, 1, 2, 3, 4])
result = s.replace([3], ["100000000000000000000"])
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
tm.assert_series_equal(result, expected)
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
result = s.replace(["100000000000000000000"], [1])
expected = pd.Series([0, 1, "100000000000000000001"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, to_replace, exp",
[
([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
(["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
],
)
def test_replace_commutative(self, ser, to_replace, exp):
# GH 16051
# DataFrame.replace() overwrites when values are non-numeric
series = pd.Series(ser)
expected = pd.Series(exp)
result = series.replace(to_replace)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
)
def test_replace_no_cast(self, ser, exp):
# GH 9113
# BUG: replace int64 dtype with bool coerces to int64
series = pd.Series(ser)
result = series.replace(2, True)
expected = pd.Series(exp)
tm.assert_series_equal(result, expected)
def test_replace_invalid_to_replace(self):
# GH 18634
# API: replace() should raise an exception if invalid argument is given
series = pd.Series(["a", "b", "c "])
msg = (
r"Expecting 'to_replace' to be either a scalar, array-like, "
r"dict or None, got invalid type.*"
)
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
series.replace(lambda x: x.strip())
@pytest.mark.parametrize("frame", [False, True])
def test_replace_nonbool_regex(self, frame):
obj = pd.Series(["a", "b", "c "])
if frame:
obj = obj.to_frame()
msg = "'to_replace' must be 'None' if 'regex' is not a bool"
with pytest.raises(ValueError, match=msg):
obj.replace(to_replace=["a"], regex="foo")
@pytest.mark.parametrize("frame", [False, True])
def test_replace_empty_copy(self, frame):
obj = pd.Series([], dtype=np.float64)
if frame:
obj = obj.to_frame()
res = obj.replace(4, 5, inplace=True)
assert res is None
res = obj.replace(4, 5, inplace=False)
tm.assert_equal(res, obj)
assert res is not obj
def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
# GH#33340
ser = pd.Series([1, 2, "A", fixed_now_ts, True])
to_replace = {0: 1, 2: "A"}
value = "foo"
msg = "Series.replace cannot use dict-like to_replace and non-None value"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
to_replace = 1
value = {0: "foo", 2: "bar"}
msg = "Series.replace cannot use dict-value and non-None to_replace"
with pytest.raises(ValueError, match=msg):
ser.replace(to_replace, value)
def test_replace_extension_other(self, frame_or_series):
# https://github.com/pandas-dev/pandas/issues/34530
obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
result = obj.replace("", "") # no exception
# should not have changed dtype
tm.assert_equal(obj, result)
def _check_replace_with_method(self, ser: pd.Series):
df = ser.to_frame()
msg1 = "The 'method' keyword in Series.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg1):
res = ser.replace(ser[1], method="pad")
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
tm.assert_series_equal(res, expected)
msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df = df.replace(ser[1], method="pad")
tm.assert_frame_equal(res_df, expected.to_frame())
ser2 = ser.copy()
with tm.assert_produces_warning(FutureWarning, match=msg1):
res2 = ser2.replace(ser[1], method="pad", inplace=True)
assert res2 is None
tm.assert_series_equal(ser2, expected)
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df2 = df.replace(ser[1], method="pad", inplace=True)
assert res_df2 is None
tm.assert_frame_equal(df, expected.to_frame())
def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
ser = pd.Series(arr)
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_interval_with_method(self, as_categorical):
# in particular interval that can't hold NA
idx = pd.IntervalIndex.from_breaks(range(4))
ser = pd.Series(idx)
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
@pytest.mark.parametrize("as_period", [True, False])
@pytest.mark.parametrize("as_categorical", [True, False])
def test_replace_datetimelike_with_method(self, as_period, as_categorical):
idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
if as_period:
idx = idx.tz_localize(None).to_period("D")
ser = pd.Series(idx)
ser.iloc[-2] = pd.NaT
if as_categorical:
ser = ser.astype("category")
self._check_replace_with_method(ser)
def test_replace_with_compiled_regex(self):
# https://github.com/pandas-dev/pandas/issues/35680
s = pd.Series(["a", "b", "c"])
regex = re.compile("^a$")
result = s.replace({regex: "z"}, regex=True)
expected = pd.Series(["z", "b", "c"])
tm.assert_series_equal(result, expected)
def test_pandas_replace_na(self):
# GH#43344
ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string")
regex_mapping = {
"AA": "CC",
"BB": "CC",
"EE": "CC",
"CC": "CC-REPL",
}
result = ser.replace(regex_mapping, regex=True)
exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"dtype, input_data, to_replace, expected_data",
[
("bool", [True, False], {True: False}, [False, False]),
("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
(
pd.IntervalDtype("int64"),
IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
{pd.Interval(1, 2): pd.Interval(10, 20)},
IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
),
(
pd.IntervalDtype("float64"),
IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
{pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
),
(
pd.PeriodDtype("M"),
[pd.Period("2020-05", freq="M")],
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
[pd.Period("2020-06", freq="M")],
),
],
)
def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
# GH#33484
ser = pd.Series(input_data, dtype=dtype)
result = ser.replace(to_replace)
expected = pd.Series(expected_data, dtype=dtype)
tm.assert_series_equal(result, expected)
def test_replace_string_dtype(self):
# GH#40732, GH#44940
ser = pd.Series(["one", "two", np.nan], dtype="string")
res = ser.replace({"one": "1", "two": "2"})
expected = pd.Series(["1", "2", np.nan], dtype="string")
tm.assert_series_equal(res, expected)
# GH#31644
ser2 = pd.Series(["A", np.nan], dtype="string")
res2 = ser2.replace("A", "B")
expected2 = pd.Series(["B", np.nan], dtype="string")
tm.assert_series_equal(res2, expected2)
ser3 = pd.Series(["A", "B"], dtype="string")
res3 = ser3.replace("A", pd.NA)
expected3 = pd.Series([pd.NA, "B"], dtype="string")
tm.assert_series_equal(res3, expected3)
def test_replace_string_dtype_list_to_replace(self):
# GH#41215, GH#44940
ser = pd.Series(["abc", "def"], dtype="string")
res = ser.replace(["abc", "any other string"], "xyz")
expected = pd.Series(["xyz", "def"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_string_dtype_regex(self):
# GH#31644
ser = pd.Series(["A", "B"], dtype="string")
res = ser.replace(r".", "C", regex=True)
expected = pd.Series(["C", "C"], dtype="string")
tm.assert_series_equal(res, expected)
def test_replace_nullable_numeric(self):
# GH#40732, GH#44940
floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
assert floats.replace({1.0: 9}).dtype == floats.dtype
assert floats.replace(1.0, 9).dtype == floats.dtype
assert floats.replace({1.0: 9.0}).dtype == floats.dtype
assert floats.replace(1.0, 9.0).dtype == floats.dtype
res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
assert res.dtype == floats.dtype
ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
assert ints.replace({1: 9}).dtype == ints.dtype
assert ints.replace(1, 9).dtype == ints.dtype
assert ints.replace({1: 9.0}).dtype == ints.dtype
assert ints.replace(1, 9.0).dtype == ints.dtype
# nullable (for now) raises instead of casting
with pytest.raises(TypeError, match="Invalid value"):
ints.replace({1: 9.5})
with pytest.raises(TypeError, match="Invalid value"):
ints.replace(1, 9.5)
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't fill 1 in string")
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"])
expected = pd.Series([1])
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)
def test_replace_different_int_types(self, any_int_numpy_dtype):
# GH#45311
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype)
map_dict = dict(zip(maps.values, maps.index))
result = labs.replace(map_dict)
expected = labs.replace({0: 0, 2: 1, 1: 2})
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("val", [2, np.nan, 2.0])
def test_replace_value_none_dtype_numeric(self, val):
# GH#48231
ser = pd.Series([1, val])
result = ser.replace(val, None)
expected = pd.Series([1, None], dtype=object)
tm.assert_series_equal(result, expected)
def test_replace_change_dtype_series(self, using_infer_string):
# GH#25797
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
warn = FutureWarning if using_infer_string else None
with tm.assert_produces_warning(warn, match="Downcasting"):
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
tm.assert_frame_equal(df, expected)
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df["Test"] = df["Test"].replace([None], [np.nan])
tm.assert_frame_equal(df, expected)
df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df["Test"] = df["Test"].fillna(np.nan)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize("dtype", ["object", "Int64"])
def test_replace_na_in_obj_column(self, dtype):
# GH#47480
ser = pd.Series([0, 1, pd.NA], dtype=dtype)
expected = pd.Series([0, 2, pd.NA], dtype=dtype)
result = ser.replace(to_replace=1, value=2)
tm.assert_series_equal(result, expected)
ser.replace(to_replace=1, value=2, inplace=True)
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize("val", [0, 0.5])
def test_replace_numeric_column_with_na(self, val):
# GH#50758
ser = pd.Series([val, 1])
expected = pd.Series([val, pd.NA])
result = ser.replace(to_replace=1, value=pd.NA)
tm.assert_series_equal(result, expected)
ser.replace(to_replace=1, value=pd.NA, inplace=True)
tm.assert_series_equal(ser, expected)
def test_replace_ea_float_with_bool(self):
# GH#55398
ser = pd.Series([0.0], dtype="Float64")
expected = ser.copy()
result = ser.replace(False, 1.0)
tm.assert_series_equal(result, expected)
ser = pd.Series([False], dtype="boolean")
expected = ser.copy()
result = ser.replace(0.0, True)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,225 @@
from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
date_range,
option_context,
)
import pandas._testing as tm
class TestResetIndex:
def test_reset_index_dti_round_trip(self):
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti)
d2 = d1.reset_index()
assert d2.dtypes.iloc[0] == np.dtype("M8[ns]")
d3 = d2.set_index("index")
tm.assert_frame_equal(d1, d3, check_names=False)
# GH#2329
stamp = datetime(2012, 11, 22)
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
df = df.set_index("Date")
assert df.index[0] == stamp
assert df.reset_index()["Date"].iloc[0] == stamp
def test_reset_index(self):
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD"), dtype=object),
index=Index([f"i-{i}" for i in range(30)], dtype=object),
)[:5]
ser = df.stack(future_stack=True)
ser.index.names = ["hash", "category"]
ser.name = "value"
df = ser.reset_index()
assert "value" in df
df = ser.reset_index(name="value2")
assert "value2" in df
# check inplace
s = ser.reset_index(drop=True)
s2 = ser
return_value = s2.reset_index(drop=True, inplace=True)
assert return_value is None
tm.assert_series_equal(s, s2)
# level
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
rs = s.reset_index(level=1)
assert len(rs.columns) == 2
rs = s.reset_index(level=[0, 2], drop=True)
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
assert isinstance(rs, Series)
def test_reset_index_name(self):
s = Series([1, 2, 3], index=Index(range(3), name="x"))
assert s.reset_index().index.name is None
assert s.reset_index(drop=True).index.name is None
def test_reset_index_level(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
for levels in ["A", "B"], [0, 1]:
# With MultiIndex
s = df.set_index(["A", "B"])["C"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df.set_index("B"))
result = s.reset_index(level=levels)
tm.assert_frame_equal(result, df)
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
tm.assert_frame_equal(result, df[["C"]])
with pytest.raises(KeyError, match="Level E "):
s.reset_index(level=["A", "E"])
# With single-level Index
s = df.set_index("A")["B"]
result = s.reset_index(level=levels[0])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[:1])
tm.assert_frame_equal(result, df[["A", "B"]])
result = s.reset_index(level=levels[0], drop=True)
tm.assert_series_equal(result, df["B"])
with pytest.raises(IndexError, match="Too many levels"):
s.reset_index(level=[0, 1, 2])
# Check that .reset_index([],drop=True) doesn't fail
result = Series(range(4)).reset_index([], drop=True)
expected = Series(range(4))
tm.assert_series_equal(result, expected)
def test_reset_index_range(self):
# GH 12071
s = Series(range(2), name="A", dtype="int64")
series_result = s.reset_index()
assert isinstance(series_result.index, RangeIndex)
series_expected = DataFrame(
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
)
tm.assert_frame_equal(series_result, series_expected)
def test_reset_index_drop_errors(self):
# GH 20925
# KeyError raised for series index when passed level name is missing
s = Series(range(4))
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong", drop=True)
with pytest.raises(KeyError, match="does not match index name"):
s.reset_index("wrong")
# KeyError raised for series when level to be dropped is missing
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
with pytest.raises(KeyError, match="not found"):
s.reset_index("wrong", drop=True)
def test_reset_index_with_drop(self):
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
data = np.random.default_rng(2).standard_normal(8)
ser = Series(data, index=index)
ser.iloc[3] = np.nan
deleveled = ser.reset_index()
assert isinstance(deleveled, DataFrame)
assert len(deleveled.columns) == len(ser.index.levels) + 1
assert deleveled.index.name == ser.index.name
deleveled = ser.reset_index(drop=True)
assert isinstance(deleveled, Series)
assert deleveled.index.name == ser.index.name
def test_reset_index_inplace_and_drop_ignore_name(self):
# GH#44575
ser = Series(range(2), name="old")
ser.reset_index(name="new", drop=True, inplace=True)
expected = Series(range(2), name="old")
tm.assert_series_equal(ser, expected)
def test_reset_index_drop_infer_string(self):
# GH#56160
pytest.importorskip("pyarrow")
ser = Series(["a", "b", "c"], dtype=object)
with option_context("future.infer_string", True):
result = ser.reset_index(drop=True)
tm.assert_series_equal(result, ser)
@pytest.mark.parametrize(
"array, dtype",
[
(["a", "b"], object),
(
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
pd.PeriodDtype(freq="Q-DEC"),
),
],
)
def test_reset_index_dtypes_on_empty_series_with_multiindex(
array, dtype, using_infer_string
):
# GH 19602 - Preserve dtype on empty Series with MultiIndex
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
exp = "string" if using_infer_string else object
expected = Series(
{
"level_0": np.int64,
"level_1": np.float64,
"level_2": exp if dtype == object else dtype,
0: object,
}
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"names, expected_names",
[
(["A", "A"], ["A", "A"]),
(["level_1", None], ["level_1", "level_1"]),
],
)
@pytest.mark.parametrize("allow_duplicates", [False, True])
def test_column_name_duplicates(names, expected_names, allow_duplicates):
# GH#44755 reset_index with duplicate column labels
s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names))
if allow_duplicates:
result = s.reset_index(allow_duplicates=True)
expected = DataFrame([[1, 1, 1]], columns=expected_names + [0])
tm.assert_frame_equal(result, expected)
else:
with pytest.raises(ValueError, match="cannot insert"):
s.reset_index()

View File

@ -0,0 +1,74 @@
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
class TestSeriesRound:
def test_round(self, datetime_series):
datetime_series.index.name = "index_name"
result = datetime_series.round(2)
expected = Series(
np.round(datetime_series.values, 2), index=datetime_series.index, name="ts"
)
tm.assert_series_equal(result, expected)
assert result.name == datetime_series.name
def test_round_numpy(self, any_float_dtype):
# See GH#12600
ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype)
out = np.round(ser, decimals=0)
expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(out, expected)
msg = "the 'out' parameter is not supported"
with pytest.raises(ValueError, match=msg):
np.round(ser, decimals=0, out=ser)
def test_round_numpy_with_nan(self, any_float_dtype):
# See GH#14197
ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype)
with tm.assert_produces_warning(None):
result = ser.round()
expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype)
tm.assert_series_equal(result, expected)
def test_round_builtin(self, any_float_dtype):
ser = Series(
[1.123, 2.123, 3.123],
index=range(3),
dtype=any_float_dtype,
)
result = round(ser)
expected_rounded0 = Series(
[1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype
)
tm.assert_series_equal(result, expected_rounded0)
decimals = 2
expected_rounded = Series(
[1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype
)
result = round(ser, decimals)
tm.assert_series_equal(result, expected_rounded)
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
def test_round_nat(self, method, freq, unit):
# GH14940, GH#56158
ser = Series([pd.NaT], dtype=f"M8[{unit}]")
expected = Series(pd.NaT, dtype=f"M8[{unit}]")
round_method = getattr(ser.dt, method)
result = round_method(freq)
tm.assert_series_equal(result, expected)
def test_round_ea_boolean(self):
# GH#55936
ser = Series([True, False], dtype="boolean")
expected = ser.copy()
result = ser.round(2)
tm.assert_series_equal(result, expected)
result.iloc[0] = False
tm.assert_series_equal(ser, expected)

View File

@ -0,0 +1,77 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
from pandas.api.types import is_scalar
class TestSeriesSearchSorted:
def test_searchsorted(self):
ser = Series([1, 2, 3])
result = ser.searchsorted(1, side="left")
assert is_scalar(result)
assert result == 0
result = ser.searchsorted(1, side="right")
assert is_scalar(result)
assert result == 1
def test_searchsorted_numeric_dtypes_scalar(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted(30)
assert is_scalar(res)
assert res == 2
res = ser.searchsorted([30])
exp = np.array([2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_numeric_dtypes_vector(self):
ser = Series([1, 2, 90, 1000, 3e9])
res = ser.searchsorted([91, 2e6])
exp = np.array([3, 4], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_datetime64_scalar(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
val = Timestamp("20120102")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_scalar_mixed_timezones(self):
# GH 30086
ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC"))
val = Timestamp("20120102", tz="America/New_York")
res = ser.searchsorted(val)
assert is_scalar(res)
assert res == 1
def test_searchsorted_datetime64_list(self):
ser = Series(date_range("20120101", periods=10, freq="2D"))
vals = [Timestamp("20120102"), Timestamp("20120104")]
res = ser.searchsorted(vals)
exp = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_sorter(self):
# GH8490
ser = Series([3, 1, 2])
res = ser.searchsorted([0, 3], sorter=np.argsort(ser))
exp = np.array([0, 2], dtype=np.intp)
tm.assert_numpy_array_equal(res, exp)
def test_searchsorted_dataframe_fail(self):
# GH#49620
ser = Series([1, 2, 3, 4, 5])
vals = pd.DataFrame([[1, 2], [3, 4]])
msg = "Value must be 1-D array-like or scalar, DataFrame is not supported"
with pytest.raises(ValueError, match=msg):
ser.searchsorted(vals)

View File

@ -0,0 +1,21 @@
from datetime import datetime
from pandas import Series
class TestSetName:
def test_set_name(self):
ser = Series([1, 2, 3])
ser2 = ser._set_name("foo")
assert ser2.name == "foo"
assert ser.name is None
assert ser is not ser2
def test_set_name_attribute(self):
ser = Series([1, 2, 3])
ser2 = Series([1, 2, 3], name="bar")
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
ser.name = name
assert ser.name == name
ser2.name = name
assert ser2.name == name

View File

@ -0,0 +1,22 @@
import pytest
from pandas import Series
@pytest.mark.parametrize(
"data, index, expected",
[
([1, 2, 3], None, 3),
({"a": 1, "b": 2, "c": 3}, None, 3),
([1, 2, 3], ["x", "y", "z"], 3),
([1, 2, 3, 4, 5], ["x", "y", "z", "w", "n"], 5),
([1, 2, 3], None, 3),
([1, 2, 3], ["x", "y", "z"], 3),
([1, 2, 3, 4], ["x", "y", "z", "w"], 4),
],
)
def test_series(data, index, expected):
# GH#52897
ser = Series(data, index=index)
assert ser.size == expected
assert isinstance(ser.size, int)

View File

@ -0,0 +1,337 @@
import numpy as np
import pytest
from pandas import (
DatetimeIndex,
IntervalIndex,
MultiIndex,
Series,
)
import pandas._testing as tm
@pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"])
def sort_kind(request):
return request.param
class TestSeriesSortIndex:
def test_sort_index_name(self, datetime_series):
result = datetime_series.sort_index(ascending=False)
assert result.name == datetime_series.name
def test_sort_index(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
rindex = list(datetime_series.index)
np.random.default_rng(2).shuffle(rindex)
random_order = datetime_series.reindex(rindex)
sorted_series = random_order.sort_index()
tm.assert_series_equal(sorted_series, datetime_series)
# descending
sorted_series = random_order.sort_index(ascending=False)
tm.assert_series_equal(
sorted_series, datetime_series.reindex(datetime_series.index[::-1])
)
# compat on level
sorted_series = random_order.sort_index(level=0)
tm.assert_series_equal(sorted_series, datetime_series)
# compat on axis
sorted_series = random_order.sort_index(axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
random_order.sort_values(axis=1)
sorted_series = random_order.sort_index(level=0, axis=0)
tm.assert_series_equal(sorted_series, datetime_series)
with pytest.raises(ValueError, match=msg):
random_order.sort_index(level=0, axis=1)
def test_sort_index_inplace(self, datetime_series):
datetime_series.index = datetime_series.index._with_freq(None)
# For GH#11402
rindex = list(datetime_series.index)
np.random.default_rng(2).shuffle(rindex)
# descending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=False, inplace=True)
assert result is None
expected = datetime_series.reindex(datetime_series.index[::-1])
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
# ascending
random_order = datetime_series.reindex(rindex)
result = random_order.sort_index(ascending=True, inplace=True)
assert result is None
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(random_order, expected)
def test_sort_index_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
res = s.sort_index(level="A")
tm.assert_series_equal(backwards, res)
res = s.sort_index(level=["A", "B"])
tm.assert_series_equal(backwards, res)
res = s.sort_index(level="A", sort_remaining=False)
tm.assert_series_equal(s, res)
res = s.sort_index(level=["A", "B"], sort_remaining=False)
tm.assert_series_equal(s, res)
@pytest.mark.parametrize("level", ["A", 0]) # GH#21052
def test_sort_index_multiindex(self, level):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
# implicit sort_remaining=True
res = s.sort_index(level=level)
tm.assert_series_equal(backwards, res)
# GH#13496
# sort has no effect without remaining lvls
res = s.sort_index(level=level, sort_remaining=False)
tm.assert_series_equal(s, res)
def test_sort_index_kind(self, sort_kind):
# GH#14444 & GH#13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position(self):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first")
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last")
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_sort_index_intervals(self):
s = Series(
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
)
result = s.sort_index()
expected = s
tm.assert_series_equal(result, expected)
result = s.sort_index(ascending=False)
expected = Series(
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ascending, ignore_index, output_index",
[
([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]),
([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]),
],
)
def test_sort_index_ignore_index(
self, inplace, original_list, sorted_list, ascending, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {
"ascending": ascending,
"ignore_index": ignore_index,
"inplace": inplace,
}
if inplace:
result_ser = ser.copy()
result_ser.sort_index(**kwargs)
else:
result_ser = ser.sort_index(**kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_sort_index_ascending_list(self):
# GH#16934
# Set up a Series with a three level MultiIndex
arrays = [
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
["one", "two", "one", "two", "one", "two", "one", "two"],
[4, 3, 2, 1, 4, 3, 2, 1],
]
tuples = zip(*arrays)
mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
ser = Series(range(8), index=mi)
# Sort with boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=False)
expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
tm.assert_series_equal(result, expected)
# Sort with list of boolean ascending
result = ser.sort_index(level=["third", "first"], ascending=[False, True])
expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
None,
(True, None),
(False, "True"),
],
)
def test_sort_index_ascending_bad_value_raises(self, ascending):
ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9])
match = 'For argument "ascending" expected type bool'
with pytest.raises(ValueError, match=match):
ser.sort_index(ascending=ascending)
class TestSeriesSortIndexKey:
def test_sort_index_multiindex_key(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level="C", key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level="C", key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_multiindex_key_multi_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
s = Series([1, 2], mi)
backwards = s.iloc[[1, 0]]
result = s.sort_index(level=["A", "C"], key=lambda x: -x)
tm.assert_series_equal(s, result)
result = s.sort_index(level=["A", "C"], key=lambda x: x) # nothing happens
tm.assert_series_equal(backwards, result)
def test_sort_index_key(self):
series = Series(np.arange(6, dtype="int64"), index=list("aaBBca"))
result = series.sort_index()
expected = series.iloc[[2, 3, 0, 1, 5, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower())
expected = series.iloc[[0, 1, 5, 2, 3, 4]]
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: x.str.lower(), ascending=False)
expected = series.iloc[[4, 2, 3, 0, 1, 5]]
tm.assert_series_equal(result, expected)
def test_sort_index_key_int(self):
series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64"))
result = series.sort_index()
tm.assert_series_equal(result, series)
result = series.sort_index(key=lambda x: -x)
expected = series.sort_index(ascending=False)
tm.assert_series_equal(result, expected)
result = series.sort_index(key=lambda x: 2 * x)
tm.assert_series_equal(result, series)
def test_sort_index_kind_key(self, sort_kind, sort_by_key):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_kind_neg_key(self, sort_kind):
# GH #14444 & #13589: Add support for sort algo choosing
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object)
index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x)
tm.assert_series_equal(expected_series, index_sorted_series)
def test_sort_index_na_position_key(self, sort_by_key):
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
index_sorted_series = series.sort_index(na_position="first", key=sort_by_key)
tm.assert_series_equal(expected_series_first, index_sorted_series)
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
index_sorted_series = series.sort_index(na_position="last", key=sort_by_key)
tm.assert_series_equal(expected_series_last, index_sorted_series)
def test_changes_length_raises(self):
s = Series([1, 2, 3])
with pytest.raises(ValueError, match="change the shape"):
s.sort_index(key=lambda x: x[:1])
def test_sort_values_key_type(self):
s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"]))
result = s.sort_index(key=lambda x: x.month)
expected = s.iloc[[0, 1, 2]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.day)
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.year)
expected = s.iloc[[2, 0, 1]]
tm.assert_series_equal(result, expected)
result = s.sort_index(key=lambda x: x.month_name())
expected = s.iloc[[2, 1, 0]]
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
[True, False],
[False, True],
],
)
def test_sort_index_multi_already_monotonic(self, ascending):
# GH 56049
mi = MultiIndex.from_product([[1, 2], [3, 4]])
ser = Series(range(len(mi)), index=mi)
result = ser.sort_index(ascending=ascending)
if ascending == [True, False]:
expected = ser.take([1, 0, 3, 2])
elif ascending == [False, True]:
expected = ser.take([2, 3, 0, 1])
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,246 @@
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Series,
)
import pandas._testing as tm
class TestSeriesSortValues:
def test_sort_values(self, datetime_series, using_copy_on_write):
# check indexes are reordered corresponding with the values
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
result = ser.sort_values()
tm.assert_series_equal(expected, result)
ts = datetime_series.copy()
ts[:5] = np.nan
vals = ts.values
result = ts.sort_values()
assert np.isnan(result[-5:]).all()
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
# na_position
result = ts.sort_values(na_position="first")
assert np.isnan(result[:5]).all()
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
# something object-type
ser = Series(["A", "B"], [1, 2])
# no failure
ser.sort_values()
# ascending=False
ordered = ts.sort_values(ascending=False)
expected = np.sort(ts.dropna().values)[::-1]
tm.assert_almost_equal(expected, ordered.dropna().values)
ordered = ts.sort_values(ascending=False, na_position="first")
tm.assert_almost_equal(expected, ordered.dropna().values)
# ascending=[False] should behave the same as ascending=False
ordered = ts.sort_values(ascending=[False])
expected = ts.sort_values(ascending=False)
tm.assert_series_equal(expected, ordered)
ordered = ts.sort_values(ascending=[False], na_position="first")
expected = ts.sort_values(ascending=False, na_position="first")
tm.assert_series_equal(expected, ordered)
msg = 'For argument "ascending" expected type bool, received type NoneType.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=None)
msg = r"Length of ascending \(0\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[])
msg = r"Length of ascending \(3\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[1, 2, 3])
msg = r"Length of ascending \(2\) must be 1 for Series"
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending=[False, False])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ts.sort_values(ascending="foobar")
# inplace=True
ts = datetime_series.copy()
return_value = ts.sort_values(ascending=False, inplace=True)
assert return_value is None
tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False))
tm.assert_index_equal(
ts.index, datetime_series.sort_values(ascending=False).index
)
# GH#5856/5853
# Series.sort_values operating on a view
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
s = df.iloc[:, 0]
msg = (
"This Series is a view of some other array, to sort in-place "
"you must create a copy"
)
if using_copy_on_write:
s.sort_values(inplace=True)
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
else:
with pytest.raises(ValueError, match=msg):
s.sort_values(inplace=True)
def test_sort_values_categorical(self):
c = Categorical(["a", "b", "b", "a"], ordered=False)
cat = Series(c.copy())
# sort in the categories order
expected = Series(
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
)
result = cat.sort_values()
tm.assert_series_equal(result, expected)
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
cat = Series(
Categorical(
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
)
)
res = cat.sort_values()
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
res = cat.sort_values(ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res.__array__(), exp)
raw_cat1 = Categorical(
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
)
raw_cat2 = Categorical(
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
)
s = ["a", "b", "c", "d"]
df = DataFrame(
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
)
# Cats must be sorted in a dataframe
res = df.sort_values(by=["string"], ascending=False)
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
assert res["sort"].dtype == "category"
res = df.sort_values(by=["sort"], ascending=False)
exp = df.sort_values(by=["string"], ascending=True)
tm.assert_series_equal(res["values"], exp["values"])
assert res["sort"].dtype == "category"
assert res["unsort"].dtype == "category"
# unordered cat, but we allow this
df.sort_values(by=["unsort"], ascending=False)
# multi-columns sort
# GH#7848
df = DataFrame(
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
)
df["grade"] = Categorical(df["raw_grade"], ordered=True)
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
# sorts 'grade' according to the order of the categories
result = df.sort_values(by=["grade"])
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
tm.assert_frame_equal(result, expected)
# multi
result = df.sort_values(by=["grade", "id"])
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_list, sorted_list, ignore_index, output_index",
[
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
],
)
def test_sort_values_ignore_index(
self, inplace, original_list, sorted_list, ignore_index, output_index
):
# GH 30114
ser = Series(original_list)
expected = Series(sorted_list, index=output_index)
kwargs = {"ignore_index": ignore_index, "inplace": inplace}
if inplace:
result_ser = ser.copy()
result_ser.sort_values(ascending=False, **kwargs)
else:
result_ser = ser.sort_values(ascending=False, **kwargs)
tm.assert_series_equal(result_ser, expected)
tm.assert_series_equal(ser, Series(original_list))
def test_mergesort_descending_stability(self):
# GH 28697
s = Series([1, 2, 1, 3], ["first", "b", "second", "c"])
result = s.sort_values(ascending=False, kind="mergesort")
expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"])
tm.assert_series_equal(result, expected)
def test_sort_values_validate_ascending_for_value_error(self):
# GH41634
ser = Series([23, 7, 21])
msg = 'For argument "ascending" expected type bool, received type str.'
with pytest.raises(ValueError, match=msg):
ser.sort_values(ascending="False")
@pytest.mark.parametrize("ascending", [False, 0, 1, True])
def test_sort_values_validate_ascending_functional(self, ascending):
# GH41634
ser = Series([23, 7, 21])
expected = np.sort(ser.values)
sorted_ser = ser.sort_values(ascending=ascending)
if not ascending:
expected = expected[::-1]
result = sorted_ser.values
tm.assert_numpy_array_equal(result, expected)
class TestSeriesSortingKey:
def test_sort_values_key(self):
series = Series(np.array(["Hello", "goodbye"]))
result = series.sort_values(axis=0)
expected = series
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x.str.lower())
expected = series[::-1]
tm.assert_series_equal(result, expected)
def test_sort_values_key_nan(self):
series = Series(np.array([0, 5, np.nan, 3, 2, np.nan]))
result = series.sort_values(axis=0)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: x + 5)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)
result = series.sort_values(axis=0, key=lambda x: -x, ascending=False)
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,182 @@
from datetime import datetime
from io import StringIO
import numpy as np
import pytest
import pandas as pd
from pandas import Series
import pandas._testing as tm
from pandas.io.common import get_handle
class TestSeriesToCSV:
def read_csv(self, path, **kwargs):
params = {"index_col": 0, "header": None}
params.update(**kwargs)
header = params.get("header")
out = pd.read_csv(path, **params).squeeze("columns")
if header is None:
out.name = out.index.name = None
return out
def test_from_csv(self, datetime_series, string_series):
# freq doesn't round-trip
datetime_series.index = datetime_series.index._with_freq(None)
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
ts = self.read_csv(path, parse_dates=True)
tm.assert_series_equal(datetime_series, ts, check_names=False)
assert ts.name is None
assert ts.index.name is None
# see gh-10483
datetime_series.to_csv(path, header=True)
ts_h = self.read_csv(path, header=0)
assert ts_h.name == "ts"
string_series.to_csv(path, header=False)
series = self.read_csv(path)
tm.assert_series_equal(string_series, series, check_names=False)
assert series.name is None
assert series.index.name is None
string_series.to_csv(path, header=True)
series_h = self.read_csv(path, header=0)
assert series_h.name == "series"
with open(path, "w", encoding="utf-8") as outfile:
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
series = self.read_csv(path, sep="|", parse_dates=True)
check_series = Series(
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
)
tm.assert_series_equal(check_series, series)
series = self.read_csv(path, sep="|", parse_dates=False)
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
tm.assert_series_equal(check_series, series)
def test_to_csv(self, datetime_series):
with tm.ensure_clean() as path:
datetime_series.to_csv(path, header=False)
with open(path, newline=None, encoding="utf-8") as f:
lines = f.readlines()
assert lines[1] != "\n"
datetime_series.to_csv(path, index=False, header=False)
arr = np.loadtxt(path)
tm.assert_almost_equal(arr, datetime_series.values)
def test_to_csv_unicode_index(self):
buf = StringIO()
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
s.to_csv(buf, encoding="UTF-8", header=False)
buf.seek(0)
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
tm.assert_series_equal(s, s2)
def test_to_csv_float_format(self):
with tm.ensure_clean() as filename:
ser = Series([0.123456, 0.234567, 0.567567])
ser.to_csv(filename, float_format="%.2f", header=False)
rs = self.read_csv(filename)
xp = Series([0.12, 0.23, 0.57])
tm.assert_series_equal(rs, xp)
def test_to_csv_list_entries(self):
s = Series(["jack and jill", "jesse and frank"])
split = s.str.split(r"\s+and\s+")
buf = StringIO()
split.to_csv(buf, header=False)
def test_to_csv_path_is_none(self):
# GH 8215
# Series.to_csv() was returning None, inconsistent with
# DataFrame.to_csv() which returned string
s = Series([1, 2, 3])
csv_str = s.to_csv(path_or_buf=None, header=False)
assert isinstance(csv_str, str)
@pytest.mark.parametrize(
"s,encoding",
[
(
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
None,
),
# GH 21241, 21118
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
(
Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), # noqa: RUF001
"cp737",
),
],
)
def test_to_csv_compression(self, s, encoding, compression):
with tm.ensure_clean() as filename:
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
# test the round trip - to_csv -> read_csv
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# test the round trip using file handle - to_csv -> read_csv
with get_handle(
filename, "w", compression=compression, encoding=encoding
) as handles:
s.to_csv(handles.handle, encoding=encoding, header=True)
result = pd.read_csv(
filename,
compression=compression,
encoding=encoding,
index_col=0,
).squeeze("columns")
tm.assert_series_equal(s, result)
# explicitly ensure file was compressed
with tm.decompress_file(filename, compression) as fh:
text = fh.read().decode(encoding or "utf8")
assert s.name in text
with tm.decompress_file(filename, compression) as fh:
tm.assert_series_equal(
s,
pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
)
def test_to_csv_interval_index(self, using_infer_string):
# GH 28210
s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
s.to_csv(path, header=False)
result = self.read_csv(path, index_col=0)
# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
expected = s.copy()
if using_infer_string:
expected.index = expected.index.astype("string[pyarrow_numpy]")
else:
expected.index = expected.index.astype(str)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,38 @@
import collections
import numpy as np
import pytest
from pandas import Series
import pandas._testing as tm
class TestSeriesToDict:
@pytest.mark.parametrize(
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
)
def test_to_dict(self, mapping, datetime_series):
# GH#16122
result = Series(datetime_series.to_dict(into=mapping), name="ts")
expected = datetime_series.copy()
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
from_method = Series(datetime_series.to_dict(into=collections.Counter))
from_constructor = Series(collections.Counter(datetime_series.items()))
tm.assert_series_equal(from_method, from_constructor)
@pytest.mark.parametrize(
"input",
(
{"a": np.int64(64), "b": 10},
{"a": np.int64(64), "b": 10, "c": "ABC"},
{"a": np.uint64(64), "b": 10, "c": "ABC"},
),
)
def test_to_dict_return_types(self, input):
# GH25969
d = Series(input).to_dict()
assert isinstance(d["a"], int)
assert isinstance(d["b"], int)

View File

@ -0,0 +1,63 @@
import pytest
from pandas import (
DataFrame,
Index,
Series,
)
import pandas._testing as tm
class TestToFrame:
def test_to_frame_respects_name_none(self):
# GH#44212 if we explicitly pass name=None, then that should be respected,
# not changed to 0
# GH-45448 this is first deprecated & enforced in 2.0
ser = Series(range(3))
result = ser.to_frame(None)
exp_index = Index([None], dtype=object)
tm.assert_index_equal(result.columns, exp_index)
result = ser.rename("foo").to_frame(None)
exp_index = Index([None], dtype=object)
tm.assert_index_equal(result.columns, exp_index)
def test_to_frame(self, datetime_series):
datetime_series.name = None
rs = datetime_series.to_frame()
xp = DataFrame(datetime_series.values, index=datetime_series.index)
tm.assert_frame_equal(rs, xp)
datetime_series.name = "testname"
rs = datetime_series.to_frame()
xp = DataFrame(
{"testname": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
rs = datetime_series.to_frame(name="testdifferent")
xp = DataFrame(
{"testdifferent": datetime_series.values}, index=datetime_series.index
)
tm.assert_frame_equal(rs, xp)
@pytest.mark.filterwarnings(
"ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
)
def test_to_frame_expanddim(self):
# GH#9762
class SubclassedSeries(Series):
@property
def _constructor_expanddim(self):
return SubclassedFrame
class SubclassedFrame(DataFrame):
pass
ser = SubclassedSeries([1, 2, 3], name="X")
result = ser.to_frame()
assert isinstance(result, SubclassedFrame)
expected = SubclassedFrame({"X": [1, 2, 3]})
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,49 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
NA,
Series,
Timedelta,
)
import pandas._testing as tm
@pytest.mark.parametrize("dtype", ["int64", "float64"])
def test_to_numpy_na_value(dtype):
# GH#48951
ser = Series([1, 2, NA, 4])
result = ser.to_numpy(dtype=dtype, na_value=0)
expected = np.array([1, 2, 0, 4], dtype=dtype)
tm.assert_numpy_array_equal(result, expected)
def test_to_numpy_cast_before_setting_na():
# GH#50600
ser = Series([1])
result = ser.to_numpy(dtype=np.float64, na_value=np.nan)
expected = np.array([1.0])
tm.assert_numpy_array_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_to_numpy_arrow_dtype_given():
# GH#57121
ser = Series([1, NA], dtype="int64[pyarrow]")
result = ser.to_numpy(dtype="float64")
expected = np.array([1.0, np.nan])
tm.assert_numpy_array_equal(result, expected)
def test_astype_ea_int_to_td_ts():
# GH#57093
ser = Series([1, None], dtype="Int64")
result = ser.astype("m8[ns]")
expected = Series([1, Timedelta("nat")], dtype="m8[ns]")
tm.assert_series_equal(result, expected)
result = ser.astype("M8[ns]")
expected = Series([1, Timedelta("nat")], dtype="M8[ns]")
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,36 @@
import pytest
import pandas.util._test_decorators as td
from pandas import (
Interval,
Period,
Series,
Timedelta,
Timestamp,
)
@pytest.mark.parametrize(
"values, dtype, expected_dtype",
(
([1], "int64", int),
([1], "Int64", int),
([1.0], "float64", float),
([1.0], "Float64", float),
(["abc"], "object", str),
(["abc"], "string", str),
([Interval(1, 3)], "interval", Interval),
([Period("2000-01-01", "D")], "period[D]", Period),
([Timedelta(days=1)], "timedelta64[ns]", Timedelta),
([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp),
pytest.param([1], "int64[pyarrow]", int, marks=td.skip_if_no("pyarrow")),
pytest.param([1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow")),
pytest.param(["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow")),
),
)
def test_tolist_scalar_dtype(values, dtype, expected_dtype):
# GH49890
ser = Series(values, dtype=dtype)
result_dtype = type(ser.tolist()[0])
assert result_dtype == expected_dtype

View File

@ -0,0 +1,67 @@
from datetime import datetime
import pytest
import pandas as pd
from pandas import (
Series,
date_range,
)
import pandas._testing as tm
class TestTruncate:
def test_truncate_datetimeindex_tz(self):
# GH 9243
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
s = Series(range(len(idx)), index=idx)
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
# GH#36148 as of 2.0 we require tzawareness compat
s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
lb = idx[1]
ub = idx[3]
result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime())
expected = Series([1, 2, 3], index=idx[1:4])
tm.assert_series_equal(result, expected)
def test_truncate_periodindex(self):
# GH 17717
idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series1 = Series([1, 2, 3], index=idx1)
result1 = series1.truncate(after="2017-09-02")
expected_idx1 = pd.PeriodIndex(
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
)
tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1))
idx2 = pd.PeriodIndex(
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
)
series2 = Series([1, 2, 3], index=idx2)
result2 = series2.sort_index().truncate(after="2017-09-02")
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
tm.assert_series_equal(result2, Series([2], index=expected_idx2))
def test_truncate_one_element_series(self):
# GH 35544
series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))
before = pd.Timestamp("2020-08-02")
after = pd.Timestamp("2020-08-04")
result = series.truncate(before=before, after=after)
# the input Series and the expected Series are the same
tm.assert_series_equal(result, series)
def test_truncate_index_only_one_unique_value(self):
# GH 42365
obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5)
truncated = obj.truncate("2021-06-28", "2021-07-01")
tm.assert_series_equal(truncated, obj)

View File

@ -0,0 +1,123 @@
from datetime import timezone
import pytest
import pytz
from pandas._libs.tslibs import timezones
from pandas import (
DatetimeIndex,
NaT,
Series,
Timestamp,
date_range,
)
import pandas._testing as tm
class TestTZLocalize:
def test_series_tz_localize_ambiguous_bool(self):
# make sure that we are correctly accepting bool values as ambiguous
# GH#14402
ts = Timestamp("2015-11-01 01:00:03")
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
ser = Series([ts])
expected0 = Series([expected0])
expected1 = Series([expected1])
with tm.external_error_raised(pytz.AmbiguousTimeError):
ser.dt.tz_localize("US/Central")
result = ser.dt.tz_localize("US/Central", ambiguous=True)
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
tm.assert_series_equal(result, expected0)
result = ser.dt.tz_localize("US/Central", ambiguous=False)
tm.assert_series_equal(result, expected1)
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
tm.assert_series_equal(result, expected1)
def test_series_tz_localize_matching_index(self):
# Matching the index of the result with that of the original series
# GH 43080
dt_series = Series(
date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"),
index=[2, 6, 7, 8, 11],
dtype="category",
)
result = dt_series.dt.tz_localize("Europe/Berlin")
expected = Series(
date_range(
start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin"
),
index=[2, 6, 7, 8, 11],
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"method, exp",
[
["shift_forward", "2015-03-29 03:00:00"],
["shift_backward", "2015-03-29 01:59:59.999999999"],
["NaT", NaT],
["raise", None],
["foo", "invalid"],
],
)
def test_tz_localize_nonexistent(self, warsaw, method, exp, unit):
# GH 8917
tz = warsaw
n = 60
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min", unit=unit)
ser = Series(1, index=dti)
df = ser.to_frame()
if method == "raise":
with tm.external_error_raised(pytz.NonExistentTimeError):
dti.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
ser.tz_localize(tz, nonexistent=method)
with tm.external_error_raised(pytz.NonExistentTimeError):
df.tz_localize(tz, nonexistent=method)
elif exp == "invalid":
msg = (
"The nonexistent argument must be one of "
"'raise', 'NaT', 'shift_forward', 'shift_backward' "
"or a timedelta object"
)
with pytest.raises(ValueError, match=msg):
dti.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match=msg):
ser.tz_localize(tz, nonexistent=method)
with pytest.raises(ValueError, match=msg):
df.tz_localize(tz, nonexistent=method)
else:
result = ser.tz_localize(tz, nonexistent=method)
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz).as_unit(unit))
tm.assert_series_equal(result, expected)
result = df.tz_localize(tz, nonexistent=method)
expected = expected.to_frame()
tm.assert_frame_equal(result, expected)
res_index = dti.tz_localize(tz, nonexistent=method)
tm.assert_index_equal(res_index, expected.index)
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
def test_series_tz_localize_empty(self, tzstr):
# GH#2248
ser = Series(dtype=object)
ser2 = ser.tz_localize("utc")
assert ser2.index.tz == timezone.utc
ser2 = ser.tz_localize(tzstr)
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))

View File

@ -0,0 +1,76 @@
import numpy as np
from pandas import (
Categorical,
IntervalIndex,
Series,
date_range,
)
import pandas._testing as tm
class TestUnique:
def test_unique_uint64(self):
ser = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
res = ser.unique()
exp = np.array([1, 2, 2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(res, exp)
def test_unique_data_ownership(self):
# it works! GH#1807
Series(Series(["a", "c", "b"]).unique()).sort_values()
def test_unique(self):
# GH#714 also, dtype=float
ser = Series([1.2345] * 100)
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
# explicit f4 dtype
ser = Series([1.2345] * 100, dtype="f4")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_nan_object_dtype(self):
# NAs in object arrays GH#714
ser = Series(["foo"] * 100, dtype="O")
ser[::2] = np.nan
result = ser.unique()
assert len(result) == 2
def test_unique_none(self):
# decision about None
ser = Series([1, 2, 3, None, None, None], dtype=object)
result = ser.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)
def test_unique_categorical(self):
# GH#18051
cat = Categorical([])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)
cat = Categorical([np.nan])
ser = Series(cat)
result = ser.unique()
tm.assert_categorical_equal(result, cat)
def test_tz_unique(self):
# GH 46128
dti1 = date_range("2016-01-01", periods=3)
ii1 = IntervalIndex.from_breaks(dti1)
ser1 = Series(ii1)
uni1 = ser1.unique()
tm.assert_interval_array_equal(ser1.array, uni1)
dti2 = date_range("2016-01-01", periods=3, tz="US/Eastern")
ii2 = IntervalIndex.from_breaks(dti2)
ser2 = Series(ii2)
uni2 = ser2.unique()
tm.assert_interval_array_equal(ser2.array, uni2)
assert uni1.dtype != uni2.dtype

View File

@ -0,0 +1,169 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
date_range,
)
import pandas._testing as tm
def test_unstack_preserves_object():
mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
ser = Series(np.arange(4.0), index=mi, dtype=object)
res1 = ser.unstack()
assert (res1.dtypes == object).all()
res2 = ser.unstack(level=0)
assert (res2.dtypes == object).all()
def test_unstack():
index = MultiIndex(
levels=[["bar", "foo"], ["one", "three", "two"]],
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
)
s = Series(np.arange(4.0), index=index)
unstacked = s.unstack()
expected = DataFrame(
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
index=["bar", "foo"],
columns=["one", "three", "two"],
)
tm.assert_frame_equal(unstacked, expected)
unstacked = s.unstack(level=0)
tm.assert_frame_equal(unstacked, expected.T)
index = MultiIndex(
levels=[["bar"], ["one", "two", "three"], [0, 1]],
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
exp_index = MultiIndex(
levels=[["one", "two", "three"], [0, 1]],
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
)
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
unstacked = s.unstack(0).sort_index()
tm.assert_frame_equal(unstacked, expected)
# GH5873
idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
ts = Series([1, 2], index=idx)
left = ts.unstack()
right = DataFrame(
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
)
tm.assert_frame_equal(left, right)
idx = MultiIndex.from_arrays(
[
["cat", "cat", "cat", "dog", "dog"],
["a", "a", "b", "a", "b"],
[1, 2, 1, 1, np.nan],
]
)
ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
right = DataFrame(
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
columns=["cat", "dog"],
)
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
right.index = MultiIndex.from_tuples(tpls)
tm.assert_frame_equal(ts.unstack(level=0), right)
def test_unstack_tuplename_in_multiindex():
# GH 19966
idx = MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
ser = Series(1, index=idx)
result = ser.unstack(("A", "a"))
expected = DataFrame(
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
index=Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1], [1, 1], [1, 1], [1, 1]],
MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1], [1, 1, 1, 1]],
Index([3, 4], name="C"),
MultiIndex.from_tuples(
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
ser = Series(1, index=idx)
result = ser.unstack(unstack_idx)
expected = DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)
def test_unstack_multi_index_categorical_values():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
index=date_range("2000-01-01", periods=10, freq="B"),
)
mi = df.stack(future_stack=True).index.rename(["major", "minor"])
ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
result = ser.unstack()
dti = ser.index.levels[0]
c = pd.Categorical(["foo"] * len(dti))
expected = DataFrame(
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
columns=Index(list("ABCD"), name="minor"),
index=dti.rename("major"),
)
tm.assert_frame_equal(result, expected)
def test_unstack_mixed_level_names():
# GH#48763
arrays = [["a", "a"], [1, 2], ["red", "blue"]]
idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y"))
ser = Series([1, 2], index=idx)
result = ser.unstack("x")
expected = DataFrame(
[[1], [2]],
columns=Index(["a"], name="x"),
index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
)
tm.assert_frame_equal(result, expected)

View File

@ -0,0 +1,139 @@
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import (
CategoricalDtype,
DataFrame,
NaT,
Series,
Timestamp,
)
import pandas._testing as tm
class TestUpdate:
def test_update(self, using_copy_on_write):
s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
s2 = Series([np.nan, 3.5, np.nan, 5.0])
s.update(s2)
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
tm.assert_series_equal(s, expected)
# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df["c"] = np.nan
# Cast to object to avoid upcast when setting "foo"
df["c"] = df["c"].astype(object)
df_orig = df.copy()
if using_copy_on_write:
with tm.raises_chained_assignment_error():
df["c"].update(Series(["foo"], index=[0]))
expected = df_orig
else:
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
df["c"].update(Series(["foo"], index=[0]))
expected = DataFrame(
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
)
expected["c"] = expected["c"].astype(object)
tm.assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"other, dtype, expected, warn",
[
# other is int
([61, 63], "int32", Series([10, 61, 12], dtype="int32"), None),
([61, 63], "int64", Series([10, 61, 12]), None),
([61, 63], float, Series([10.0, 61.0, 12.0]), None),
([61, 63], object, Series([10, 61, 12], dtype=object), None),
# other is float, but can be cast to int
([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32"), None),
([61.0, 63.0], "int64", Series([10, 61, 12]), None),
([61.0, 63.0], float, Series([10.0, 61.0, 12.0]), None),
([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object), None),
# others is float, cannot be cast to int
([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0]), FutureWarning),
([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0]), FutureWarning),
([61.1, 63.1], float, Series([10.0, 61.1, 12.0]), None),
([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object), None),
# other is object, cannot be cast
([(61,), (63,)], "int32", Series([10, (61,), 12]), FutureWarning),
([(61,), (63,)], "int64", Series([10, (61,), 12]), FutureWarning),
([(61,), (63,)], float, Series([10.0, (61,), 12.0]), FutureWarning),
([(61,), (63,)], object, Series([10, (61,), 12]), None),
],
)
def test_update_dtypes(self, other, dtype, expected, warn):
ser = Series([10, 11, 12], dtype=dtype)
other = Series(other, index=[1, 3])
with tm.assert_produces_warning(warn, match="item of incompatible dtype"):
ser.update(other)
tm.assert_series_equal(ser, expected)
@pytest.mark.parametrize(
"series, other, expected",
[
# update by key
(
Series({"a": 1, "b": 2, "c": 3, "d": 4}),
{"b": 5, "c": np.nan},
Series({"a": 1, "b": 5, "c": 3, "d": 4}),
),
# update by position
(Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])),
],
)
def test_update_from_non_series(self, series, other, expected):
# GH 33215
series.update(other)
tm.assert_series_equal(series, expected)
@pytest.mark.parametrize(
"data, other, expected, dtype",
[
(["a", None], [None, "b"], ["a", "b"], "string[python]"),
pytest.param(
["a", None],
[None, "b"],
["a", "b"],
"string[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
([1, None], [None, 2], [1, 2], "Int64"),
([True, None], [None, False], [True, False], "boolean"),
(
["a", None],
[None, "b"],
["a", "b"],
CategoricalDtype(categories=["a", "b"]),
),
(
[Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
[NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
[Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
"datetime64[ns, Europe/London]",
),
],
)
def test_update_extension_array_series(self, data, other, expected, dtype):
result = Series(data, dtype=dtype)
other = Series(other, dtype=dtype)
expected = Series(expected, dtype=dtype)
result.update(other)
tm.assert_series_equal(result, expected)
def test_update_with_categorical_type(self):
# GH 25744
dtype = CategoricalDtype(["a", "b", "c", "d"])
s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype)
s2 = Series(["b", "a"], index=[1, 2], dtype=dtype)
s1.update(s2)
result = s1
expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,271 @@
import numpy as np
import pytest
import pandas as pd
from pandas import (
Categorical,
CategoricalIndex,
Index,
Series,
)
import pandas._testing as tm
class TestSeriesValueCounts:
def test_value_counts_datetime(self, unit):
# most dtypes are tested in tests/base
values = [
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 10:00"),
pd.Timestamp("2011-01-01 11:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 09:00"),
pd.Timestamp("2011-01-01 11:00"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
name="xxx",
).as_unit(unit)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx").dt.as_unit(unit)
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_datetime_tz(self, unit):
values = [
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
]
exp_idx = pd.DatetimeIndex(
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
tz="US/Eastern",
name="xxx",
).as_unit(unit)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx").dt.as_unit(unit)
tm.assert_series_equal(ser.value_counts(), exp)
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
tm.assert_series_equal(idx.value_counts(), exp)
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_period(self):
values = [
pd.Period("2011-01", freq="M"),
pd.Period("2011-02", freq="M"),
pd.Period("2011-03", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-01", freq="M"),
pd.Period("2011-03", freq="M"),
]
exp_idx = pd.PeriodIndex(
["2011-01", "2011-03", "2011-02"], freq="M", name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check DatetimeIndex outputs the same result
idx = pd.PeriodIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_ordered(self):
# most dtypes are tested in tests/base
values = Categorical([1, 2, 3, 1, 1, 3], ordered=True)
exp_idx = CategoricalIndex(
[1, 3, 2], categories=[1, 2, 3], ordered=True, name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical_not_ordered(self):
values = Categorical([1, 2, 3, 1, 1, 3], ordered=False)
exp_idx = CategoricalIndex(
[1, 3, 2], categories=[1, 2, 3], ordered=False, name="xxx"
)
exp = Series([3, 2, 1], index=exp_idx, name="count")
ser = Series(values, name="xxx")
tm.assert_series_equal(ser.value_counts(), exp)
# check CategoricalIndex outputs the same result
idx = CategoricalIndex(values, name="xxx")
tm.assert_series_equal(idx.value_counts(), exp)
# normalize
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
def test_value_counts_categorical(self):
# GH#12835
cats = Categorical(list("abcccb"), categories=list("cabd"))
ser = Series(cats, name="xxx")
res = ser.value_counts(sort=False)
exp_index = CategoricalIndex(
list("cabd"), categories=cats.categories, name="xxx"
)
exp = Series([3, 1, 2, 0], name="count", index=exp_index)
tm.assert_series_equal(res, exp)
res = ser.value_counts(sort=True)
exp_index = CategoricalIndex(
list("cbad"), categories=cats.categories, name="xxx"
)
exp = Series([3, 2, 1, 0], name="count", index=exp_index)
tm.assert_series_equal(res, exp)
# check object dtype handles the Series.name as the same
# (tested in tests/base)
ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx")
res = ser.value_counts()
exp = Series([3, 2, 1], name="count", index=Index(["c", "b", "a"], name="xxx"))
tm.assert_series_equal(res, exp)
def test_value_counts_categorical_with_nan(self):
# see GH#9443
# sanity check
ser = Series(["a", "b", "a"], dtype="category")
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# same Series via two different constructions --> same behaviour
series = [
Series(["a", "b", None, "a", None, None], dtype="category"),
Series(
Categorical(["a", "b", None, "a", None, None], categories=["a", "b"])
),
]
for ser in series:
# None is a NaN value, so we exclude its count here
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
res = ser.value_counts(dropna=True)
tm.assert_series_equal(res, exp)
# we don't exclude the count of None and sort by counts
exp = Series(
[3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]), name="count"
)
res = ser.value_counts(dropna=False)
tm.assert_series_equal(res, exp)
# When we aren't sorting by counts, and np.nan isn't a
# category, it should be last.
exp = Series(
[2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]), name="count"
)
res = ser.value_counts(dropna=False, sort=False)
tm.assert_series_equal(res, exp)
@pytest.mark.parametrize(
"ser, dropna, exp",
[
(
Series([False, True, True, pd.NA]),
False,
Series([2, 1, 1], index=[True, False, pd.NA], name="count"),
),
(
Series([False, True, True, pd.NA]),
True,
Series([2, 1], index=Index([True, False], dtype=object), name="count"),
),
(
Series(range(3), index=[True, False, np.nan]).index,
False,
Series([1, 1, 1], index=[True, False, np.nan], name="count"),
),
],
)
def test_value_counts_bool_with_nan(self, ser, dropna, exp):
# GH32146
out = ser.value_counts(dropna=dropna)
tm.assert_series_equal(out, exp)
@pytest.mark.parametrize(
"input_array,expected",
[
(
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
Series(
[3, 2, 1],
index=Index([3j, 1 + 1j, 1], dtype=np.complex128),
name="count",
),
),
(
np.array([1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], dtype=np.complex64),
Series(
[3, 2, 1],
index=Index([3j, 1 + 1j, 1], dtype=np.complex64),
name="count",
),
),
],
)
def test_value_counts_complex_numbers(self, input_array, expected):
# GH 17927
result = Series(input_array).value_counts()
tm.assert_series_equal(result, expected)
def test_value_counts_masked(self):
# GH#54984
dtype = "Int64"
ser = Series([1, 2, None, 2, None, 3], dtype=dtype)
result = ser.value_counts(dropna=False)
expected = Series(
[2, 2, 1, 1],
index=Index([2, None, 1, 3], dtype=dtype),
dtype=dtype,
name="count",
)
tm.assert_series_equal(result, expected)
result = ser.value_counts(dropna=True)
expected = Series(
[2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count"
)
tm.assert_series_equal(result, expected)

View File

@ -0,0 +1,29 @@
import numpy as np
import pytest
from pandas import (
IntervalIndex,
Series,
period_range,
)
import pandas._testing as tm
class TestValues:
@pytest.mark.parametrize(
"data",
[
period_range("2000", periods=4),
IntervalIndex.from_breaks([1, 2, 3, 4]),
],
)
def test_values_object_extension_dtypes(self, data):
# https://github.com/pandas-dev/pandas/issues/23995
result = Series(data).values
expected = np.array(data.astype(object))
tm.assert_numpy_array_equal(result, expected)
def test_values(self, datetime_series):
tm.assert_almost_equal(
datetime_series.values, list(datetime_series), check_dtype=False
)

View File

@ -0,0 +1,61 @@
import numpy as np
import pytest
from pandas import (
Index,
Series,
array,
date_range,
)
import pandas._testing as tm
pytestmark = pytest.mark.filterwarnings(
"ignore:Series.view is deprecated and will be removed in a future version.:FutureWarning" # noqa: E501
)
class TestView:
def test_view_i8_to_datetimelike(self):
dti = date_range("2000", periods=4, tz="US/Central")
ser = Series(dti.asi8)
result = ser.view(dti.dtype)
tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None))
pi = dti.tz_localize(None).to_period("D")
ser = Series(pi.asi8)
result = ser.view(pi.dtype)
tm.assert_period_array_equal(result._values, pi._data)
def test_view_tz(self):
# GH#24024
ser = Series(date_range("2000", periods=4, tz="US/Central"))
result = ser.view("i8")
expected = Series(
[
946706400000000000,
946792800000000000,
946879200000000000,
946965600000000000,
]
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize(
"second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
)
@pytest.mark.parametrize("box", [Series, Index, array])
def test_view_between_datetimelike(self, first, second, box):
dti = date_range("2016-01-01", periods=3)
orig = box(dti)
obj = orig.view(first)
assert obj.dtype == first
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)
res = obj.view(second)
assert res.dtype == second
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)

View File

@ -0,0 +1,300 @@
import inspect
import pydoc
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
Series,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestSeriesMisc:
def test_tab_completion(self):
# GH 9910
s = Series(list("abcd"))
# Series of str values should have .str but not .dt/.cat in __dir__
assert "str" in dir(s)
assert "dt" not in dir(s)
assert "cat" not in dir(s)
def test_tab_completion_dt(self):
# similarly for .dt
s = Series(date_range("1/1/2015", periods=5))
assert "dt" in dir(s)
assert "str" not in dir(s)
assert "cat" not in dir(s)
def test_tab_completion_cat(self):
# Similarly for .cat, but with the twist that str and dt should be
# there if the categories are of that type first cat and str.
s = Series(list("abbcd"), dtype="category")
assert "cat" in dir(s)
assert "str" in dir(s) # as it is a string categorical
assert "dt" not in dir(s)
def test_tab_completion_cat_str(self):
# similar to cat and str
s = Series(date_range("1/1/2015", periods=5)).astype("category")
assert "cat" in dir(s)
assert "str" not in dir(s)
assert "dt" in dir(s) # as it is a datetime categorical
def test_tab_completion_with_categorical(self):
# test the tab completion display
ok_for_cat = [
"categories",
"codes",
"ordered",
"set_categories",
"add_categories",
"remove_categories",
"rename_categories",
"reorder_categories",
"remove_unused_categories",
"as_ordered",
"as_unordered",
]
s = Series(list("aabbcde")).astype("category")
results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")})
tm.assert_almost_equal(results, sorted(set(ok_for_cat)))
@pytest.mark.parametrize(
"index",
[
Index(list("ab") * 5, dtype="category"),
Index([str(i) for i in range(10)]),
Index(["foo", "bar", "baz"] * 2),
date_range("2020-01-01", periods=10),
period_range("2020-01-01", periods=10, freq="D"),
timedelta_range("1 day", periods=10),
Index(np.arange(10), dtype=np.uint64),
Index(np.arange(10), dtype=np.int64),
Index(np.arange(10), dtype=np.float64),
Index([True, False]),
Index([f"a{i}" for i in range(101)]),
pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
],
)
def test_index_tab_completion(self, index):
# dir contains string-like values of the Index.
s = Series(index=index, dtype=object)
dir_s = dir(s)
for i, x in enumerate(s.index.unique(level=0)):
if i < 100:
assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
else:
assert x not in dir_s
@pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])])
def test_not_hashable(self, ser):
msg = "unhashable type: 'Series'"
with pytest.raises(TypeError, match=msg):
hash(ser)
def test_contains(self, datetime_series):
tm.assert_contains_all(datetime_series.index, datetime_series)
def test_axis_alias(self):
s = Series([1, 2, np.nan])
tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
assert s.dropna().sum("rows") == 3
assert s._get_axis_number("rows") == 0
assert s._get_axis_name("rows") == "index"
def test_class_axis(self):
# https://github.com/pandas-dev/pandas/issues/18147
# no exception and no empty docstring
assert pydoc.getdoc(Series.index)
def test_ndarray_compat(self):
# test numpy compat with Series as sub-class of NDFrame
tsdf = DataFrame(
np.random.default_rng(2).standard_normal((1000, 3)),
columns=["A", "B", "C"],
index=date_range("1/1/2000", periods=1000),
)
def f(x):
return x[x.idxmax()]
result = tsdf.apply(f)
expected = tsdf.max()
tm.assert_series_equal(result, expected)
def test_ndarray_compat_like_func(self):
# using an ndarray like function
s = Series(np.random.default_rng(2).standard_normal(10))
result = Series(np.ones_like(s))
expected = Series(1, index=range(10), dtype="float64")
tm.assert_series_equal(result, expected)
def test_ndarray_compat_ravel(self):
# ravel
s = Series(np.random.default_rng(2).standard_normal(10))
with tm.assert_produces_warning(FutureWarning, match="ravel is deprecated"):
result = s.ravel(order="F")
tm.assert_almost_equal(result, s.values.ravel(order="F"))
def test_empty_method(self):
s_empty = Series(dtype=object)
assert s_empty.empty
@pytest.mark.parametrize("dtype", ["int64", object])
def test_empty_method_full_series(self, dtype):
full_series = Series(index=[1], dtype=dtype)
assert not full_series.empty
@pytest.mark.parametrize("dtype", [None, "Int64"])
def test_integer_series_size(self, dtype):
# GH 25580
s = Series(range(9), dtype=dtype)
assert s.size == 9
def test_attrs(self):
s = Series([0, 1], name="abc")
assert s.attrs == {}
s.attrs["version"] = 1
result = s + 1
assert result.attrs == {"version": 1}
def test_inspect_getmembers(self):
# GH38782
pytest.importorskip("jinja2")
ser = Series(dtype=object)
msg = "Series._data is deprecated"
with tm.assert_produces_warning(
DeprecationWarning, match=msg, check_stacklevel=False
):
inspect.getmembers(ser)
def test_unknown_attribute(self):
# GH#9680
tdi = timedelta_range(start=0, periods=10, freq="1s")
ser = Series(np.random.default_rng(2).normal(size=10), index=tdi)
assert "foo" not in ser.__dict__
msg = "'Series' object has no attribute 'foo'"
with pytest.raises(AttributeError, match=msg):
ser.foo
@pytest.mark.parametrize("op", ["year", "day", "second", "weekday"])
def test_datetime_series_no_datelike_attrs(self, op, datetime_series):
# GH#7206
msg = f"'Series' object has no attribute '{op}'"
with pytest.raises(AttributeError, match=msg):
getattr(datetime_series, op)
def test_series_datetimelike_attribute_access(self):
# attribute access should still work!
ser = Series({"year": 2000, "month": 1, "day": 10})
assert ser.year == 2000
assert ser.month == 1
assert ser.day == 10
def test_series_datetimelike_attribute_access_invalid(self):
ser = Series({"year": 2000, "month": 1, "day": 10})
msg = "'Series' object has no attribute 'weekday'"
with pytest.raises(AttributeError, match=msg):
ser.weekday
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize(
"kernel, has_numeric_only",
[
("skew", True),
("var", True),
("all", False),
("prod", True),
("any", False),
("idxmin", False),
("quantile", False),
("idxmax", False),
("min", True),
("sem", True),
("mean", True),
("nunique", False),
("max", True),
("sum", True),
("count", False),
("median", True),
("std", True),
("backfill", False),
("rank", True),
("pct_change", False),
("cummax", False),
("shift", False),
("diff", False),
("cumsum", False),
("cummin", False),
("cumprod", False),
("fillna", False),
("ffill", False),
("pad", False),
("bfill", False),
("sample", False),
("tail", False),
("take", False),
("head", False),
("cov", False),
("corr", False),
],
)
@pytest.mark.parametrize("dtype", [bool, int, float, object])
def test_numeric_only(self, kernel, has_numeric_only, dtype):
# GH#47500
ser = Series([0, 1, 1], dtype=dtype)
if kernel == "corrwith":
args = (ser,)
elif kernel == "corr":
args = (ser,)
elif kernel == "cov":
args = (ser,)
elif kernel == "nth":
args = (0,)
elif kernel == "fillna":
args = (True,)
elif kernel == "fillna":
args = ("ffill",)
elif kernel == "take":
args = ([0],)
elif kernel == "quantile":
args = (0.5,)
else:
args = ()
method = getattr(ser, kernel)
if not has_numeric_only:
msg = (
"(got an unexpected keyword argument 'numeric_only'"
"|too many arguments passed in)"
)
with pytest.raises(TypeError, match=msg):
method(*args, numeric_only=True)
elif dtype is object:
msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
with pytest.raises(TypeError, match=msg):
method(*args, numeric_only=True)
else:
result = method(*args, numeric_only=True)
expected = method(*args, numeric_only=False)
if isinstance(expected, Series):
# transformer
tm.assert_series_equal(result, expected)
else:
# reducer
assert result == expected
@pytest.mark.parametrize("converter", [int, float, complex])
def test_float_int_deprecated(converter):
# GH 51101
with tm.assert_produces_warning(FutureWarning):
assert converter(Series([1])) == converter(1)

View File

@ -0,0 +1,984 @@
from datetime import (
date,
timedelta,
timezone,
)
from decimal import Decimal
import operator
import numpy as np
import pytest
from pandas._libs import lib
from pandas._libs.tslibs import IncompatibleFrequency
import pandas as pd
from pandas import (
Categorical,
DatetimeTZDtype,
Index,
Series,
Timedelta,
bdate_range,
date_range,
isna,
)
import pandas._testing as tm
from pandas.core import ops
from pandas.core.computation import expressions as expr
from pandas.core.computation.check import NUMEXPR_INSTALLED
@pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"])
def switch_numexpr_min_elements(request, monkeypatch):
with monkeypatch.context() as m:
m.setattr(expr, "_MIN_ELEMENTS", request.param)
yield
def _permute(obj):
return obj.take(np.random.default_rng(2).permutation(len(obj)))
class TestSeriesFlexArithmetic:
@pytest.mark.parametrize(
"ts",
[
(lambda x: x, lambda x: x * 2, False),
(lambda x: x, lambda x: x[::2], False),
(lambda x: x, lambda x: 5, True),
(
lambda x: Series(range(10), dtype=np.float64),
lambda x: Series(range(10), dtype=np.float64),
True,
),
],
)
@pytest.mark.parametrize(
"opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"]
)
def test_flex_method_equivalence(self, opname, ts):
# check that Series.{opname} behaves like Series.__{opname}__,
tser = Series(
np.arange(20, dtype=np.float64),
index=date_range("2020-01-01", periods=20),
name="ts",
)
series = ts[0](tser)
other = ts[1](tser)
check_reverse = ts[2]
op = getattr(Series, opname)
alt = getattr(operator, opname)
result = op(series, other)
expected = alt(series, other)
tm.assert_almost_equal(result, expected)
if check_reverse:
rop = getattr(Series, "r" + opname)
result = rop(series, other)
expected = alt(other, series)
tm.assert_almost_equal(result, expected)
def test_flex_method_subclass_metadata_preservation(self, all_arithmetic_operators):
# GH 13208
class MySeries(Series):
_metadata = ["x"]
@property
def _constructor(self):
return MySeries
opname = all_arithmetic_operators
op = getattr(Series, opname)
m = MySeries([1, 2, 3], name="test")
m.x = 42
result = op(m, 1)
assert result.x == 42
def test_flex_add_scalar_fill_value(self):
# GH12723
ser = Series([0, 1, np.nan, 3, 4, 5])
exp = ser.fillna(0).add(2)
res = ser.add(2, fill_value=0)
tm.assert_series_equal(res, exp)
pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)]
for op in ["add", "sub", "mul", "pow", "truediv", "floordiv"]:
fv = 0
lop = getattr(Series, op)
lequiv = getattr(operator, op)
rop = getattr(Series, "r" + op)
# bind op at definition time...
requiv = lambda x, y, op=op: getattr(operator, op)(y, x)
pairings.append((lop, lequiv, fv))
pairings.append((rop, requiv, fv))
@pytest.mark.parametrize("op, equiv_op, fv", pairings)
def test_operators_combine(self, op, equiv_op, fv):
def _check_fill(meth, op, a, b, fill_value=0):
exp_index = a.index.union(b.index)
a = a.reindex(exp_index)
b = b.reindex(exp_index)
amask = isna(a)
bmask = isna(b)
exp_values = []
for i in range(len(exp_index)):
with np.errstate(all="ignore"):
if amask[i]:
if bmask[i]:
exp_values.append(np.nan)
continue
exp_values.append(op(fill_value, b[i]))
elif bmask[i]:
if amask[i]:
exp_values.append(np.nan)
continue
exp_values.append(op(a[i], fill_value))
else:
exp_values.append(op(a[i], b[i]))
result = meth(a, b, fill_value=fill_value)
expected = Series(exp_values, exp_index)
tm.assert_series_equal(result, expected)
a = Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=np.arange(5))
b = Series([np.nan, 1, np.nan, 3, np.nan, 4.0], index=np.arange(6))
result = op(a, b)
exp = equiv_op(a, b)
tm.assert_series_equal(result, exp)
_check_fill(op, equiv_op, a, b, fill_value=fv)
# should accept axis=0 or axis='rows'
op(a, b, axis=0)
class TestSeriesArithmetic:
# Some of these may end up in tests/arithmetic, but are not yet sorted
def test_add_series_with_period_index(self):
rng = pd.period_range("1/1/2000", "1/1/2010", freq="Y")
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
result = ts + ts[::2]
expected = ts + ts
expected.iloc[1::2] = np.nan
tm.assert_series_equal(result, expected)
result = ts + _permute(ts[::2])
tm.assert_series_equal(result, expected)
msg = "Input has different freq=D from Period\\(freq=Y-DEC\\)"
with pytest.raises(IncompatibleFrequency, match=msg):
ts + ts.asfreq("D", how="end")
@pytest.mark.parametrize(
"target_add,input_value,expected_value",
[
("!", ["hello", "world"], ["hello!", "world!"]),
("m", ["hello", "world"], ["hellom", "worldm"]),
],
)
def test_string_addition(self, target_add, input_value, expected_value):
# GH28658 - ensure adding 'm' does not raise an error
a = Series(input_value)
result = a + target_add
expected = Series(expected_value)
tm.assert_series_equal(result, expected)
def test_divmod(self):
# GH#25557
a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"])
b = Series([2, np.nan, 1, np.nan], index=["a", "b", "d", "e"])
result = a.divmod(b)
expected = divmod(a, b)
tm.assert_series_equal(result[0], expected[0])
tm.assert_series_equal(result[1], expected[1])
result = a.rdivmod(b)
expected = divmod(b, a)
tm.assert_series_equal(result[0], expected[0])
tm.assert_series_equal(result[1], expected[1])
@pytest.mark.parametrize("index", [None, range(9)])
def test_series_integer_mod(self, index):
# GH#24396
s1 = Series(range(1, 10))
s2 = Series("foo", index=index)
msg = "not all arguments converted during string formatting|mod not"
with pytest.raises((TypeError, NotImplementedError), match=msg):
s2 % s1
def test_add_with_duplicate_index(self):
# GH14227
s1 = Series([1, 2], index=[1, 1])
s2 = Series([10, 10], index=[1, 2])
result = s1 + s2
expected = Series([11, 12, np.nan], index=[1, 1, 2])
tm.assert_series_equal(result, expected)
def test_add_na_handling(self):
ser = Series(
[Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)]
)
result = ser + ser.shift(1)
result2 = ser.shift(1) + ser
assert isna(result.iloc[0])
assert isna(result2.iloc[0])
def test_add_corner_cases(self, datetime_series):
empty = Series([], index=Index([]), dtype=np.float64)
result = datetime_series + empty
assert np.isnan(result).all()
result = empty + empty.copy()
assert len(result) == 0
def test_add_float_plus_int(self, datetime_series):
# float + int
int_ts = datetime_series.astype(int)[:-5]
added = datetime_series + int_ts
expected = Series(
datetime_series.values[:-5] + int_ts.values,
index=datetime_series.index[:-5],
name="ts",
)
tm.assert_series_equal(added[:-5], expected)
def test_mul_empty_int_corner_case(self):
s1 = Series([], [], dtype=np.int32)
s2 = Series({"x": 0.0})
tm.assert_series_equal(s1 * s2, Series([np.nan], index=["x"]))
def test_sub_datetimelike_align(self):
# GH#7500
# datetimelike ops need to align
dt = Series(date_range("2012-1-1", periods=3, freq="D"))
dt.iloc[2] = np.nan
dt2 = dt[::-1]
expected = Series([timedelta(0), timedelta(0), pd.NaT])
# name is reset
result = dt2 - dt
tm.assert_series_equal(result, expected)
expected = Series(expected, name=0)
result = (dt2.to_frame() - dt.to_frame())[0]
tm.assert_series_equal(result, expected)
def test_alignment_doesnt_change_tz(self):
# GH#33671
dti = date_range("2016-01-01", periods=10, tz="CET")
dti_utc = dti.tz_convert("UTC")
ser = Series(10, index=dti)
ser_utc = Series(10, index=dti_utc)
# we don't care about the result, just that original indexes are unchanged
ser * ser_utc
assert ser.index is dti
assert ser_utc.index is dti_utc
def test_alignment_categorical(self):
# GH13365
cat = Categorical(["3z53", "3z53", "LoJG", "LoJG", "LoJG", "N503"])
ser1 = Series(2, index=cat)
ser2 = Series(2, index=cat[:-1])
result = ser1 * ser2
exp_index = ["3z53"] * 4 + ["LoJG"] * 9 + ["N503"]
exp_index = pd.CategoricalIndex(exp_index, categories=cat.categories)
exp_values = [4.0] * 13 + [np.nan]
expected = Series(exp_values, exp_index)
tm.assert_series_equal(result, expected)
def test_arithmetic_with_duplicate_index(self):
# GH#8363
# integer ops with a non-unique index
index = [2, 2, 3, 3, 4]
ser = Series(np.arange(1, 6, dtype="int64"), index=index)
other = Series(np.arange(5, dtype="int64"), index=index)
result = ser - other
expected = Series(1, index=[2, 2, 3, 3, 4])
tm.assert_series_equal(result, expected)
# GH#8363
# datetime ops with a non-unique index
ser = Series(date_range("20130101 09:00:00", periods=5), index=index)
other = Series(date_range("20130101", periods=5), index=index)
result = ser - other
expected = Series(Timedelta("9 hours"), index=[2, 2, 3, 3, 4])
tm.assert_series_equal(result, expected)
def test_masked_and_non_masked_propagate_na(self):
# GH#45810
ser1 = Series([0, np.nan], dtype="float")
ser2 = Series([0, 1], dtype="Int64")
result = ser1 * ser2
expected = Series([0, pd.NA], dtype="Float64")
tm.assert_series_equal(result, expected)
def test_mask_div_propagate_na_for_non_na_dtype(self):
# GH#42630
ser1 = Series([15, pd.NA, 5, 4], dtype="Int64")
ser2 = Series([15, 5, np.nan, 4])
result = ser1 / ser2
expected = Series([1.0, pd.NA, pd.NA, 1.0], dtype="Float64")
tm.assert_series_equal(result, expected)
result = ser2 / ser1
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("val, dtype", [(3, "Int64"), (3.5, "Float64")])
def test_add_list_to_masked_array(self, val, dtype):
# GH#22962
ser = Series([1, None, 3], dtype="Int64")
result = ser + [1, None, val]
expected = Series([2, None, 3 + val], dtype=dtype)
tm.assert_series_equal(result, expected)
result = [1, None, val] + ser
tm.assert_series_equal(result, expected)
def test_add_list_to_masked_array_boolean(self, request):
# GH#22962
warning = (
UserWarning
if request.node.callspec.id == "numexpr" and NUMEXPR_INSTALLED
else None
)
ser = Series([True, None, False], dtype="boolean")
with tm.assert_produces_warning(warning):
result = ser + [True, None, True]
expected = Series([True, None, True], dtype="boolean")
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(warning):
result = [True, None, True] + ser
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Comparisons
class TestSeriesFlexComparison:
@pytest.mark.parametrize("axis", [0, None, "index"])
def test_comparison_flex_basic(self, axis, comparison_op):
left = Series(np.random.default_rng(2).standard_normal(10))
right = Series(np.random.default_rng(2).standard_normal(10))
result = getattr(left, comparison_op.__name__)(right, axis=axis)
expected = comparison_op(left, right)
tm.assert_series_equal(result, expected)
def test_comparison_bad_axis(self, comparison_op):
left = Series(np.random.default_rng(2).standard_normal(10))
right = Series(np.random.default_rng(2).standard_normal(10))
msg = "No axis named 1 for object type"
with pytest.raises(ValueError, match=msg):
getattr(left, comparison_op.__name__)(right, axis=1)
@pytest.mark.parametrize(
"values, op",
[
([False, False, True, False], "eq"),
([True, True, False, True], "ne"),
([False, False, True, False], "le"),
([False, False, False, False], "lt"),
([False, True, True, False], "ge"),
([False, True, False, False], "gt"),
],
)
def test_comparison_flex_alignment(self, values, op):
left = Series([1, 3, 2], index=list("abc"))
right = Series([2, 2, 2], index=list("bcd"))
result = getattr(left, op)(right)
expected = Series(values, index=list("abcd"))
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"values, op, fill_value",
[
([False, False, True, True], "eq", 2),
([True, True, False, False], "ne", 2),
([False, False, True, True], "le", 0),
([False, False, False, True], "lt", 0),
([True, True, True, False], "ge", 0),
([True, True, False, False], "gt", 0),
],
)
def test_comparison_flex_alignment_fill(self, values, op, fill_value):
left = Series([1, 3, 2], index=list("abc"))
right = Series([2, 2, 2], index=list("bcd"))
result = getattr(left, op)(right, fill_value=fill_value)
expected = Series(values, index=list("abcd"))
tm.assert_series_equal(result, expected)
class TestSeriesComparison:
def test_comparison_different_length(self):
a = Series(["a", "b", "c"])
b = Series(["b", "a"])
msg = "only compare identically-labeled Series"
with pytest.raises(ValueError, match=msg):
a < b
a = Series([1, 2])
b = Series([2, 3, 4])
with pytest.raises(ValueError, match=msg):
a == b
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
def test_ser_flex_cmp_return_dtypes(self, opname):
# GH#15115
ser = Series([1, 3, 2], index=range(3))
const = 2
result = getattr(ser, opname)(const).dtypes
expected = np.dtype("bool")
assert result == expected
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
def test_ser_flex_cmp_return_dtypes_empty(self, opname):
# GH#15115 empty Series case
ser = Series([1, 3, 2], index=range(3))
empty = ser.iloc[:0]
const = 2
result = getattr(empty, opname)(const).dtypes
expected = np.dtype("bool")
assert result == expected
@pytest.mark.parametrize(
"names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")]
)
def test_ser_cmp_result_names(self, names, comparison_op):
# datetime64 dtype
op = comparison_op
dti = date_range("1949-06-07 03:00:00", freq="h", periods=5, name=names[0])
ser = Series(dti).rename(names[1])
result = op(ser, dti)
assert result.name == names[2]
# datetime64tz dtype
dti = dti.tz_localize("US/Central")
dti = pd.DatetimeIndex(dti, freq="infer") # freq not preserved by tz_localize
ser = Series(dti).rename(names[1])
result = op(ser, dti)
assert result.name == names[2]
# timedelta64 dtype
tdi = dti - dti.shift(1)
ser = Series(tdi).rename(names[1])
result = op(ser, tdi)
assert result.name == names[2]
# interval dtype
if op in [operator.eq, operator.ne]:
# interval dtype comparisons not yet implemented
ii = pd.interval_range(start=0, periods=5, name=names[0])
ser = Series(ii).rename(names[1])
result = op(ser, ii)
assert result.name == names[2]
# categorical
if op in [operator.eq, operator.ne]:
# categorical dtype comparisons raise for inequalities
cidx = tdi.astype("category")
ser = Series(cidx).rename(names[1])
result = op(ser, cidx)
assert result.name == names[2]
def test_comparisons(self, using_infer_string):
s = Series(["a", "b", "c"])
s2 = Series([False, True, False])
# it works!
exp = Series([False, False, False])
if using_infer_string:
import pyarrow as pa
msg = "has no kernel"
# TODO(3.0) GH56008
with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
s == s2
with tm.assert_produces_warning(
DeprecationWarning, match="comparison", check_stacklevel=False
):
with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg):
s2 == s
else:
tm.assert_series_equal(s == s2, exp)
tm.assert_series_equal(s2 == s, exp)
# -----------------------------------------------------------------
# Categorical Dtype Comparisons
def test_categorical_comparisons(self):
# GH#8938
# allow equality comparisons
a = Series(list("abc"), dtype="category")
b = Series(list("abc"), dtype="object")
c = Series(["a", "b", "cc"], dtype="object")
d = Series(list("acb"), dtype="object")
e = Categorical(list("abc"))
f = Categorical(list("acb"))
# vs scalar
assert not (a == "a").all()
assert ((a != "a") == ~(a == "a")).all()
assert not ("a" == a).all()
assert (a == "a")[0]
assert ("a" == a)[0]
assert not ("a" != a)[0]
# vs list-like
assert (a == a).all()
assert not (a != a).all()
assert (a == list(a)).all()
assert (a == b).all()
assert (b == a).all()
assert ((~(a == b)) == (a != b)).all()
assert ((~(b == a)) == (b != a)).all()
assert not (a == c).all()
assert not (c == a).all()
assert not (a == d).all()
assert not (d == a).all()
# vs a cat-like
assert (a == e).all()
assert (e == a).all()
assert not (a == f).all()
assert not (f == a).all()
assert (~(a == e) == (a != e)).all()
assert (~(e == a) == (e != a)).all()
assert (~(a == f) == (a != f)).all()
assert (~(f == a) == (f != a)).all()
# non-equality is not comparable
msg = "can only compare equality or not"
with pytest.raises(TypeError, match=msg):
a < b
with pytest.raises(TypeError, match=msg):
b < a
with pytest.raises(TypeError, match=msg):
a > b
with pytest.raises(TypeError, match=msg):
b > a
def test_unequal_categorical_comparison_raises_type_error(self):
# unequal comparison should raise for unordered cats
cat = Series(Categorical(list("abc")))
msg = "can only compare equality or not"
with pytest.raises(TypeError, match=msg):
cat > "b"
cat = Series(Categorical(list("abc"), ordered=False))
with pytest.raises(TypeError, match=msg):
cat > "b"
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
# and following comparisons with scalars not in categories should raise
# for unequal comps, but not for equal/not equal
cat = Series(Categorical(list("abc"), ordered=True))
msg = "Invalid comparison between dtype=category and str"
with pytest.raises(TypeError, match=msg):
cat < "d"
with pytest.raises(TypeError, match=msg):
cat > "d"
with pytest.raises(TypeError, match=msg):
"d" < cat
with pytest.raises(TypeError, match=msg):
"d" > cat
tm.assert_series_equal(cat == "d", Series([False, False, False]))
tm.assert_series_equal(cat != "d", Series([True, True, True]))
# -----------------------------------------------------------------
def test_comparison_tuples(self):
# GH#11339
# comparisons vs tuple
s = Series([(1, 1), (1, 2)])
result = s == (1, 2)
expected = Series([False, True])
tm.assert_series_equal(result, expected)
result = s != (1, 2)
expected = Series([True, False])
tm.assert_series_equal(result, expected)
result = s == (0, 0)
expected = Series([False, False])
tm.assert_series_equal(result, expected)
result = s != (0, 0)
expected = Series([True, True])
tm.assert_series_equal(result, expected)
s = Series([(1, 1), (1, 1)])
result = s == (1, 1)
expected = Series([True, True])
tm.assert_series_equal(result, expected)
result = s != (1, 1)
expected = Series([False, False])
tm.assert_series_equal(result, expected)
def test_comparison_frozenset(self):
ser = Series([frozenset([1]), frozenset([1, 2])])
result = ser == frozenset([1])
expected = Series([True, False])
tm.assert_series_equal(result, expected)
def test_comparison_operators_with_nas(self, comparison_op):
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
ser[::2] = np.nan
# test that comparisons work
val = ser[5]
result = comparison_op(ser, val)
expected = comparison_op(ser.dropna(), val).reindex(ser.index)
msg = "Downcasting object dtype arrays"
with tm.assert_produces_warning(FutureWarning, match=msg):
if comparison_op is operator.ne:
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)
tm.assert_series_equal(result, expected)
def test_ne(self):
ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float)
expected = np.array([True, True, False, True, True])
tm.assert_numpy_array_equal(ts.index != 5, expected)
tm.assert_numpy_array_equal(~(ts.index == 5), expected)
@pytest.mark.parametrize(
"left, right",
[
(
Series([1, 2, 3], index=list("ABC"), name="x"),
Series([2, 2, 2], index=list("ABD"), name="x"),
),
(
Series([1, 2, 3], index=list("ABC"), name="x"),
Series([2, 2, 2, 2], index=list("ABCD"), name="x"),
),
],
)
def test_comp_ops_df_compat(self, left, right, frame_or_series):
# GH 1134
# GH 50083 to clarify that index and columns must be identically labeled
if frame_or_series is not Series:
msg = (
rf"Can only compare identically-labeled \(both index and columns\) "
f"{frame_or_series.__name__} objects"
)
left = left.to_frame()
right = right.to_frame()
else:
msg = (
f"Can only compare identically-labeled {frame_or_series.__name__} "
f"objects"
)
with pytest.raises(ValueError, match=msg):
left == right
with pytest.raises(ValueError, match=msg):
right == left
with pytest.raises(ValueError, match=msg):
left != right
with pytest.raises(ValueError, match=msg):
right != left
with pytest.raises(ValueError, match=msg):
left < right
with pytest.raises(ValueError, match=msg):
right < left
def test_compare_series_interval_keyword(self):
# GH#25338
ser = Series(["IntervalA", "IntervalB", "IntervalC"])
result = ser == "IntervalA"
expected = Series([True, False, False])
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Unsorted
# These arithmetic tests were previously in other files, eventually
# should be parametrized and put into tests.arithmetic
class TestTimeSeriesArithmetic:
def test_series_add_tz_mismatch_converts_to_utc(self):
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
perm = np.random.default_rng(2).permutation(100)[:90]
ser1 = Series(
np.random.default_rng(2).standard_normal(90),
index=rng.take(perm).tz_convert("US/Eastern"),
)
perm = np.random.default_rng(2).permutation(100)[:90]
ser2 = Series(
np.random.default_rng(2).standard_normal(90),
index=rng.take(perm).tz_convert("Europe/Berlin"),
)
result = ser1 + ser2
uts1 = ser1.tz_convert("utc")
uts2 = ser2.tz_convert("utc")
expected = uts1 + uts2
# sort since input indexes are not equal
expected = expected.sort_index()
assert result.index.tz is timezone.utc
tm.assert_series_equal(result, expected)
def test_series_add_aware_naive_raises(self):
rng = date_range("1/1/2011", periods=10, freq="h")
ser = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
ser_utc = ser.tz_localize("utc")
msg = "Cannot join tz-naive with tz-aware DatetimeIndex"
with pytest.raises(Exception, match=msg):
ser + ser_utc
with pytest.raises(Exception, match=msg):
ser_utc + ser
# TODO: belongs in tests/arithmetic?
def test_datetime_understood(self, unit):
# Ensures it doesn't fail to create the right series
# reported in issue#16726
series = Series(date_range("2012-01-01", periods=3, unit=unit))
offset = pd.offsets.DateOffset(days=6)
result = series - offset
exp_dti = pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"]).as_unit(
unit
)
expected = Series(exp_dti)
tm.assert_series_equal(result, expected)
def test_align_date_objects_with_datetimeindex(self):
rng = date_range("1/1/2000", periods=20)
ts = Series(np.random.default_rng(2).standard_normal(20), index=rng)
ts_slice = ts[5:]
ts2 = ts_slice.copy()
ts2.index = [x.date() for x in ts2.index]
result = ts + ts2
result2 = ts2 + ts
expected = ts + ts[5:]
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result2, expected)
class TestNamePreservation:
@pytest.mark.parametrize("box", [list, tuple, np.array, Index, Series, pd.array])
@pytest.mark.parametrize("flex", [True, False])
def test_series_ops_name_retention(self, flex, box, names, all_binary_operators):
# GH#33930 consistent name-retention
op = all_binary_operators
left = Series(range(10), name=names[0])
right = Series(range(10), name=names[1])
name = op.__name__.strip("_")
is_logical = name in ["and", "rand", "xor", "rxor", "or", "ror"]
msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
warn = None
if box in [list, tuple] and is_logical:
warn = FutureWarning
right = box(right)
if flex:
if is_logical:
# Series doesn't have these as flex methods
return
result = getattr(left, name)(right)
else:
# GH#37374 logical ops behaving as set ops deprecated
with tm.assert_produces_warning(warn, match=msg):
result = op(left, right)
assert isinstance(result, Series)
if box in [Index, Series]:
assert result.name is names[2] or result.name == names[2]
else:
assert result.name is names[0] or result.name == names[0]
def test_binop_maybe_preserve_name(self, datetime_series):
# names match, preserve
result = datetime_series * datetime_series
assert result.name == datetime_series.name
result = datetime_series.mul(datetime_series)
assert result.name == datetime_series.name
result = datetime_series * datetime_series[:-2]
assert result.name == datetime_series.name
# names don't match, don't preserve
cp = datetime_series.copy()
cp.name = "something else"
result = datetime_series + cp
assert result.name is None
result = datetime_series.add(cp)
assert result.name is None
ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
ops = ops + ["r" + op for op in ops]
for op in ops:
# names match, preserve
ser = datetime_series.copy()
result = getattr(ser, op)(ser)
assert result.name == datetime_series.name
# names don't match, don't preserve
cp = datetime_series.copy()
cp.name = "changed"
result = getattr(ser, op)(cp)
assert result.name is None
def test_scalarop_preserve_name(self, datetime_series):
result = datetime_series * 2
assert result.name == datetime_series.name
class TestInplaceOperations:
@pytest.mark.parametrize(
"dtype1, dtype2, dtype_expected, dtype_mul",
(
("Int64", "Int64", "Int64", "Int64"),
("float", "float", "float", "float"),
("Int64", "float", "Float64", "Float64"),
("Int64", "Float64", "Float64", "Float64"),
),
)
def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul):
# GH 37910
ser1 = Series([1], dtype=dtype1)
ser2 = Series([2], dtype=dtype2)
ser1 += ser2
expected = Series([3], dtype=dtype_expected)
tm.assert_series_equal(ser1, expected)
ser1 -= ser2
expected = Series([1], dtype=dtype_expected)
tm.assert_series_equal(ser1, expected)
ser1 *= ser2
expected = Series([2], dtype=dtype_mul)
tm.assert_series_equal(ser1, expected)
def test_none_comparison(request, series_with_simple_index):
series = series_with_simple_index
if len(series) < 1:
request.applymarker(
pytest.mark.xfail(reason="Test doesn't make sense on empty data")
)
# bug brought up by #1079
# changed from TypeError in 0.17.0
series.iloc[0] = np.nan
# noinspection PyComparisonWithNone
result = series == None # noqa: E711
assert not result.iat[0]
assert not result.iat[1]
# noinspection PyComparisonWithNone
result = series != None # noqa: E711
assert result.iat[0]
assert result.iat[1]
result = None == series # noqa: E711
assert not result.iat[0]
assert not result.iat[1]
result = None != series # noqa: E711
assert result.iat[0]
assert result.iat[1]
if lib.is_np_dtype(series.dtype, "M") or isinstance(series.dtype, DatetimeTZDtype):
# Following DatetimeIndex (and Timestamp) convention,
# inequality comparisons with Series[datetime64] raise
msg = "Invalid comparison"
with pytest.raises(TypeError, match=msg):
None > series
with pytest.raises(TypeError, match=msg):
series > None
else:
result = None > series
assert not result.iat[0]
assert not result.iat[1]
result = series < None
assert not result.iat[0]
assert not result.iat[1]
def test_series_varied_multiindex_alignment():
# GH 20414
s1 = Series(
range(8),
index=pd.MultiIndex.from_product(
[list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"]
),
)
s2 = Series(
[1000 * i for i in range(1, 5)],
index=pd.MultiIndex.from_product([list("xy"), [1, 2]], names=["xy", "num"]),
)
result = s1.loc[pd.IndexSlice[["a"], :, :]] + s2
expected = Series(
[1000, 2001, 3002, 4003],
index=pd.MultiIndex.from_tuples(
[("a", "x", 1), ("a", "x", 2), ("a", "y", 1), ("a", "y", 2)],
names=["ab", "xy", "num"],
),
)
tm.assert_series_equal(result, expected)
def test_rmod_consistent_large_series():
# GH 29602
result = Series([2] * 10001).rmod(-1)
expected = Series([1] * 10001)
tm.assert_series_equal(result, expected)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,157 @@
"""
Tests for Series cumulative operations.
See also
--------
tests.frame.test_cumulative
"""
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
methods = {
"cumsum": np.cumsum,
"cumprod": np.cumprod,
"cummin": np.minimum.accumulate,
"cummax": np.maximum.accumulate,
}
class TestSeriesCumulativeOps:
@pytest.mark.parametrize("func", [np.cumsum, np.cumprod])
def test_datetime_series(self, datetime_series, func):
tm.assert_numpy_array_equal(
func(datetime_series).values,
func(np.array(datetime_series)),
check_dtype=True,
)
# with missing values
ts = datetime_series.copy()
ts[::2] = np.nan
result = func(ts)[1::2]
expected = func(np.array(ts.dropna()))
tm.assert_numpy_array_equal(result.values, expected, check_dtype=False)
@pytest.mark.parametrize("method", ["cummin", "cummax"])
def test_cummin_cummax(self, datetime_series, method):
ufunc = methods[method]
result = getattr(datetime_series, method)().values
expected = ufunc(np.array(datetime_series))
tm.assert_numpy_array_equal(result, expected)
ts = datetime_series.copy()
ts[::2] = np.nan
result = getattr(ts, method)()[1::2]
expected = ufunc(ts.dropna())
result.index = result.index._with_freq(None)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ts",
[
pd.Timedelta(0),
pd.Timestamp("1999-12-31"),
pd.Timestamp("1999-12-31").tz_localize("US/Pacific"),
],
)
@pytest.mark.parametrize(
"method, skipna, exp_tdi",
[
["cummax", True, ["NaT", "2 days", "NaT", "2 days", "NaT", "3 days"]],
["cummin", True, ["NaT", "2 days", "NaT", "1 days", "NaT", "1 days"]],
[
"cummax",
False,
["NaT", "NaT", "NaT", "NaT", "NaT", "NaT"],
],
[
"cummin",
False,
["NaT", "NaT", "NaT", "NaT", "NaT", "NaT"],
],
],
)
def test_cummin_cummax_datetimelike(self, ts, method, skipna, exp_tdi):
# with ts==pd.Timedelta(0), we are testing td64; with naive Timestamp
# we are testing datetime64[ns]; with Timestamp[US/Pacific]
# we are testing dt64tz
tdi = pd.to_timedelta(["NaT", "2 days", "NaT", "1 days", "NaT", "3 days"])
ser = pd.Series(tdi + ts)
exp_tdi = pd.to_timedelta(exp_tdi)
expected = pd.Series(exp_tdi + ts)
result = getattr(ser, method)(skipna=skipna)
tm.assert_series_equal(expected, result)
@pytest.mark.parametrize(
"func, exp",
[
("cummin", pd.Period("2012-1-1", freq="D")),
("cummax", pd.Period("2012-1-2", freq="D")),
],
)
def test_cummin_cummax_period(self, func, exp):
# GH#28385
ser = pd.Series(
[pd.Period("2012-1-1", freq="D"), pd.NaT, pd.Period("2012-1-2", freq="D")]
)
result = getattr(ser, func)(skipna=False)
expected = pd.Series([pd.Period("2012-1-1", freq="D"), pd.NaT, pd.NaT])
tm.assert_series_equal(result, expected)
result = getattr(ser, func)(skipna=True)
expected = pd.Series([pd.Period("2012-1-1", freq="D"), pd.NaT, exp])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"arg",
[
[False, False, False, True, True, False, False],
[False, False, False, False, False, False, False],
],
)
@pytest.mark.parametrize(
"func", [lambda x: x, lambda x: ~x], ids=["identity", "inverse"]
)
@pytest.mark.parametrize("method", methods.keys())
def test_cummethods_bool(self, arg, func, method):
# GH#6270
# checking Series method vs the ufunc applied to the values
ser = func(pd.Series(arg))
ufunc = methods[method]
exp_vals = ufunc(ser.values)
expected = pd.Series(exp_vals)
result = getattr(ser, method)()
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"method, expected",
[
["cumsum", pd.Series([0, 1, np.nan, 1], dtype=object)],
["cumprod", pd.Series([False, 0, np.nan, 0])],
["cummin", pd.Series([False, False, np.nan, False])],
["cummax", pd.Series([False, True, np.nan, True])],
],
)
def test_cummethods_bool_in_object_dtype(self, method, expected):
ser = pd.Series([False, True, np.nan, False])
result = getattr(ser, method)()
tm.assert_series_equal(result, expected)
def test_cumprod_timedelta(self):
# GH#48111
ser = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=3)])
with pytest.raises(TypeError, match="cumprod not supported for Timedelta"):
ser.cumprod()

View File

@ -0,0 +1,577 @@
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas as pd
from pandas import (
Categorical,
DataFrame,
Index,
Series,
date_range,
option_context,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestSeriesRepr:
def test_multilevel_name_print_0(self):
# GH#55415 None does not get printed, but 0 does
# (matching DataFrame and flat index behavior)
mi = pd.MultiIndex.from_product([range(2, 3), range(3, 4)], names=[0, None])
ser = Series(1.5, index=mi)
res = repr(ser)
expected = "0 \n2 3 1.5\ndtype: float64"
assert res == expected
def test_multilevel_name_print(self, lexsorted_two_level_string_multiindex):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
expected = [
"first second",
"foo one 0",
" two 1",
" three 2",
"bar one 3",
" two 4",
"baz two 5",
" three 6",
"qux one 7",
" two 8",
" three 9",
"Name: sth, dtype: int64",
]
expected = "\n".join(expected)
assert repr(ser) == expected
def test_small_name_printing(self):
# Test small Series.
s = Series([0, 1, 2])
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
def test_big_name_printing(self):
# Test big Series (diff code path).
s = Series(range(1000))
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
def test_empty_name_printing(self):
s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object)
assert "Name: test" in repr(s)
@pytest.mark.parametrize("args", [(), (0, -1)])
def test_float_range(self, args):
str(
Series(
np.random.default_rng(2).standard_normal(1000),
index=np.arange(1000, *args),
)
)
def test_empty_object(self):
# empty
str(Series(dtype=object))
def test_string(self, string_series):
str(string_series)
str(string_series.astype(int))
# with NaNs
string_series[5:7] = np.nan
str(string_series)
def test_object(self, object_series):
str(object_series)
def test_datetime(self, datetime_series):
str(datetime_series)
# with Nones
ots = datetime_series.astype("O")
ots[::2] = None
repr(ots)
@pytest.mark.parametrize(
"name",
[
"",
1,
1.2,
"foo",
"\u03B1\u03B2\u03B3",
"loooooooooooooooooooooooooooooooooooooooooooooooooooong",
("foo", "bar", "baz"),
(1, 2),
("foo", 1, 2.3),
("\u03B1", "\u03B2", "\u03B3"),
("\u03B1", "bar"),
],
)
def test_various_names(self, name, string_series):
# various names
string_series.name = name
repr(string_series)
def test_tuple_name(self):
biggie = Series(
np.random.default_rng(2).standard_normal(1000),
index=np.arange(1000),
name=("foo", "bar", "baz"),
)
repr(biggie)
@pytest.mark.parametrize("arg", [100, 1001])
def test_tidy_repr_name_0(self, arg):
# tidy repr
ser = Series(np.random.default_rng(2).standard_normal(arg), name=0)
rep_str = repr(ser)
assert "Name: 0" in rep_str
@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="TODO: investigate why this is failing"
)
def test_newline(self):
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
assert "\t" not in repr(ser)
assert "\r" not in repr(ser)
assert "a\n" not in repr(ser)
@pytest.mark.parametrize(
"name, expected",
[
["foo", "Series([], Name: foo, dtype: int64)"],
[None, "Series([], dtype: int64)"],
],
)
def test_empty_int64(self, name, expected):
# with empty series (#4651)
s = Series([], dtype=np.int64, name=name)
assert repr(s) == expected
def test_repr_bool_fails(self, capsys):
s = Series(
[
DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
for i in range(5)
]
)
# It works (with no Cython exception barf)!
repr(s)
captured = capsys.readouterr()
assert captured.err == ""
def test_repr_name_iterable_indexable(self):
s = Series([1, 2, 3], name=np.int64(3))
# it works!
repr(s)
s.name = ("\u05d0",) * 2
repr(s)
def test_repr_max_rows(self):
# GH 6863
with option_context("display.max_rows", None):
str(Series(range(1001))) # should not raise exception
def test_unicode_string_with_unicode(self):
df = Series(["\u05d0"], name="\u05d1")
str(df)
ser = Series(["\u03c3"] * 10)
repr(ser)
ser2 = Series(["\u05d0"] * 1000)
ser2.name = "title1"
repr(ser2)
def test_str_to_bytes_raises(self):
# GH 26447
df = Series(["abc"], name="abc")
msg = "^'str' object cannot be interpreted as an integer$"
with pytest.raises(TypeError, match=msg):
bytes(df)
def test_timeseries_repr_object_dtype(self):
index = Index(
[datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
)
ts = Series(np.random.default_rng(2).standard_normal(len(index)), index)
repr(ts)
ts = Series(
np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
)
assert repr(ts).splitlines()[-1].startswith("Freq:")
ts2 = ts.iloc[np.random.default_rng(2).integers(0, len(ts) - 1, 400)]
repr(ts2).splitlines()[-1]
def test_latex_repr(self):
pytest.importorskip("jinja2") # uses Styler implementation
result = r"""\begin{tabular}{ll}
\toprule
& 0 \\
\midrule
0 & $\alpha$ \\
1 & b \\
2 & c \\
\bottomrule
\end{tabular}
"""
with option_context(
"styler.format.escape", None, "styler.render.repr", "latex"
):
s = Series([r"$\alpha$", "b", "c"])
assert result == s._repr_latex_()
assert s._repr_latex_() is None
def test_index_repr_in_frame_with_nan(self):
# see gh-25061
i = Index([1, np.nan])
s = Series([1, 2], index=i)
exp = """1.0 1\nNaN 2\ndtype: int64"""
assert repr(s) == exp
def test_format_pre_1900_dates(self):
rng = date_range("1/1/1850", "1/1/1950", freq="YE-DEC")
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
rng.format()
ts = Series(1, index=rng)
repr(ts)
def test_series_repr_nat(self):
series = Series([0, 1000, 2000, pd.NaT._value], dtype="M8[ns]")
result = repr(series)
expected = (
"0 1970-01-01 00:00:00.000000\n"
"1 1970-01-01 00:00:00.000001\n"
"2 1970-01-01 00:00:00.000002\n"
"3 NaT\n"
"dtype: datetime64[ns]"
)
assert result == expected
def test_float_repr(self):
# GH#35603
# check float format when cast to object
ser = Series([1.0]).astype(object)
expected = "0 1.0\ndtype: object"
assert repr(ser) == expected
def test_different_null_objects(self):
# GH#45263
ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
result = repr(ser)
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
assert result == expected
class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
# see gh-21002
class County:
name = "San Sebastián"
state = "PR"
def __repr__(self) -> str:
return self.name + ", " + self.state
cat = Categorical([County() for _ in range(61)])
idx = Index(cat)
ser = idx.to_series()
repr(ser)
str(ser)
def test_categorical_repr(self, using_infer_string):
a = Series(Categorical([1, 2, 3, 4]))
exp = (
"0 1\n1 2\n2 3\n3 4\n"
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
)
assert exp == a.__str__()
a = Series(Categorical(["a", "b"] * 25))
if using_infer_string:
exp = (
"0 a\n1 b\n"
" ..\n"
"48 a\n49 b\n"
"Length: 50, dtype: category\nCategories (2, string): [a, b]"
)
else:
exp = (
"0 a\n1 b\n"
" ..\n"
"48 a\n49 b\n"
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
)
with option_context("display.max_rows", 5):
assert exp == repr(a)
levs = list("abcdefghijklmnopqrstuvwxyz")
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
if using_infer_string:
exp = (
"0 a\n1 b\n"
"dtype: category\n"
"Categories (26, string): [a < b < c < d ... w < x < y < z]"
)
else:
exp = (
"0 a\n1 b\n"
"dtype: category\n"
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
"'w' < 'x' < 'y' < 'z']"
)
assert exp == a.__str__()
def test_categorical_series_repr(self):
s = Series(Categorical([1, 2, 3]))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1, 2, 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10)))
exp = f"""0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, {np.dtype(int)}): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
assert repr(s) == exp
def test_categorical_series_repr_ordered(self):
s = Series(Categorical([1, 2, 3], ordered=True))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1 < 2 < 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10), ordered=True))
exp = f"""0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, {np.dtype(int)}): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
assert repr(s) == exp
def test_categorical_series_repr_datetime(self):
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa: E501
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
2011-01-01 13:00:00-05:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_datetime_ordered(self):
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
2011-01-01 13:00:00-05:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_period(self):
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2011-01-01 13:00]""" # noqa: E501
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_period_ordered(self):
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2011-01-01 13:00]""" # noqa: E501
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_timedelta(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
8 days 01:00:00, 9 days 01:00:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_timedelta_ordered(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx, ordered=True))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
8 days 01:00:00 < 9 days 01:00:00]""" # noqa: E501
assert repr(s) == exp

View File

@ -0,0 +1,35 @@
class TestIteration:
def test_keys(self, datetime_series):
assert datetime_series.keys() is datetime_series.index
def test_iter_datetimes(self, datetime_series):
for i, val in enumerate(datetime_series):
# pylint: disable-next=unnecessary-list-index-lookup
assert val == datetime_series.iloc[i]
def test_iter_strings(self, string_series):
for i, val in enumerate(string_series):
# pylint: disable-next=unnecessary-list-index-lookup
assert val == string_series.iloc[i]
def test_iteritems_datetimes(self, datetime_series):
for idx, val in datetime_series.items():
assert val == datetime_series[idx]
def test_iteritems_strings(self, string_series):
for idx, val in string_series.items():
assert val == string_series[idx]
# assert is lazy (generators don't define reverse, lists do)
assert not hasattr(string_series.items(), "reverse")
def test_items_datetimes(self, datetime_series):
for idx, val in datetime_series.items():
assert val == datetime_series[idx]
def test_items_strings(self, string_series):
for idx, val in string_series.items():
assert val == string_series[idx]
# assert is lazy (generators don't define reverse, lists do)
assert not hasattr(string_series.items(), "reverse")

View File

@ -0,0 +1,548 @@
from datetime import datetime
import operator
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
Series,
bdate_range,
)
import pandas._testing as tm
from pandas.core import ops
class TestSeriesLogicalOps:
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
@pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor])
def test_bool_operators_with_nas(self, bool_op):
# boolean &, |, ^ should work with object arrays and propagate NAs
ser = Series(bdate_range("1/1/2000", periods=10), dtype=object)
ser[::2] = np.nan
mask = ser.isna()
filled = ser.fillna(ser[0])
result = bool_op(ser < ser[9], ser > ser[3])
expected = bool_op(filled < filled[9], filled > filled[3])
expected[mask] = False
tm.assert_series_equal(result, expected)
def test_logical_operators_bool_dtype_with_empty(self):
# GH#9016: support bitwise op for integer types
index = list("bca")
s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_empty = Series([], dtype=object)
res = s_tft & s_empty
expected = s_fff.sort_index()
tm.assert_series_equal(res, expected)
res = s_tft | s_empty
expected = s_tft.sort_index()
tm.assert_series_equal(res, expected)
def test_logical_operators_int_dtype_with_int_dtype(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype="int64")
s_3333 = Series([3] * 4)
s_4444 = Series([4] * 4)
res = s_0123 & s_3333
expected = Series(range(4), dtype="int64")
tm.assert_series_equal(res, expected)
res = s_0123 | s_4444
expected = Series(range(4, 8), dtype="int64")
tm.assert_series_equal(res, expected)
s_1111 = Series([1] * 4, dtype="int8")
res = s_0123 & s_1111
expected = Series([0, 1, 0, 1], dtype="int64")
tm.assert_series_equal(res, expected)
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
expected = Series([1, 1, 3, 3], dtype="int32")
tm.assert_series_equal(res, expected)
def test_logical_operators_int_dtype_with_int_scalar(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype="int64")
res = s_0123 & 0
expected = Series([0] * 4)
tm.assert_series_equal(res, expected)
res = s_0123 & 1
expected = Series([0, 1, 0, 1])
tm.assert_series_equal(res, expected)
def test_logical_operators_int_dtype_with_float(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype="int64")
warn_msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
msg = "Cannot perform.+with a dtyped.+array and scalar of type"
with pytest.raises(TypeError, match=msg):
s_0123 & np.nan
with pytest.raises(TypeError, match=msg):
s_0123 & 3.14
msg = "unsupported operand type.+for &:"
with pytest.raises(TypeError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s_0123 & [0.1, 4, 3.14, 2]
with pytest.raises(TypeError, match=msg):
s_0123 & np.array([0.1, 4, 3.14, 2])
with pytest.raises(TypeError, match=msg):
s_0123 & Series([0.1, 4, -3.14, 2])
def test_logical_operators_int_dtype_with_str(self):
s_1111 = Series([1] * 4, dtype="int8")
warn_msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
msg = "Cannot perform 'and_' with a dtyped.+array and scalar of type"
with pytest.raises(TypeError, match=msg):
s_1111 & "a"
with pytest.raises(TypeError, match="unsupported operand.+for &"):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
s_1111 & ["a", "b", "c", "d"]
def test_logical_operators_int_dtype_with_bool(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype="int64")
expected = Series([False] * 4)
result = s_0123 & False
tm.assert_series_equal(result, expected)
warn_msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
result = s_0123 & [False]
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
result = s_0123 & (False,)
tm.assert_series_equal(result, expected)
result = s_0123 ^ False
expected = Series([False, True, True, True])
tm.assert_series_equal(result, expected)
def test_logical_operators_int_dtype_with_object(self, using_infer_string):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype="int64")
result = s_0123 & Series([False, np.nan, False, False])
expected = Series([False] * 4)
tm.assert_series_equal(result, expected)
s_abNd = Series(["a", "b", np.nan, "d"])
if using_infer_string:
import pyarrow as pa
with pytest.raises(pa.lib.ArrowNotImplementedError, match="has no kernel"):
s_0123 & s_abNd
else:
with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
s_0123 & s_abNd
def test_logical_operators_bool_dtype_with_int(self):
index = list("bca")
s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
res = s_tft & 0
expected = s_fff
tm.assert_series_equal(res, expected)
res = s_tft & 1
expected = s_tft
tm.assert_series_equal(res, expected)
def test_logical_ops_bool_dtype_with_ndarray(self):
# make sure we operate on ndarray the same as Series
left = Series([True, True, True, False, True])
right = [True, False, None, True, np.nan]
msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
expected = Series([True, False, False, False, False])
with tm.assert_produces_warning(FutureWarning, match=msg):
result = left & right
tm.assert_series_equal(result, expected)
result = left & np.array(right)
tm.assert_series_equal(result, expected)
result = left & Index(right)
tm.assert_series_equal(result, expected)
result = left & Series(right)
tm.assert_series_equal(result, expected)
expected = Series([True, True, True, True, True])
with tm.assert_produces_warning(FutureWarning, match=msg):
result = left | right
tm.assert_series_equal(result, expected)
result = left | np.array(right)
tm.assert_series_equal(result, expected)
result = left | Index(right)
tm.assert_series_equal(result, expected)
result = left | Series(right)
tm.assert_series_equal(result, expected)
expected = Series([False, True, True, True, True])
with tm.assert_produces_warning(FutureWarning, match=msg):
result = left ^ right
tm.assert_series_equal(result, expected)
result = left ^ np.array(right)
tm.assert_series_equal(result, expected)
result = left ^ Index(right)
tm.assert_series_equal(result, expected)
result = left ^ Series(right)
tm.assert_series_equal(result, expected)
def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self):
# GH#9016: support bitwise op for integer types
index = list("bca")
s_tft = Series([True, False, True], index=index)
s_tft = Series([True, False, True], index=index)
s_tff = Series([True, False, False], index=index)
s_0123 = Series(range(4), dtype="int64")
# s_0123 will be all false now because of reindexing like s_tft
expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
with tm.assert_produces_warning(FutureWarning):
result = s_tft & s_0123
tm.assert_series_equal(result, expected)
# GH 52538: Deprecate casting to object type when reindex is needed;
# matches DataFrame behavior
expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
with tm.assert_produces_warning(FutureWarning):
result = s_0123 & s_tft
tm.assert_series_equal(result, expected)
s_a0b1c0 = Series([1], list("b"))
with tm.assert_produces_warning(FutureWarning):
res = s_tft & s_a0b1c0
expected = s_tff.reindex(list("abc"))
tm.assert_series_equal(res, expected)
with tm.assert_produces_warning(FutureWarning):
res = s_tft | s_a0b1c0
expected = s_tft.reindex(list("abc"))
tm.assert_series_equal(res, expected)
def test_scalar_na_logical_ops_corners(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
msg = "Cannot perform.+with a dtyped.+array and scalar of type"
with pytest.raises(TypeError, match=msg):
s & datetime(2005, 1, 1)
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan
expected = Series(True, index=s.index)
expected[::2] = False
msg = (
r"Logical ops \(and, or, xor\) between Pandas objects and "
"dtype-less sequences"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s & list(s)
tm.assert_series_equal(result, expected)
def test_scalar_na_logical_ops_corners_aligns(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan
d = DataFrame({"A": s})
expected = DataFrame(False, index=range(9), columns=["A"] + list(range(9)))
result = s & d
tm.assert_frame_equal(result, expected)
result = d & s
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
def test_logical_ops_with_index(self, op):
# GH#22092, GH#19792
ser = Series([True, True, False, False])
idx1 = Index([True, False, True, False])
idx2 = Index([1, 0, 1, 0])
expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))])
result = op(ser, idx1)
tm.assert_series_equal(result, expected)
expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool)
result = op(ser, idx2)
tm.assert_series_equal(result, expected)
def test_reversed_xor_with_index_returns_series(self):
# GH#22092, GH#19792 pre-2.0 these were aliased to setops
ser = Series([True, True, False, False])
idx1 = Index([True, False, True, False], dtype=bool)
idx2 = Index([1, 0, 1, 0])
expected = Series([False, True, True, False])
result = idx1 ^ ser
tm.assert_series_equal(result, expected)
result = idx2 ^ ser
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"op",
[
ops.rand_,
ops.ror_,
],
)
def test_reversed_logical_op_with_index_returns_series(self, op):
# GH#22092, GH#19792
ser = Series([True, True, False, False])
idx1 = Index([True, False, True, False])
idx2 = Index([1, 0, 1, 0])
expected = Series(op(idx1.values, ser.values))
result = op(ser, idx1)
tm.assert_series_equal(result, expected)
expected = op(ser, Series(idx2))
result = op(ser, idx2)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"op, expected",
[
(ops.rand_, Series([False, False])),
(ops.ror_, Series([True, True])),
(ops.rxor, Series([True, True])),
],
)
def test_reverse_ops_with_index(self, op, expected):
# https://github.com/pandas-dev/pandas/pull/23628
# multi-set Index ops are buggy, so let's avoid duplicates...
# GH#49503
ser = Series([True, False])
idx = Index([False, True])
result = op(ser, idx)
tm.assert_series_equal(result, expected)
def test_logical_ops_label_based(self, using_infer_string):
# GH#4947
# logical ops should be label based
a = Series([True, False, True], list("bca"))
b = Series([False, True, False], list("abc"))
expected = Series([False, True, False], list("abc"))
result = a & b
tm.assert_series_equal(result, expected)
expected = Series([True, True, False], list("abc"))
result = a | b
tm.assert_series_equal(result, expected)
expected = Series([True, False, False], list("abc"))
result = a ^ b
tm.assert_series_equal(result, expected)
# rhs is bigger
a = Series([True, False, True], list("bca"))
b = Series([False, True, False, True], list("abcd"))
expected = Series([False, True, False, False], list("abcd"))
result = a & b
tm.assert_series_equal(result, expected)
expected = Series([True, True, False, False], list("abcd"))
result = a | b
tm.assert_series_equal(result, expected)
# filling
# vs empty
empty = Series([], dtype=object)
result = a & empty.copy()
expected = Series([False, False, False], list("abc"))
tm.assert_series_equal(result, expected)
result = a | empty.copy()
expected = Series([True, True, False], list("abc"))
tm.assert_series_equal(result, expected)
# vs non-matching
with tm.assert_produces_warning(FutureWarning):
result = a & Series([1], ["z"])
expected = Series([False, False, False, False], list("abcz"))
tm.assert_series_equal(result, expected)
with tm.assert_produces_warning(FutureWarning):
result = a | Series([1], ["z"])
expected = Series([True, True, False, False], list("abcz"))
tm.assert_series_equal(result, expected)
# identity
# we would like s[s|e] == s to hold for any e, whether empty or not
with tm.assert_produces_warning(FutureWarning):
for e in [
empty.copy(),
Series([1], ["z"]),
Series(np.nan, b.index),
Series(np.nan, a.index),
]:
result = a[a | e]
tm.assert_series_equal(result, a[a])
for e in [Series(["z"])]:
warn = FutureWarning if using_infer_string else None
if using_infer_string:
import pyarrow as pa
with tm.assert_produces_warning(warn, match="Operation between non"):
with pytest.raises(
pa.lib.ArrowNotImplementedError, match="has no kernel"
):
result = a[a | e]
else:
result = a[a | e]
tm.assert_series_equal(result, a[a])
# vs scalars
index = list("bca")
t = Series([True, False, True])
for v in [True, 1, 2]:
result = Series([True, False, True], index=index) | v
expected = Series([True, True, True], index=index)
tm.assert_series_equal(result, expected)
msg = "Cannot perform.+with a dtyped.+array and scalar of type"
for v in [np.nan, "foo"]:
with pytest.raises(TypeError, match=msg):
t | v
for v in [False, 0]:
result = Series([True, False, True], index=index) | v
expected = Series([True, False, True], index=index)
tm.assert_series_equal(result, expected)
for v in [True, 1]:
result = Series([True, False, True], index=index) & v
expected = Series([True, False, True], index=index)
tm.assert_series_equal(result, expected)
for v in [False, 0]:
result = Series([True, False, True], index=index) & v
expected = Series([False, False, False], index=index)
tm.assert_series_equal(result, expected)
msg = "Cannot perform.+with a dtyped.+array and scalar of type"
for v in [np.nan]:
with pytest.raises(TypeError, match=msg):
t & v
def test_logical_ops_df_compat(self):
# GH#1134
s1 = Series([True, False, True], index=list("ABC"), name="x")
s2 = Series([True, True, False], index=list("ABD"), name="x")
exp = Series([True, False, False, False], index=list("ABCD"), name="x")
tm.assert_series_equal(s1 & s2, exp)
tm.assert_series_equal(s2 & s1, exp)
# True | np.nan => True
exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x")
tm.assert_series_equal(s1 | s2, exp_or1)
# np.nan | True => np.nan, filled with False
exp_or = Series([True, True, False, False], index=list("ABCD"), name="x")
tm.assert_series_equal(s2 | s1, exp_or)
# DataFrame doesn't fill nan with False
tm.assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame())
tm.assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame())
exp = DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
tm.assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame())
tm.assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame())
# different length
s3 = Series([True, False, True], index=list("ABC"), name="x")
s4 = Series([True, True, True, True], index=list("ABCD"), name="x")
exp = Series([True, False, True, False], index=list("ABCD"), name="x")
tm.assert_series_equal(s3 & s4, exp)
tm.assert_series_equal(s4 & s3, exp)
# np.nan | True => np.nan, filled with False
exp_or1 = Series([True, True, True, False], index=list("ABCD"), name="x")
tm.assert_series_equal(s3 | s4, exp_or1)
# True | np.nan => True
exp_or = Series([True, True, True, True], index=list("ABCD"), name="x")
tm.assert_series_equal(s4 | s3, exp_or)
tm.assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame())
tm.assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame())
tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame())
tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame())
@pytest.mark.xfail(reason="Will pass once #52839 deprecation is enforced")
def test_int_dtype_different_index_not_bool(self):
# GH 52500
ser1 = Series([1, 2, 3], index=[10, 11, 23], name="a")
ser2 = Series([10, 20, 30], index=[11, 10, 23], name="a")
result = np.bitwise_xor(ser1, ser2)
expected = Series([21, 8, 29], index=[10, 11, 23], name="a")
tm.assert_series_equal(result, expected)
result = ser1 ^ ser2
tm.assert_series_equal(result, expected)
def test_pyarrow_numpy_string_invalid(self):
# GH#56008
pytest.importorskip("pyarrow")
ser = Series([False, True])
ser2 = Series(["a", "b"], dtype="string[pyarrow_numpy]")
result = ser == ser2
expected = Series(False, index=ser.index)
tm.assert_series_equal(result, expected)
result = ser != ser2
expected = Series(True, index=ser.index)
tm.assert_series_equal(result, expected)
with pytest.raises(TypeError, match="Invalid comparison"):
ser > ser2

View File

@ -0,0 +1,105 @@
from datetime import timedelta
import numpy as np
import pytest
from pandas._libs import iNaT
import pandas as pd
from pandas import (
Categorical,
Index,
NaT,
Series,
isna,
)
import pandas._testing as tm
class TestSeriesMissingData:
def test_categorical_nan_handling(self):
# NaNs are represented as -1 in labels
s = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
tm.assert_numpy_array_equal(
s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8)
)
def test_isna_for_inf(self):
s = Series(["a", np.inf, np.nan, pd.NA, 1.0])
msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pd.option_context("mode.use_inf_as_na", True):
r = s.isna()
dr = s.dropna()
e = Series([False, True, True, True, False])
de = Series(["a", 1.0], index=[0, 4])
tm.assert_series_equal(r, e)
tm.assert_series_equal(dr, de)
def test_timedelta64_nan(self):
td = Series([timedelta(days=i) for i in range(10)])
# nan ops on timedeltas
td1 = td.copy()
td1[0] = np.nan
assert isna(td1[0])
assert td1[0]._value == iNaT
td1[0] = td[0]
assert not isna(td1[0])
# GH#16674 iNaT is treated as an integer when given by the user
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
td1[1] = iNaT
assert not isna(td1[1])
assert td1.dtype == np.object_
assert td1[1] == iNaT
td1[1] = td[1]
assert not isna(td1[1])
td1[2] = NaT
assert isna(td1[2])
assert td1[2]._value == iNaT
td1[2] = td[2]
assert not isna(td1[2])
# boolean setting
# GH#2899 boolean setting
td3 = np.timedelta64(timedelta(days=3))
td7 = np.timedelta64(timedelta(days=7))
td[(td > td3) & (td < td7)] = np.nan
assert isna(td).sum() == 3
@pytest.mark.xfail(
reason="Chained inequality raises when trying to define 'selector'"
)
def test_logical_range_select(self, datetime_series):
# NumPy limitation =(
# https://github.com/pandas-dev/pandas/commit/9030dc021f07c76809848925cb34828f6c8484f3
selector = -0.5 <= datetime_series <= 0.5
expected = (datetime_series >= -0.5) & (datetime_series <= 0.5)
tm.assert_series_equal(selector, expected)
def test_valid(self, datetime_series):
ts = datetime_series.copy()
ts.index = ts.index._with_freq(None)
ts[::2] = np.nan
result = ts.dropna()
assert len(result) == ts.count()
tm.assert_series_equal(result, ts[1::2])
tm.assert_series_equal(result, ts[pd.notna(ts)])
def test_hasnans_uncached_for_series():
# GH#19700
# set float64 dtype to avoid upcast when setting nan
idx = Index([0, 1], dtype="float64")
assert idx.hasnans is False
assert "hasnans" in idx._cache
ser = idx.to_series()
assert ser.hasnans is False
assert not hasattr(ser, "_cache")
ser.iloc[-1] = np.nan
assert ser.hasnans is True

View File

@ -0,0 +1,46 @@
"""
Tests for np.foo applied to Series, not necessarily ufuncs.
"""
import numpy as np
import pytest
import pandas.util._test_decorators as td
from pandas import Series
import pandas._testing as tm
class TestPtp:
def test_ptp(self):
# GH#21614
N = 1000
arr = np.random.default_rng(2).standard_normal(N)
ser = Series(arr)
assert np.ptp(ser) == np.ptp(arr)
def test_numpy_unique(datetime_series):
# it works!
np.unique(datetime_series)
@pytest.mark.parametrize("index", [["a", "b", "c", "d", "e"], None])
def test_numpy_argwhere(index):
# GH#35331
s = Series(range(5), index=index, dtype=np.int64)
result = np.argwhere(s > 2).astype(np.int64)
expected = np.array([[3], [4]], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)
@td.skip_if_no("pyarrow")
def test_log_arrow_backed_missing_value():
# GH#56285
ser = Series([1, 2, None], dtype="float64[pyarrow]")
result = np.log(ser)
expected = np.log(Series([1, 2, None], dtype="float64"))
tm.assert_series_equal(result, expected)

Some files were not shown because too many files have changed in this diff Show More