Updated script that can be controled by Nodejs web app
This commit is contained in:
@ -0,0 +1,9 @@
|
||||
# ruff: noqa: TCH004
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# import modules that have public classes/functions
|
||||
from pandas.io.formats import style
|
||||
|
||||
# and mark only those modules as public
|
||||
__all__ = ["style"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
157
lib/python3.13/site-packages/pandas/io/formats/_color_data.py
Normal file
157
lib/python3.13/site-packages/pandas/io/formats/_color_data.py
Normal file
@ -0,0 +1,157 @@
|
||||
# GH37967: Enable the use of CSS named colors, as defined in
|
||||
# matplotlib.colors.CSS4_COLORS, when exporting to Excel.
|
||||
# This data has been copied here, instead of being imported from matplotlib,
|
||||
# not to have ``to_excel`` methods require matplotlib.
|
||||
# source: matplotlib._color_data (3.3.3)
|
||||
from __future__ import annotations
|
||||
|
||||
CSS4_COLORS = {
|
||||
"aliceblue": "F0F8FF",
|
||||
"antiquewhite": "FAEBD7",
|
||||
"aqua": "00FFFF",
|
||||
"aquamarine": "7FFFD4",
|
||||
"azure": "F0FFFF",
|
||||
"beige": "F5F5DC",
|
||||
"bisque": "FFE4C4",
|
||||
"black": "000000",
|
||||
"blanchedalmond": "FFEBCD",
|
||||
"blue": "0000FF",
|
||||
"blueviolet": "8A2BE2",
|
||||
"brown": "A52A2A",
|
||||
"burlywood": "DEB887",
|
||||
"cadetblue": "5F9EA0",
|
||||
"chartreuse": "7FFF00",
|
||||
"chocolate": "D2691E",
|
||||
"coral": "FF7F50",
|
||||
"cornflowerblue": "6495ED",
|
||||
"cornsilk": "FFF8DC",
|
||||
"crimson": "DC143C",
|
||||
"cyan": "00FFFF",
|
||||
"darkblue": "00008B",
|
||||
"darkcyan": "008B8B",
|
||||
"darkgoldenrod": "B8860B",
|
||||
"darkgray": "A9A9A9",
|
||||
"darkgreen": "006400",
|
||||
"darkgrey": "A9A9A9",
|
||||
"darkkhaki": "BDB76B",
|
||||
"darkmagenta": "8B008B",
|
||||
"darkolivegreen": "556B2F",
|
||||
"darkorange": "FF8C00",
|
||||
"darkorchid": "9932CC",
|
||||
"darkred": "8B0000",
|
||||
"darksalmon": "E9967A",
|
||||
"darkseagreen": "8FBC8F",
|
||||
"darkslateblue": "483D8B",
|
||||
"darkslategray": "2F4F4F",
|
||||
"darkslategrey": "2F4F4F",
|
||||
"darkturquoise": "00CED1",
|
||||
"darkviolet": "9400D3",
|
||||
"deeppink": "FF1493",
|
||||
"deepskyblue": "00BFFF",
|
||||
"dimgray": "696969",
|
||||
"dimgrey": "696969",
|
||||
"dodgerblue": "1E90FF",
|
||||
"firebrick": "B22222",
|
||||
"floralwhite": "FFFAF0",
|
||||
"forestgreen": "228B22",
|
||||
"fuchsia": "FF00FF",
|
||||
"gainsboro": "DCDCDC",
|
||||
"ghostwhite": "F8F8FF",
|
||||
"gold": "FFD700",
|
||||
"goldenrod": "DAA520",
|
||||
"gray": "808080",
|
||||
"green": "008000",
|
||||
"greenyellow": "ADFF2F",
|
||||
"grey": "808080",
|
||||
"honeydew": "F0FFF0",
|
||||
"hotpink": "FF69B4",
|
||||
"indianred": "CD5C5C",
|
||||
"indigo": "4B0082",
|
||||
"ivory": "FFFFF0",
|
||||
"khaki": "F0E68C",
|
||||
"lavender": "E6E6FA",
|
||||
"lavenderblush": "FFF0F5",
|
||||
"lawngreen": "7CFC00",
|
||||
"lemonchiffon": "FFFACD",
|
||||
"lightblue": "ADD8E6",
|
||||
"lightcoral": "F08080",
|
||||
"lightcyan": "E0FFFF",
|
||||
"lightgoldenrodyellow": "FAFAD2",
|
||||
"lightgray": "D3D3D3",
|
||||
"lightgreen": "90EE90",
|
||||
"lightgrey": "D3D3D3",
|
||||
"lightpink": "FFB6C1",
|
||||
"lightsalmon": "FFA07A",
|
||||
"lightseagreen": "20B2AA",
|
||||
"lightskyblue": "87CEFA",
|
||||
"lightslategray": "778899",
|
||||
"lightslategrey": "778899",
|
||||
"lightsteelblue": "B0C4DE",
|
||||
"lightyellow": "FFFFE0",
|
||||
"lime": "00FF00",
|
||||
"limegreen": "32CD32",
|
||||
"linen": "FAF0E6",
|
||||
"magenta": "FF00FF",
|
||||
"maroon": "800000",
|
||||
"mediumaquamarine": "66CDAA",
|
||||
"mediumblue": "0000CD",
|
||||
"mediumorchid": "BA55D3",
|
||||
"mediumpurple": "9370DB",
|
||||
"mediumseagreen": "3CB371",
|
||||
"mediumslateblue": "7B68EE",
|
||||
"mediumspringgreen": "00FA9A",
|
||||
"mediumturquoise": "48D1CC",
|
||||
"mediumvioletred": "C71585",
|
||||
"midnightblue": "191970",
|
||||
"mintcream": "F5FFFA",
|
||||
"mistyrose": "FFE4E1",
|
||||
"moccasin": "FFE4B5",
|
||||
"navajowhite": "FFDEAD",
|
||||
"navy": "000080",
|
||||
"oldlace": "FDF5E6",
|
||||
"olive": "808000",
|
||||
"olivedrab": "6B8E23",
|
||||
"orange": "FFA500",
|
||||
"orangered": "FF4500",
|
||||
"orchid": "DA70D6",
|
||||
"palegoldenrod": "EEE8AA",
|
||||
"palegreen": "98FB98",
|
||||
"paleturquoise": "AFEEEE",
|
||||
"palevioletred": "DB7093",
|
||||
"papayawhip": "FFEFD5",
|
||||
"peachpuff": "FFDAB9",
|
||||
"peru": "CD853F",
|
||||
"pink": "FFC0CB",
|
||||
"plum": "DDA0DD",
|
||||
"powderblue": "B0E0E6",
|
||||
"purple": "800080",
|
||||
"rebeccapurple": "663399",
|
||||
"red": "FF0000",
|
||||
"rosybrown": "BC8F8F",
|
||||
"royalblue": "4169E1",
|
||||
"saddlebrown": "8B4513",
|
||||
"salmon": "FA8072",
|
||||
"sandybrown": "F4A460",
|
||||
"seagreen": "2E8B57",
|
||||
"seashell": "FFF5EE",
|
||||
"sienna": "A0522D",
|
||||
"silver": "C0C0C0",
|
||||
"skyblue": "87CEEB",
|
||||
"slateblue": "6A5ACD",
|
||||
"slategray": "708090",
|
||||
"slategrey": "708090",
|
||||
"snow": "FFFAFA",
|
||||
"springgreen": "00FF7F",
|
||||
"steelblue": "4682B4",
|
||||
"tan": "D2B48C",
|
||||
"teal": "008080",
|
||||
"thistle": "D8BFD8",
|
||||
"tomato": "FF6347",
|
||||
"turquoise": "40E0D0",
|
||||
"violet": "EE82EE",
|
||||
"wheat": "F5DEB3",
|
||||
"white": "FFFFFF",
|
||||
"whitesmoke": "F5F5F5",
|
||||
"yellow": "FFFF00",
|
||||
"yellowgreen": "9ACD32",
|
||||
}
|
94
lib/python3.13/site-packages/pandas/io/formats/console.py
Normal file
94
lib/python3.13/site-packages/pandas/io/formats/console.py
Normal file
@ -0,0 +1,94 @@
|
||||
"""
|
||||
Internal module for console introspection
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from shutil import get_terminal_size
|
||||
|
||||
|
||||
def get_console_size() -> tuple[int | None, int | None]:
|
||||
"""
|
||||
Return console size as tuple = (width, height).
|
||||
|
||||
Returns (None,None) in non-interactive session.
|
||||
"""
|
||||
from pandas import get_option
|
||||
|
||||
display_width = get_option("display.width")
|
||||
display_height = get_option("display.max_rows")
|
||||
|
||||
# Consider
|
||||
# interactive shell terminal, can detect term size
|
||||
# interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
|
||||
# size non-interactive script, should disregard term size
|
||||
|
||||
# in addition
|
||||
# width,height have default values, but setting to 'None' signals
|
||||
# should use Auto-Detection, But only in interactive shell-terminal.
|
||||
# Simple. yeah.
|
||||
|
||||
if in_interactive_session():
|
||||
if in_ipython_frontend():
|
||||
# sane defaults for interactive non-shell terminal
|
||||
# match default for width,height in config_init
|
||||
from pandas._config.config import get_default_val
|
||||
|
||||
terminal_width = get_default_val("display.width")
|
||||
terminal_height = get_default_val("display.max_rows")
|
||||
else:
|
||||
# pure terminal
|
||||
terminal_width, terminal_height = get_terminal_size()
|
||||
else:
|
||||
terminal_width, terminal_height = None, None
|
||||
|
||||
# Note if the User sets width/Height to None (auto-detection)
|
||||
# and we're in a script (non-inter), this will return (None,None)
|
||||
# caller needs to deal.
|
||||
return display_width or terminal_width, display_height or terminal_height
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Detect our environment
|
||||
|
||||
|
||||
def in_interactive_session() -> bool:
|
||||
"""
|
||||
Check if we're running in an interactive shell.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True if running under python/ipython interactive shell.
|
||||
"""
|
||||
from pandas import get_option
|
||||
|
||||
def check_main():
|
||||
try:
|
||||
import __main__ as main
|
||||
except ModuleNotFoundError:
|
||||
return get_option("mode.sim_interactive")
|
||||
return not hasattr(main, "__file__") or get_option("mode.sim_interactive")
|
||||
|
||||
try:
|
||||
# error: Name '__IPYTHON__' is not defined
|
||||
return __IPYTHON__ or check_main() # type: ignore[name-defined]
|
||||
except NameError:
|
||||
return check_main()
|
||||
|
||||
|
||||
def in_ipython_frontend() -> bool:
|
||||
"""
|
||||
Check if we're inside an IPython zmq frontend.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
"""
|
||||
try:
|
||||
# error: Name 'get_ipython' is not defined
|
||||
ip = get_ipython() # type: ignore[name-defined]
|
||||
return "zmq" in str(type(ip)).lower()
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
return False
|
421
lib/python3.13/site-packages/pandas/io/formats/css.py
Normal file
421
lib/python3.13/site-packages/pandas/io/formats/css.py
Normal file
@ -0,0 +1,421 @@
|
||||
"""
|
||||
Utilities for interpreting CSS from Stylers for formatting non-HTML outputs.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Callable,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from pandas.errors import CSSWarning
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Generator,
|
||||
Iterable,
|
||||
Iterator,
|
||||
)
|
||||
|
||||
|
||||
def _side_expander(prop_fmt: str) -> Callable:
|
||||
"""
|
||||
Wrapper to expand shorthand property into top, right, bottom, left properties
|
||||
|
||||
Parameters
|
||||
----------
|
||||
side : str
|
||||
The border side to expand into properties
|
||||
|
||||
Returns
|
||||
-------
|
||||
function: Return to call when a 'border(-{side}): {value}' string is encountered
|
||||
"""
|
||||
|
||||
def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]:
|
||||
"""
|
||||
Expand shorthand property into side-specific property (top, right, bottom, left)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prop (str): CSS property name
|
||||
value (str): String token for property
|
||||
|
||||
Yields
|
||||
------
|
||||
Tuple (str, str): Expanded property, value
|
||||
"""
|
||||
tokens = value.split()
|
||||
try:
|
||||
mapping = self.SIDE_SHORTHANDS[len(tokens)]
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
f'Could not expand "{prop}: {value}"',
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return
|
||||
for key, idx in zip(self.SIDES, mapping):
|
||||
yield prop_fmt.format(key), tokens[idx]
|
||||
|
||||
return expand
|
||||
|
||||
|
||||
def _border_expander(side: str = "") -> Callable:
|
||||
"""
|
||||
Wrapper to expand 'border' property into border color, style, and width properties
|
||||
|
||||
Parameters
|
||||
----------
|
||||
side : str
|
||||
The border side to expand into properties
|
||||
|
||||
Returns
|
||||
-------
|
||||
function: Return to call when a 'border(-{side}): {value}' string is encountered
|
||||
"""
|
||||
if side != "":
|
||||
side = f"-{side}"
|
||||
|
||||
def expand(self, prop, value: str) -> Generator[tuple[str, str], None, None]:
|
||||
"""
|
||||
Expand border into color, style, and width tuples
|
||||
|
||||
Parameters
|
||||
----------
|
||||
prop : str
|
||||
CSS property name passed to styler
|
||||
value : str
|
||||
Value passed to styler for property
|
||||
|
||||
Yields
|
||||
------
|
||||
Tuple (str, str): Expanded property, value
|
||||
"""
|
||||
tokens = value.split()
|
||||
if len(tokens) == 0 or len(tokens) > 3:
|
||||
warnings.warn(
|
||||
f'Too many tokens provided to "{prop}" (expected 1-3)',
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
|
||||
# TODO: Can we use current color as initial value to comply with CSS standards?
|
||||
border_declarations = {
|
||||
f"border{side}-color": "black",
|
||||
f"border{side}-style": "none",
|
||||
f"border{side}-width": "medium",
|
||||
}
|
||||
for token in tokens:
|
||||
if token.lower() in self.BORDER_STYLES:
|
||||
border_declarations[f"border{side}-style"] = token
|
||||
elif any(ratio in token.lower() for ratio in self.BORDER_WIDTH_RATIOS):
|
||||
border_declarations[f"border{side}-width"] = token
|
||||
else:
|
||||
border_declarations[f"border{side}-color"] = token
|
||||
# TODO: Warn user if item entered more than once (e.g. "border: red green")
|
||||
|
||||
# Per CSS, "border" will reset previous "border-*" definitions
|
||||
yield from self.atomize(border_declarations.items())
|
||||
|
||||
return expand
|
||||
|
||||
|
||||
class CSSResolver:
|
||||
"""
|
||||
A callable for parsing and resolving CSS to atomic properties.
|
||||
"""
|
||||
|
||||
UNIT_RATIOS = {
|
||||
"pt": ("pt", 1),
|
||||
"em": ("em", 1),
|
||||
"rem": ("pt", 12),
|
||||
"ex": ("em", 0.5),
|
||||
# 'ch':
|
||||
"px": ("pt", 0.75),
|
||||
"pc": ("pt", 12),
|
||||
"in": ("pt", 72),
|
||||
"cm": ("in", 1 / 2.54),
|
||||
"mm": ("in", 1 / 25.4),
|
||||
"q": ("mm", 0.25),
|
||||
"!!default": ("em", 0),
|
||||
}
|
||||
|
||||
FONT_SIZE_RATIOS = UNIT_RATIOS.copy()
|
||||
FONT_SIZE_RATIOS.update(
|
||||
{
|
||||
"%": ("em", 0.01),
|
||||
"xx-small": ("rem", 0.5),
|
||||
"x-small": ("rem", 0.625),
|
||||
"small": ("rem", 0.8),
|
||||
"medium": ("rem", 1),
|
||||
"large": ("rem", 1.125),
|
||||
"x-large": ("rem", 1.5),
|
||||
"xx-large": ("rem", 2),
|
||||
"smaller": ("em", 1 / 1.2),
|
||||
"larger": ("em", 1.2),
|
||||
"!!default": ("em", 1),
|
||||
}
|
||||
)
|
||||
|
||||
MARGIN_RATIOS = UNIT_RATIOS.copy()
|
||||
MARGIN_RATIOS.update({"none": ("pt", 0)})
|
||||
|
||||
BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy()
|
||||
BORDER_WIDTH_RATIOS.update(
|
||||
{
|
||||
"none": ("pt", 0),
|
||||
"thick": ("px", 4),
|
||||
"medium": ("px", 2),
|
||||
"thin": ("px", 1),
|
||||
# Default: medium only if solid
|
||||
}
|
||||
)
|
||||
|
||||
BORDER_STYLES = [
|
||||
"none",
|
||||
"hidden",
|
||||
"dotted",
|
||||
"dashed",
|
||||
"solid",
|
||||
"double",
|
||||
"groove",
|
||||
"ridge",
|
||||
"inset",
|
||||
"outset",
|
||||
"mediumdashdot",
|
||||
"dashdotdot",
|
||||
"hair",
|
||||
"mediumdashdotdot",
|
||||
"dashdot",
|
||||
"slantdashdot",
|
||||
"mediumdashed",
|
||||
]
|
||||
|
||||
SIDE_SHORTHANDS = {
|
||||
1: [0, 0, 0, 0],
|
||||
2: [0, 1, 0, 1],
|
||||
3: [0, 1, 2, 1],
|
||||
4: [0, 1, 2, 3],
|
||||
}
|
||||
|
||||
SIDES = ("top", "right", "bottom", "left")
|
||||
|
||||
CSS_EXPANSIONS = {
|
||||
**{
|
||||
(f"border-{prop}" if prop else "border"): _border_expander(prop)
|
||||
for prop in ["", "top", "right", "bottom", "left"]
|
||||
},
|
||||
**{
|
||||
f"border-{prop}": _side_expander(f"border-{{:s}}-{prop}")
|
||||
for prop in ["color", "style", "width"]
|
||||
},
|
||||
"margin": _side_expander("margin-{:s}"),
|
||||
"padding": _side_expander("padding-{:s}"),
|
||||
}
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
declarations: str | Iterable[tuple[str, str]],
|
||||
inherited: dict[str, str] | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
The given declarations to atomic properties.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
declarations_str : str | Iterable[tuple[str, str]]
|
||||
A CSS string or set of CSS declaration tuples
|
||||
e.g. "font-weight: bold; background: blue" or
|
||||
{("font-weight", "bold"), ("background", "blue")}
|
||||
inherited : dict, optional
|
||||
Atomic properties indicating the inherited style context in which
|
||||
declarations_str is to be resolved. ``inherited`` should already
|
||||
be resolved, i.e. valid output of this method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
Atomic CSS 2.2 properties.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> resolve = CSSResolver()
|
||||
>>> inherited = {'font-family': 'serif', 'font-weight': 'bold'}
|
||||
>>> out = resolve('''
|
||||
... border-color: BLUE RED;
|
||||
... font-size: 1em;
|
||||
... font-size: 2em;
|
||||
... font-weight: normal;
|
||||
... font-weight: inherit;
|
||||
... ''', inherited)
|
||||
>>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE
|
||||
[('border-bottom-color', 'blue'),
|
||||
('border-left-color', 'red'),
|
||||
('border-right-color', 'red'),
|
||||
('border-top-color', 'blue'),
|
||||
('font-family', 'serif'),
|
||||
('font-size', '24pt'),
|
||||
('font-weight', 'bold')]
|
||||
"""
|
||||
if isinstance(declarations, str):
|
||||
declarations = self.parse(declarations)
|
||||
props = dict(self.atomize(declarations))
|
||||
if inherited is None:
|
||||
inherited = {}
|
||||
|
||||
props = self._update_initial(props, inherited)
|
||||
props = self._update_font_size(props, inherited)
|
||||
return self._update_other_units(props)
|
||||
|
||||
def _update_initial(
|
||||
self,
|
||||
props: dict[str, str],
|
||||
inherited: dict[str, str],
|
||||
) -> dict[str, str]:
|
||||
# 1. resolve inherited, initial
|
||||
for prop, val in inherited.items():
|
||||
if prop not in props:
|
||||
props[prop] = val
|
||||
|
||||
new_props = props.copy()
|
||||
for prop, val in props.items():
|
||||
if val == "inherit":
|
||||
val = inherited.get(prop, "initial")
|
||||
|
||||
if val in ("initial", None):
|
||||
# we do not define a complete initial stylesheet
|
||||
del new_props[prop]
|
||||
else:
|
||||
new_props[prop] = val
|
||||
return new_props
|
||||
|
||||
def _update_font_size(
|
||||
self,
|
||||
props: dict[str, str],
|
||||
inherited: dict[str, str],
|
||||
) -> dict[str, str]:
|
||||
# 2. resolve relative font size
|
||||
if props.get("font-size"):
|
||||
props["font-size"] = self.size_to_pt(
|
||||
props["font-size"],
|
||||
self._get_font_size(inherited),
|
||||
conversions=self.FONT_SIZE_RATIOS,
|
||||
)
|
||||
return props
|
||||
|
||||
def _get_font_size(self, props: dict[str, str]) -> float | None:
|
||||
if props.get("font-size"):
|
||||
font_size_string = props["font-size"]
|
||||
return self._get_float_font_size_from_pt(font_size_string)
|
||||
return None
|
||||
|
||||
def _get_float_font_size_from_pt(self, font_size_string: str) -> float:
|
||||
assert font_size_string.endswith("pt")
|
||||
return float(font_size_string.rstrip("pt"))
|
||||
|
||||
def _update_other_units(self, props: dict[str, str]) -> dict[str, str]:
|
||||
font_size = self._get_font_size(props)
|
||||
# 3. TODO: resolve other font-relative units
|
||||
for side in self.SIDES:
|
||||
prop = f"border-{side}-width"
|
||||
if prop in props:
|
||||
props[prop] = self.size_to_pt(
|
||||
props[prop],
|
||||
em_pt=font_size,
|
||||
conversions=self.BORDER_WIDTH_RATIOS,
|
||||
)
|
||||
|
||||
for prop in [f"margin-{side}", f"padding-{side}"]:
|
||||
if prop in props:
|
||||
# TODO: support %
|
||||
props[prop] = self.size_to_pt(
|
||||
props[prop],
|
||||
em_pt=font_size,
|
||||
conversions=self.MARGIN_RATIOS,
|
||||
)
|
||||
return props
|
||||
|
||||
def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS) -> str:
|
||||
def _error():
|
||||
warnings.warn(
|
||||
f"Unhandled size: {repr(in_val)}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return self.size_to_pt("1!!default", conversions=conversions)
|
||||
|
||||
match = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val)
|
||||
if match is None:
|
||||
return _error()
|
||||
|
||||
val, unit = match.groups()
|
||||
if val == "":
|
||||
# hack for 'large' etc.
|
||||
val = 1
|
||||
else:
|
||||
try:
|
||||
val = float(val)
|
||||
except ValueError:
|
||||
return _error()
|
||||
|
||||
while unit != "pt":
|
||||
if unit == "em":
|
||||
if em_pt is None:
|
||||
unit = "rem"
|
||||
else:
|
||||
val *= em_pt
|
||||
unit = "pt"
|
||||
continue
|
||||
|
||||
try:
|
||||
unit, mul = conversions[unit]
|
||||
except KeyError:
|
||||
return _error()
|
||||
val *= mul
|
||||
|
||||
val = round(val, 5)
|
||||
if int(val) == val:
|
||||
size_fmt = f"{int(val):d}pt"
|
||||
else:
|
||||
size_fmt = f"{val:f}pt"
|
||||
return size_fmt
|
||||
|
||||
def atomize(self, declarations: Iterable) -> Generator[tuple[str, str], None, None]:
|
||||
for prop, value in declarations:
|
||||
prop = prop.lower()
|
||||
value = value.lower()
|
||||
if prop in self.CSS_EXPANSIONS:
|
||||
expand = self.CSS_EXPANSIONS[prop]
|
||||
yield from expand(self, prop, value)
|
||||
else:
|
||||
yield prop, value
|
||||
|
||||
def parse(self, declarations_str: str) -> Iterator[tuple[str, str]]:
|
||||
"""
|
||||
Generates (prop, value) pairs from declarations.
|
||||
|
||||
In a future version may generate parsed tokens from tinycss/tinycss2
|
||||
|
||||
Parameters
|
||||
----------
|
||||
declarations_str : str
|
||||
"""
|
||||
for decl in declarations_str.split(";"):
|
||||
if not decl.strip():
|
||||
continue
|
||||
prop, sep, val = decl.partition(":")
|
||||
prop = prop.strip().lower()
|
||||
# TODO: don't lowercase case sensitive parts of values (strings)
|
||||
val = val.strip().lower()
|
||||
if sep:
|
||||
yield prop, val
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Ill-formatted attribute: expected a colon in {repr(decl)}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
330
lib/python3.13/site-packages/pandas/io/formats/csvs.py
Normal file
330
lib/python3.13/site-packages/pandas/io/formats/csvs.py
Normal file
@ -0,0 +1,330 @@
|
||||
"""
|
||||
Module for formatting output data into CSV files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Sequence,
|
||||
)
|
||||
import csv as csvlib
|
||||
import os
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
cast,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs import writers as libwriters
|
||||
from pandas._typing import SequenceNotStr
|
||||
from pandas.util._decorators import cache_readonly
|
||||
|
||||
from pandas.core.dtypes.generic import (
|
||||
ABCDatetimeIndex,
|
||||
ABCIndex,
|
||||
ABCMultiIndex,
|
||||
ABCPeriodIndex,
|
||||
)
|
||||
from pandas.core.dtypes.missing import notna
|
||||
|
||||
from pandas.core.indexes.api import Index
|
||||
|
||||
from pandas.io.common import get_handle
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
CompressionOptions,
|
||||
FilePath,
|
||||
FloatFormatType,
|
||||
IndexLabel,
|
||||
StorageOptions,
|
||||
WriteBuffer,
|
||||
npt,
|
||||
)
|
||||
|
||||
from pandas.io.formats.format import DataFrameFormatter
|
||||
|
||||
|
||||
_DEFAULT_CHUNKSIZE_CELLS = 100_000
|
||||
|
||||
|
||||
class CSVFormatter:
|
||||
cols: npt.NDArray[np.object_]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
formatter: DataFrameFormatter,
|
||||
path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] = "",
|
||||
sep: str = ",",
|
||||
cols: Sequence[Hashable] | None = None,
|
||||
index_label: IndexLabel | None = None,
|
||||
mode: str = "w",
|
||||
encoding: str | None = None,
|
||||
errors: str = "strict",
|
||||
compression: CompressionOptions = "infer",
|
||||
quoting: int | None = None,
|
||||
lineterminator: str | None = "\n",
|
||||
chunksize: int | None = None,
|
||||
quotechar: str | None = '"',
|
||||
date_format: str | None = None,
|
||||
doublequote: bool = True,
|
||||
escapechar: str | None = None,
|
||||
storage_options: StorageOptions | None = None,
|
||||
) -> None:
|
||||
self.fmt = formatter
|
||||
|
||||
self.obj = self.fmt.frame
|
||||
|
||||
self.filepath_or_buffer = path_or_buf
|
||||
self.encoding = encoding
|
||||
self.compression: CompressionOptions = compression
|
||||
self.mode = mode
|
||||
self.storage_options = storage_options
|
||||
|
||||
self.sep = sep
|
||||
self.index_label = self._initialize_index_label(index_label)
|
||||
self.errors = errors
|
||||
self.quoting = quoting or csvlib.QUOTE_MINIMAL
|
||||
self.quotechar = self._initialize_quotechar(quotechar)
|
||||
self.doublequote = doublequote
|
||||
self.escapechar = escapechar
|
||||
self.lineterminator = lineterminator or os.linesep
|
||||
self.date_format = date_format
|
||||
self.cols = self._initialize_columns(cols)
|
||||
self.chunksize = self._initialize_chunksize(chunksize)
|
||||
|
||||
@property
|
||||
def na_rep(self) -> str:
|
||||
return self.fmt.na_rep
|
||||
|
||||
@property
|
||||
def float_format(self) -> FloatFormatType | None:
|
||||
return self.fmt.float_format
|
||||
|
||||
@property
|
||||
def decimal(self) -> str:
|
||||
return self.fmt.decimal
|
||||
|
||||
@property
|
||||
def header(self) -> bool | SequenceNotStr[str]:
|
||||
return self.fmt.header
|
||||
|
||||
@property
|
||||
def index(self) -> bool:
|
||||
return self.fmt.index
|
||||
|
||||
def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel:
|
||||
if index_label is not False:
|
||||
if index_label is None:
|
||||
return self._get_index_label_from_obj()
|
||||
elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndex)):
|
||||
# given a string for a DF with Index
|
||||
return [index_label]
|
||||
return index_label
|
||||
|
||||
def _get_index_label_from_obj(self) -> Sequence[Hashable]:
|
||||
if isinstance(self.obj.index, ABCMultiIndex):
|
||||
return self._get_index_label_multiindex()
|
||||
else:
|
||||
return self._get_index_label_flat()
|
||||
|
||||
def _get_index_label_multiindex(self) -> Sequence[Hashable]:
|
||||
return [name or "" for name in self.obj.index.names]
|
||||
|
||||
def _get_index_label_flat(self) -> Sequence[Hashable]:
|
||||
index_label = self.obj.index.name
|
||||
return [""] if index_label is None else [index_label]
|
||||
|
||||
def _initialize_quotechar(self, quotechar: str | None) -> str | None:
|
||||
if self.quoting != csvlib.QUOTE_NONE:
|
||||
# prevents crash in _csv
|
||||
return quotechar
|
||||
return None
|
||||
|
||||
@property
|
||||
def has_mi_columns(self) -> bool:
|
||||
return bool(isinstance(self.obj.columns, ABCMultiIndex))
|
||||
|
||||
def _initialize_columns(
|
||||
self, cols: Iterable[Hashable] | None
|
||||
) -> npt.NDArray[np.object_]:
|
||||
# validate mi options
|
||||
if self.has_mi_columns:
|
||||
if cols is not None:
|
||||
msg = "cannot specify cols with a MultiIndex on the columns"
|
||||
raise TypeError(msg)
|
||||
|
||||
if cols is not None:
|
||||
if isinstance(cols, ABCIndex):
|
||||
cols = cols._get_values_for_csv(**self._number_format)
|
||||
else:
|
||||
cols = list(cols)
|
||||
self.obj = self.obj.loc[:, cols]
|
||||
|
||||
# update columns to include possible multiplicity of dupes
|
||||
# and make sure cols is just a list of labels
|
||||
new_cols = self.obj.columns
|
||||
return new_cols._get_values_for_csv(**self._number_format)
|
||||
|
||||
def _initialize_chunksize(self, chunksize: int | None) -> int:
|
||||
if chunksize is None:
|
||||
return (_DEFAULT_CHUNKSIZE_CELLS // (len(self.cols) or 1)) or 1
|
||||
return int(chunksize)
|
||||
|
||||
@property
|
||||
def _number_format(self) -> dict[str, Any]:
|
||||
"""Dictionary used for storing number formatting settings."""
|
||||
return {
|
||||
"na_rep": self.na_rep,
|
||||
"float_format": self.float_format,
|
||||
"date_format": self.date_format,
|
||||
"quoting": self.quoting,
|
||||
"decimal": self.decimal,
|
||||
}
|
||||
|
||||
@cache_readonly
|
||||
def data_index(self) -> Index:
|
||||
data_index = self.obj.index
|
||||
if (
|
||||
isinstance(data_index, (ABCDatetimeIndex, ABCPeriodIndex))
|
||||
and self.date_format is not None
|
||||
):
|
||||
data_index = Index(
|
||||
[x.strftime(self.date_format) if notna(x) else "" for x in data_index]
|
||||
)
|
||||
elif isinstance(data_index, ABCMultiIndex):
|
||||
data_index = data_index.remove_unused_levels()
|
||||
return data_index
|
||||
|
||||
@property
|
||||
def nlevels(self) -> int:
|
||||
if self.index:
|
||||
return getattr(self.data_index, "nlevels", 1)
|
||||
else:
|
||||
return 0
|
||||
|
||||
@property
|
||||
def _has_aliases(self) -> bool:
|
||||
return isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
|
||||
|
||||
@property
|
||||
def _need_to_save_header(self) -> bool:
|
||||
return bool(self._has_aliases or self.header)
|
||||
|
||||
@property
|
||||
def write_cols(self) -> SequenceNotStr[Hashable]:
|
||||
if self._has_aliases:
|
||||
assert not isinstance(self.header, bool)
|
||||
if len(self.header) != len(self.cols):
|
||||
raise ValueError(
|
||||
f"Writing {len(self.cols)} cols but got {len(self.header)} aliases"
|
||||
)
|
||||
return self.header
|
||||
else:
|
||||
# self.cols is an ndarray derived from Index._get_values_for_csv,
|
||||
# so its entries are strings, i.e. hashable
|
||||
return cast(SequenceNotStr[Hashable], self.cols)
|
||||
|
||||
@property
|
||||
def encoded_labels(self) -> list[Hashable]:
|
||||
encoded_labels: list[Hashable] = []
|
||||
|
||||
if self.index and self.index_label:
|
||||
assert isinstance(self.index_label, Sequence)
|
||||
encoded_labels = list(self.index_label)
|
||||
|
||||
if not self.has_mi_columns or self._has_aliases:
|
||||
encoded_labels += list(self.write_cols)
|
||||
|
||||
return encoded_labels
|
||||
|
||||
def save(self) -> None:
|
||||
"""
|
||||
Create the writer & save.
|
||||
"""
|
||||
# apply compression and byte/text conversion
|
||||
with get_handle(
|
||||
self.filepath_or_buffer,
|
||||
self.mode,
|
||||
encoding=self.encoding,
|
||||
errors=self.errors,
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
) as handles:
|
||||
# Note: self.encoding is irrelevant here
|
||||
self.writer = csvlib.writer(
|
||||
handles.handle,
|
||||
lineterminator=self.lineterminator,
|
||||
delimiter=self.sep,
|
||||
quoting=self.quoting,
|
||||
doublequote=self.doublequote,
|
||||
escapechar=self.escapechar,
|
||||
quotechar=self.quotechar,
|
||||
)
|
||||
|
||||
self._save()
|
||||
|
||||
def _save(self) -> None:
|
||||
if self._need_to_save_header:
|
||||
self._save_header()
|
||||
self._save_body()
|
||||
|
||||
def _save_header(self) -> None:
|
||||
if not self.has_mi_columns or self._has_aliases:
|
||||
self.writer.writerow(self.encoded_labels)
|
||||
else:
|
||||
for row in self._generate_multiindex_header_rows():
|
||||
self.writer.writerow(row)
|
||||
|
||||
def _generate_multiindex_header_rows(self) -> Iterator[list[Hashable]]:
|
||||
columns = self.obj.columns
|
||||
for i in range(columns.nlevels):
|
||||
# we need at least 1 index column to write our col names
|
||||
col_line = []
|
||||
if self.index:
|
||||
# name is the first column
|
||||
col_line.append(columns.names[i])
|
||||
|
||||
if isinstance(self.index_label, list) and len(self.index_label) > 1:
|
||||
col_line.extend([""] * (len(self.index_label) - 1))
|
||||
|
||||
col_line.extend(columns._get_level_values(i))
|
||||
yield col_line
|
||||
|
||||
# Write out the index line if it's not empty.
|
||||
# Otherwise, we will print out an extraneous
|
||||
# blank line between the mi and the data rows.
|
||||
if self.encoded_labels and set(self.encoded_labels) != {""}:
|
||||
yield self.encoded_labels + [""] * len(columns)
|
||||
|
||||
def _save_body(self) -> None:
|
||||
nrows = len(self.data_index)
|
||||
chunks = (nrows // self.chunksize) + 1
|
||||
for i in range(chunks):
|
||||
start_i = i * self.chunksize
|
||||
end_i = min(start_i + self.chunksize, nrows)
|
||||
if start_i >= end_i:
|
||||
break
|
||||
self._save_chunk(start_i, end_i)
|
||||
|
||||
def _save_chunk(self, start_i: int, end_i: int) -> None:
|
||||
# create the data for a chunk
|
||||
slicer = slice(start_i, end_i)
|
||||
df = self.obj.iloc[slicer]
|
||||
|
||||
res = df._get_values_for_csv(**self._number_format)
|
||||
data = list(res._iter_column_arrays())
|
||||
|
||||
ix = self.data_index[slicer]._get_values_for_csv(**self._number_format)
|
||||
libwriters.write_csv_rows(
|
||||
data,
|
||||
ix,
|
||||
self.nlevels,
|
||||
self.cols,
|
||||
self.writer,
|
||||
)
|
962
lib/python3.13/site-packages/pandas/io/formats/excel.py
Normal file
962
lib/python3.13/site-packages/pandas/io/formats/excel.py
Normal file
@ -0,0 +1,962 @@
|
||||
"""
|
||||
Utilities for conversion to writer-agnostic Excel representation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterable,
|
||||
Mapping,
|
||||
Sequence,
|
||||
)
|
||||
import functools
|
||||
import itertools
|
||||
import re
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
cast,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas._libs.lib import is_list_like
|
||||
from pandas.util._decorators import doc
|
||||
from pandas.util._exceptions import find_stack_level
|
||||
|
||||
from pandas.core.dtypes import missing
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float,
|
||||
is_scalar,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
PeriodIndex,
|
||||
)
|
||||
import pandas.core.common as com
|
||||
from pandas.core.shared_docs import _shared_docs
|
||||
|
||||
from pandas.io.formats._color_data import CSS4_COLORS
|
||||
from pandas.io.formats.css import (
|
||||
CSSResolver,
|
||||
CSSWarning,
|
||||
)
|
||||
from pandas.io.formats.format import get_level_lengths
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
FilePath,
|
||||
IndexLabel,
|
||||
StorageOptions,
|
||||
WriteExcelBuffer,
|
||||
)
|
||||
|
||||
from pandas import ExcelWriter
|
||||
|
||||
|
||||
class ExcelCell:
|
||||
__fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
|
||||
__slots__ = __fields__
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
row: int,
|
||||
col: int,
|
||||
val,
|
||||
style=None,
|
||||
mergestart: int | None = None,
|
||||
mergeend: int | None = None,
|
||||
) -> None:
|
||||
self.row = row
|
||||
self.col = col
|
||||
self.val = val
|
||||
self.style = style
|
||||
self.mergestart = mergestart
|
||||
self.mergeend = mergeend
|
||||
|
||||
|
||||
class CssExcelCell(ExcelCell):
|
||||
def __init__(
|
||||
self,
|
||||
row: int,
|
||||
col: int,
|
||||
val,
|
||||
style: dict | None,
|
||||
css_styles: dict[tuple[int, int], list[tuple[str, Any]]] | None,
|
||||
css_row: int,
|
||||
css_col: int,
|
||||
css_converter: Callable | None,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
if css_styles and css_converter:
|
||||
# Use dict to get only one (case-insensitive) declaration per property
|
||||
declaration_dict = {
|
||||
prop.lower(): val for prop, val in css_styles[css_row, css_col]
|
||||
}
|
||||
# Convert to frozenset for order-invariant caching
|
||||
unique_declarations = frozenset(declaration_dict.items())
|
||||
style = css_converter(unique_declarations)
|
||||
|
||||
super().__init__(row=row, col=col, val=val, style=style, **kwargs)
|
||||
|
||||
|
||||
class CSSToExcelConverter:
|
||||
"""
|
||||
A callable for converting CSS declarations to ExcelWriter styles
|
||||
|
||||
Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
|
||||
focusing on font styling, backgrounds, borders and alignment.
|
||||
|
||||
Operates by first computing CSS styles in a fairly generic
|
||||
way (see :meth:`compute_css`) then determining Excel style
|
||||
properties from CSS properties (see :meth:`build_xlstyle`).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
inherited : str, optional
|
||||
CSS declarations understood to be the containing scope for the
|
||||
CSS processed by :meth:`__call__`.
|
||||
"""
|
||||
|
||||
NAMED_COLORS = CSS4_COLORS
|
||||
|
||||
VERTICAL_MAP = {
|
||||
"top": "top",
|
||||
"text-top": "top",
|
||||
"middle": "center",
|
||||
"baseline": "bottom",
|
||||
"bottom": "bottom",
|
||||
"text-bottom": "bottom",
|
||||
# OpenXML also has 'justify', 'distributed'
|
||||
}
|
||||
|
||||
BOLD_MAP = {
|
||||
"bold": True,
|
||||
"bolder": True,
|
||||
"600": True,
|
||||
"700": True,
|
||||
"800": True,
|
||||
"900": True,
|
||||
"normal": False,
|
||||
"lighter": False,
|
||||
"100": False,
|
||||
"200": False,
|
||||
"300": False,
|
||||
"400": False,
|
||||
"500": False,
|
||||
}
|
||||
|
||||
ITALIC_MAP = {
|
||||
"normal": False,
|
||||
"italic": True,
|
||||
"oblique": True,
|
||||
}
|
||||
|
||||
FAMILY_MAP = {
|
||||
"serif": 1, # roman
|
||||
"sans-serif": 2, # swiss
|
||||
"cursive": 4, # script
|
||||
"fantasy": 5, # decorative
|
||||
}
|
||||
|
||||
BORDER_STYLE_MAP = {
|
||||
style.lower(): style
|
||||
for style in [
|
||||
"dashed",
|
||||
"mediumDashDot",
|
||||
"dashDotDot",
|
||||
"hair",
|
||||
"dotted",
|
||||
"mediumDashDotDot",
|
||||
"double",
|
||||
"dashDot",
|
||||
"slantDashDot",
|
||||
"mediumDashed",
|
||||
]
|
||||
}
|
||||
|
||||
# NB: Most of the methods here could be classmethods, as only __init__
|
||||
# and __call__ make use of instance attributes. We leave them as
|
||||
# instancemethods so that users can easily experiment with extensions
|
||||
# without monkey-patching.
|
||||
inherited: dict[str, str] | None
|
||||
|
||||
def __init__(self, inherited: str | None = None) -> None:
|
||||
if inherited is not None:
|
||||
self.inherited = self.compute_css(inherited)
|
||||
else:
|
||||
self.inherited = None
|
||||
# We should avoid cache on the __call__ method.
|
||||
# Otherwise once the method __call__ has been called
|
||||
# garbage collection no longer deletes the instance.
|
||||
self._call_cached = functools.cache(self._call_uncached)
|
||||
|
||||
compute_css = CSSResolver()
|
||||
|
||||
def __call__(
|
||||
self, declarations: str | frozenset[tuple[str, str]]
|
||||
) -> dict[str, dict[str, str]]:
|
||||
"""
|
||||
Convert CSS declarations to ExcelWriter style.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
declarations : str | frozenset[tuple[str, str]]
|
||||
CSS string or set of CSS declaration tuples.
|
||||
e.g. "font-weight: bold; background: blue" or
|
||||
{("font-weight", "bold"), ("background", "blue")}
|
||||
|
||||
Returns
|
||||
-------
|
||||
xlstyle : dict
|
||||
A style as interpreted by ExcelWriter when found in
|
||||
ExcelCell.style.
|
||||
"""
|
||||
return self._call_cached(declarations)
|
||||
|
||||
def _call_uncached(
|
||||
self, declarations: str | frozenset[tuple[str, str]]
|
||||
) -> dict[str, dict[str, str]]:
|
||||
properties = self.compute_css(declarations, self.inherited)
|
||||
return self.build_xlstyle(properties)
|
||||
|
||||
def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]:
|
||||
out = {
|
||||
"alignment": self.build_alignment(props),
|
||||
"border": self.build_border(props),
|
||||
"fill": self.build_fill(props),
|
||||
"font": self.build_font(props),
|
||||
"number_format": self.build_number_format(props),
|
||||
}
|
||||
|
||||
# TODO: handle cell width and height: needs support in pandas.io.excel
|
||||
|
||||
def remove_none(d: dict[str, str | None]) -> None:
|
||||
"""Remove key where value is None, through nested dicts"""
|
||||
for k, v in list(d.items()):
|
||||
if v is None:
|
||||
del d[k]
|
||||
elif isinstance(v, dict):
|
||||
remove_none(v)
|
||||
if not v:
|
||||
del d[k]
|
||||
|
||||
remove_none(out)
|
||||
return out
|
||||
|
||||
def build_alignment(self, props: Mapping[str, str]) -> dict[str, bool | str | None]:
|
||||
# TODO: text-indent, padding-left -> alignment.indent
|
||||
return {
|
||||
"horizontal": props.get("text-align"),
|
||||
"vertical": self._get_vertical_alignment(props),
|
||||
"wrap_text": self._get_is_wrap_text(props),
|
||||
}
|
||||
|
||||
def _get_vertical_alignment(self, props: Mapping[str, str]) -> str | None:
|
||||
vertical_align = props.get("vertical-align")
|
||||
if vertical_align:
|
||||
return self.VERTICAL_MAP.get(vertical_align)
|
||||
return None
|
||||
|
||||
def _get_is_wrap_text(self, props: Mapping[str, str]) -> bool | None:
|
||||
if props.get("white-space") is None:
|
||||
return None
|
||||
return bool(props["white-space"] not in ("nowrap", "pre", "pre-line"))
|
||||
|
||||
def build_border(
|
||||
self, props: Mapping[str, str]
|
||||
) -> dict[str, dict[str, str | None]]:
|
||||
return {
|
||||
side: {
|
||||
"style": self._border_style(
|
||||
props.get(f"border-{side}-style"),
|
||||
props.get(f"border-{side}-width"),
|
||||
self.color_to_excel(props.get(f"border-{side}-color")),
|
||||
),
|
||||
"color": self.color_to_excel(props.get(f"border-{side}-color")),
|
||||
}
|
||||
for side in ["top", "right", "bottom", "left"]
|
||||
}
|
||||
|
||||
def _border_style(self, style: str | None, width: str | None, color: str | None):
|
||||
# convert styles and widths to openxml, one of:
|
||||
# 'dashDot'
|
||||
# 'dashDotDot'
|
||||
# 'dashed'
|
||||
# 'dotted'
|
||||
# 'double'
|
||||
# 'hair'
|
||||
# 'medium'
|
||||
# 'mediumDashDot'
|
||||
# 'mediumDashDotDot'
|
||||
# 'mediumDashed'
|
||||
# 'slantDashDot'
|
||||
# 'thick'
|
||||
# 'thin'
|
||||
if width is None and style is None and color is None:
|
||||
# Return None will remove "border" from style dictionary
|
||||
return None
|
||||
|
||||
if width is None and style is None:
|
||||
# Return "none" will keep "border" in style dictionary
|
||||
return "none"
|
||||
|
||||
if style in ("none", "hidden"):
|
||||
return "none"
|
||||
|
||||
width_name = self._get_width_name(width)
|
||||
if width_name is None:
|
||||
return "none"
|
||||
|
||||
if style in (None, "groove", "ridge", "inset", "outset", "solid"):
|
||||
# not handled
|
||||
return width_name
|
||||
|
||||
if style == "double":
|
||||
return "double"
|
||||
if style == "dotted":
|
||||
if width_name in ("hair", "thin"):
|
||||
return "dotted"
|
||||
return "mediumDashDotDot"
|
||||
if style == "dashed":
|
||||
if width_name in ("hair", "thin"):
|
||||
return "dashed"
|
||||
return "mediumDashed"
|
||||
elif style in self.BORDER_STYLE_MAP:
|
||||
# Excel-specific styles
|
||||
return self.BORDER_STYLE_MAP[style]
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Unhandled border style format: {repr(style)}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return "none"
|
||||
|
||||
def _get_width_name(self, width_input: str | None) -> str | None:
|
||||
width = self._width_to_float(width_input)
|
||||
if width < 1e-5:
|
||||
return None
|
||||
elif width < 1.3:
|
||||
return "thin"
|
||||
elif width < 2.8:
|
||||
return "medium"
|
||||
return "thick"
|
||||
|
||||
def _width_to_float(self, width: str | None) -> float:
|
||||
if width is None:
|
||||
width = "2pt"
|
||||
return self._pt_to_float(width)
|
||||
|
||||
def _pt_to_float(self, pt_string: str) -> float:
|
||||
assert pt_string.endswith("pt")
|
||||
return float(pt_string.rstrip("pt"))
|
||||
|
||||
def build_fill(self, props: Mapping[str, str]):
|
||||
# TODO: perhaps allow for special properties
|
||||
# -excel-pattern-bgcolor and -excel-pattern-type
|
||||
fill_color = props.get("background-color")
|
||||
if fill_color not in (None, "transparent", "none"):
|
||||
return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"}
|
||||
|
||||
def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
|
||||
fc = props.get("number-format")
|
||||
fc = fc.replace("§", ";") if isinstance(fc, str) else fc
|
||||
return {"format_code": fc}
|
||||
|
||||
def build_font(
|
||||
self, props: Mapping[str, str]
|
||||
) -> dict[str, bool | float | str | None]:
|
||||
font_names = self._get_font_names(props)
|
||||
decoration = self._get_decoration(props)
|
||||
return {
|
||||
"name": font_names[0] if font_names else None,
|
||||
"family": self._select_font_family(font_names),
|
||||
"size": self._get_font_size(props),
|
||||
"bold": self._get_is_bold(props),
|
||||
"italic": self._get_is_italic(props),
|
||||
"underline": ("single" if "underline" in decoration else None),
|
||||
"strike": ("line-through" in decoration) or None,
|
||||
"color": self.color_to_excel(props.get("color")),
|
||||
# shadow if nonzero digit before shadow color
|
||||
"shadow": self._get_shadow(props),
|
||||
}
|
||||
|
||||
def _get_is_bold(self, props: Mapping[str, str]) -> bool | None:
|
||||
weight = props.get("font-weight")
|
||||
if weight:
|
||||
return self.BOLD_MAP.get(weight)
|
||||
return None
|
||||
|
||||
def _get_is_italic(self, props: Mapping[str, str]) -> bool | None:
|
||||
font_style = props.get("font-style")
|
||||
if font_style:
|
||||
return self.ITALIC_MAP.get(font_style)
|
||||
return None
|
||||
|
||||
def _get_decoration(self, props: Mapping[str, str]) -> Sequence[str]:
|
||||
decoration = props.get("text-decoration")
|
||||
if decoration is not None:
|
||||
return decoration.split()
|
||||
else:
|
||||
return ()
|
||||
|
||||
def _get_underline(self, decoration: Sequence[str]) -> str | None:
|
||||
if "underline" in decoration:
|
||||
return "single"
|
||||
return None
|
||||
|
||||
def _get_shadow(self, props: Mapping[str, str]) -> bool | None:
|
||||
if "text-shadow" in props:
|
||||
return bool(re.search("^[^#(]*[1-9]", props["text-shadow"]))
|
||||
return None
|
||||
|
||||
def _get_font_names(self, props: Mapping[str, str]) -> Sequence[str]:
|
||||
font_names_tmp = re.findall(
|
||||
r"""(?x)
|
||||
(
|
||||
"(?:[^"]|\\")+"
|
||||
|
|
||||
'(?:[^']|\\')+'
|
||||
|
|
||||
[^'",]+
|
||||
)(?=,|\s*$)
|
||||
""",
|
||||
props.get("font-family", ""),
|
||||
)
|
||||
|
||||
font_names = []
|
||||
for name in font_names_tmp:
|
||||
if name[:1] == '"':
|
||||
name = name[1:-1].replace('\\"', '"')
|
||||
elif name[:1] == "'":
|
||||
name = name[1:-1].replace("\\'", "'")
|
||||
else:
|
||||
name = name.strip()
|
||||
if name:
|
||||
font_names.append(name)
|
||||
return font_names
|
||||
|
||||
def _get_font_size(self, props: Mapping[str, str]) -> float | None:
|
||||
size = props.get("font-size")
|
||||
if size is None:
|
||||
return size
|
||||
return self._pt_to_float(size)
|
||||
|
||||
def _select_font_family(self, font_names: Sequence[str]) -> int | None:
|
||||
family = None
|
||||
for name in font_names:
|
||||
family = self.FAMILY_MAP.get(name)
|
||||
if family:
|
||||
break
|
||||
|
||||
return family
|
||||
|
||||
def color_to_excel(self, val: str | None) -> str | None:
|
||||
if val is None:
|
||||
return None
|
||||
|
||||
if self._is_hex_color(val):
|
||||
return self._convert_hex_to_excel(val)
|
||||
|
||||
try:
|
||||
return self.NAMED_COLORS[val]
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
f"Unhandled color format: {repr(val)}",
|
||||
CSSWarning,
|
||||
stacklevel=find_stack_level(),
|
||||
)
|
||||
return None
|
||||
|
||||
def _is_hex_color(self, color_string: str) -> bool:
|
||||
return bool(color_string.startswith("#"))
|
||||
|
||||
def _convert_hex_to_excel(self, color_string: str) -> str:
|
||||
code = color_string.lstrip("#")
|
||||
if self._is_shorthand_color(color_string):
|
||||
return (code[0] * 2 + code[1] * 2 + code[2] * 2).upper()
|
||||
else:
|
||||
return code.upper()
|
||||
|
||||
def _is_shorthand_color(self, color_string: str) -> bool:
|
||||
"""Check if color code is shorthand.
|
||||
|
||||
#FFF is a shorthand as opposed to full #FFFFFF.
|
||||
"""
|
||||
code = color_string.lstrip("#")
|
||||
if len(code) == 3:
|
||||
return True
|
||||
elif len(code) == 6:
|
||||
return False
|
||||
else:
|
||||
raise ValueError(f"Unexpected color {color_string}")
|
||||
|
||||
|
||||
class ExcelFormatter:
|
||||
"""
|
||||
Class for formatting a DataFrame to a list of ExcelCells,
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df : DataFrame or Styler
|
||||
na_rep: na representation
|
||||
float_format : str, default None
|
||||
Format string for floating point numbers
|
||||
cols : sequence, optional
|
||||
Columns to write
|
||||
header : bool or sequence of str, default True
|
||||
Write out column names. If a list of string is given it is
|
||||
assumed to be aliases for the column names
|
||||
index : bool, default True
|
||||
output row names (index)
|
||||
index_label : str or sequence, default None
|
||||
Column label for index column(s) if desired. If None is given, and
|
||||
`header` and `index` are True, then the index names are used. A
|
||||
sequence should be given if the DataFrame uses MultiIndex.
|
||||
merge_cells : bool, default False
|
||||
Format MultiIndex and Hierarchical Rows as merged cells.
|
||||
inf_rep : str, default `'inf'`
|
||||
representation for np.inf values (which aren't representable in Excel)
|
||||
A `'-'` sign will be added in front of -inf.
|
||||
style_converter : callable, optional
|
||||
This translates Styler styles (CSS) into ExcelWriter styles.
|
||||
Defaults to ``CSSToExcelConverter()``.
|
||||
It should have signature css_declarations string -> excel style.
|
||||
This is only called for body cells.
|
||||
"""
|
||||
|
||||
max_rows = 2**20
|
||||
max_cols = 2**14
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
df,
|
||||
na_rep: str = "",
|
||||
float_format: str | None = None,
|
||||
cols: Sequence[Hashable] | None = None,
|
||||
header: Sequence[Hashable] | bool = True,
|
||||
index: bool = True,
|
||||
index_label: IndexLabel | None = None,
|
||||
merge_cells: bool = False,
|
||||
inf_rep: str = "inf",
|
||||
style_converter: Callable | None = None,
|
||||
) -> None:
|
||||
self.rowcounter = 0
|
||||
self.na_rep = na_rep
|
||||
if not isinstance(df, DataFrame):
|
||||
self.styler = df
|
||||
self.styler._compute() # calculate applied styles
|
||||
df = df.data
|
||||
if style_converter is None:
|
||||
style_converter = CSSToExcelConverter()
|
||||
self.style_converter: Callable | None = style_converter
|
||||
else:
|
||||
self.styler = None
|
||||
self.style_converter = None
|
||||
self.df = df
|
||||
if cols is not None:
|
||||
# all missing, raise
|
||||
if not len(Index(cols).intersection(df.columns)):
|
||||
raise KeyError("passes columns are not ALL present dataframe")
|
||||
|
||||
if len(Index(cols).intersection(df.columns)) != len(set(cols)):
|
||||
# Deprecated in GH#17295, enforced in 1.0.0
|
||||
raise KeyError("Not all names specified in 'columns' are found")
|
||||
|
||||
self.df = df.reindex(columns=cols)
|
||||
|
||||
self.columns = self.df.columns
|
||||
self.float_format = float_format
|
||||
self.index = index
|
||||
self.index_label = index_label
|
||||
self.header = header
|
||||
self.merge_cells = merge_cells
|
||||
self.inf_rep = inf_rep
|
||||
|
||||
@property
|
||||
def header_style(self) -> dict[str, dict[str, str | bool]]:
|
||||
return {
|
||||
"font": {"bold": True},
|
||||
"borders": {
|
||||
"top": "thin",
|
||||
"right": "thin",
|
||||
"bottom": "thin",
|
||||
"left": "thin",
|
||||
},
|
||||
"alignment": {"horizontal": "center", "vertical": "top"},
|
||||
}
|
||||
|
||||
def _format_value(self, val):
|
||||
if is_scalar(val) and missing.isna(val):
|
||||
val = self.na_rep
|
||||
elif is_float(val):
|
||||
if missing.isposinf_scalar(val):
|
||||
val = self.inf_rep
|
||||
elif missing.isneginf_scalar(val):
|
||||
val = f"-{self.inf_rep}"
|
||||
elif self.float_format is not None:
|
||||
val = float(self.float_format % val)
|
||||
if getattr(val, "tzinfo", None) is not None:
|
||||
raise ValueError(
|
||||
"Excel does not support datetimes with "
|
||||
"timezones. Please ensure that datetimes "
|
||||
"are timezone unaware before writing to Excel."
|
||||
)
|
||||
return val
|
||||
|
||||
def _format_header_mi(self) -> Iterable[ExcelCell]:
|
||||
if self.columns.nlevels > 1:
|
||||
if not self.index:
|
||||
raise NotImplementedError(
|
||||
"Writing to Excel with MultiIndex columns and no "
|
||||
"index ('index'=False) is not yet implemented."
|
||||
)
|
||||
|
||||
if not (self._has_aliases or self.header):
|
||||
return
|
||||
|
||||
columns = self.columns
|
||||
level_strs = columns._format_multi(
|
||||
sparsify=self.merge_cells, include_names=False
|
||||
)
|
||||
level_lengths = get_level_lengths(level_strs)
|
||||
coloffset = 0
|
||||
lnum = 0
|
||||
|
||||
if self.index and isinstance(self.df.index, MultiIndex):
|
||||
coloffset = len(self.df.index[0]) - 1
|
||||
|
||||
if self.merge_cells:
|
||||
# Format multi-index as a merged cells.
|
||||
for lnum, name in enumerate(columns.names):
|
||||
yield ExcelCell(
|
||||
row=lnum,
|
||||
col=coloffset,
|
||||
val=name,
|
||||
style=self.header_style,
|
||||
)
|
||||
|
||||
for lnum, (spans, levels, level_codes) in enumerate(
|
||||
zip(level_lengths, columns.levels, columns.codes)
|
||||
):
|
||||
values = levels.take(level_codes)
|
||||
for i, span_val in spans.items():
|
||||
mergestart, mergeend = None, None
|
||||
if span_val > 1:
|
||||
mergestart, mergeend = lnum, coloffset + i + span_val
|
||||
yield CssExcelCell(
|
||||
row=lnum,
|
||||
col=coloffset + i + 1,
|
||||
val=values[i],
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_columns", None),
|
||||
css_row=lnum,
|
||||
css_col=i,
|
||||
css_converter=self.style_converter,
|
||||
mergestart=mergestart,
|
||||
mergeend=mergeend,
|
||||
)
|
||||
else:
|
||||
# Format in legacy format with dots to indicate levels.
|
||||
for i, values in enumerate(zip(*level_strs)):
|
||||
v = ".".join(map(pprint_thing, values))
|
||||
yield CssExcelCell(
|
||||
row=lnum,
|
||||
col=coloffset + i + 1,
|
||||
val=v,
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_columns", None),
|
||||
css_row=lnum,
|
||||
css_col=i,
|
||||
css_converter=self.style_converter,
|
||||
)
|
||||
|
||||
self.rowcounter = lnum
|
||||
|
||||
def _format_header_regular(self) -> Iterable[ExcelCell]:
|
||||
if self._has_aliases or self.header:
|
||||
coloffset = 0
|
||||
|
||||
if self.index:
|
||||
coloffset = 1
|
||||
if isinstance(self.df.index, MultiIndex):
|
||||
coloffset = len(self.df.index.names)
|
||||
|
||||
colnames = self.columns
|
||||
if self._has_aliases:
|
||||
self.header = cast(Sequence, self.header)
|
||||
if len(self.header) != len(self.columns):
|
||||
raise ValueError(
|
||||
f"Writing {len(self.columns)} cols "
|
||||
f"but got {len(self.header)} aliases"
|
||||
)
|
||||
colnames = self.header
|
||||
|
||||
for colindex, colname in enumerate(colnames):
|
||||
yield CssExcelCell(
|
||||
row=self.rowcounter,
|
||||
col=colindex + coloffset,
|
||||
val=colname,
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_columns", None),
|
||||
css_row=0,
|
||||
css_col=colindex,
|
||||
css_converter=self.style_converter,
|
||||
)
|
||||
|
||||
def _format_header(self) -> Iterable[ExcelCell]:
|
||||
gen: Iterable[ExcelCell]
|
||||
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
gen = self._format_header_mi()
|
||||
else:
|
||||
gen = self._format_header_regular()
|
||||
|
||||
gen2: Iterable[ExcelCell] = ()
|
||||
|
||||
if self.df.index.names:
|
||||
row = [x if x is not None else "" for x in self.df.index.names] + [
|
||||
""
|
||||
] * len(self.columns)
|
||||
if functools.reduce(lambda x, y: x and y, (x != "" for x in row)):
|
||||
gen2 = (
|
||||
ExcelCell(self.rowcounter, colindex, val, self.header_style)
|
||||
for colindex, val in enumerate(row)
|
||||
)
|
||||
self.rowcounter += 1
|
||||
return itertools.chain(gen, gen2)
|
||||
|
||||
def _format_body(self) -> Iterable[ExcelCell]:
|
||||
if isinstance(self.df.index, MultiIndex):
|
||||
return self._format_hierarchical_rows()
|
||||
else:
|
||||
return self._format_regular_rows()
|
||||
|
||||
def _format_regular_rows(self) -> Iterable[ExcelCell]:
|
||||
if self._has_aliases or self.header:
|
||||
self.rowcounter += 1
|
||||
|
||||
# output index and index_label?
|
||||
if self.index:
|
||||
# check aliases
|
||||
# if list only take first as this is not a MultiIndex
|
||||
if self.index_label and isinstance(
|
||||
self.index_label, (list, tuple, np.ndarray, Index)
|
||||
):
|
||||
index_label = self.index_label[0]
|
||||
# if string good to go
|
||||
elif self.index_label and isinstance(self.index_label, str):
|
||||
index_label = self.index_label
|
||||
else:
|
||||
index_label = self.df.index.names[0]
|
||||
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
self.rowcounter += 1
|
||||
|
||||
if index_label and self.header is not False:
|
||||
yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style)
|
||||
|
||||
# write index_values
|
||||
index_values = self.df.index
|
||||
if isinstance(self.df.index, PeriodIndex):
|
||||
index_values = self.df.index.to_timestamp()
|
||||
|
||||
for idx, idxval in enumerate(index_values):
|
||||
yield CssExcelCell(
|
||||
row=self.rowcounter + idx,
|
||||
col=0,
|
||||
val=idxval,
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_index", None),
|
||||
css_row=idx,
|
||||
css_col=0,
|
||||
css_converter=self.style_converter,
|
||||
)
|
||||
coloffset = 1
|
||||
else:
|
||||
coloffset = 0
|
||||
|
||||
yield from self._generate_body(coloffset)
|
||||
|
||||
def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
|
||||
if self._has_aliases or self.header:
|
||||
self.rowcounter += 1
|
||||
|
||||
gcolidx = 0
|
||||
|
||||
if self.index:
|
||||
index_labels = self.df.index.names
|
||||
# check for aliases
|
||||
if self.index_label and isinstance(
|
||||
self.index_label, (list, tuple, np.ndarray, Index)
|
||||
):
|
||||
index_labels = self.index_label
|
||||
|
||||
# MultiIndex columns require an extra row
|
||||
# with index names (blank if None) for
|
||||
# unambiguous round-trip, unless not merging,
|
||||
# in which case the names all go on one row Issue #11328
|
||||
if isinstance(self.columns, MultiIndex) and self.merge_cells:
|
||||
self.rowcounter += 1
|
||||
|
||||
# if index labels are not empty go ahead and dump
|
||||
if com.any_not_none(*index_labels) and self.header is not False:
|
||||
for cidx, name in enumerate(index_labels):
|
||||
yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style)
|
||||
|
||||
if self.merge_cells:
|
||||
# Format hierarchical rows as merged cells.
|
||||
level_strs = self.df.index._format_multi(
|
||||
sparsify=True, include_names=False
|
||||
)
|
||||
level_lengths = get_level_lengths(level_strs)
|
||||
|
||||
for spans, levels, level_codes in zip(
|
||||
level_lengths, self.df.index.levels, self.df.index.codes
|
||||
):
|
||||
values = levels.take(
|
||||
level_codes,
|
||||
allow_fill=levels._can_hold_na,
|
||||
fill_value=levels._na_value,
|
||||
)
|
||||
|
||||
for i, span_val in spans.items():
|
||||
mergestart, mergeend = None, None
|
||||
if span_val > 1:
|
||||
mergestart = self.rowcounter + i + span_val - 1
|
||||
mergeend = gcolidx
|
||||
yield CssExcelCell(
|
||||
row=self.rowcounter + i,
|
||||
col=gcolidx,
|
||||
val=values[i],
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_index", None),
|
||||
css_row=i,
|
||||
css_col=gcolidx,
|
||||
css_converter=self.style_converter,
|
||||
mergestart=mergestart,
|
||||
mergeend=mergeend,
|
||||
)
|
||||
gcolidx += 1
|
||||
|
||||
else:
|
||||
# Format hierarchical rows with non-merged values.
|
||||
for indexcolvals in zip(*self.df.index):
|
||||
for idx, indexcolval in enumerate(indexcolvals):
|
||||
yield CssExcelCell(
|
||||
row=self.rowcounter + idx,
|
||||
col=gcolidx,
|
||||
val=indexcolval,
|
||||
style=self.header_style,
|
||||
css_styles=getattr(self.styler, "ctx_index", None),
|
||||
css_row=idx,
|
||||
css_col=gcolidx,
|
||||
css_converter=self.style_converter,
|
||||
)
|
||||
gcolidx += 1
|
||||
|
||||
yield from self._generate_body(gcolidx)
|
||||
|
||||
@property
|
||||
def _has_aliases(self) -> bool:
|
||||
"""Whether the aliases for column names are present."""
|
||||
return is_list_like(self.header)
|
||||
|
||||
def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]:
|
||||
# Write the body of the frame data series by series.
|
||||
for colidx in range(len(self.columns)):
|
||||
series = self.df.iloc[:, colidx]
|
||||
for i, val in enumerate(series):
|
||||
yield CssExcelCell(
|
||||
row=self.rowcounter + i,
|
||||
col=colidx + coloffset,
|
||||
val=val,
|
||||
style=None,
|
||||
css_styles=getattr(self.styler, "ctx", None),
|
||||
css_row=i,
|
||||
css_col=colidx,
|
||||
css_converter=self.style_converter,
|
||||
)
|
||||
|
||||
def get_formatted_cells(self) -> Iterable[ExcelCell]:
|
||||
for cell in itertools.chain(self._format_header(), self._format_body()):
|
||||
cell.val = self._format_value(cell.val)
|
||||
yield cell
|
||||
|
||||
@doc(storage_options=_shared_docs["storage_options"])
|
||||
def write(
|
||||
self,
|
||||
writer: FilePath | WriteExcelBuffer | ExcelWriter,
|
||||
sheet_name: str = "Sheet1",
|
||||
startrow: int = 0,
|
||||
startcol: int = 0,
|
||||
freeze_panes: tuple[int, int] | None = None,
|
||||
engine: str | None = None,
|
||||
storage_options: StorageOptions | None = None,
|
||||
engine_kwargs: dict | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
writer : path-like, file-like, or ExcelWriter object
|
||||
File path or existing ExcelWriter
|
||||
sheet_name : str, default 'Sheet1'
|
||||
Name of sheet which will contain DataFrame
|
||||
startrow :
|
||||
upper left cell row to dump data frame
|
||||
startcol :
|
||||
upper left cell column to dump data frame
|
||||
freeze_panes : tuple of integer (length 2), default None
|
||||
Specifies the one-based bottommost row and rightmost column that
|
||||
is to be frozen
|
||||
engine : string, default None
|
||||
write engine to use if writer is a path - you can also set this
|
||||
via the options ``io.excel.xlsx.writer``,
|
||||
or ``io.excel.xlsm.writer``.
|
||||
|
||||
{storage_options}
|
||||
|
||||
engine_kwargs: dict, optional
|
||||
Arbitrary keyword arguments passed to excel engine.
|
||||
"""
|
||||
from pandas.io.excel import ExcelWriter
|
||||
|
||||
num_rows, num_cols = self.df.shape
|
||||
if num_rows > self.max_rows or num_cols > self.max_cols:
|
||||
raise ValueError(
|
||||
f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} "
|
||||
f"Max sheet size is: {self.max_rows}, {self.max_cols}"
|
||||
)
|
||||
|
||||
if engine_kwargs is None:
|
||||
engine_kwargs = {}
|
||||
|
||||
formatted_cells = self.get_formatted_cells()
|
||||
if isinstance(writer, ExcelWriter):
|
||||
need_save = False
|
||||
else:
|
||||
writer = ExcelWriter(
|
||||
writer,
|
||||
engine=engine,
|
||||
storage_options=storage_options,
|
||||
engine_kwargs=engine_kwargs,
|
||||
)
|
||||
need_save = True
|
||||
|
||||
try:
|
||||
writer._write_cells(
|
||||
formatted_cells,
|
||||
sheet_name,
|
||||
startrow=startrow,
|
||||
startcol=startcol,
|
||||
freeze_panes=freeze_panes,
|
||||
)
|
||||
finally:
|
||||
# make sure to close opened file handles
|
||||
if need_save:
|
||||
writer.close()
|
2058
lib/python3.13/site-packages/pandas/io/formats/format.py
Normal file
2058
lib/python3.13/site-packages/pandas/io/formats/format.py
Normal file
File diff suppressed because it is too large
Load Diff
646
lib/python3.13/site-packages/pandas/io/formats/html.py
Normal file
646
lib/python3.13/site-packages/pandas/io/formats/html.py
Normal file
@ -0,0 +1,646 @@
|
||||
"""
|
||||
Module for formatting output data in HTML.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from textwrap import dedent
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Final,
|
||||
cast,
|
||||
)
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas._libs import lib
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
option_context,
|
||||
)
|
||||
|
||||
from pandas.io.common import is_url
|
||||
from pandas.io.formats.format import (
|
||||
DataFrameFormatter,
|
||||
get_level_lengths,
|
||||
)
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import (
|
||||
Hashable,
|
||||
Iterable,
|
||||
Mapping,
|
||||
)
|
||||
|
||||
|
||||
class HTMLFormatter:
|
||||
"""
|
||||
Internal class for formatting output data in html.
|
||||
This class is intended for shared functionality between
|
||||
DataFrame.to_html() and DataFrame._repr_html_().
|
||||
Any logic in common with other output formatting methods
|
||||
should ideally be inherited from classes in format.py
|
||||
and this class responsible for only producing html markup.
|
||||
"""
|
||||
|
||||
indent_delta: Final = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
formatter: DataFrameFormatter,
|
||||
classes: str | list[str] | tuple[str, ...] | None = None,
|
||||
border: int | bool | None = None,
|
||||
table_id: str | None = None,
|
||||
render_links: bool = False,
|
||||
) -> None:
|
||||
self.fmt = formatter
|
||||
self.classes = classes
|
||||
|
||||
self.frame = self.fmt.frame
|
||||
self.columns = self.fmt.tr_frame.columns
|
||||
self.elements: list[str] = []
|
||||
self.bold_rows = self.fmt.bold_rows
|
||||
self.escape = self.fmt.escape
|
||||
self.show_dimensions = self.fmt.show_dimensions
|
||||
if border is None or border is True:
|
||||
border = cast(int, get_option("display.html.border"))
|
||||
elif not border:
|
||||
border = None
|
||||
|
||||
self.border = border
|
||||
self.table_id = table_id
|
||||
self.render_links = render_links
|
||||
|
||||
self.col_space = {}
|
||||
is_multi_index = isinstance(self.columns, MultiIndex)
|
||||
for column, value in self.fmt.col_space.items():
|
||||
col_space_value = f"{value}px" if isinstance(value, int) else value
|
||||
self.col_space[column] = col_space_value
|
||||
# GH 53885: Handling case where column is index
|
||||
# Flatten the data in the multi index and add in the map
|
||||
if is_multi_index and isinstance(column, tuple):
|
||||
for column_index in column:
|
||||
self.col_space[str(column_index)] = col_space_value
|
||||
|
||||
def to_string(self) -> str:
|
||||
lines = self.render()
|
||||
if any(isinstance(x, str) for x in lines):
|
||||
lines = [str(x) for x in lines]
|
||||
return "\n".join(lines)
|
||||
|
||||
def render(self) -> list[str]:
|
||||
self._write_table()
|
||||
|
||||
if self.should_show_dimensions:
|
||||
by = chr(215) # × # noqa: RUF003
|
||||
self.write(
|
||||
f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"
|
||||
)
|
||||
|
||||
return self.elements
|
||||
|
||||
@property
|
||||
def should_show_dimensions(self) -> bool:
|
||||
return self.fmt.should_show_dimensions
|
||||
|
||||
@property
|
||||
def show_row_idx_names(self) -> bool:
|
||||
return self.fmt.show_row_idx_names
|
||||
|
||||
@property
|
||||
def show_col_idx_names(self) -> bool:
|
||||
return self.fmt.show_col_idx_names
|
||||
|
||||
@property
|
||||
def row_levels(self) -> int:
|
||||
if self.fmt.index:
|
||||
# showing (row) index
|
||||
return self.frame.index.nlevels
|
||||
elif self.show_col_idx_names:
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# If the row index is not displayed a column of
|
||||
# blank cells need to be included before the DataFrame values.
|
||||
return 1
|
||||
# not showing (row) index
|
||||
return 0
|
||||
|
||||
def _get_columns_formatted_values(self) -> Iterable:
|
||||
return self.columns
|
||||
|
||||
@property
|
||||
def is_truncated(self) -> bool:
|
||||
return self.fmt.is_truncated
|
||||
|
||||
@property
|
||||
def ncols(self) -> int:
|
||||
return len(self.fmt.tr_frame.columns)
|
||||
|
||||
def write(self, s: Any, indent: int = 0) -> None:
|
||||
rs = pprint_thing(s)
|
||||
self.elements.append(" " * indent + rs)
|
||||
|
||||
def write_th(
|
||||
self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Method for writing a formatted <th> cell.
|
||||
|
||||
If col_space is set on the formatter then that is used for
|
||||
the value of min-width.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s : object
|
||||
The data to be written inside the cell.
|
||||
header : bool, default False
|
||||
Set to True if the <th> is for use inside <thead>. This will
|
||||
cause min-width to be set if there is one.
|
||||
indent : int, default 0
|
||||
The indentation level of the cell.
|
||||
tags : str, default None
|
||||
Tags to include in the cell.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A written <th> cell.
|
||||
"""
|
||||
col_space = self.col_space.get(s, None)
|
||||
|
||||
if header and col_space is not None:
|
||||
tags = tags or ""
|
||||
tags += f'style="min-width: {col_space};"'
|
||||
|
||||
self._write_cell(s, kind="th", indent=indent, tags=tags)
|
||||
|
||||
def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None:
|
||||
self._write_cell(s, kind="td", indent=indent, tags=tags)
|
||||
|
||||
def _write_cell(
|
||||
self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None
|
||||
) -> None:
|
||||
if tags is not None:
|
||||
start_tag = f"<{kind} {tags}>"
|
||||
else:
|
||||
start_tag = f"<{kind}>"
|
||||
|
||||
if self.escape:
|
||||
# escape & first to prevent double escaping of &
|
||||
esc = {"&": r"&", "<": r"<", ">": r">"}
|
||||
else:
|
||||
esc = {}
|
||||
|
||||
rs = pprint_thing(s, escape_chars=esc).strip()
|
||||
|
||||
if self.render_links and is_url(rs):
|
||||
rs_unescaped = pprint_thing(s, escape_chars={}).strip()
|
||||
start_tag += f'<a href="{rs_unescaped}" target="_blank">'
|
||||
end_a = "</a>"
|
||||
else:
|
||||
end_a = ""
|
||||
|
||||
self.write(f"{start_tag}{rs}{end_a}</{kind}>", indent)
|
||||
|
||||
def write_tr(
|
||||
self,
|
||||
line: Iterable,
|
||||
indent: int = 0,
|
||||
indent_delta: int = 0,
|
||||
header: bool = False,
|
||||
align: str | None = None,
|
||||
tags: dict[int, str] | None = None,
|
||||
nindex_levels: int = 0,
|
||||
) -> None:
|
||||
if tags is None:
|
||||
tags = {}
|
||||
|
||||
if align is None:
|
||||
self.write("<tr>", indent)
|
||||
else:
|
||||
self.write(f'<tr style="text-align: {align};">', indent)
|
||||
indent += indent_delta
|
||||
|
||||
for i, s in enumerate(line):
|
||||
val_tag = tags.get(i, None)
|
||||
if header or (self.bold_rows and i < nindex_levels):
|
||||
self.write_th(s, indent=indent, header=header, tags=val_tag)
|
||||
else:
|
||||
self.write_td(s, indent, tags=val_tag)
|
||||
|
||||
indent -= indent_delta
|
||||
self.write("</tr>", indent)
|
||||
|
||||
def _write_table(self, indent: int = 0) -> None:
|
||||
_classes = ["dataframe"] # Default class.
|
||||
use_mathjax = get_option("display.html.use_mathjax")
|
||||
if not use_mathjax:
|
||||
_classes.append("tex2jax_ignore")
|
||||
if self.classes is not None:
|
||||
if isinstance(self.classes, str):
|
||||
self.classes = self.classes.split()
|
||||
if not isinstance(self.classes, (list, tuple)):
|
||||
raise TypeError(
|
||||
"classes must be a string, list, "
|
||||
f"or tuple, not {type(self.classes)}"
|
||||
)
|
||||
_classes.extend(self.classes)
|
||||
|
||||
if self.table_id is None:
|
||||
id_section = ""
|
||||
else:
|
||||
id_section = f' id="{self.table_id}"'
|
||||
|
||||
if self.border is None:
|
||||
border_attr = ""
|
||||
else:
|
||||
border_attr = f' border="{self.border}"'
|
||||
|
||||
self.write(
|
||||
f'<table{border_attr} class="{" ".join(_classes)}"{id_section}>',
|
||||
indent,
|
||||
)
|
||||
|
||||
if self.fmt.header or self.show_row_idx_names:
|
||||
self._write_header(indent + self.indent_delta)
|
||||
|
||||
self._write_body(indent + self.indent_delta)
|
||||
|
||||
self.write("</table>", indent)
|
||||
|
||||
def _write_col_header(self, indent: int) -> None:
|
||||
row: list[Hashable]
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
template = 'colspan="{span:d}" halign="left"'
|
||||
|
||||
sentinel: lib.NoDefault | bool
|
||||
if self.fmt.sparsify:
|
||||
# GH3547
|
||||
sentinel = lib.no_default
|
||||
else:
|
||||
sentinel = False
|
||||
levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
|
||||
level_lengths = get_level_lengths(levels, sentinel)
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
for lnum, (records, values) in enumerate(zip(level_lengths, levels)):
|
||||
if is_truncated_horizontally:
|
||||
# modify the header lines
|
||||
ins_col = self.fmt.tr_col_num
|
||||
if self.fmt.sparsify:
|
||||
recs_new = {}
|
||||
# Increment tags after ... col.
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_col:
|
||||
recs_new[tag + 1] = span
|
||||
elif tag + span > ins_col:
|
||||
recs_new[tag] = span + 1
|
||||
if lnum == inner_lvl:
|
||||
values = (
|
||||
values[:ins_col] + ("...",) + values[ins_col:]
|
||||
)
|
||||
else:
|
||||
# sparse col headers do not receive a ...
|
||||
values = (
|
||||
values[:ins_col]
|
||||
+ (values[ins_col - 1],)
|
||||
+ values[ins_col:]
|
||||
)
|
||||
else:
|
||||
recs_new[tag] = span
|
||||
# if ins_col lies between tags, all col headers
|
||||
# get ...
|
||||
if tag + span == ins_col:
|
||||
recs_new[ins_col] = 1
|
||||
values = values[:ins_col] + ("...",) + values[ins_col:]
|
||||
records = recs_new
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
if lnum == inner_lvl:
|
||||
records[ins_col] = 1
|
||||
else:
|
||||
recs_new = {}
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_col:
|
||||
recs_new[tag + 1] = span
|
||||
else:
|
||||
recs_new[tag] = span
|
||||
recs_new[ins_col] = 1
|
||||
records = recs_new
|
||||
values = values[:ins_col] + ["..."] + values[ins_col:]
|
||||
|
||||
# see gh-22579
|
||||
# Column Offset Bug with to_html(index=False) with
|
||||
# MultiIndex Columns and Index.
|
||||
# Initially fill row with blank cells before column names.
|
||||
# TODO: Refactor to remove code duplication with code
|
||||
# block below for standard columns index.
|
||||
row = [""] * (self.row_levels - 1)
|
||||
if self.fmt.index or self.show_col_idx_names:
|
||||
# see gh-22747
|
||||
# If to_html(index_names=False) do not show columns
|
||||
# index names.
|
||||
# TODO: Refactor to use _get_column_name_list from
|
||||
# DataFrameFormatter class and create a
|
||||
# _get_formatted_column_labels function for code
|
||||
# parity with DataFrameFormatter class.
|
||||
if self.fmt.show_index_names:
|
||||
name = self.columns.names[lnum]
|
||||
row.append(pprint_thing(name or ""))
|
||||
else:
|
||||
row.append("")
|
||||
|
||||
tags = {}
|
||||
j = len(row)
|
||||
for i, v in enumerate(values):
|
||||
if i in records:
|
||||
if records[i] > 1:
|
||||
tags[j] = template.format(span=records[i])
|
||||
else:
|
||||
continue
|
||||
j += 1
|
||||
row.append(v)
|
||||
self.write_tr(row, indent, self.indent_delta, tags=tags, header=True)
|
||||
else:
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# Initially fill row with blank cells before column names.
|
||||
# TODO: Refactor to remove code duplication with code block
|
||||
# above for columns MultiIndex.
|
||||
row = [""] * (self.row_levels - 1)
|
||||
if self.fmt.index or self.show_col_idx_names:
|
||||
# see gh-22747
|
||||
# If to_html(index_names=False) do not show columns
|
||||
# index names.
|
||||
# TODO: Refactor to use _get_column_name_list from
|
||||
# DataFrameFormatter class.
|
||||
if self.fmt.show_index_names:
|
||||
row.append(self.columns.name or "")
|
||||
else:
|
||||
row.append("")
|
||||
row.extend(self._get_columns_formatted_values())
|
||||
align = self.fmt.justify
|
||||
|
||||
if is_truncated_horizontally:
|
||||
ins_col = self.row_levels + self.fmt.tr_col_num
|
||||
row.insert(ins_col, "...")
|
||||
|
||||
self.write_tr(row, indent, self.indent_delta, header=True, align=align)
|
||||
|
||||
def _write_row_header(self, indent: int) -> None:
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
row = [x if x is not None else "" for x in self.frame.index.names] + [""] * (
|
||||
self.ncols + (1 if is_truncated_horizontally else 0)
|
||||
)
|
||||
self.write_tr(row, indent, self.indent_delta, header=True)
|
||||
|
||||
def _write_header(self, indent: int) -> None:
|
||||
self.write("<thead>", indent)
|
||||
|
||||
if self.fmt.header:
|
||||
self._write_col_header(indent + self.indent_delta)
|
||||
|
||||
if self.show_row_idx_names:
|
||||
self._write_row_header(indent + self.indent_delta)
|
||||
|
||||
self.write("</thead>", indent)
|
||||
|
||||
def _get_formatted_values(self) -> dict[int, list[str]]:
|
||||
with option_context("display.max_colwidth", None):
|
||||
fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)}
|
||||
return fmt_values
|
||||
|
||||
def _write_body(self, indent: int) -> None:
|
||||
self.write("<tbody>", indent)
|
||||
fmt_values = self._get_formatted_values()
|
||||
|
||||
# write values
|
||||
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
|
||||
self._write_hierarchical_rows(fmt_values, indent + self.indent_delta)
|
||||
else:
|
||||
self._write_regular_rows(fmt_values, indent + self.indent_delta)
|
||||
|
||||
self.write("</tbody>", indent)
|
||||
|
||||
def _write_regular_rows(
|
||||
self, fmt_values: Mapping[int, list[str]], indent: int
|
||||
) -> None:
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
is_truncated_vertically = self.fmt.is_truncated_vertically
|
||||
|
||||
nrows = len(self.fmt.tr_frame)
|
||||
|
||||
if self.fmt.index:
|
||||
fmt = self.fmt._get_formatter("__index__")
|
||||
if fmt is not None:
|
||||
index_values = self.fmt.tr_frame.index.map(fmt)
|
||||
else:
|
||||
# only reached with non-Multi index
|
||||
index_values = self.fmt.tr_frame.index._format_flat(include_name=False)
|
||||
|
||||
row: list[str] = []
|
||||
for i in range(nrows):
|
||||
if is_truncated_vertically and i == (self.fmt.tr_row_num):
|
||||
str_sep_row = ["..."] * len(row)
|
||||
self.write_tr(
|
||||
str_sep_row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=self.row_levels,
|
||||
)
|
||||
|
||||
row = []
|
||||
if self.fmt.index:
|
||||
row.append(index_values[i])
|
||||
# see gh-22579
|
||||
# Column misalignment also occurs for
|
||||
# a standard index when the columns index is named.
|
||||
# Add blank cell before data cells.
|
||||
elif self.show_col_idx_names:
|
||||
row.append("")
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
|
||||
if is_truncated_horizontally:
|
||||
dot_col_ix = self.fmt.tr_col_num + self.row_levels
|
||||
row.insert(dot_col_ix, "...")
|
||||
self.write_tr(
|
||||
row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels
|
||||
)
|
||||
|
||||
def _write_hierarchical_rows(
|
||||
self, fmt_values: Mapping[int, list[str]], indent: int
|
||||
) -> None:
|
||||
template = 'rowspan="{span}" valign="top"'
|
||||
|
||||
is_truncated_horizontally = self.fmt.is_truncated_horizontally
|
||||
is_truncated_vertically = self.fmt.is_truncated_vertically
|
||||
frame = self.fmt.tr_frame
|
||||
nrows = len(frame)
|
||||
|
||||
assert isinstance(frame.index, MultiIndex)
|
||||
idx_values = frame.index._format_multi(sparsify=False, include_names=False)
|
||||
idx_values = list(zip(*idx_values))
|
||||
|
||||
if self.fmt.sparsify:
|
||||
# GH3547
|
||||
sentinel = lib.no_default
|
||||
levels = frame.index._format_multi(sparsify=sentinel, include_names=False)
|
||||
|
||||
level_lengths = get_level_lengths(levels, sentinel)
|
||||
inner_lvl = len(level_lengths) - 1
|
||||
if is_truncated_vertically:
|
||||
# Insert ... row and adjust idx_values and
|
||||
# level_lengths to take this into account.
|
||||
ins_row = self.fmt.tr_row_num
|
||||
inserted = False
|
||||
for lnum, records in enumerate(level_lengths):
|
||||
rec_new = {}
|
||||
for tag, span in list(records.items()):
|
||||
if tag >= ins_row:
|
||||
rec_new[tag + 1] = span
|
||||
elif tag + span > ins_row:
|
||||
rec_new[tag] = span + 1
|
||||
|
||||
# GH 14882 - Make sure insertion done once
|
||||
if not inserted:
|
||||
dot_row = list(idx_values[ins_row - 1])
|
||||
dot_row[-1] = "..."
|
||||
idx_values.insert(ins_row, tuple(dot_row))
|
||||
inserted = True
|
||||
else:
|
||||
dot_row = list(idx_values[ins_row])
|
||||
dot_row[inner_lvl - lnum] = "..."
|
||||
idx_values[ins_row] = tuple(dot_row)
|
||||
else:
|
||||
rec_new[tag] = span
|
||||
# If ins_row lies between tags, all cols idx cols
|
||||
# receive ...
|
||||
if tag + span == ins_row:
|
||||
rec_new[ins_row] = 1
|
||||
if lnum == 0:
|
||||
idx_values.insert(
|
||||
ins_row, tuple(["..."] * len(level_lengths))
|
||||
)
|
||||
|
||||
# GH 14882 - Place ... in correct level
|
||||
elif inserted:
|
||||
dot_row = list(idx_values[ins_row])
|
||||
dot_row[inner_lvl - lnum] = "..."
|
||||
idx_values[ins_row] = tuple(dot_row)
|
||||
level_lengths[lnum] = rec_new
|
||||
|
||||
level_lengths[inner_lvl][ins_row] = 1
|
||||
for ix_col in fmt_values:
|
||||
fmt_values[ix_col].insert(ins_row, "...")
|
||||
nrows += 1
|
||||
|
||||
for i in range(nrows):
|
||||
row = []
|
||||
tags = {}
|
||||
|
||||
sparse_offset = 0
|
||||
j = 0
|
||||
for records, v in zip(level_lengths, idx_values[i]):
|
||||
if i in records:
|
||||
if records[i] > 1:
|
||||
tags[j] = template.format(span=records[i])
|
||||
else:
|
||||
sparse_offset += 1
|
||||
continue
|
||||
|
||||
j += 1
|
||||
row.append(v)
|
||||
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
if is_truncated_horizontally:
|
||||
row.insert(
|
||||
self.row_levels - sparse_offset + self.fmt.tr_col_num, "..."
|
||||
)
|
||||
self.write_tr(
|
||||
row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=tags,
|
||||
nindex_levels=len(levels) - sparse_offset,
|
||||
)
|
||||
else:
|
||||
row = []
|
||||
for i in range(len(frame)):
|
||||
if is_truncated_vertically and i == (self.fmt.tr_row_num):
|
||||
str_sep_row = ["..."] * len(row)
|
||||
self.write_tr(
|
||||
str_sep_row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=self.row_levels,
|
||||
)
|
||||
|
||||
idx_values = list(
|
||||
zip(*frame.index._format_multi(sparsify=False, include_names=False))
|
||||
)
|
||||
row = []
|
||||
row.extend(idx_values[i])
|
||||
row.extend(fmt_values[j][i] for j in range(self.ncols))
|
||||
if is_truncated_horizontally:
|
||||
row.insert(self.row_levels + self.fmt.tr_col_num, "...")
|
||||
self.write_tr(
|
||||
row,
|
||||
indent,
|
||||
self.indent_delta,
|
||||
tags=None,
|
||||
nindex_levels=frame.index.nlevels,
|
||||
)
|
||||
|
||||
|
||||
class NotebookFormatter(HTMLFormatter):
|
||||
"""
|
||||
Internal class for formatting output data in html for display in Jupyter
|
||||
Notebooks. This class is intended for functionality specific to
|
||||
DataFrame._repr_html_() and DataFrame.to_html(notebook=True)
|
||||
"""
|
||||
|
||||
def _get_formatted_values(self) -> dict[int, list[str]]:
|
||||
return {i: self.fmt.format_col(i) for i in range(self.ncols)}
|
||||
|
||||
def _get_columns_formatted_values(self) -> list[str]:
|
||||
# only reached with non-Multi Index
|
||||
return self.columns._format_flat(include_name=False)
|
||||
|
||||
def write_style(self) -> None:
|
||||
# We use the "scoped" attribute here so that the desired
|
||||
# style properties for the data frame are not then applied
|
||||
# throughout the entire notebook.
|
||||
template_first = """\
|
||||
<style scoped>"""
|
||||
template_last = """\
|
||||
</style>"""
|
||||
template_select = """\
|
||||
.dataframe %s {
|
||||
%s: %s;
|
||||
}"""
|
||||
element_props = [
|
||||
("tbody tr th:only-of-type", "vertical-align", "middle"),
|
||||
("tbody tr th", "vertical-align", "top"),
|
||||
]
|
||||
if isinstance(self.columns, MultiIndex):
|
||||
element_props.append(("thead tr th", "text-align", "left"))
|
||||
if self.show_row_idx_names:
|
||||
element_props.append(
|
||||
("thead tr:last-of-type th", "text-align", "right")
|
||||
)
|
||||
else:
|
||||
element_props.append(("thead th", "text-align", "right"))
|
||||
template_mid = "\n\n".join(template_select % t for t in element_props)
|
||||
template = dedent(f"{template_first}\n{template_mid}\n{template_last}")
|
||||
self.write(template)
|
||||
|
||||
def render(self) -> list[str]:
|
||||
self.write("<div>")
|
||||
self.write_style()
|
||||
super().render()
|
||||
self.write("</div>")
|
||||
return self.elements
|
1101
lib/python3.13/site-packages/pandas/io/formats/info.py
Normal file
1101
lib/python3.13/site-packages/pandas/io/formats/info.py
Normal file
File diff suppressed because it is too large
Load Diff
572
lib/python3.13/site-packages/pandas/io/formats/printing.py
Normal file
572
lib/python3.13/site-packages/pandas/io/formats/printing.py
Normal file
@ -0,0 +1,572 @@
|
||||
"""
|
||||
Printing tools.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import (
|
||||
Iterable,
|
||||
Mapping,
|
||||
Sequence,
|
||||
)
|
||||
import sys
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
from unicodedata import east_asian_width
|
||||
|
||||
from pandas._config import get_option
|
||||
|
||||
from pandas.core.dtypes.inference import is_sequence
|
||||
|
||||
from pandas.io.formats.console import get_console_size
|
||||
|
||||
EscapeChars = Union[Mapping[str, str], Iterable[str]]
|
||||
_KT = TypeVar("_KT")
|
||||
_VT = TypeVar("_VT")
|
||||
|
||||
|
||||
def adjoin(space: int, *lists: list[str], **kwargs) -> str:
|
||||
"""
|
||||
Glues together two sets of strings using the amount of space requested.
|
||||
The idea is to prettify.
|
||||
|
||||
----------
|
||||
space : int
|
||||
number of spaces for padding
|
||||
lists : str
|
||||
list of str which being joined
|
||||
strlen : callable
|
||||
function used to calculate the length of each str. Needed for unicode
|
||||
handling.
|
||||
justfunc : callable
|
||||
function used to justify str. Needed for unicode handling.
|
||||
"""
|
||||
strlen = kwargs.pop("strlen", len)
|
||||
justfunc = kwargs.pop("justfunc", _adj_justify)
|
||||
|
||||
newLists = []
|
||||
lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
|
||||
# not the last one
|
||||
lengths.append(max(map(len, lists[-1])))
|
||||
maxLen = max(map(len, lists))
|
||||
for i, lst in enumerate(lists):
|
||||
nl = justfunc(lst, lengths[i], mode="left")
|
||||
nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
|
||||
newLists.append(nl)
|
||||
toJoin = zip(*newLists)
|
||||
return "\n".join("".join(lines) for lines in toJoin)
|
||||
|
||||
|
||||
def _adj_justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
|
||||
"""
|
||||
Perform ljust, center, rjust against string or list-like
|
||||
"""
|
||||
if mode == "left":
|
||||
return [x.ljust(max_len) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(max_len) for x in texts]
|
||||
else:
|
||||
return [x.rjust(max_len) for x in texts]
|
||||
|
||||
|
||||
# Unicode consolidation
|
||||
# ---------------------
|
||||
#
|
||||
# pprinting utility functions for generating Unicode text or
|
||||
# bytes(3.x)/str(2.x) representations of objects.
|
||||
# Try to use these as much as possible rather than rolling your own.
|
||||
#
|
||||
# When to use
|
||||
# -----------
|
||||
#
|
||||
# 1) If you're writing code internal to pandas (no I/O directly involved),
|
||||
# use pprint_thing().
|
||||
#
|
||||
# It will always return unicode text which can handled by other
|
||||
# parts of the package without breakage.
|
||||
#
|
||||
# 2) if you need to write something out to file, use
|
||||
# pprint_thing_encoded(encoding).
|
||||
#
|
||||
# If no encoding is specified, it defaults to utf-8. Since encoding pure
|
||||
# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
|
||||
# working with straight ascii.
|
||||
|
||||
|
||||
def _pprint_seq(
|
||||
seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
|
||||
) -> str:
|
||||
"""
|
||||
internal. pprinter for iterables. you should probably use pprint_thing()
|
||||
rather than calling this directly.
|
||||
|
||||
bounds length of printed sequence, depending on options
|
||||
"""
|
||||
if isinstance(seq, set):
|
||||
fmt = "{{{body}}}"
|
||||
else:
|
||||
fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
|
||||
|
||||
if max_seq_items is False:
|
||||
nitems = len(seq)
|
||||
else:
|
||||
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
|
||||
|
||||
s = iter(seq)
|
||||
# handle sets, no slicing
|
||||
r = [
|
||||
pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
|
||||
for i in range(min(nitems, len(seq)))
|
||||
]
|
||||
body = ", ".join(r)
|
||||
|
||||
if nitems < len(seq):
|
||||
body += ", ..."
|
||||
elif isinstance(seq, tuple) and len(seq) == 1:
|
||||
body += ","
|
||||
|
||||
return fmt.format(body=body)
|
||||
|
||||
|
||||
def _pprint_dict(
|
||||
seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
|
||||
) -> str:
|
||||
"""
|
||||
internal. pprinter for iterables. you should probably use pprint_thing()
|
||||
rather than calling this directly.
|
||||
"""
|
||||
fmt = "{{{things}}}"
|
||||
pairs = []
|
||||
|
||||
pfmt = "{key}: {val}"
|
||||
|
||||
if max_seq_items is False:
|
||||
nitems = len(seq)
|
||||
else:
|
||||
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
|
||||
|
||||
for k, v in list(seq.items())[:nitems]:
|
||||
pairs.append(
|
||||
pfmt.format(
|
||||
key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||||
val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
|
||||
)
|
||||
)
|
||||
|
||||
if nitems < len(seq):
|
||||
return fmt.format(things=", ".join(pairs) + ", ...")
|
||||
else:
|
||||
return fmt.format(things=", ".join(pairs))
|
||||
|
||||
|
||||
def pprint_thing(
|
||||
thing: Any,
|
||||
_nest_lvl: int = 0,
|
||||
escape_chars: EscapeChars | None = None,
|
||||
default_escapes: bool = False,
|
||||
quote_strings: bool = False,
|
||||
max_seq_items: int | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
This function is the sanctioned way of converting objects
|
||||
to a string representation and properly handles nested sequences.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
thing : anything to be formatted
|
||||
_nest_lvl : internal use only. pprint_thing() is mutually-recursive
|
||||
with pprint_sequence, this argument is used to keep track of the
|
||||
current nesting level, and limit it.
|
||||
escape_chars : list or dict, optional
|
||||
Characters to escape. If a dict is passed the values are the
|
||||
replacements
|
||||
default_escapes : bool, default False
|
||||
Whether the input escape characters replaces or adds to the defaults
|
||||
max_seq_items : int or None, default None
|
||||
Pass through to other pretty printers to limit sequence printing
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
"""
|
||||
|
||||
def as_escaped_string(
|
||||
thing: Any, escape_chars: EscapeChars | None = escape_chars
|
||||
) -> str:
|
||||
translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
|
||||
if isinstance(escape_chars, dict):
|
||||
if default_escapes:
|
||||
translate.update(escape_chars)
|
||||
else:
|
||||
translate = escape_chars
|
||||
escape_chars = list(escape_chars.keys())
|
||||
else:
|
||||
escape_chars = escape_chars or ()
|
||||
|
||||
result = str(thing)
|
||||
for c in escape_chars:
|
||||
result = result.replace(c, translate[c])
|
||||
return result
|
||||
|
||||
if hasattr(thing, "__next__"):
|
||||
return str(thing)
|
||||
elif isinstance(thing, dict) and _nest_lvl < get_option(
|
||||
"display.pprint_nest_depth"
|
||||
):
|
||||
result = _pprint_dict(
|
||||
thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
|
||||
)
|
||||
elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
|
||||
result = _pprint_seq(
|
||||
thing,
|
||||
_nest_lvl,
|
||||
escape_chars=escape_chars,
|
||||
quote_strings=quote_strings,
|
||||
max_seq_items=max_seq_items,
|
||||
)
|
||||
elif isinstance(thing, str) and quote_strings:
|
||||
result = f"'{as_escaped_string(thing)}'"
|
||||
else:
|
||||
result = as_escaped_string(thing)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def pprint_thing_encoded(
|
||||
object, encoding: str = "utf-8", errors: str = "replace"
|
||||
) -> bytes:
|
||||
value = pprint_thing(object) # get unicode representation of object
|
||||
return value.encode(encoding, errors)
|
||||
|
||||
|
||||
def enable_data_resource_formatter(enable: bool) -> None:
|
||||
if "IPython" not in sys.modules:
|
||||
# definitely not in IPython
|
||||
return
|
||||
from IPython import get_ipython
|
||||
|
||||
ip = get_ipython()
|
||||
if ip is None:
|
||||
# still not in IPython
|
||||
return
|
||||
|
||||
formatters = ip.display_formatter.formatters
|
||||
mimetype = "application/vnd.dataresource+json"
|
||||
|
||||
if enable:
|
||||
if mimetype not in formatters:
|
||||
# define tableschema formatter
|
||||
from IPython.core.formatters import BaseFormatter
|
||||
from traitlets import ObjectName
|
||||
|
||||
class TableSchemaFormatter(BaseFormatter):
|
||||
print_method = ObjectName("_repr_data_resource_")
|
||||
_return_type = (dict,)
|
||||
|
||||
# register it:
|
||||
formatters[mimetype] = TableSchemaFormatter()
|
||||
# enable it if it's been disabled:
|
||||
formatters[mimetype].enabled = True
|
||||
# unregister tableschema mime-type
|
||||
elif mimetype in formatters:
|
||||
formatters[mimetype].enabled = False
|
||||
|
||||
|
||||
def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
|
||||
return pprint_thing(
|
||||
thing,
|
||||
escape_chars=("\t", "\r", "\n"),
|
||||
quote_strings=True,
|
||||
max_seq_items=max_seq_items,
|
||||
)
|
||||
|
||||
|
||||
def format_object_summary(
|
||||
obj,
|
||||
formatter: Callable,
|
||||
is_justify: bool = True,
|
||||
name: str | None = None,
|
||||
indent_for_name: bool = True,
|
||||
line_break_each_value: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Return the formatted obj as a unicode string
|
||||
|
||||
Parameters
|
||||
----------
|
||||
obj : object
|
||||
must be iterable and support __getitem__
|
||||
formatter : callable
|
||||
string formatter for an element
|
||||
is_justify : bool
|
||||
should justify the display
|
||||
name : name, optional
|
||||
defaults to the class name of the obj
|
||||
indent_for_name : bool, default True
|
||||
Whether subsequent lines should be indented to
|
||||
align with the name.
|
||||
line_break_each_value : bool, default False
|
||||
If True, inserts a line break for each value of ``obj``.
|
||||
If False, only break lines when the a line of values gets wider
|
||||
than the display width.
|
||||
|
||||
Returns
|
||||
-------
|
||||
summary string
|
||||
"""
|
||||
display_width, _ = get_console_size()
|
||||
if display_width is None:
|
||||
display_width = get_option("display.width") or 80
|
||||
if name is None:
|
||||
name = type(obj).__name__
|
||||
|
||||
if indent_for_name:
|
||||
name_len = len(name)
|
||||
space1 = f'\n{(" " * (name_len + 1))}'
|
||||
space2 = f'\n{(" " * (name_len + 2))}'
|
||||
else:
|
||||
space1 = "\n"
|
||||
space2 = "\n " # space for the opening '['
|
||||
|
||||
n = len(obj)
|
||||
if line_break_each_value:
|
||||
# If we want to vertically align on each value of obj, we need to
|
||||
# separate values by a line break and indent the values
|
||||
sep = ",\n " + " " * len(name)
|
||||
else:
|
||||
sep = ","
|
||||
max_seq_items = get_option("display.max_seq_items") or n
|
||||
|
||||
# are we a truncated display
|
||||
is_truncated = n > max_seq_items
|
||||
|
||||
# adj can optionally handle unicode eastern asian width
|
||||
adj = get_adjustment()
|
||||
|
||||
def _extend_line(
|
||||
s: str, line: str, value: str, display_width: int, next_line_prefix: str
|
||||
) -> tuple[str, str]:
|
||||
if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
|
||||
s += line.rstrip()
|
||||
line = next_line_prefix
|
||||
line += value
|
||||
return s, line
|
||||
|
||||
def best_len(values: list[str]) -> int:
|
||||
if values:
|
||||
return max(adj.len(x) for x in values)
|
||||
else:
|
||||
return 0
|
||||
|
||||
close = ", "
|
||||
|
||||
if n == 0:
|
||||
summary = f"[]{close}"
|
||||
elif n == 1 and not line_break_each_value:
|
||||
first = formatter(obj[0])
|
||||
summary = f"[{first}]{close}"
|
||||
elif n == 2 and not line_break_each_value:
|
||||
first = formatter(obj[0])
|
||||
last = formatter(obj[-1])
|
||||
summary = f"[{first}, {last}]{close}"
|
||||
else:
|
||||
if max_seq_items == 1:
|
||||
# If max_seq_items=1 show only last element
|
||||
head = []
|
||||
tail = [formatter(x) for x in obj[-1:]]
|
||||
elif n > max_seq_items:
|
||||
n = min(max_seq_items // 2, 10)
|
||||
head = [formatter(x) for x in obj[:n]]
|
||||
tail = [formatter(x) for x in obj[-n:]]
|
||||
else:
|
||||
head = []
|
||||
tail = [formatter(x) for x in obj]
|
||||
|
||||
# adjust all values to max length if needed
|
||||
if is_justify:
|
||||
if line_break_each_value:
|
||||
# Justify each string in the values of head and tail, so the
|
||||
# strings will right align when head and tail are stacked
|
||||
# vertically.
|
||||
head, tail = _justify(head, tail)
|
||||
elif is_truncated or not (
|
||||
len(", ".join(head)) < display_width
|
||||
and len(", ".join(tail)) < display_width
|
||||
):
|
||||
# Each string in head and tail should align with each other
|
||||
max_length = max(best_len(head), best_len(tail))
|
||||
head = [x.rjust(max_length) for x in head]
|
||||
tail = [x.rjust(max_length) for x in tail]
|
||||
# If we are not truncated and we are only a single
|
||||
# line, then don't justify
|
||||
|
||||
if line_break_each_value:
|
||||
# Now head and tail are of type List[Tuple[str]]. Below we
|
||||
# convert them into List[str], so there will be one string per
|
||||
# value. Also truncate items horizontally if wider than
|
||||
# max_space
|
||||
max_space = display_width - len(space2)
|
||||
value = tail[0]
|
||||
max_items = 1
|
||||
for num_items in reversed(range(1, len(value) + 1)):
|
||||
pprinted_seq = _pprint_seq(value, max_seq_items=num_items)
|
||||
if len(pprinted_seq) < max_space:
|
||||
max_items = num_items
|
||||
break
|
||||
head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
|
||||
tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
|
||||
|
||||
summary = ""
|
||||
line = space2
|
||||
|
||||
for head_value in head:
|
||||
word = head_value + sep + " "
|
||||
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||||
|
||||
if is_truncated:
|
||||
# remove trailing space of last line
|
||||
summary += line.rstrip() + space2 + "..."
|
||||
line = space2
|
||||
|
||||
for tail_item in tail[:-1]:
|
||||
word = tail_item + sep + " "
|
||||
summary, line = _extend_line(summary, line, word, display_width, space2)
|
||||
|
||||
# last value: no sep added + 1 space of width used for trailing ','
|
||||
summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
|
||||
summary += line
|
||||
|
||||
# right now close is either '' or ', '
|
||||
# Now we want to include the ']', but not the maybe space.
|
||||
close = "]" + close.rstrip(" ")
|
||||
summary += close
|
||||
|
||||
if len(summary) > (display_width) or line_break_each_value:
|
||||
summary += space1
|
||||
else: # one row
|
||||
summary += " "
|
||||
|
||||
# remove initial space
|
||||
summary = "[" + summary[len(space2) :]
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def _justify(
|
||||
head: list[Sequence[str]], tail: list[Sequence[str]]
|
||||
) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
|
||||
"""
|
||||
Justify items in head and tail, so they are right-aligned when stacked.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
head : list-like of list-likes of strings
|
||||
tail : list-like of list-likes of strings
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple of list of tuples of strings
|
||||
Same as head and tail, but items are right aligned when stacked
|
||||
vertically.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> _justify([['a', 'b']], [['abc', 'abcd']])
|
||||
([(' a', ' b')], [('abc', 'abcd')])
|
||||
"""
|
||||
combined = head + tail
|
||||
|
||||
# For each position for the sequences in ``combined``,
|
||||
# find the length of the largest string.
|
||||
max_length = [0] * len(combined[0])
|
||||
for inner_seq in combined:
|
||||
length = [len(item) for item in inner_seq]
|
||||
max_length = [max(x, y) for x, y in zip(max_length, length)]
|
||||
|
||||
# justify each item in each list-like in head and tail using max_length
|
||||
head_tuples = [
|
||||
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
|
||||
]
|
||||
tail_tuples = [
|
||||
tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
|
||||
]
|
||||
return head_tuples, tail_tuples
|
||||
|
||||
|
||||
class PrettyDict(dict[_KT, _VT]):
|
||||
"""Dict extension to support abbreviated __repr__"""
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return pprint_thing(self)
|
||||
|
||||
|
||||
class _TextAdjustment:
|
||||
def __init__(self) -> None:
|
||||
self.encoding = get_option("display.encoding")
|
||||
|
||||
def len(self, text: str) -> int:
|
||||
return len(text)
|
||||
|
||||
def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
|
||||
"""
|
||||
Perform ljust, center, rjust against string or list-like
|
||||
"""
|
||||
if mode == "left":
|
||||
return [x.ljust(max_len) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(max_len) for x in texts]
|
||||
else:
|
||||
return [x.rjust(max_len) for x in texts]
|
||||
|
||||
def adjoin(self, space: int, *lists, **kwargs) -> str:
|
||||
return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
|
||||
|
||||
|
||||
class _EastAsianTextAdjustment(_TextAdjustment):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
if get_option("display.unicode.ambiguous_as_wide"):
|
||||
self.ambiguous_width = 2
|
||||
else:
|
||||
self.ambiguous_width = 1
|
||||
|
||||
# Definition of East Asian Width
|
||||
# https://unicode.org/reports/tr11/
|
||||
# Ambiguous width can be changed by option
|
||||
self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
|
||||
|
||||
def len(self, text: str) -> int:
|
||||
"""
|
||||
Calculate display width considering unicode East Asian Width
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return len(text)
|
||||
|
||||
return sum(
|
||||
self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
|
||||
)
|
||||
|
||||
def justify(
|
||||
self, texts: Iterable[str], max_len: int, mode: str = "right"
|
||||
) -> list[str]:
|
||||
# re-calculate padding space per str considering East Asian Width
|
||||
def _get_pad(t):
|
||||
return max_len - self.len(t) + len(t)
|
||||
|
||||
if mode == "left":
|
||||
return [x.ljust(_get_pad(x)) for x in texts]
|
||||
elif mode == "center":
|
||||
return [x.center(_get_pad(x)) for x in texts]
|
||||
else:
|
||||
return [x.rjust(_get_pad(x)) for x in texts]
|
||||
|
||||
|
||||
def get_adjustment() -> _TextAdjustment:
|
||||
use_east_asian_width = get_option("display.unicode.east_asian_width")
|
||||
if use_east_asian_width:
|
||||
return _EastAsianTextAdjustment()
|
||||
else:
|
||||
return _TextAdjustment()
|
206
lib/python3.13/site-packages/pandas/io/formats/string.py
Normal file
206
lib/python3.13/site-packages/pandas/io/formats/string.py
Normal file
@ -0,0 +1,206 @@
|
||||
"""
|
||||
Module for formatting output data in console (to string).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from shutil import get_terminal_size
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas.io.formats.printing import pprint_thing
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from pandas.io.formats.format import DataFrameFormatter
|
||||
|
||||
|
||||
class StringFormatter:
|
||||
"""Formatter for string representation of a dataframe."""
|
||||
|
||||
def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
|
||||
self.fmt = fmt
|
||||
self.adj = fmt.adj
|
||||
self.frame = fmt.frame
|
||||
self.line_width = line_width
|
||||
|
||||
def to_string(self) -> str:
|
||||
text = self._get_string_representation()
|
||||
if self.fmt.should_show_dimensions:
|
||||
text = f"{text}{self.fmt.dimensions_info}"
|
||||
return text
|
||||
|
||||
def _get_strcols(self) -> list[list[str]]:
|
||||
strcols = self.fmt.get_strcols()
|
||||
if self.fmt.is_truncated:
|
||||
strcols = self._insert_dot_separators(strcols)
|
||||
return strcols
|
||||
|
||||
def _get_string_representation(self) -> str:
|
||||
if self.fmt.frame.empty:
|
||||
return self._empty_info_line
|
||||
|
||||
strcols = self._get_strcols()
|
||||
|
||||
if self.line_width is None:
|
||||
# no need to wrap around just print the whole frame
|
||||
return self.adj.adjoin(1, *strcols)
|
||||
|
||||
if self._need_to_wrap_around:
|
||||
return self._join_multiline(strcols)
|
||||
|
||||
return self._fit_strcols_to_terminal_width(strcols)
|
||||
|
||||
@property
|
||||
def _empty_info_line(self) -> str:
|
||||
return (
|
||||
f"Empty {type(self.frame).__name__}\n"
|
||||
f"Columns: {pprint_thing(self.frame.columns)}\n"
|
||||
f"Index: {pprint_thing(self.frame.index)}"
|
||||
)
|
||||
|
||||
@property
|
||||
def _need_to_wrap_around(self) -> bool:
|
||||
return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
|
||||
|
||||
def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
|
||||
str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
|
||||
index_length = len(str_index)
|
||||
|
||||
if self.fmt.is_truncated_horizontally:
|
||||
strcols = self._insert_dot_separator_horizontal(strcols, index_length)
|
||||
|
||||
if self.fmt.is_truncated_vertically:
|
||||
strcols = self._insert_dot_separator_vertical(strcols, index_length)
|
||||
|
||||
return strcols
|
||||
|
||||
@property
|
||||
def _adjusted_tr_col_num(self) -> int:
|
||||
return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
|
||||
|
||||
def _insert_dot_separator_horizontal(
|
||||
self, strcols: list[list[str]], index_length: int
|
||||
) -> list[list[str]]:
|
||||
strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
|
||||
return strcols
|
||||
|
||||
def _insert_dot_separator_vertical(
|
||||
self, strcols: list[list[str]], index_length: int
|
||||
) -> list[list[str]]:
|
||||
n_header_rows = index_length - len(self.fmt.tr_frame)
|
||||
row_num = self.fmt.tr_row_num
|
||||
for ix, col in enumerate(strcols):
|
||||
cwidth = self.adj.len(col[row_num])
|
||||
|
||||
if self.fmt.is_truncated_horizontally:
|
||||
is_dot_col = ix == self._adjusted_tr_col_num
|
||||
else:
|
||||
is_dot_col = False
|
||||
|
||||
if cwidth > 3 or is_dot_col:
|
||||
dots = "..."
|
||||
else:
|
||||
dots = ".."
|
||||
|
||||
if ix == 0 and self.fmt.index:
|
||||
dot_mode = "left"
|
||||
elif is_dot_col:
|
||||
cwidth = 4
|
||||
dot_mode = "right"
|
||||
else:
|
||||
dot_mode = "right"
|
||||
|
||||
dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
|
||||
col.insert(row_num + n_header_rows, dot_str)
|
||||
return strcols
|
||||
|
||||
def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
|
||||
lwidth = self.line_width
|
||||
adjoin_width = 1
|
||||
strcols = list(strcols_input)
|
||||
|
||||
if self.fmt.index:
|
||||
idx = strcols.pop(0)
|
||||
lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
|
||||
|
||||
col_widths = [
|
||||
np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
|
||||
for col in strcols
|
||||
]
|
||||
|
||||
assert lwidth is not None
|
||||
col_bins = _binify(col_widths, lwidth)
|
||||
nbins = len(col_bins)
|
||||
|
||||
str_lst = []
|
||||
start = 0
|
||||
for i, end in enumerate(col_bins):
|
||||
row = strcols[start:end]
|
||||
if self.fmt.index:
|
||||
row.insert(0, idx)
|
||||
if nbins > 1:
|
||||
nrows = len(row[-1])
|
||||
if end <= len(strcols) and i < nbins - 1:
|
||||
row.append([" \\"] + [" "] * (nrows - 1))
|
||||
else:
|
||||
row.append([" "] * nrows)
|
||||
str_lst.append(self.adj.adjoin(adjoin_width, *row))
|
||||
start = end
|
||||
return "\n\n".join(str_lst)
|
||||
|
||||
def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
|
||||
from pandas import Series
|
||||
|
||||
lines = self.adj.adjoin(1, *strcols).split("\n")
|
||||
max_len = Series(lines).str.len().max()
|
||||
# plus truncate dot col
|
||||
width, _ = get_terminal_size()
|
||||
dif = max_len - width
|
||||
# '+ 1' to avoid too wide repr (GH PR #17023)
|
||||
adj_dif = dif + 1
|
||||
col_lens = Series([Series(ele).str.len().max() for ele in strcols])
|
||||
n_cols = len(col_lens)
|
||||
counter = 0
|
||||
while adj_dif > 0 and n_cols > 1:
|
||||
counter += 1
|
||||
mid = round(n_cols / 2)
|
||||
mid_ix = col_lens.index[mid]
|
||||
col_len = col_lens[mid_ix]
|
||||
# adjoin adds one
|
||||
adj_dif -= col_len + 1
|
||||
col_lens = col_lens.drop(mid_ix)
|
||||
n_cols = len(col_lens)
|
||||
|
||||
# subtract index column
|
||||
max_cols_fitted = n_cols - self.fmt.index
|
||||
# GH-21180. Ensure that we print at least two.
|
||||
max_cols_fitted = max(max_cols_fitted, 2)
|
||||
self.fmt.max_cols_fitted = max_cols_fitted
|
||||
|
||||
# Call again _truncate to cut frame appropriately
|
||||
# and then generate string representation
|
||||
self.fmt.truncate()
|
||||
strcols = self._get_strcols()
|
||||
return self.adj.adjoin(1, *strcols)
|
||||
|
||||
|
||||
def _binify(cols: list[int], line_width: int) -> list[int]:
|
||||
adjoin_width = 1
|
||||
bins = []
|
||||
curr_width = 0
|
||||
i_last_column = len(cols) - 1
|
||||
for i, w in enumerate(cols):
|
||||
w_adjoined = w + adjoin_width
|
||||
curr_width += w_adjoined
|
||||
if i_last_column == i:
|
||||
wrap = curr_width + 1 > line_width and i > 0
|
||||
else:
|
||||
wrap = curr_width + 2 > line_width and i > 0
|
||||
if wrap:
|
||||
bins.append(i)
|
||||
curr_width = w_adjoined
|
||||
|
||||
bins.append(len(cols))
|
||||
return bins
|
4136
lib/python3.13/site-packages/pandas/io/formats/style.py
Normal file
4136
lib/python3.13/site-packages/pandas/io/formats/style.py
Normal file
File diff suppressed because it is too large
Load Diff
2497
lib/python3.13/site-packages/pandas/io/formats/style_render.py
Normal file
2497
lib/python3.13/site-packages/pandas/io/formats/style_render.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,16 @@
|
||||
{# Update the html_style/table_structure.html documentation too #}
|
||||
{% if doctype_html %}
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="{{encoding}}">
|
||||
{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
|
||||
</head>
|
||||
<body>
|
||||
{% include html_table_tpl %}
|
||||
</body>
|
||||
</html>
|
||||
{% elif not doctype_html %}
|
||||
{% if not exclude_styles %}{% include html_style_tpl %}{% endif %}
|
||||
{% include html_table_tpl %}
|
||||
{% endif %}
|
@ -0,0 +1,26 @@
|
||||
{%- block before_style -%}{%- endblock before_style -%}
|
||||
{% block style %}
|
||||
<style type="text/css">
|
||||
{% block table_styles %}
|
||||
{% for s in table_styles %}
|
||||
#T_{{uuid}} {{s.selector}} {
|
||||
{% for p,val in s.props %}
|
||||
{{p}}: {{val}};
|
||||
{% endfor %}
|
||||
}
|
||||
{% endfor %}
|
||||
{% endblock table_styles %}
|
||||
{% block before_cellstyle %}{% endblock before_cellstyle %}
|
||||
{% block cellstyle %}
|
||||
{% for cs in [cellstyle, cellstyle_index, cellstyle_columns] %}
|
||||
{% for s in cs %}
|
||||
{% for selector in s.selectors %}{% if not loop.first %}, {% endif %}#T_{{uuid}}_{{selector}}{% endfor %} {
|
||||
{% for p,val in s.props %}
|
||||
{{p}}: {{val}};
|
||||
{% endfor %}
|
||||
}
|
||||
{% endfor %}
|
||||
{% endfor %}
|
||||
{% endblock cellstyle %}
|
||||
</style>
|
||||
{% endblock style %}
|
@ -0,0 +1,63 @@
|
||||
{% block before_table %}{% endblock before_table %}
|
||||
{% block table %}
|
||||
{% if exclude_styles %}
|
||||
<table>
|
||||
{% else %}
|
||||
<table id="T_{{uuid}}"{% if table_attributes %} {{table_attributes}}{% endif %}>
|
||||
{% endif %}
|
||||
{% block caption %}
|
||||
{% if caption and caption is string %}
|
||||
<caption>{{caption}}</caption>
|
||||
{% elif caption and caption is sequence %}
|
||||
<caption>{{caption[0]}}</caption>
|
||||
{% endif %}
|
||||
{% endblock caption %}
|
||||
{% block thead %}
|
||||
<thead>
|
||||
{% block before_head_rows %}{% endblock %}
|
||||
{% for r in head %}
|
||||
{% block head_tr scoped %}
|
||||
<tr>
|
||||
{% if exclude_styles %}
|
||||
{% for c in r %}
|
||||
{% if c.is_visible != False %}
|
||||
<{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
{% for c in r %}
|
||||
{% if c.is_visible != False %}
|
||||
<{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% endblock head_tr %}
|
||||
{% endfor %}
|
||||
{% block after_head_rows %}{% endblock %}
|
||||
</thead>
|
||||
{% endblock thead %}
|
||||
{% block tbody %}
|
||||
<tbody>
|
||||
{% block before_rows %}{% endblock before_rows %}
|
||||
{% for r in body %}
|
||||
{% block tr scoped %}
|
||||
<tr>
|
||||
{% if exclude_styles %}
|
||||
{% for c in r %}{% if c.is_visible != False %}
|
||||
<{{c.type}} {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}{% endfor %}
|
||||
{% else %}
|
||||
{% for c in r %}{% if c.is_visible != False %}
|
||||
<{{c.type}} {%- if c.id is defined %} id="T_{{uuid}}_{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}}</{{c.type}}>
|
||||
{% endif %}{% endfor %}
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% endblock tr %}
|
||||
{% endfor %}
|
||||
{% block after_rows %}{% endblock after_rows %}
|
||||
</tbody>
|
||||
{% endblock tbody %}
|
||||
</table>
|
||||
{% endblock table %}
|
||||
{% block after_table %}{% endblock after_table %}
|
@ -0,0 +1,5 @@
|
||||
{% if environment == "longtable" %}
|
||||
{% include "latex_longtable.tpl" %}
|
||||
{% else %}
|
||||
{% include "latex_table.tpl" %}
|
||||
{% endif %}
|
@ -0,0 +1,82 @@
|
||||
\begin{longtable}
|
||||
{%- set position = parse_table(table_styles, 'position') %}
|
||||
{%- if position is not none %}
|
||||
[{{position}}]
|
||||
{%- endif %}
|
||||
{%- set column_format = parse_table(table_styles, 'column_format') %}
|
||||
{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
|
||||
|
||||
{% for style in table_styles %}
|
||||
{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format', 'label'] %}
|
||||
\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% if caption and caption is string %}
|
||||
\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}}
|
||||
{%- endif %} \\
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}}
|
||||
{%- endif %} \\
|
||||
{% else %}
|
||||
{%- set label = parse_table(table_styles, 'label') %}
|
||||
{%- if label is not none %}
|
||||
\label{{label}} \\
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% set toprule = parse_table(table_styles, 'toprule') %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% set midrule = parse_table(table_styles, 'midrule') %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endfirsthead
|
||||
{% if caption and caption is string %}
|
||||
\caption[]{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %} \\
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %} \\
|
||||
{% endif %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endhead
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\multicolumn{% raw %}{{% endraw %}{{body[0]|length}}{% raw %}}{% endraw %}{r}{Continued on next page} \\
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
\endfoot
|
||||
{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
|
||||
{% if bottomrule is not none %}
|
||||
\{{bottomrule}}
|
||||
{% endif %}
|
||||
\endlastfoot
|
||||
{% for row in body %}
|
||||
{% for c in row %}{% if not loop.first %} & {% endif %}
|
||||
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
|
||||
{%- endfor %} \\
|
||||
{% if clines and clines[loop.index] | length > 0 %}
|
||||
{%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
\end{longtable}
|
||||
{% raw %}{% endraw %}
|
@ -0,0 +1,57 @@
|
||||
{% if environment or parse_wrap(table_styles, caption) %}
|
||||
\begin{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
|
||||
{%- set position = parse_table(table_styles, 'position') %}
|
||||
{%- if position is not none %}
|
||||
[{{position}}]
|
||||
{%- endif %}
|
||||
|
||||
{% set position_float = parse_table(table_styles, 'position_float') %}
|
||||
{% if position_float is not none%}
|
||||
\{{position_float}}
|
||||
{% endif %}
|
||||
{% if caption and caption is string %}
|
||||
\caption{% raw %}{{% endraw %}{{caption}}{% raw %}}{% endraw %}
|
||||
|
||||
{% elif caption and caption is sequence %}
|
||||
\caption[{{caption[1]}}]{% raw %}{{% endraw %}{{caption[0]}}{% raw %}}{% endraw %}
|
||||
|
||||
{% endif %}
|
||||
{% for style in table_styles %}
|
||||
{% if style['selector'] not in ['position', 'position_float', 'caption', 'toprule', 'midrule', 'bottomrule', 'column_format'] %}
|
||||
\{{style['selector']}}{{parse_table(table_styles, style['selector'])}}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
\begin{tabular}
|
||||
{%- set column_format = parse_table(table_styles, 'column_format') %}
|
||||
{% raw %}{{% endraw %}{{column_format}}{% raw %}}{% endraw %}
|
||||
|
||||
{% set toprule = parse_table(table_styles, 'toprule') %}
|
||||
{% if toprule is not none %}
|
||||
\{{toprule}}
|
||||
{% endif %}
|
||||
{% for row in head %}
|
||||
{% for c in row %}{%- if not loop.first %} & {% endif %}{{parse_header(c, multirow_align, multicol_align, siunitx, convert_css)}}{% endfor %} \\
|
||||
{% endfor %}
|
||||
{% set midrule = parse_table(table_styles, 'midrule') %}
|
||||
{% if midrule is not none %}
|
||||
\{{midrule}}
|
||||
{% endif %}
|
||||
{% for row in body %}
|
||||
{% for c in row %}{% if not loop.first %} & {% endif %}
|
||||
{%- if c.type == 'th' %}{{parse_header(c, multirow_align, multicol_align, False, convert_css)}}{% else %}{{parse_cell(c.cellstyle, c.display_value, convert_css)}}{% endif %}
|
||||
{%- endfor %} \\
|
||||
{% if clines and clines[loop.index] | length > 0 %}
|
||||
{%- for cline in clines[loop.index] %}{% if not loop.first %} {% endif %}{{ cline }}{% endfor %}
|
||||
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% set bottomrule = parse_table(table_styles, 'bottomrule') %}
|
||||
{% if bottomrule is not none %}
|
||||
\{{bottomrule}}
|
||||
{% endif %}
|
||||
\end{tabular}
|
||||
{% if environment or parse_wrap(table_styles, caption) %}
|
||||
\end{% raw %}{{% endraw %}{{environment if environment else "table"}}{% raw %}}{% endraw %}
|
||||
|
||||
{% endif %}
|
@ -0,0 +1,12 @@
|
||||
{% for r in head %}
|
||||
{% for c in r %}{% if c["is_visible"] %}
|
||||
{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
|
||||
{% endif %}{% endfor %}
|
||||
|
||||
{% endfor %}
|
||||
{% for r in body %}
|
||||
{% for c in r %}{% if c["is_visible"] %}
|
||||
{{ c["display_value"] }}{% if not loop.last %}{{ delimiter }}{% endif %}
|
||||
{% endif %}{% endfor %}
|
||||
|
||||
{% endfor %}
|
560
lib/python3.13/site-packages/pandas/io/formats/xml.py
Normal file
560
lib/python3.13/site-packages/pandas/io/formats/xml.py
Normal file
@ -0,0 +1,560 @@
|
||||
"""
|
||||
:mod:`pandas.io.formats.xml` is a module for formatting data in XML.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import codecs
|
||||
import io
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
final,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from pandas.errors import AbstractMethodError
|
||||
from pandas.util._decorators import (
|
||||
cache_readonly,
|
||||
doc,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
from pandas.core.dtypes.missing import isna
|
||||
|
||||
from pandas.core.shared_docs import _shared_docs
|
||||
|
||||
from pandas.io.common import get_handle
|
||||
from pandas.io.xml import (
|
||||
get_data_from_filepath,
|
||||
preprocess_data,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pandas._typing import (
|
||||
CompressionOptions,
|
||||
FilePath,
|
||||
ReadBuffer,
|
||||
StorageOptions,
|
||||
WriteBuffer,
|
||||
)
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
@doc(
|
||||
storage_options=_shared_docs["storage_options"],
|
||||
compression_options=_shared_docs["compression_options"] % "path_or_buffer",
|
||||
)
|
||||
class _BaseXMLFormatter:
|
||||
"""
|
||||
Subclass for formatting data in XML.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path_or_buffer : str or file-like
|
||||
This can be either a string of raw XML, a valid URL,
|
||||
file or file-like object.
|
||||
|
||||
index : bool
|
||||
Whether to include index in xml document.
|
||||
|
||||
row_name : str
|
||||
Name for root of xml document. Default is 'data'.
|
||||
|
||||
root_name : str
|
||||
Name for row elements of xml document. Default is 'row'.
|
||||
|
||||
na_rep : str
|
||||
Missing data representation.
|
||||
|
||||
attrs_cols : list
|
||||
List of columns to write as attributes in row element.
|
||||
|
||||
elem_cols : list
|
||||
List of columns to write as children in row element.
|
||||
|
||||
namespaces : dict
|
||||
The namespaces to define in XML document as dicts with key
|
||||
being namespace and value the URI.
|
||||
|
||||
prefix : str
|
||||
The prefix for each element in XML document including root.
|
||||
|
||||
encoding : str
|
||||
Encoding of xml object or document.
|
||||
|
||||
xml_declaration : bool
|
||||
Whether to include xml declaration at top line item in xml.
|
||||
|
||||
pretty_print : bool
|
||||
Whether to write xml document with line breaks and indentation.
|
||||
|
||||
stylesheet : str or file-like
|
||||
A URL, file, file-like object, or a raw string containing XSLT.
|
||||
|
||||
{compression_options}
|
||||
|
||||
.. versionchanged:: 1.4.0 Zstandard support.
|
||||
|
||||
{storage_options}
|
||||
|
||||
See also
|
||||
--------
|
||||
pandas.io.formats.xml.EtreeXMLFormatter
|
||||
pandas.io.formats.xml.LxmlXMLFormatter
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
frame: DataFrame,
|
||||
path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
|
||||
index: bool = True,
|
||||
root_name: str | None = "data",
|
||||
row_name: str | None = "row",
|
||||
na_rep: str | None = None,
|
||||
attr_cols: list[str] | None = None,
|
||||
elem_cols: list[str] | None = None,
|
||||
namespaces: dict[str | None, str] | None = None,
|
||||
prefix: str | None = None,
|
||||
encoding: str = "utf-8",
|
||||
xml_declaration: bool | None = True,
|
||||
pretty_print: bool | None = True,
|
||||
stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = None,
|
||||
compression: CompressionOptions = "infer",
|
||||
storage_options: StorageOptions | None = None,
|
||||
) -> None:
|
||||
self.frame = frame
|
||||
self.path_or_buffer = path_or_buffer
|
||||
self.index = index
|
||||
self.root_name = root_name
|
||||
self.row_name = row_name
|
||||
self.na_rep = na_rep
|
||||
self.attr_cols = attr_cols
|
||||
self.elem_cols = elem_cols
|
||||
self.namespaces = namespaces
|
||||
self.prefix = prefix
|
||||
self.encoding = encoding
|
||||
self.xml_declaration = xml_declaration
|
||||
self.pretty_print = pretty_print
|
||||
self.stylesheet = stylesheet
|
||||
self.compression: CompressionOptions = compression
|
||||
self.storage_options = storage_options
|
||||
|
||||
self.orig_cols = self.frame.columns.tolist()
|
||||
self.frame_dicts = self._process_dataframe()
|
||||
|
||||
self._validate_columns()
|
||||
self._validate_encoding()
|
||||
self.prefix_uri = self._get_prefix_uri()
|
||||
self._handle_indexes()
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
"""
|
||||
Build tree from data.
|
||||
|
||||
This method initializes the root and builds attributes and elements
|
||||
with optional namespaces.
|
||||
"""
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _validate_columns(self) -> None:
|
||||
"""
|
||||
Validate elems_cols and attrs_cols.
|
||||
|
||||
This method will check if columns is list-like.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
* If value is not a list and less then length of nodes.
|
||||
"""
|
||||
if self.attr_cols and not is_list_like(self.attr_cols):
|
||||
raise TypeError(
|
||||
f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
|
||||
)
|
||||
|
||||
if self.elem_cols and not is_list_like(self.elem_cols):
|
||||
raise TypeError(
|
||||
f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
|
||||
)
|
||||
|
||||
@final
|
||||
def _validate_encoding(self) -> None:
|
||||
"""
|
||||
Validate encoding.
|
||||
|
||||
This method will check if encoding is among listed under codecs.
|
||||
|
||||
Raises
|
||||
------
|
||||
LookupError
|
||||
* If encoding is not available in codecs.
|
||||
"""
|
||||
|
||||
codecs.lookup(self.encoding)
|
||||
|
||||
@final
|
||||
def _process_dataframe(self) -> dict[int | str, dict[str, Any]]:
|
||||
"""
|
||||
Adjust Data Frame to fit xml output.
|
||||
|
||||
This method will adjust underlying data frame for xml output,
|
||||
including optionally replacing missing values and including indexes.
|
||||
"""
|
||||
|
||||
df = self.frame
|
||||
|
||||
if self.index:
|
||||
df = df.reset_index()
|
||||
|
||||
if self.na_rep is not None:
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
"Downcasting object dtype arrays",
|
||||
category=FutureWarning,
|
||||
)
|
||||
df = df.fillna(self.na_rep)
|
||||
|
||||
return df.to_dict(orient="index")
|
||||
|
||||
@final
|
||||
def _handle_indexes(self) -> None:
|
||||
"""
|
||||
Handle indexes.
|
||||
|
||||
This method will add indexes into attr_cols or elem_cols.
|
||||
"""
|
||||
|
||||
if not self.index:
|
||||
return
|
||||
|
||||
first_key = next(iter(self.frame_dicts))
|
||||
indexes: list[str] = [
|
||||
x for x in self.frame_dicts[first_key].keys() if x not in self.orig_cols
|
||||
]
|
||||
|
||||
if self.attr_cols:
|
||||
self.attr_cols = indexes + self.attr_cols
|
||||
|
||||
if self.elem_cols:
|
||||
self.elem_cols = indexes + self.elem_cols
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
"""
|
||||
Get uri of namespace prefix.
|
||||
|
||||
This method retrieves corresponding URI to prefix in namespaces.
|
||||
|
||||
Raises
|
||||
------
|
||||
KeyError
|
||||
*If prefix is not included in namespace dict.
|
||||
"""
|
||||
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _other_namespaces(self) -> dict:
|
||||
"""
|
||||
Define other namespaces.
|
||||
|
||||
This method will build dictionary of namespaces attributes
|
||||
for root element, conditionally with optional namespaces and
|
||||
prefix.
|
||||
"""
|
||||
|
||||
nmsp_dict: dict[str, str] = {}
|
||||
if self.namespaces:
|
||||
nmsp_dict = {
|
||||
f"xmlns{p if p=='' else f':{p}'}": n
|
||||
for p, n in self.namespaces.items()
|
||||
if n != self.prefix_uri[1:-1]
|
||||
}
|
||||
|
||||
return nmsp_dict
|
||||
|
||||
@final
|
||||
def _build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any:
|
||||
"""
|
||||
Create attributes of row.
|
||||
|
||||
This method adds attributes using attr_cols to row element and
|
||||
works with tuples for multindex or hierarchical columns.
|
||||
"""
|
||||
|
||||
if not self.attr_cols:
|
||||
return elem_row
|
||||
|
||||
for col in self.attr_cols:
|
||||
attr_name = self._get_flat_col_name(col)
|
||||
try:
|
||||
if not isna(d[col]):
|
||||
elem_row.attrib[attr_name] = str(d[col])
|
||||
except KeyError:
|
||||
raise KeyError(f"no valid column, {col}")
|
||||
return elem_row
|
||||
|
||||
@final
|
||||
def _get_flat_col_name(self, col: str | tuple) -> str:
|
||||
flat_col = col
|
||||
if isinstance(col, tuple):
|
||||
flat_col = (
|
||||
"".join([str(c) for c in col]).strip()
|
||||
if "" in col
|
||||
else "_".join([str(c) for c in col]).strip()
|
||||
)
|
||||
return f"{self.prefix_uri}{flat_col}"
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
raise AbstractMethodError(self)
|
||||
|
||||
@final
|
||||
def _build_elems(self, d: dict[str, Any], elem_row: Any) -> None:
|
||||
"""
|
||||
Create child elements of row.
|
||||
|
||||
This method adds child elements using elem_cols to row element and
|
||||
works with tuples for multindex or hierarchical columns.
|
||||
"""
|
||||
sub_element_cls = self._sub_element_cls
|
||||
|
||||
if not self.elem_cols:
|
||||
return
|
||||
|
||||
for col in self.elem_cols:
|
||||
elem_name = self._get_flat_col_name(col)
|
||||
try:
|
||||
val = None if isna(d[col]) or d[col] == "" else str(d[col])
|
||||
sub_element_cls(elem_row, elem_name).text = val
|
||||
except KeyError:
|
||||
raise KeyError(f"no valid column, {col}")
|
||||
|
||||
@final
|
||||
def write_output(self) -> str | None:
|
||||
xml_doc = self._build_tree()
|
||||
|
||||
if self.path_or_buffer is not None:
|
||||
with get_handle(
|
||||
self.path_or_buffer,
|
||||
"wb",
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
is_text=False,
|
||||
) as handles:
|
||||
handles.handle.write(xml_doc)
|
||||
return None
|
||||
|
||||
else:
|
||||
return xml_doc.decode(self.encoding).rstrip()
|
||||
|
||||
|
||||
class EtreeXMLFormatter(_BaseXMLFormatter):
|
||||
"""
|
||||
Class for formatting data in xml using Python standard library
|
||||
modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
|
||||
"""
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
from xml.etree.ElementTree import (
|
||||
Element,
|
||||
SubElement,
|
||||
tostring,
|
||||
)
|
||||
|
||||
self.root = Element(
|
||||
f"{self.prefix_uri}{self.root_name}", attrib=self._other_namespaces()
|
||||
)
|
||||
|
||||
for d in self.frame_dicts.values():
|
||||
elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
|
||||
|
||||
if not self.attr_cols and not self.elem_cols:
|
||||
self.elem_cols = list(d.keys())
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
else:
|
||||
elem_row = self._build_attribs(d, elem_row)
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
self.out_xml = tostring(
|
||||
self.root,
|
||||
method="xml",
|
||||
encoding=self.encoding,
|
||||
xml_declaration=self.xml_declaration,
|
||||
)
|
||||
|
||||
if self.pretty_print:
|
||||
self.out_xml = self._prettify_tree()
|
||||
|
||||
if self.stylesheet is not None:
|
||||
raise ValueError(
|
||||
"To use stylesheet, you need lxml installed and selected as parser."
|
||||
)
|
||||
|
||||
return self.out_xml
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
from xml.etree.ElementTree import register_namespace
|
||||
|
||||
uri = ""
|
||||
if self.namespaces:
|
||||
for p, n in self.namespaces.items():
|
||||
if isinstance(p, str) and isinstance(n, str):
|
||||
register_namespace(p, n)
|
||||
if self.prefix:
|
||||
try:
|
||||
uri = f"{{{self.namespaces[self.prefix]}}}"
|
||||
except KeyError:
|
||||
raise KeyError(f"{self.prefix} is not included in namespaces")
|
||||
elif "" in self.namespaces:
|
||||
uri = f'{{{self.namespaces[""]}}}'
|
||||
else:
|
||||
uri = ""
|
||||
|
||||
return uri
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
from xml.etree.ElementTree import SubElement
|
||||
|
||||
return SubElement
|
||||
|
||||
def _prettify_tree(self) -> bytes:
|
||||
"""
|
||||
Output tree for pretty print format.
|
||||
|
||||
This method will pretty print xml with line breaks and indentation.
|
||||
"""
|
||||
|
||||
from xml.dom.minidom import parseString
|
||||
|
||||
dom = parseString(self.out_xml)
|
||||
|
||||
return dom.toprettyxml(indent=" ", encoding=self.encoding)
|
||||
|
||||
|
||||
class LxmlXMLFormatter(_BaseXMLFormatter):
|
||||
"""
|
||||
Class for formatting data in xml using Python standard library
|
||||
modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
self._convert_empty_str_key()
|
||||
|
||||
def _build_tree(self) -> bytes:
|
||||
"""
|
||||
Build tree from data.
|
||||
|
||||
This method initializes the root and builds attributes and elements
|
||||
with optional namespaces.
|
||||
"""
|
||||
from lxml.etree import (
|
||||
Element,
|
||||
SubElement,
|
||||
tostring,
|
||||
)
|
||||
|
||||
self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
|
||||
|
||||
for d in self.frame_dicts.values():
|
||||
elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
|
||||
|
||||
if not self.attr_cols and not self.elem_cols:
|
||||
self.elem_cols = list(d.keys())
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
else:
|
||||
elem_row = self._build_attribs(d, elem_row)
|
||||
self._build_elems(d, elem_row)
|
||||
|
||||
self.out_xml = tostring(
|
||||
self.root,
|
||||
pretty_print=self.pretty_print,
|
||||
method="xml",
|
||||
encoding=self.encoding,
|
||||
xml_declaration=self.xml_declaration,
|
||||
)
|
||||
|
||||
if self.stylesheet is not None:
|
||||
self.out_xml = self._transform_doc()
|
||||
|
||||
return self.out_xml
|
||||
|
||||
def _convert_empty_str_key(self) -> None:
|
||||
"""
|
||||
Replace zero-length string in `namespaces`.
|
||||
|
||||
This method will replace '' with None to align to `lxml`
|
||||
requirement that empty string prefixes are not allowed.
|
||||
"""
|
||||
|
||||
if self.namespaces and "" in self.namespaces.keys():
|
||||
self.namespaces[None] = self.namespaces.pop("", "default")
|
||||
|
||||
def _get_prefix_uri(self) -> str:
|
||||
uri = ""
|
||||
if self.namespaces:
|
||||
if self.prefix:
|
||||
try:
|
||||
uri = f"{{{self.namespaces[self.prefix]}}}"
|
||||
except KeyError:
|
||||
raise KeyError(f"{self.prefix} is not included in namespaces")
|
||||
elif "" in self.namespaces:
|
||||
uri = f'{{{self.namespaces[""]}}}'
|
||||
else:
|
||||
uri = ""
|
||||
|
||||
return uri
|
||||
|
||||
@cache_readonly
|
||||
def _sub_element_cls(self):
|
||||
from lxml.etree import SubElement
|
||||
|
||||
return SubElement
|
||||
|
||||
def _transform_doc(self) -> bytes:
|
||||
"""
|
||||
Parse stylesheet from file or buffer and run it.
|
||||
|
||||
This method will parse stylesheet object into tree for parsing
|
||||
conditionally by its specific object type, then transforms
|
||||
original tree with XSLT script.
|
||||
"""
|
||||
from lxml.etree import (
|
||||
XSLT,
|
||||
XMLParser,
|
||||
fromstring,
|
||||
parse,
|
||||
)
|
||||
|
||||
style_doc = self.stylesheet
|
||||
assert style_doc is not None # is ensured by caller
|
||||
|
||||
handle_data = get_data_from_filepath(
|
||||
filepath_or_buffer=style_doc,
|
||||
encoding=self.encoding,
|
||||
compression=self.compression,
|
||||
storage_options=self.storage_options,
|
||||
)
|
||||
|
||||
with preprocess_data(handle_data) as xml_data:
|
||||
curr_parser = XMLParser(encoding=self.encoding)
|
||||
|
||||
if isinstance(xml_data, io.StringIO):
|
||||
xsl_doc = fromstring(
|
||||
xml_data.getvalue().encode(self.encoding), parser=curr_parser
|
||||
)
|
||||
else:
|
||||
xsl_doc = parse(xml_data, parser=curr_parser)
|
||||
|
||||
transformer = XSLT(xsl_doc)
|
||||
new_doc = transformer(self.root)
|
||||
|
||||
return bytes(new_doc)
|
Reference in New Issue
Block a user