1from __future__ import annotations
2
3from collections import defaultdict
4from collections.abc import Sequence
5from functools import partial
6import re
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Callable,
11 DefaultDict,
12 Optional,
13 TypedDict,
14 Union,
15)
16from uuid import uuid4
17
18import numpy as np
19
20from pandas._config import get_option
21
22from pandas._libs import lib
23from pandas.compat._optional import import_optional_dependency
24
25from pandas.core.dtypes.common import (
26 is_complex,
27 is_float,
28 is_integer,
29)
30from pandas.core.dtypes.generic import ABCSeries
31
32from pandas import (
33 DataFrame,
34 Index,
35 IndexSlice,
36 MultiIndex,
37 Series,
38 isna,
39)
40from pandas.api.types import is_list_like
41import pandas.core.common as com
42
43if TYPE_CHECKING:
44 from pandas._typing import (
45 Axis,
46 Level,
47 )
48jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
49from markupsafe import escape as escape_html # markupsafe is jinja2 dependency
50
51BaseFormatter = Union[str, Callable]
52ExtFormatter = Union[BaseFormatter, dict[Any, Optional[BaseFormatter]]]
53CSSPair = tuple[str, Union[str, float]]
54CSSList = list[CSSPair]
55CSSProperties = Union[str, CSSList]
56
57
58class CSSDict(TypedDict):
59 selector: str
60 props: CSSProperties
61
62
63CSSStyles = list[CSSDict]
64Subset = Union[slice, Sequence, Index]
65
66
67class StylerRenderer:
68 """
69 Base class to process rendering a Styler with a specified jinja2 template.
70 """
71
72 loader = jinja2.PackageLoader("pandas", "io/formats/templates")
73 env = jinja2.Environment(loader=loader, trim_blocks=True)
74 template_html = env.get_template("html.tpl")
75 template_html_table = env.get_template("html_table.tpl")
76 template_html_style = env.get_template("html_style.tpl")
77 template_latex = env.get_template("latex.tpl")
78 template_string = env.get_template("string.tpl")
79
80 def __init__(
81 self,
82 data: DataFrame | Series,
83 uuid: str | None = None,
84 uuid_len: int = 5,
85 table_styles: CSSStyles | None = None,
86 table_attributes: str | None = None,
87 caption: str | tuple | list | None = None,
88 cell_ids: bool = True,
89 precision: int | None = None,
90 ) -> None:
91 # validate ordered args
92 if isinstance(data, Series):
93 data = data.to_frame()
94 if not isinstance(data, DataFrame):
95 raise TypeError("``data`` must be a Series or DataFrame")
96 self.data: DataFrame = data
97 self.index: Index = data.index
98 self.columns: Index = data.columns
99 if not isinstance(uuid_len, int) or uuid_len < 0:
100 raise TypeError("``uuid_len`` must be an integer in range [0, 32].")
101 self.uuid = uuid or uuid4().hex[: min(32, uuid_len)]
102 self.uuid_len = len(self.uuid)
103 self.table_styles = table_styles
104 self.table_attributes = table_attributes
105 self.caption = caption
106 self.cell_ids = cell_ids
107 self.css = {
108 "row_heading": "row_heading",
109 "col_heading": "col_heading",
110 "index_name": "index_name",
111 "col": "col",
112 "row": "row",
113 "col_trim": "col_trim",
114 "row_trim": "row_trim",
115 "level": "level",
116 "data": "data",
117 "blank": "blank",
118 "foot": "foot",
119 }
120 self.concatenated: list[StylerRenderer] = []
121 # add rendering variables
122 self.hide_index_names: bool = False
123 self.hide_column_names: bool = False
124 self.hide_index_: list = [False] * self.index.nlevels
125 self.hide_columns_: list = [False] * self.columns.nlevels
126 self.hidden_rows: Sequence[int] = [] # sequence for specific hidden rows/cols
127 self.hidden_columns: Sequence[int] = []
128 self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
129 self.ctx_index: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
130 self.ctx_columns: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
131 self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str)
132 self._todo: list[tuple[Callable, tuple, dict]] = []
133 self.tooltips: Tooltips | None = None
134 precision = (
135 get_option("styler.format.precision") if precision is None else precision
136 )
137 self._display_funcs: DefaultDict[ # maps (row, col) -> format func
138 tuple[int, int], Callable[[Any], str]
139 ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
140 self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func
141 tuple[int, int], Callable[[Any], str]
142 ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
143 self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func
144 tuple[int, int], Callable[[Any], str]
145 ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
146
147 def _render(
148 self,
149 sparse_index: bool,
150 sparse_columns: bool,
151 max_rows: int | None = None,
152 max_cols: int | None = None,
153 blank: str = "",
154 ):
155 """
156 Computes and applies styles and then generates the general render dicts.
157
158 Also extends the `ctx` and `ctx_index` attributes with those of concatenated
159 stylers for use within `_translate_latex`
160 """
161 self._compute()
162 dxs = []
163 ctx_len = len(self.index)
164 for i, concatenated in enumerate(self.concatenated):
165 concatenated.hide_index_ = self.hide_index_
166 concatenated.hidden_columns = self.hidden_columns
167 foot = f"{self.css['foot']}{i}"
168 concatenated.css = {
169 **self.css,
170 "data": f"{foot}_data",
171 "row_heading": f"{foot}_row_heading",
172 "row": f"{foot}_row",
173 "foot": f"{foot}_foot",
174 }
175 dx = concatenated._render(
176 sparse_index, sparse_columns, max_rows, max_cols, blank
177 )
178 dxs.append(dx)
179
180 for (r, c), v in concatenated.ctx.items():
181 self.ctx[(r + ctx_len, c)] = v
182 for (r, c), v in concatenated.ctx_index.items():
183 self.ctx_index[(r + ctx_len, c)] = v
184
185 ctx_len += len(concatenated.index)
186
187 d = self._translate(
188 sparse_index, sparse_columns, max_rows, max_cols, blank, dxs
189 )
190 return d
191
192 def _render_html(
193 self,
194 sparse_index: bool,
195 sparse_columns: bool,
196 max_rows: int | None = None,
197 max_cols: int | None = None,
198 **kwargs,
199 ) -> str:
200 """
201 Renders the ``Styler`` including all applied styles to HTML.
202 Generates a dict with necessary kwargs passed to jinja2 template.
203 """
204 d = self._render(sparse_index, sparse_columns, max_rows, max_cols, " ")
205 d.update(kwargs)
206 return self.template_html.render(
207 **d,
208 html_table_tpl=self.template_html_table,
209 html_style_tpl=self.template_html_style,
210 )
211
212 def _render_latex(
213 self, sparse_index: bool, sparse_columns: bool, clines: str | None, **kwargs
214 ) -> str:
215 """
216 Render a Styler in latex format
217 """
218 d = self._render(sparse_index, sparse_columns, None, None)
219 self._translate_latex(d, clines=clines)
220 self.template_latex.globals["parse_wrap"] = _parse_latex_table_wrapping
221 self.template_latex.globals["parse_table"] = _parse_latex_table_styles
222 self.template_latex.globals["parse_cell"] = _parse_latex_cell_styles
223 self.template_latex.globals["parse_header"] = _parse_latex_header_span
224 d.update(kwargs)
225 return self.template_latex.render(**d)
226
227 def _render_string(
228 self,
229 sparse_index: bool,
230 sparse_columns: bool,
231 max_rows: int | None = None,
232 max_cols: int | None = None,
233 **kwargs,
234 ) -> str:
235 """
236 Render a Styler in string format
237 """
238 d = self._render(sparse_index, sparse_columns, max_rows, max_cols)
239 d.update(kwargs)
240 return self.template_string.render(**d)
241
242 def _compute(self):
243 """
244 Execute the style functions built up in `self._todo`.
245
246 Relies on the conventions that all style functions go through
247 .apply or .map. The append styles to apply as tuples of
248
249 (application method, *args, **kwargs)
250 """
251 self.ctx.clear()
252 self.ctx_index.clear()
253 self.ctx_columns.clear()
254 r = self
255 for func, args, kwargs in self._todo:
256 r = func(self)(*args, **kwargs)
257 return r
258
259 def _translate(
260 self,
261 sparse_index: bool,
262 sparse_cols: bool,
263 max_rows: int | None = None,
264 max_cols: int | None = None,
265 blank: str = " ",
266 dxs: list[dict] | None = None,
267 ):
268 """
269 Process Styler data and settings into a dict for template rendering.
270
271 Convert data and settings from ``Styler`` attributes such as ``self.data``,
272 ``self.tooltips`` including applying any methods in ``self._todo``.
273
274 Parameters
275 ----------
276 sparse_index : bool
277 Whether to sparsify the index or print all hierarchical index elements.
278 Upstream defaults are typically to `pandas.options.styler.sparse.index`.
279 sparse_cols : bool
280 Whether to sparsify the columns or print all hierarchical column elements.
281 Upstream defaults are typically to `pandas.options.styler.sparse.columns`.
282 max_rows, max_cols : int, optional
283 Specific max rows and cols. max_elements always take precedence in render.
284 blank : str
285 Entry to top-left blank cells.
286 dxs : list[dict]
287 The render dicts of the concatenated Stylers.
288
289 Returns
290 -------
291 d : dict
292 The following structure: {uuid, table_styles, caption, head, body,
293 cellstyle, table_attributes}
294 """
295 if dxs is None:
296 dxs = []
297 self.css["blank_value"] = blank
298
299 # construct render dict
300 d = {
301 "uuid": self.uuid,
302 "table_styles": format_table_styles(self.table_styles or []),
303 "caption": self.caption,
304 }
305
306 max_elements = get_option("styler.render.max_elements")
307 max_rows = max_rows if max_rows else get_option("styler.render.max_rows")
308 max_cols = max_cols if max_cols else get_option("styler.render.max_columns")
309 max_rows, max_cols = _get_trimming_maximums(
310 len(self.data.index),
311 len(self.data.columns),
312 max_elements,
313 max_rows,
314 max_cols,
315 )
316
317 self.cellstyle_map_columns: DefaultDict[
318 tuple[CSSPair, ...], list[str]
319 ] = defaultdict(list)
320 head = self._translate_header(sparse_cols, max_cols)
321 d.update({"head": head})
322
323 # for sparsifying a MultiIndex and for use with latex clines
324 idx_lengths = _get_level_lengths(
325 self.index, sparse_index, max_rows, self.hidden_rows
326 )
327 d.update({"index_lengths": idx_lengths})
328
329 self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
330 list
331 )
332 self.cellstyle_map_index: DefaultDict[
333 tuple[CSSPair, ...], list[str]
334 ] = defaultdict(list)
335 body: list = self._translate_body(idx_lengths, max_rows, max_cols)
336 d.update({"body": body})
337
338 ctx_maps = {
339 "cellstyle": "cellstyle_map",
340 "cellstyle_index": "cellstyle_map_index",
341 "cellstyle_columns": "cellstyle_map_columns",
342 } # add the cell_ids styles map to the render dictionary in right format
343 for k, attr in ctx_maps.items():
344 map = [
345 {"props": list(props), "selectors": selectors}
346 for props, selectors in getattr(self, attr).items()
347 ]
348 d.update({k: map})
349
350 for dx in dxs: # self.concatenated is not empty
351 d["body"].extend(dx["body"]) # type: ignore[union-attr]
352 d["cellstyle"].extend(dx["cellstyle"]) # type: ignore[union-attr]
353 d["cellstyle_index"].extend( # type: ignore[union-attr]
354 dx["cellstyle_index"]
355 )
356
357 table_attr = self.table_attributes
358 if not get_option("styler.html.mathjax"):
359 table_attr = table_attr or ""
360 if 'class="' in table_attr:
361 table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ')
362 else:
363 table_attr += ' class="tex2jax_ignore"'
364 d.update({"table_attributes": table_attr})
365
366 if self.tooltips:
367 d = self.tooltips._translate(self, d)
368
369 return d
370
371 def _translate_header(self, sparsify_cols: bool, max_cols: int):
372 """
373 Build each <tr> within table <head> as a list
374
375 Using the structure:
376 +----------------------------+---------------+---------------------------+
377 | index_blanks ... | column_name_0 | column_headers (level_0) |
378 1) | .. | .. | .. |
379 | index_blanks ... | column_name_n | column_headers (level_n) |
380 +----------------------------+---------------+---------------------------+
381 2) | index_names (level_0 to level_n) ... | column_blanks ... |
382 +----------------------------+---------------+---------------------------+
383
384 Parameters
385 ----------
386 sparsify_cols : bool
387 Whether column_headers section will add colspan attributes (>1) to elements.
388 max_cols : int
389 Maximum number of columns to render. If exceeded will contain `...` filler.
390
391 Returns
392 -------
393 head : list
394 The associated HTML elements needed for template rendering.
395 """
396 # for sparsifying a MultiIndex
397 col_lengths = _get_level_lengths(
398 self.columns, sparsify_cols, max_cols, self.hidden_columns
399 )
400
401 clabels = self.data.columns.tolist()
402 if self.data.columns.nlevels == 1:
403 clabels = [[x] for x in clabels]
404 clabels = list(zip(*clabels))
405
406 head = []
407 # 1) column headers
408 for r, hide in enumerate(self.hide_columns_):
409 if hide or not clabels:
410 continue
411
412 header_row = self._generate_col_header_row(
413 (r, clabels), max_cols, col_lengths
414 )
415 head.append(header_row)
416
417 # 2) index names
418 if (
419 self.data.index.names
420 and com.any_not_none(*self.data.index.names)
421 and not all(self.hide_index_)
422 and not self.hide_index_names
423 ):
424 index_names_row = self._generate_index_names_row(
425 clabels, max_cols, col_lengths
426 )
427 head.append(index_names_row)
428
429 return head
430
431 def _generate_col_header_row(
432 self, iter: Sequence, max_cols: int, col_lengths: dict
433 ):
434 """
435 Generate the row containing column headers:
436
437 +----------------------------+---------------+---------------------------+
438 | index_blanks ... | column_name_i | column_headers (level_i) |
439 +----------------------------+---------------+---------------------------+
440
441 Parameters
442 ----------
443 iter : tuple
444 Looping variables from outer scope
445 max_cols : int
446 Permissible number of columns
447 col_lengths :
448 c
449
450 Returns
451 -------
452 list of elements
453 """
454
455 r, clabels = iter
456
457 # number of index blanks is governed by number of hidden index levels
458 index_blanks = [
459 _element("th", self.css["blank"], self.css["blank_value"], True)
460 ] * (self.index.nlevels - sum(self.hide_index_) - 1)
461
462 name = self.data.columns.names[r]
463 column_name = [
464 _element(
465 "th",
466 (
467 f"{self.css['blank']} {self.css['level']}{r}"
468 if name is None
469 else f"{self.css['index_name']} {self.css['level']}{r}"
470 ),
471 name
472 if (name is not None and not self.hide_column_names)
473 else self.css["blank_value"],
474 not all(self.hide_index_),
475 )
476 ]
477
478 column_headers: list = []
479 visible_col_count: int = 0
480 for c, value in enumerate(clabels[r]):
481 header_element_visible = _is_visible(c, r, col_lengths)
482 if header_element_visible:
483 visible_col_count += col_lengths.get((r, c), 0)
484 if self._check_trim(
485 visible_col_count,
486 max_cols,
487 column_headers,
488 "th",
489 f"{self.css['col_heading']} {self.css['level']}{r} "
490 f"{self.css['col_trim']}",
491 ):
492 break
493
494 header_element = _element(
495 "th",
496 (
497 f"{self.css['col_heading']} {self.css['level']}{r} "
498 f"{self.css['col']}{c}"
499 ),
500 value,
501 header_element_visible,
502 display_value=self._display_funcs_columns[(r, c)](value),
503 attributes=(
504 f'colspan="{col_lengths.get((r, c), 0)}"'
505 if col_lengths.get((r, c), 0) > 1
506 else ""
507 ),
508 )
509
510 if self.cell_ids:
511 header_element["id"] = f"{self.css['level']}{r}_{self.css['col']}{c}"
512 if (
513 header_element_visible
514 and (r, c) in self.ctx_columns
515 and self.ctx_columns[r, c]
516 ):
517 header_element["id"] = f"{self.css['level']}{r}_{self.css['col']}{c}"
518 self.cellstyle_map_columns[tuple(self.ctx_columns[r, c])].append(
519 f"{self.css['level']}{r}_{self.css['col']}{c}"
520 )
521
522 column_headers.append(header_element)
523
524 return index_blanks + column_name + column_headers
525
526 def _generate_index_names_row(
527 self, iter: Sequence, max_cols: int, col_lengths: dict
528 ):
529 """
530 Generate the row containing index names
531
532 +----------------------------+---------------+---------------------------+
533 | index_names (level_0 to level_n) ... | column_blanks ... |
534 +----------------------------+---------------+---------------------------+
535
536 Parameters
537 ----------
538 iter : tuple
539 Looping variables from outer scope
540 max_cols : int
541 Permissible number of columns
542
543 Returns
544 -------
545 list of elements
546 """
547
548 clabels = iter
549
550 index_names = [
551 _element(
552 "th",
553 f"{self.css['index_name']} {self.css['level']}{c}",
554 self.css["blank_value"] if name is None else name,
555 not self.hide_index_[c],
556 )
557 for c, name in enumerate(self.data.index.names)
558 ]
559
560 column_blanks: list = []
561 visible_col_count: int = 0
562 if clabels:
563 last_level = self.columns.nlevels - 1 # use last level since never sparsed
564 for c, value in enumerate(clabels[last_level]):
565 header_element_visible = _is_visible(c, last_level, col_lengths)
566 if header_element_visible:
567 visible_col_count += 1
568 if self._check_trim(
569 visible_col_count,
570 max_cols,
571 column_blanks,
572 "th",
573 f"{self.css['blank']} {self.css['col']}{c} {self.css['col_trim']}",
574 self.css["blank_value"],
575 ):
576 break
577
578 column_blanks.append(
579 _element(
580 "th",
581 f"{self.css['blank']} {self.css['col']}{c}",
582 self.css["blank_value"],
583 c not in self.hidden_columns,
584 )
585 )
586
587 return index_names + column_blanks
588
589 def _translate_body(self, idx_lengths: dict, max_rows: int, max_cols: int):
590 """
591 Build each <tr> within table <body> as a list
592
593 Use the following structure:
594 +--------------------------------------------+---------------------------+
595 | index_header_0 ... index_header_n | data_by_column ... |
596 +--------------------------------------------+---------------------------+
597
598 Also add elements to the cellstyle_map for more efficient grouped elements in
599 <style></style> block
600
601 Parameters
602 ----------
603 sparsify_index : bool
604 Whether index_headers section will add rowspan attributes (>1) to elements.
605
606 Returns
607 -------
608 body : list
609 The associated HTML elements needed for template rendering.
610 """
611 rlabels = self.data.index.tolist()
612 if not isinstance(self.data.index, MultiIndex):
613 rlabels = [[x] for x in rlabels]
614
615 body: list = []
616 visible_row_count: int = 0
617 for r, row_tup in [
618 z for z in enumerate(self.data.itertuples()) if z[0] not in self.hidden_rows
619 ]:
620 visible_row_count += 1
621 if self._check_trim(
622 visible_row_count,
623 max_rows,
624 body,
625 "row",
626 ):
627 break
628
629 body_row = self._generate_body_row(
630 (r, row_tup, rlabels), max_cols, idx_lengths
631 )
632 body.append(body_row)
633 return body
634
635 def _check_trim(
636 self,
637 count: int,
638 max: int,
639 obj: list,
640 element: str,
641 css: str | None = None,
642 value: str = "...",
643 ) -> bool:
644 """
645 Indicates whether to break render loops and append a trimming indicator
646
647 Parameters
648 ----------
649 count : int
650 The loop count of previous visible items.
651 max : int
652 The allowable rendered items in the loop.
653 obj : list
654 The current render collection of the rendered items.
655 element : str
656 The type of element to append in the case a trimming indicator is needed.
657 css : str, optional
658 The css to add to the trimming indicator element.
659 value : str, optional
660 The value of the elements display if necessary.
661
662 Returns
663 -------
664 result : bool
665 Whether a trimming element was required and appended.
666 """
667 if count > max:
668 if element == "row":
669 obj.append(self._generate_trimmed_row(max))
670 else:
671 obj.append(_element(element, css, value, True, attributes=""))
672 return True
673 return False
674
675 def _generate_trimmed_row(self, max_cols: int) -> list:
676 """
677 When a render has too many rows we generate a trimming row containing "..."
678
679 Parameters
680 ----------
681 max_cols : int
682 Number of permissible columns
683
684 Returns
685 -------
686 list of elements
687 """
688 index_headers = [
689 _element(
690 "th",
691 (
692 f"{self.css['row_heading']} {self.css['level']}{c} "
693 f"{self.css['row_trim']}"
694 ),
695 "...",
696 not self.hide_index_[c],
697 attributes="",
698 )
699 for c in range(self.data.index.nlevels)
700 ]
701
702 data: list = []
703 visible_col_count: int = 0
704 for c, _ in enumerate(self.columns):
705 data_element_visible = c not in self.hidden_columns
706 if data_element_visible:
707 visible_col_count += 1
708 if self._check_trim(
709 visible_col_count,
710 max_cols,
711 data,
712 "td",
713 f"{self.css['data']} {self.css['row_trim']} {self.css['col_trim']}",
714 ):
715 break
716
717 data.append(
718 _element(
719 "td",
720 f"{self.css['data']} {self.css['col']}{c} {self.css['row_trim']}",
721 "...",
722 data_element_visible,
723 attributes="",
724 )
725 )
726
727 return index_headers + data
728
729 def _generate_body_row(
730 self,
731 iter: tuple,
732 max_cols: int,
733 idx_lengths: dict,
734 ):
735 """
736 Generate a regular row for the body section of appropriate format.
737
738 +--------------------------------------------+---------------------------+
739 | index_header_0 ... index_header_n | data_by_column ... |
740 +--------------------------------------------+---------------------------+
741
742 Parameters
743 ----------
744 iter : tuple
745 Iterable from outer scope: row number, row data tuple, row index labels.
746 max_cols : int
747 Number of permissible columns.
748 idx_lengths : dict
749 A map of the sparsification structure of the index
750
751 Returns
752 -------
753 list of elements
754 """
755 r, row_tup, rlabels = iter
756
757 index_headers = []
758 for c, value in enumerate(rlabels[r]):
759 header_element_visible = (
760 _is_visible(r, c, idx_lengths) and not self.hide_index_[c]
761 )
762 header_element = _element(
763 "th",
764 (
765 f"{self.css['row_heading']} {self.css['level']}{c} "
766 f"{self.css['row']}{r}"
767 ),
768 value,
769 header_element_visible,
770 display_value=self._display_funcs_index[(r, c)](value),
771 attributes=(
772 f'rowspan="{idx_lengths.get((c, r), 0)}"'
773 if idx_lengths.get((c, r), 0) > 1
774 else ""
775 ),
776 )
777
778 if self.cell_ids:
779 header_element[
780 "id"
781 ] = f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given
782 if (
783 header_element_visible
784 and (r, c) in self.ctx_index
785 and self.ctx_index[r, c]
786 ):
787 # always add id if a style is specified
788 header_element["id"] = f"{self.css['level']}{c}_{self.css['row']}{r}"
789 self.cellstyle_map_index[tuple(self.ctx_index[r, c])].append(
790 f"{self.css['level']}{c}_{self.css['row']}{r}"
791 )
792
793 index_headers.append(header_element)
794
795 data: list = []
796 visible_col_count: int = 0
797 for c, value in enumerate(row_tup[1:]):
798 data_element_visible = (
799 c not in self.hidden_columns and r not in self.hidden_rows
800 )
801 if data_element_visible:
802 visible_col_count += 1
803 if self._check_trim(
804 visible_col_count,
805 max_cols,
806 data,
807 "td",
808 f"{self.css['data']} {self.css['row']}{r} {self.css['col_trim']}",
809 ):
810 break
811
812 # add custom classes from cell context
813 cls = ""
814 if (r, c) in self.cell_context:
815 cls = " " + self.cell_context[r, c]
816
817 data_element = _element(
818 "td",
819 (
820 f"{self.css['data']} {self.css['row']}{r} "
821 f"{self.css['col']}{c}{cls}"
822 ),
823 value,
824 data_element_visible,
825 attributes="",
826 display_value=self._display_funcs[(r, c)](value),
827 )
828
829 if self.cell_ids:
830 data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}"
831 if data_element_visible and (r, c) in self.ctx and self.ctx[r, c]:
832 # always add id if needed due to specified style
833 data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}"
834 self.cellstyle_map[tuple(self.ctx[r, c])].append(
835 f"{self.css['row']}{r}_{self.css['col']}{c}"
836 )
837
838 data.append(data_element)
839
840 return index_headers + data
841
842 def _translate_latex(self, d: dict, clines: str | None) -> None:
843 r"""
844 Post-process the default render dict for the LaTeX template format.
845
846 Processing items included are:
847 - Remove hidden columns from the non-headers part of the body.
848 - Place cellstyles directly in td cells rather than use cellstyle_map.
849 - Remove hidden indexes or reinsert missing th elements if part of multiindex
850 or multirow sparsification (so that \multirow and \multicol work correctly).
851 """
852 index_levels = self.index.nlevels
853 visible_index_level_n = index_levels - sum(self.hide_index_)
854 d["head"] = [
855 [
856 {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}
857 for c, col in enumerate(row)
858 if col["is_visible"]
859 ]
860 for r, row in enumerate(d["head"])
861 ]
862
863 def _concatenated_visible_rows(obj, n, row_indices):
864 """
865 Extract all visible row indices recursively from concatenated stylers.
866 """
867 row_indices.extend(
868 [r + n for r in range(len(obj.index)) if r not in obj.hidden_rows]
869 )
870 n += len(obj.index)
871 for concatenated in obj.concatenated:
872 n = _concatenated_visible_rows(concatenated, n, row_indices)
873 return n
874
875 def concatenated_visible_rows(obj):
876 row_indices: list[int] = []
877 _concatenated_visible_rows(obj, 0, row_indices)
878 # TODO try to consolidate the concat visible rows
879 # methods to a single function / recursion for simplicity
880 return row_indices
881
882 body = []
883 for r, row in zip(concatenated_visible_rows(self), d["body"]):
884 # note: cannot enumerate d["body"] because rows were dropped if hidden
885 # during _translate_body so must zip to acquire the true r-index associated
886 # with the ctx obj which contains the cell styles.
887 if all(self.hide_index_):
888 row_body_headers = []
889 else:
890 row_body_headers = [
891 {
892 **col,
893 "display_value": col["display_value"]
894 if col["is_visible"]
895 else "",
896 "cellstyle": self.ctx_index[r, c],
897 }
898 for c, col in enumerate(row[:index_levels])
899 if (col["type"] == "th" and not self.hide_index_[c])
900 ]
901
902 row_body_cells = [
903 {**col, "cellstyle": self.ctx[r, c]}
904 for c, col in enumerate(row[index_levels:])
905 if (col["is_visible"] and col["type"] == "td")
906 ]
907
908 body.append(row_body_headers + row_body_cells)
909 d["body"] = body
910
911 # clines are determined from info on index_lengths and hidden_rows and input
912 # to a dict defining which row clines should be added in the template.
913 if clines not in [
914 None,
915 "all;data",
916 "all;index",
917 "skip-last;data",
918 "skip-last;index",
919 ]:
920 raise ValueError(
921 f"`clines` value of {clines} is invalid. Should either be None or one "
922 f"of 'all;data', 'all;index', 'skip-last;data', 'skip-last;index'."
923 )
924 if clines is not None:
925 data_len = len(row_body_cells) if "data" in clines and d["body"] else 0
926
927 d["clines"] = defaultdict(list)
928 visible_row_indexes: list[int] = [
929 r for r in range(len(self.data.index)) if r not in self.hidden_rows
930 ]
931 visible_index_levels: list[int] = [
932 i for i in range(index_levels) if not self.hide_index_[i]
933 ]
934 for rn, r in enumerate(visible_row_indexes):
935 for lvln, lvl in enumerate(visible_index_levels):
936 if lvl == index_levels - 1 and "skip-last" in clines:
937 continue
938 idx_len = d["index_lengths"].get((lvl, r), None)
939 if idx_len is not None: # i.e. not a sparsified entry
940 d["clines"][rn + idx_len].append(
941 f"\\cline{{{lvln+1}-{len(visible_index_levels)+data_len}}}"
942 )
943
944 def format(
945 self,
946 formatter: ExtFormatter | None = None,
947 subset: Subset | None = None,
948 na_rep: str | None = None,
949 precision: int | None = None,
950 decimal: str = ".",
951 thousands: str | None = None,
952 escape: str | None = None,
953 hyperlinks: str | None = None,
954 ) -> StylerRenderer:
955 r"""
956 Format the text display value of cells.
957
958 Parameters
959 ----------
960 formatter : str, callable, dict or None
961 Object to define how values are displayed. See notes.
962 subset : label, array-like, IndexSlice, optional
963 A valid 2d input to `DataFrame.loc[<subset>]`, or, in the case of a 1d input
964 or single key, to `DataFrame.loc[:, <subset>]` where the columns are
965 prioritised, to limit ``data`` to *before* applying the function.
966 na_rep : str, optional
967 Representation for missing values.
968 If ``na_rep`` is None, no special formatting is applied.
969 precision : int, optional
970 Floating point precision to use for display purposes, if not determined by
971 the specified ``formatter``.
972
973 .. versionadded:: 1.3.0
974
975 decimal : str, default "."
976 Character used as decimal separator for floats, complex and integers.
977
978 .. versionadded:: 1.3.0
979
980 thousands : str, optional, default None
981 Character used as thousands separator for floats, complex and integers.
982
983 .. versionadded:: 1.3.0
984
985 escape : str, optional
986 Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
987 in cell display string with HTML-safe sequences.
988 Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
989 ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
990 LaTeX-safe sequences.
991 Use 'latex-math' to replace the characters the same way as in 'latex' mode,
992 except for math substrings, which either are surrounded
993 by two characters ``$`` or start with the character ``\(`` and
994 end with ``\)``. Escaping is done before ``formatter``.
995
996 .. versionadded:: 1.3.0
997
998 hyperlinks : {"html", "latex"}, optional
999 Convert string patterns containing https://, http://, ftp:// or www. to
1000 HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
1001 commands if "latex".
1002
1003 .. versionadded:: 1.4.0
1004
1005 Returns
1006 -------
1007 Styler
1008
1009 See Also
1010 --------
1011 Styler.format_index: Format the text display value of index labels.
1012
1013 Notes
1014 -----
1015 This method assigns a formatting function, ``formatter``, to each cell in the
1016 DataFrame. If ``formatter`` is ``None``, then the default formatter is used.
1017 If a callable then that function should take a data value as input and return
1018 a displayable representation, such as a string. If ``formatter`` is
1019 given as a string this is assumed to be a valid Python format specification
1020 and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given,
1021 keys should correspond to column names, and values should be string or
1022 callable, as above.
1023
1024 The default formatter currently expresses floats and complex numbers with the
1025 pandas display precision unless using the ``precision`` argument here. The
1026 default formatter does not adjust the representation of missing values unless
1027 the ``na_rep`` argument is used.
1028
1029 The ``subset`` argument defines which region to apply the formatting function
1030 to. If the ``formatter`` argument is given in dict form but does not include
1031 all columns within the subset then these columns will have the default formatter
1032 applied. Any columns in the formatter dict excluded from the subset will
1033 be ignored.
1034
1035 When using a ``formatter`` string the dtypes must be compatible, otherwise a
1036 `ValueError` will be raised.
1037
1038 When instantiating a Styler, default formatting can be applied be setting the
1039 ``pandas.options``:
1040
1041 - ``styler.format.formatter``: default None.
1042 - ``styler.format.na_rep``: default None.
1043 - ``styler.format.precision``: default 6.
1044 - ``styler.format.decimal``: default ".".
1045 - ``styler.format.thousands``: default None.
1046 - ``styler.format.escape``: default None.
1047
1048 .. warning::
1049 `Styler.format` is ignored when using the output format `Styler.to_excel`,
1050 since Excel and Python have inherrently different formatting structures.
1051 However, it is possible to use the `number-format` pseudo CSS attribute
1052 to force Excel permissible formatting. See examples.
1053
1054 Examples
1055 --------
1056 Using ``na_rep`` and ``precision`` with the default ``formatter``
1057
1058 >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]])
1059 >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP
1060 0 1 2
1061 0 MISS 1.000 A
1062 1 2.000 MISS 3.000
1063
1064 Using a ``formatter`` specification on consistent column dtypes
1065
1066 >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP
1067 0 1 2
1068 0 MISS 1.00 A
1069 1 2.00 MISS 3.000000
1070
1071 Using the default ``formatter`` for unspecified columns
1072
1073 >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1)
1074 ... # doctest: +SKIP
1075 0 1 2
1076 0 MISS £ 1.0 A
1077 1 2.00 MISS 3.0
1078
1079 Multiple ``na_rep`` or ``precision`` specifications under the default
1080 ``formatter``.
1081
1082 >>> (df.style.format(na_rep='MISS', precision=1, subset=[0])
1083 ... .format(na_rep='PASS', precision=2, subset=[1, 2])) # doctest: +SKIP
1084 0 1 2
1085 0 MISS 1.00 A
1086 1 2.0 PASS 3.00
1087
1088 Using a callable ``formatter`` function.
1089
1090 >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
1091 >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS')
1092 ... # doctest: +SKIP
1093 0 1 2
1094 0 MISS 1.0000 STRING
1095 1 2.0 MISS FLOAT
1096
1097 Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
1098
1099 >>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
1100 >>> s = df.style.format(
1101 ... '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
1102 ... )
1103 >>> s.to_html() # doctest: +SKIP
1104 ...
1105 <td .. ><a href="a.com/<div></div>"><div></div></a></td>
1106 <td .. ><a href="a.com/"A&B"">"A&B"</a></td>
1107 <td .. >NA</td>
1108 ...
1109
1110 Using a ``formatter`` with ``escape`` in 'latex' mode.
1111
1112 >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
1113 >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
1114 ... # doctest: +SKIP
1115 \begin{tabular}{ll}
1116 & 0 \\
1117 0 & \textbf{123} \\
1118 1 & \textbf{\textasciitilde \space \textasciicircum } \\
1119 2 & \textbf{\$\%\#} \\
1120 \end{tabular}
1121
1122 Applying ``escape`` in 'latex-math' mode. In the example below
1123 we enter math mode using the character ``$``.
1124
1125 >>> df = pd.DataFrame([[r"$\sum_{i=1}^{10} a_i$ a~b $\alpha \
1126 ... = \frac{\beta}{\zeta^2}$"], ["%#^ $ \$x^2 $"]])
1127 >>> df.style.format(escape="latex-math").to_latex()
1128 ... # doctest: +SKIP
1129 \begin{tabular}{ll}
1130 & 0 \\
1131 0 & $\sum_{i=1}^{10} a_i$ a\textasciitilde b $\alpha = \frac{\beta}{\zeta^2}$ \\
1132 1 & \%\#\textasciicircum \space $ \$x^2 $ \\
1133 \end{tabular}
1134
1135 We can use the character ``\(`` to enter math mode and the character ``\)``
1136 to close math mode.
1137
1138 >>> df = pd.DataFrame([[r"\(\sum_{i=1}^{10} a_i\) a~b \(\alpha \
1139 ... = \frac{\beta}{\zeta^2}\)"], ["%#^ \( \$x^2 \)"]])
1140 >>> df.style.format(escape="latex-math").to_latex()
1141 ... # doctest: +SKIP
1142 \begin{tabular}{ll}
1143 & 0 \\
1144 0 & \(\sum_{i=1}^{10} a_i\) a\textasciitilde b \(\alpha
1145 = \frac{\beta}{\zeta^2}\) \\
1146 1 & \%\#\textasciicircum \space \( \$x^2 \) \\
1147 \end{tabular}
1148
1149 If we have in one DataFrame cell a combination of both shorthands
1150 for math formulas, the shorthand with the sign ``$`` will be applied.
1151
1152 >>> df = pd.DataFrame([[r"\( x^2 \) $x^2$"], \
1153 ... [r"$\frac{\beta}{\zeta}$ \(\frac{\beta}{\zeta}\)"]])
1154 >>> df.style.format(escape="latex-math").to_latex()
1155 ... # doctest: +SKIP
1156 \begin{tabular}{ll}
1157 & 0 \\
1158 0 & \textbackslash ( x\textasciicircum 2 \textbackslash ) $x^2$ \\
1159 1 & $\frac{\beta}{\zeta}$ \textbackslash (\textbackslash
1160 frac\{\textbackslash beta\}\{\textbackslash zeta\}\textbackslash ) \\
1161 \end{tabular}
1162
1163 Pandas defines a `number-format` pseudo CSS attribute instead of the `.format`
1164 method to create `to_excel` permissible formatting. Note that semi-colons are
1165 CSS protected characters but used as separators in Excel's format string.
1166 Replace semi-colons with the section separator character (ASCII-245) when
1167 defining the formatting here.
1168
1169 >>> df = pd.DataFrame({"A": [1, 0, -1]})
1170 >>> pseudo_css = "number-format: 0§[Red](0)§-§@;"
1171 >>> filename = "formatted_file.xlsx"
1172 >>> df.style.map(lambda v: pseudo_css).to_excel(filename) # doctest: +SKIP
1173
1174 .. figure:: ../../_static/style/format_excel_css.png
1175 """
1176 if all(
1177 (
1178 formatter is None,
1179 subset is None,
1180 precision is None,
1181 decimal == ".",
1182 thousands is None,
1183 na_rep is None,
1184 escape is None,
1185 hyperlinks is None,
1186 )
1187 ):
1188 self._display_funcs.clear()
1189 return self # clear the formatter / revert to default and avoid looping
1190
1191 subset = slice(None) if subset is None else subset
1192 subset = non_reducing_slice(subset)
1193 data = self.data.loc[subset]
1194
1195 if not isinstance(formatter, dict):
1196 formatter = {col: formatter for col in data.columns}
1197
1198 cis = self.columns.get_indexer_for(data.columns)
1199 ris = self.index.get_indexer_for(data.index)
1200 for ci in cis:
1201 format_func = _maybe_wrap_formatter(
1202 formatter.get(self.columns[ci]),
1203 na_rep=na_rep,
1204 precision=precision,
1205 decimal=decimal,
1206 thousands=thousands,
1207 escape=escape,
1208 hyperlinks=hyperlinks,
1209 )
1210 for ri in ris:
1211 self._display_funcs[(ri, ci)] = format_func
1212
1213 return self
1214
1215 def format_index(
1216 self,
1217 formatter: ExtFormatter | None = None,
1218 axis: Axis = 0,
1219 level: Level | list[Level] | None = None,
1220 na_rep: str | None = None,
1221 precision: int | None = None,
1222 decimal: str = ".",
1223 thousands: str | None = None,
1224 escape: str | None = None,
1225 hyperlinks: str | None = None,
1226 ) -> StylerRenderer:
1227 r"""
1228 Format the text display value of index labels or column headers.
1229
1230 .. versionadded:: 1.4.0
1231
1232 Parameters
1233 ----------
1234 formatter : str, callable, dict or None
1235 Object to define how values are displayed. See notes.
1236 axis : {0, "index", 1, "columns"}
1237 Whether to apply the formatter to the index or column headers.
1238 level : int, str, list
1239 The level(s) over which to apply the generic formatter.
1240 na_rep : str, optional
1241 Representation for missing values.
1242 If ``na_rep`` is None, no special formatting is applied.
1243 precision : int, optional
1244 Floating point precision to use for display purposes, if not determined by
1245 the specified ``formatter``.
1246 decimal : str, default "."
1247 Character used as decimal separator for floats, complex and integers.
1248 thousands : str, optional, default None
1249 Character used as thousands separator for floats, complex and integers.
1250 escape : str, optional
1251 Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
1252 in cell display string with HTML-safe sequences.
1253 Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
1254 ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
1255 LaTeX-safe sequences.
1256 Escaping is done before ``formatter``.
1257 hyperlinks : {"html", "latex"}, optional
1258 Convert string patterns containing https://, http://, ftp:// or www. to
1259 HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
1260 commands if "latex".
1261
1262 Returns
1263 -------
1264 Styler
1265
1266 See Also
1267 --------
1268 Styler.format: Format the text display value of data cells.
1269
1270 Notes
1271 -----
1272 This method assigns a formatting function, ``formatter``, to each level label
1273 in the DataFrame's index or column headers. If ``formatter`` is ``None``,
1274 then the default formatter is used.
1275 If a callable then that function should take a label value as input and return
1276 a displayable representation, such as a string. If ``formatter`` is
1277 given as a string this is assumed to be a valid Python format specification
1278 and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given,
1279 keys should correspond to MultiIndex level numbers or names, and values should
1280 be string or callable, as above.
1281
1282 The default formatter currently expresses floats and complex numbers with the
1283 pandas display precision unless using the ``precision`` argument here. The
1284 default formatter does not adjust the representation of missing values unless
1285 the ``na_rep`` argument is used.
1286
1287 The ``level`` argument defines which levels of a MultiIndex to apply the
1288 method to. If the ``formatter`` argument is given in dict form but does
1289 not include all levels within the level argument then these unspecified levels
1290 will have the default formatter applied. Any levels in the formatter dict
1291 specifically excluded from the level argument will be ignored.
1292
1293 When using a ``formatter`` string the dtypes must be compatible, otherwise a
1294 `ValueError` will be raised.
1295
1296 .. warning::
1297 `Styler.format_index` is ignored when using the output format
1298 `Styler.to_excel`, since Excel and Python have inherrently different
1299 formatting structures.
1300 However, it is possible to use the `number-format` pseudo CSS attribute
1301 to force Excel permissible formatting. See documentation for `Styler.format`.
1302
1303 Examples
1304 --------
1305 Using ``na_rep`` and ``precision`` with the default ``formatter``
1306
1307 >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0])
1308 >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP
1309 2.000 MISS 4.000
1310 0 1 2 3
1311
1312 Using a ``formatter`` specification on consistent dtypes in a level
1313
1314 >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP
1315 2.00 MISS 4.00
1316 0 1 2 3
1317
1318 Using the default ``formatter`` for unspecified levels
1319
1320 >>> df = pd.DataFrame([[1, 2, 3]],
1321 ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]]))
1322 >>> df.style.format_index({0: lambda v: v.upper()}, axis=1, precision=1)
1323 ... # doctest: +SKIP
1324 A B
1325 2.0 nan 4.0
1326 0 1 2 3
1327
1328 Using a callable ``formatter`` function.
1329
1330 >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
1331 >>> df.style.format_index(func, axis=1, na_rep='MISS')
1332 ... # doctest: +SKIP
1333 STRING STRING
1334 FLOAT MISS FLOAT
1335 0 1 2 3
1336
1337 Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
1338
1339 >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None])
1340 >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA")
1341 ... # doctest: +SKIP
1342 <th .. >$ "A"</th>
1343 <th .. >$ A&B</th>
1344 <th .. >NA</td>
1345 ...
1346
1347 Using a ``formatter`` with LaTeX ``escape``.
1348
1349 >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"])
1350 >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex()
1351 ... # doctest: +SKIP
1352 \begin{tabular}{lrrr}
1353 {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\
1354 0 & 1 & 2 & 3 \\
1355 \end{tabular}
1356 """
1357 axis = self.data._get_axis_number(axis)
1358 if axis == 0:
1359 display_funcs_, obj = self._display_funcs_index, self.index
1360 else:
1361 display_funcs_, obj = self._display_funcs_columns, self.columns
1362 levels_ = refactor_levels(level, obj)
1363
1364 if all(
1365 (
1366 formatter is None,
1367 level is None,
1368 precision is None,
1369 decimal == ".",
1370 thousands is None,
1371 na_rep is None,
1372 escape is None,
1373 hyperlinks is None,
1374 )
1375 ):
1376 display_funcs_.clear()
1377 return self # clear the formatter / revert to default and avoid looping
1378
1379 if not isinstance(formatter, dict):
1380 formatter = {level: formatter for level in levels_}
1381 else:
1382 formatter = {
1383 obj._get_level_number(level): formatter_
1384 for level, formatter_ in formatter.items()
1385 }
1386
1387 for lvl in levels_:
1388 format_func = _maybe_wrap_formatter(
1389 formatter.get(lvl),
1390 na_rep=na_rep,
1391 precision=precision,
1392 decimal=decimal,
1393 thousands=thousands,
1394 escape=escape,
1395 hyperlinks=hyperlinks,
1396 )
1397
1398 for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
1399 display_funcs_[idx] = format_func
1400
1401 return self
1402
1403 def relabel_index(
1404 self,
1405 labels: Sequence | Index,
1406 axis: Axis = 0,
1407 level: Level | list[Level] | None = None,
1408 ) -> StylerRenderer:
1409 r"""
1410 Relabel the index, or column header, keys to display a set of specified values.
1411
1412 .. versionadded:: 1.5.0
1413
1414 Parameters
1415 ----------
1416 labels : list-like or Index
1417 New labels to display. Must have same length as the underlying values not
1418 hidden.
1419 axis : {"index", 0, "columns", 1}
1420 Apply to the index or columns.
1421 level : int, str, list, optional
1422 The level(s) over which to apply the new labels. If `None` will apply
1423 to all levels of an Index or MultiIndex which are not hidden.
1424
1425 Returns
1426 -------
1427 Styler
1428
1429 See Also
1430 --------
1431 Styler.format_index: Format the text display value of index or column headers.
1432 Styler.hide: Hide the index, column headers, or specified data from display.
1433
1434 Notes
1435 -----
1436 As part of Styler, this method allows the display of an index to be
1437 completely user-specified without affecting the underlying DataFrame data,
1438 index, or column headers. This means that the flexibility of indexing is
1439 maintained whilst the final display is customisable.
1440
1441 Since Styler is designed to be progressively constructed with method chaining,
1442 this method is adapted to react to the **currently specified hidden elements**.
1443 This is useful because it means one does not have to specify all the new
1444 labels if the majority of an index, or column headers, have already been hidden.
1445 The following produce equivalent display (note the length of ``labels`` in
1446 each case).
1447
1448 .. code-block:: python
1449
1450 # relabel first, then hide
1451 df = pd.DataFrame({"col": ["a", "b", "c"]})
1452 df.style.relabel_index(["A", "B", "C"]).hide([0,1])
1453 # hide first, then relabel
1454 df = pd.DataFrame({"col": ["a", "b", "c"]})
1455 df.style.hide([0,1]).relabel_index(["C"])
1456
1457 This method should be used, rather than :meth:`Styler.format_index`, in one of
1458 the following cases (see examples):
1459
1460 - A specified set of labels are required which are not a function of the
1461 underlying index keys.
1462 - The function of the underlying index keys requires a counter variable,
1463 such as those available upon enumeration.
1464
1465 Examples
1466 --------
1467 Basic use
1468
1469 >>> df = pd.DataFrame({"col": ["a", "b", "c"]})
1470 >>> df.style.relabel_index(["A", "B", "C"]) # doctest: +SKIP
1471 col
1472 A a
1473 B b
1474 C c
1475
1476 Chaining with pre-hidden elements
1477
1478 >>> df.style.hide([0,1]).relabel_index(["C"]) # doctest: +SKIP
1479 col
1480 C c
1481
1482 Using a MultiIndex
1483
1484 >>> midx = pd.MultiIndex.from_product([[0, 1], [0, 1], [0, 1]])
1485 >>> df = pd.DataFrame({"col": list(range(8))}, index=midx)
1486 >>> styler = df.style # doctest: +SKIP
1487 col
1488 0 0 0 0
1489 1 1
1490 1 0 2
1491 1 3
1492 1 0 0 4
1493 1 5
1494 1 0 6
1495 1 7
1496 >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0))
1497 ... # doctest: +SKIP
1498 >>> styler.hide(level=[0,1]) # doctest: +SKIP
1499 >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP
1500 col
1501 binary6 6
1502 binary7 7
1503
1504 We can also achieve the above by indexing first and then re-labeling
1505
1506 >>> styler = df.loc[[(1,1,0), (1,1,1)]].style
1507 >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"])
1508 ... # doctest: +SKIP
1509 col
1510 binary6 6
1511 binary7 7
1512
1513 Defining a formatting function which uses an enumeration counter. Also note
1514 that the value of the index key is passed in the case of string labels so it
1515 can also be inserted into the label, using curly brackets (or double curly
1516 brackets if the string if pre-formatted),
1517
1518 >>> df = pd.DataFrame({"samples": np.random.rand(10)})
1519 >>> styler = df.loc[np.random.randint(0,10,3)].style
1520 >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)])
1521 ... # doctest: +SKIP
1522 samples
1523 sample1 (5) 0.315811
1524 sample2 (0) 0.495941
1525 sample3 (2) 0.067946
1526 """
1527 axis = self.data._get_axis_number(axis)
1528 if axis == 0:
1529 display_funcs_, obj = self._display_funcs_index, self.index
1530 hidden_labels, hidden_lvls = self.hidden_rows, self.hide_index_
1531 else:
1532 display_funcs_, obj = self._display_funcs_columns, self.columns
1533 hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_
1534 visible_len = len(obj) - len(set(hidden_labels))
1535 if len(labels) != visible_len:
1536 raise ValueError(
1537 "``labels`` must be of length equal to the number of "
1538 f"visible labels along ``axis`` ({visible_len})."
1539 )
1540
1541 if level is None:
1542 level = [i for i in range(obj.nlevels) if not hidden_lvls[i]]
1543 levels_ = refactor_levels(level, obj)
1544
1545 def alias_(x, value):
1546 if isinstance(value, str):
1547 return value.format(x)
1548 return value
1549
1550 for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]):
1551 if len(levels_) == 1:
1552 idx = (i, levels_[0]) if axis == 0 else (levels_[0], i)
1553 display_funcs_[idx] = partial(alias_, value=labels[ai])
1554 else:
1555 for aj, lvl in enumerate(levels_):
1556 idx = (i, lvl) if axis == 0 else (lvl, i)
1557 display_funcs_[idx] = partial(alias_, value=labels[ai][aj])
1558
1559 return self
1560
1561
1562def _element(
1563 html_element: str,
1564 html_class: str | None,
1565 value: Any,
1566 is_visible: bool,
1567 **kwargs,
1568) -> dict:
1569 """
1570 Template to return container with information for a <td></td> or <th></th> element.
1571 """
1572 if "display_value" not in kwargs:
1573 kwargs["display_value"] = value
1574 return {
1575 "type": html_element,
1576 "value": value,
1577 "class": html_class,
1578 "is_visible": is_visible,
1579 **kwargs,
1580 }
1581
1582
1583def _get_trimming_maximums(
1584 rn,
1585 cn,
1586 max_elements,
1587 max_rows=None,
1588 max_cols=None,
1589 scaling_factor: float = 0.8,
1590) -> tuple[int, int]:
1591 """
1592 Recursively reduce the number of rows and columns to satisfy max elements.
1593
1594 Parameters
1595 ----------
1596 rn, cn : int
1597 The number of input rows / columns
1598 max_elements : int
1599 The number of allowable elements
1600 max_rows, max_cols : int, optional
1601 Directly specify an initial maximum rows or columns before compression.
1602 scaling_factor : float
1603 Factor at which to reduce the number of rows / columns to fit.
1604
1605 Returns
1606 -------
1607 rn, cn : tuple
1608 New rn and cn values that satisfy the max_elements constraint
1609 """
1610
1611 def scale_down(rn, cn):
1612 if cn >= rn:
1613 return rn, int(cn * scaling_factor)
1614 else:
1615 return int(rn * scaling_factor), cn
1616
1617 if max_rows:
1618 rn = max_rows if rn > max_rows else rn
1619 if max_cols:
1620 cn = max_cols if cn > max_cols else cn
1621
1622 while rn * cn > max_elements:
1623 rn, cn = scale_down(rn, cn)
1624
1625 return rn, cn
1626
1627
1628def _get_level_lengths(
1629 index: Index,
1630 sparsify: bool,
1631 max_index: int,
1632 hidden_elements: Sequence[int] | None = None,
1633):
1634 """
1635 Given an index, find the level length for each element.
1636
1637 Parameters
1638 ----------
1639 index : Index
1640 Index or columns to determine lengths of each element
1641 sparsify : bool
1642 Whether to hide or show each distinct element in a MultiIndex
1643 max_index : int
1644 The maximum number of elements to analyse along the index due to trimming
1645 hidden_elements : sequence of int
1646 Index positions of elements hidden from display in the index affecting
1647 length
1648
1649 Returns
1650 -------
1651 Dict :
1652 Result is a dictionary of (level, initial_position): span
1653 """
1654 if isinstance(index, MultiIndex):
1655 levels = index._format_multi(sparsify=lib.no_default, include_names=False)
1656 else:
1657 levels = index._format_flat(include_name=False)
1658
1659 if hidden_elements is None:
1660 hidden_elements = []
1661
1662 lengths = {}
1663 if not isinstance(index, MultiIndex):
1664 for i, value in enumerate(levels):
1665 if i not in hidden_elements:
1666 lengths[(0, i)] = 1
1667 return lengths
1668
1669 for i, lvl in enumerate(levels):
1670 visible_row_count = 0 # used to break loop due to display trimming
1671 for j, row in enumerate(lvl):
1672 if visible_row_count > max_index:
1673 break
1674 if not sparsify:
1675 # then lengths will always equal 1 since no aggregation.
1676 if j not in hidden_elements:
1677 lengths[(i, j)] = 1
1678 visible_row_count += 1
1679 elif (row is not lib.no_default) and (j not in hidden_elements):
1680 # this element has not been sparsified so must be the start of section
1681 last_label = j
1682 lengths[(i, last_label)] = 1
1683 visible_row_count += 1
1684 elif row is not lib.no_default:
1685 # even if the above is hidden, keep track of it in case length > 1 and
1686 # later elements are visible
1687 last_label = j
1688 lengths[(i, last_label)] = 0
1689 elif j not in hidden_elements:
1690 # then element must be part of sparsified section and is visible
1691 visible_row_count += 1
1692 if visible_row_count > max_index:
1693 break # do not add a length since the render trim limit reached
1694 if lengths[(i, last_label)] == 0:
1695 # if previous iteration was first-of-section but hidden then offset
1696 last_label = j
1697 lengths[(i, last_label)] = 1
1698 else:
1699 # else add to previous iteration
1700 lengths[(i, last_label)] += 1
1701
1702 non_zero_lengths = {
1703 element: length for element, length in lengths.items() if length >= 1
1704 }
1705
1706 return non_zero_lengths
1707
1708
1709def _is_visible(idx_row, idx_col, lengths) -> bool:
1710 """
1711 Index -> {(idx_row, idx_col): bool}).
1712 """
1713 return (idx_col, idx_row) in lengths
1714
1715
1716def format_table_styles(styles: CSSStyles) -> CSSStyles:
1717 """
1718 looks for multiple CSS selectors and separates them:
1719 [{'selector': 'td, th', 'props': 'a:v;'}]
1720 ---> [{'selector': 'td', 'props': 'a:v;'},
1721 {'selector': 'th', 'props': 'a:v;'}]
1722 """
1723 return [
1724 {"selector": selector, "props": css_dict["props"]}
1725 for css_dict in styles
1726 for selector in css_dict["selector"].split(",")
1727 ]
1728
1729
1730def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any:
1731 """
1732 Format the display of a value
1733
1734 Parameters
1735 ----------
1736 x : Any
1737 Input variable to be formatted
1738 precision : Int
1739 Floating point precision used if ``x`` is float or complex.
1740 thousands : bool, default False
1741 Whether to group digits with thousands separated with ",".
1742
1743 Returns
1744 -------
1745 value : Any
1746 Matches input type, or string if input is float or complex or int with sep.
1747 """
1748 if is_float(x) or is_complex(x):
1749 return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}"
1750 elif is_integer(x):
1751 return f"{x:,}" if thousands else str(x)
1752 return x
1753
1754
1755def _wrap_decimal_thousands(
1756 formatter: Callable, decimal: str, thousands: str | None
1757) -> Callable:
1758 """
1759 Takes a string formatting function and wraps logic to deal with thousands and
1760 decimal parameters, in the case that they are non-standard and that the input
1761 is a (float, complex, int).
1762 """
1763
1764 def wrapper(x):
1765 if is_float(x) or is_integer(x) or is_complex(x):
1766 if decimal != "." and thousands is not None and thousands != ",":
1767 return (
1768 formatter(x)
1769 .replace(",", "§_§-") # rare string to avoid "," <-> "." clash.
1770 .replace(".", decimal)
1771 .replace("§_§-", thousands)
1772 )
1773 elif decimal != "." and (thousands is None or thousands == ","):
1774 return formatter(x).replace(".", decimal)
1775 elif decimal == "." and thousands is not None and thousands != ",":
1776 return formatter(x).replace(",", thousands)
1777 return formatter(x)
1778
1779 return wrapper
1780
1781
1782def _str_escape(x, escape):
1783 """if escaping: only use on str, else return input"""
1784 if isinstance(x, str):
1785 if escape == "html":
1786 return escape_html(x)
1787 elif escape == "latex":
1788 return _escape_latex(x)
1789 elif escape == "latex-math":
1790 return _escape_latex_math(x)
1791 else:
1792 raise ValueError(
1793 f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
1794got {escape}"
1795 )
1796 return x
1797
1798
1799def _render_href(x, format):
1800 """uses regex to detect a common URL pattern and converts to href tag in format."""
1801 if isinstance(x, str):
1802 if format == "html":
1803 href = '<a href="{0}" target="_blank">{0}</a>'
1804 elif format == "latex":
1805 href = r"\href{{{0}}}{{{0}}}"
1806 else:
1807 raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
1808 pat = r"((http|ftp)s?:\/\/|www.)[\w/\-?=%.:@]+\.[\w/\-&?=%.,':;~!@#$*()\[\]]+"
1809 return re.sub(pat, lambda m: href.format(m.group(0)), x)
1810 return x
1811
1812
1813def _maybe_wrap_formatter(
1814 formatter: BaseFormatter | None = None,
1815 na_rep: str | None = None,
1816 precision: int | None = None,
1817 decimal: str = ".",
1818 thousands: str | None = None,
1819 escape: str | None = None,
1820 hyperlinks: str | None = None,
1821) -> Callable:
1822 """
1823 Allows formatters to be expressed as str, callable or None, where None returns
1824 a default formatting function. wraps with na_rep, and precision where they are
1825 available.
1826 """
1827 # Get initial func from input string, input callable, or from default factory
1828 if isinstance(formatter, str):
1829 func_0 = lambda x: formatter.format(x)
1830 elif callable(formatter):
1831 func_0 = formatter
1832 elif formatter is None:
1833 precision = (
1834 get_option("styler.format.precision") if precision is None else precision
1835 )
1836 func_0 = partial(
1837 _default_formatter, precision=precision, thousands=(thousands is not None)
1838 )
1839 else:
1840 raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
1841
1842 # Replace chars if escaping
1843 if escape is not None:
1844 func_1 = lambda x: func_0(_str_escape(x, escape=escape))
1845 else:
1846 func_1 = func_0
1847
1848 # Replace decimals and thousands if non-standard inputs detected
1849 if decimal != "." or (thousands is not None and thousands != ","):
1850 func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands)
1851 else:
1852 func_2 = func_1
1853
1854 # Render links
1855 if hyperlinks is not None:
1856 func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
1857 else:
1858 func_3 = func_2
1859
1860 # Replace missing values if na_rep
1861 if na_rep is None:
1862 return func_3
1863 else:
1864 return lambda x: na_rep if (isna(x) is True) else func_3(x)
1865
1866
1867def non_reducing_slice(slice_: Subset):
1868 """
1869 Ensure that a slice doesn't reduce to a Series or Scalar.
1870
1871 Any user-passed `subset` should have this called on it
1872 to make sure we're always working with DataFrames.
1873 """
1874 # default to column slice, like DataFrame
1875 # ['A', 'B'] -> IndexSlices[:, ['A', 'B']]
1876 kinds = (ABCSeries, np.ndarray, Index, list, str)
1877 if isinstance(slice_, kinds):
1878 slice_ = IndexSlice[:, slice_]
1879
1880 def pred(part) -> bool:
1881 """
1882 Returns
1883 -------
1884 bool
1885 True if slice does *not* reduce,
1886 False if `part` is a tuple.
1887 """
1888 # true when slice does *not* reduce, False when part is a tuple,
1889 # i.e. MultiIndex slice
1890 if isinstance(part, tuple):
1891 # GH#39421 check for sub-slice:
1892 return any((isinstance(s, slice) or is_list_like(s)) for s in part)
1893 else:
1894 return isinstance(part, slice) or is_list_like(part)
1895
1896 if not is_list_like(slice_):
1897 if not isinstance(slice_, slice):
1898 # a 1-d slice, like df.loc[1]
1899 slice_ = [[slice_]]
1900 else:
1901 # slice(a, b, c)
1902 slice_ = [slice_] # to tuplize later
1903 else:
1904 # error: Item "slice" of "Union[slice, Sequence[Any]]" has no attribute
1905 # "__iter__" (not iterable) -> is specifically list_like in conditional
1906 slice_ = [p if pred(p) else [p] for p in slice_] # type: ignore[union-attr]
1907 return tuple(slice_)
1908
1909
1910def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
1911 """
1912 Convert css-string to sequence of tuples format if needed.
1913 'color:red; border:1px solid black;' -> [('color', 'red'),
1914 ('border','1px solid red')]
1915 """
1916 if isinstance(style, str):
1917 s = style.split(";")
1918 try:
1919 return [
1920 (x.split(":")[0].strip(), x.split(":")[1].strip())
1921 for x in s
1922 if x.strip() != ""
1923 ]
1924 except IndexError:
1925 raise ValueError(
1926 "Styles supplied as string must follow CSS rule formats, "
1927 f"for example 'attr: val;'. '{style}' was given."
1928 )
1929 return style
1930
1931
1932def refactor_levels(
1933 level: Level | list[Level] | None,
1934 obj: Index,
1935) -> list[int]:
1936 """
1937 Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``.
1938
1939 Parameters
1940 ----------
1941 level : int, str, list
1942 Original ``level`` arg supplied to above methods.
1943 obj:
1944 Either ``self.index`` or ``self.columns``
1945
1946 Returns
1947 -------
1948 list : refactored arg with a list of levels to hide
1949 """
1950 if level is None:
1951 levels_: list[int] = list(range(obj.nlevels))
1952 elif isinstance(level, int):
1953 levels_ = [level]
1954 elif isinstance(level, str):
1955 levels_ = [obj._get_level_number(level)]
1956 elif isinstance(level, list):
1957 levels_ = [
1958 obj._get_level_number(lev) if not isinstance(lev, int) else lev
1959 for lev in level
1960 ]
1961 else:
1962 raise ValueError("`level` must be of type `int`, `str` or list of such")
1963 return levels_
1964
1965
1966class Tooltips:
1967 """
1968 An extension to ``Styler`` that allows for and manipulates tooltips on hover
1969 of ``<td>`` cells in the HTML result.
1970
1971 Parameters
1972 ----------
1973 css_name: str, default "pd-t"
1974 Name of the CSS class that controls visualisation of tooltips.
1975 css_props: list-like, default; see Notes
1976 List of (attr, value) tuples defining properties of the CSS class.
1977 tooltips: DataFrame, default empty
1978 DataFrame of strings aligned with underlying Styler data for tooltip
1979 display.
1980
1981 Notes
1982 -----
1983 The default properties for the tooltip CSS class are:
1984
1985 - visibility: hidden
1986 - position: absolute
1987 - z-index: 1
1988 - background-color: black
1989 - color: white
1990 - transform: translate(-20px, -20px)
1991
1992 Hidden visibility is a key prerequisite to the hover functionality, and should
1993 always be included in any manual properties specification.
1994 """
1995
1996 def __init__(
1997 self,
1998 css_props: CSSProperties = [
1999 ("visibility", "hidden"),
2000 ("position", "absolute"),
2001 ("z-index", 1),
2002 ("background-color", "black"),
2003 ("color", "white"),
2004 ("transform", "translate(-20px, -20px)"),
2005 ],
2006 css_name: str = "pd-t",
2007 tooltips: DataFrame = DataFrame(),
2008 ) -> None:
2009 self.class_name = css_name
2010 self.class_properties = css_props
2011 self.tt_data = tooltips
2012 self.table_styles: CSSStyles = []
2013
2014 @property
2015 def _class_styles(self):
2016 """
2017 Combine the ``_Tooltips`` CSS class name and CSS properties to the format
2018 required to extend the underlying ``Styler`` `table_styles` to allow
2019 tooltips to render in HTML.
2020
2021 Returns
2022 -------
2023 styles : List
2024 """
2025 return [
2026 {
2027 "selector": f".{self.class_name}",
2028 "props": maybe_convert_css_to_tuples(self.class_properties),
2029 }
2030 ]
2031
2032 def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
2033 """
2034 For every table data-cell that has a valid tooltip (not None, NaN or
2035 empty string) must create two pseudo CSS entries for the specific
2036 <td> element id which are added to overall table styles:
2037 an on hover visibility change and a content change
2038 dependent upon the user's chosen display string.
2039
2040 For example:
2041 [{"selector": "T__row1_col1:hover .pd-t",
2042 "props": [("visibility", "visible")]},
2043 {"selector": "T__row1_col1 .pd-t::after",
2044 "props": [("content", "Some Valid Text String")]}]
2045
2046 Parameters
2047 ----------
2048 uuid: str
2049 The uuid of the Styler instance
2050 name: str
2051 The css-name of the class used for styling tooltips
2052 row : int
2053 The row index of the specified tooltip string data
2054 col : int
2055 The col index of the specified tooltip string data
2056 text : str
2057 The textual content of the tooltip to be displayed in HTML.
2058
2059 Returns
2060 -------
2061 pseudo_css : List
2062 """
2063 selector_id = "#T_" + uuid + "_row" + str(row) + "_col" + str(col)
2064 return [
2065 {
2066 "selector": selector_id + f":hover .{name}",
2067 "props": [("visibility", "visible")],
2068 },
2069 {
2070 "selector": selector_id + f" .{name}::after",
2071 "props": [("content", f'"{text}"')],
2072 },
2073 ]
2074
2075 def _translate(self, styler: StylerRenderer, d: dict):
2076 """
2077 Mutate the render dictionary to allow for tooltips:
2078
2079 - Add ``<span>`` HTML element to each data cells ``display_value``. Ignores
2080 headers.
2081 - Add table level CSS styles to control pseudo classes.
2082
2083 Parameters
2084 ----------
2085 styler_data : DataFrame
2086 Underlying ``Styler`` DataFrame used for reindexing.
2087 uuid : str
2088 The underlying ``Styler`` uuid for CSS id.
2089 d : dict
2090 The dictionary prior to final render
2091
2092 Returns
2093 -------
2094 render_dict : Dict
2095 """
2096 self.tt_data = self.tt_data.reindex_like(styler.data)
2097 if self.tt_data.empty:
2098 return d
2099
2100 name = self.class_name
2101 mask = (self.tt_data.isna()) | (self.tt_data.eq("")) # empty string = no ttip
2102 self.table_styles = [
2103 style
2104 for sublist in [
2105 self._pseudo_css(styler.uuid, name, i, j, str(self.tt_data.iloc[i, j]))
2106 for i in range(len(self.tt_data.index))
2107 for j in range(len(self.tt_data.columns))
2108 if not (
2109 mask.iloc[i, j]
2110 or i in styler.hidden_rows
2111 or j in styler.hidden_columns
2112 )
2113 ]
2114 for style in sublist
2115 ]
2116
2117 if self.table_styles:
2118 # add span class to every cell only if at least 1 non-empty tooltip
2119 for row in d["body"]:
2120 for item in row:
2121 if item["type"] == "td":
2122 item["display_value"] = (
2123 str(item["display_value"])
2124 + f'<span class="{self.class_name}"></span>'
2125 )
2126 d["table_styles"].extend(self._class_styles)
2127 d["table_styles"].extend(self.table_styles)
2128
2129 return d
2130
2131
2132def _parse_latex_table_wrapping(table_styles: CSSStyles, caption: str | None) -> bool:
2133 """
2134 Indicate whether LaTeX {tabular} should be wrapped with a {table} environment.
2135
2136 Parses the `table_styles` and detects any selectors which must be included outside
2137 of {tabular}, i.e. indicating that wrapping must occur, and therefore return True,
2138 or if a caption exists and requires similar.
2139 """
2140 IGNORED_WRAPPERS = ["toprule", "midrule", "bottomrule", "column_format"]
2141 # ignored selectors are included with {tabular} so do not need wrapping
2142 return (
2143 table_styles is not None
2144 and any(d["selector"] not in IGNORED_WRAPPERS for d in table_styles)
2145 ) or caption is not None
2146
2147
2148def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | None:
2149 """
2150 Return the first 'props' 'value' from ``tables_styles`` identified by ``selector``.
2151
2152 Examples
2153 --------
2154 >>> table_styles = [{'selector': 'foo', 'props': [('attr','value')]},
2155 ... {'selector': 'bar', 'props': [('attr', 'overwritten')]},
2156 ... {'selector': 'bar', 'props': [('a1', 'baz'), ('a2', 'ignore')]}]
2157 >>> _parse_latex_table_styles(table_styles, selector='bar')
2158 'baz'
2159
2160 Notes
2161 -----
2162 The replacement of "§" with ":" is to avoid the CSS problem where ":" has structural
2163 significance and cannot be used in LaTeX labels, but is often required by them.
2164 """
2165 for style in table_styles[::-1]: # in reverse for most recently applied style
2166 if style["selector"] == selector:
2167 return str(style["props"][0][1]).replace("§", ":")
2168 return None
2169
2170
2171def _parse_latex_cell_styles(
2172 latex_styles: CSSList, display_value: str, convert_css: bool = False
2173) -> str:
2174 r"""
2175 Mutate the ``display_value`` string including LaTeX commands from ``latex_styles``.
2176
2177 This method builds a recursive latex chain of commands based on the
2178 CSSList input, nested around ``display_value``.
2179
2180 If a CSS style is given as ('<command>', '<options>') this is translated to
2181 '\<command><options>{display_value}', and this value is treated as the
2182 display value for the next iteration.
2183
2184 The most recent style forms the inner component, for example for styles:
2185 `[('c1', 'o1'), ('c2', 'o2')]` this returns: `\c1o1{\c2o2{display_value}}`
2186
2187 Sometimes latex commands have to be wrapped with curly braces in different ways:
2188 We create some parsing flags to identify the different behaviours:
2189
2190 - `--rwrap` : `\<command><options>{<display_value>}`
2191 - `--wrap` : `{\<command><options> <display_value>}`
2192 - `--nowrap` : `\<command><options> <display_value>`
2193 - `--lwrap` : `{\<command><options>} <display_value>`
2194 - `--dwrap` : `{\<command><options>}{<display_value>}`
2195
2196 For example for styles:
2197 `[('c1', 'o1--wrap'), ('c2', 'o2')]` this returns: `{\c1o1 \c2o2{display_value}}
2198 """
2199 if convert_css:
2200 latex_styles = _parse_latex_css_conversion(latex_styles)
2201 for command, options in latex_styles[::-1]: # in reverse for most recent style
2202 formatter = {
2203 "--wrap": f"{{\\{command}--to_parse {display_value}}}",
2204 "--nowrap": f"\\{command}--to_parse {display_value}",
2205 "--lwrap": f"{{\\{command}--to_parse}} {display_value}",
2206 "--rwrap": f"\\{command}--to_parse{{{display_value}}}",
2207 "--dwrap": f"{{\\{command}--to_parse}}{{{display_value}}}",
2208 }
2209 display_value = f"\\{command}{options} {display_value}"
2210 for arg in ["--nowrap", "--wrap", "--lwrap", "--rwrap", "--dwrap"]:
2211 if arg in str(options):
2212 display_value = formatter[arg].replace(
2213 "--to_parse", _parse_latex_options_strip(value=options, arg=arg)
2214 )
2215 break # only ever one purposeful entry
2216 return display_value
2217
2218
2219def _parse_latex_header_span(
2220 cell: dict[str, Any],
2221 multirow_align: str,
2222 multicol_align: str,
2223 wrap: bool = False,
2224 convert_css: bool = False,
2225) -> str:
2226 r"""
2227 Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present.
2228
2229 'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then
2230 the `display_value` is altered to a LaTeX `multirow` or `multicol` command
2231 respectively, with the appropriate cell-span.
2232
2233 ``wrap`` is used to enclose the `display_value` in braces which is needed for
2234 column headers using an siunitx package.
2235
2236 Requires the package {multirow}, whereas multicol support is usually built in
2237 to the {tabular} environment.
2238
2239 Examples
2240 --------
2241 >>> cell = {'cellstyle': '', 'display_value':'text', 'attributes': 'colspan="3"'}
2242 >>> _parse_latex_header_span(cell, 't', 'c')
2243 '\\multicolumn{3}{c}{text}'
2244 """
2245 display_val = _parse_latex_cell_styles(
2246 cell["cellstyle"], cell["display_value"], convert_css
2247 )
2248 if "attributes" in cell:
2249 attrs = cell["attributes"]
2250 if 'colspan="' in attrs:
2251 colspan = attrs[attrs.find('colspan="') + 9 :] # len('colspan="') = 9
2252 colspan = int(colspan[: colspan.find('"')])
2253 if "naive-l" == multicol_align:
2254 out = f"{{{display_val}}}" if wrap else f"{display_val}"
2255 blanks = " & {}" if wrap else " &"
2256 return out + blanks * (colspan - 1)
2257 elif "naive-r" == multicol_align:
2258 out = f"{{{display_val}}}" if wrap else f"{display_val}"
2259 blanks = "{} & " if wrap else "& "
2260 return blanks * (colspan - 1) + out
2261 return f"\\multicolumn{{{colspan}}}{{{multicol_align}}}{{{display_val}}}"
2262 elif 'rowspan="' in attrs:
2263 if multirow_align == "naive":
2264 return display_val
2265 rowspan = attrs[attrs.find('rowspan="') + 9 :]
2266 rowspan = int(rowspan[: rowspan.find('"')])
2267 return f"\\multirow[{multirow_align}]{{{rowspan}}}{{*}}{{{display_val}}}"
2268 if wrap:
2269 return f"{{{display_val}}}"
2270 else:
2271 return display_val
2272
2273
2274def _parse_latex_options_strip(value: str | float, arg: str) -> str:
2275 """
2276 Strip a css_value which may have latex wrapping arguments, css comment identifiers,
2277 and whitespaces, to a valid string for latex options parsing.
2278
2279 For example: 'red /* --wrap */ ' --> 'red'
2280 """
2281 return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()
2282
2283
2284def _parse_latex_css_conversion(styles: CSSList) -> CSSList:
2285 """
2286 Convert CSS (attribute,value) pairs to equivalent LaTeX (command,options) pairs.
2287
2288 Ignore conversion if tagged with `--latex` option, skipped if no conversion found.
2289 """
2290
2291 def font_weight(value, arg):
2292 if value in ("bold", "bolder"):
2293 return "bfseries", f"{arg}"
2294 return None
2295
2296 def font_style(value, arg):
2297 if value == "italic":
2298 return "itshape", f"{arg}"
2299 if value == "oblique":
2300 return "slshape", f"{arg}"
2301 return None
2302
2303 def color(value, user_arg, command, comm_arg):
2304 """
2305 CSS colors have 5 formats to process:
2306
2307 - 6 digit hex code: "#ff23ee" --> [HTML]{FF23EE}
2308 - 3 digit hex code: "#f0e" --> [HTML]{FF00EE}
2309 - rgba: rgba(128, 255, 0, 0.5) --> [rgb]{0.502, 1.000, 0.000}
2310 - rgb: rgb(128, 255, 0,) --> [rbg]{0.502, 1.000, 0.000}
2311 - string: red --> {red}
2312
2313 Additionally rgb or rgba can be expressed in % which is also parsed.
2314 """
2315 arg = user_arg if user_arg != "" else comm_arg
2316
2317 if value[0] == "#" and len(value) == 7: # color is hex code
2318 return command, f"[HTML]{{{value[1:].upper()}}}{arg}"
2319 if value[0] == "#" and len(value) == 4: # color is short hex code
2320 val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}"
2321 return command, f"[HTML]{{{val}}}{arg}"
2322 elif value[:3] == "rgb": # color is rgb or rgba
2323 r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip()
2324 r = float(r[:-1]) / 100 if "%" in r else int(r) / 255
2325 g = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[0].strip()
2326 g = float(g[:-1]) / 100 if "%" in g else int(g) / 255
2327 if value[3] == "a": # color is rgba
2328 b = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[1].strip()
2329 else: # color is rgb
2330 b = re.findall("(?<=,)[0-9\\s%]+(?=\\))", value)[0].strip()
2331 b = float(b[:-1]) / 100 if "%" in b else int(b) / 255
2332 return command, f"[rgb]{{{r:.3f}, {g:.3f}, {b:.3f}}}{arg}"
2333 else:
2334 return command, f"{{{value}}}{arg}" # color is likely string-named
2335
2336 CONVERTED_ATTRIBUTES: dict[str, Callable] = {
2337 "font-weight": font_weight,
2338 "background-color": partial(color, command="cellcolor", comm_arg="--lwrap"),
2339 "color": partial(color, command="color", comm_arg=""),
2340 "font-style": font_style,
2341 }
2342
2343 latex_styles: CSSList = []
2344 for attribute, value in styles:
2345 if isinstance(value, str) and "--latex" in value:
2346 # return the style without conversion but drop '--latex'
2347 latex_styles.append((attribute, value.replace("--latex", "")))
2348 if attribute in CONVERTED_ATTRIBUTES:
2349 arg = ""
2350 for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]:
2351 if x in str(value):
2352 arg, value = x, _parse_latex_options_strip(value, x)
2353 break
2354 latex_style = CONVERTED_ATTRIBUTES[attribute](value, arg)
2355 if latex_style is not None:
2356 latex_styles.extend([latex_style])
2357 return latex_styles
2358
2359
2360def _escape_latex(s: str) -> str:
2361 r"""
2362 Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
2363 ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.
2364
2365 Use this if you need to display text that might contain such characters in LaTeX.
2366
2367 Parameters
2368 ----------
2369 s : str
2370 Input to be escaped
2371
2372 Return
2373 ------
2374 str :
2375 Escaped string
2376 """
2377 return (
2378 s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
2379 .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
2380 .replace("&", "\\&")
2381 .replace("%", "\\%")
2382 .replace("$", "\\$")
2383 .replace("#", "\\#")
2384 .replace("_", "\\_")
2385 .replace("{", "\\{")
2386 .replace("}", "\\}")
2387 .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces
2388 .replace("~", "\\textasciitilde ")
2389 .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
2390 .replace("^", "\\textasciicircum ")
2391 .replace("ab2§=§8yz", "\\textbackslash ")
2392 )
2393
2394
2395def _math_mode_with_dollar(s: str) -> str:
2396 r"""
2397 All characters in LaTeX math mode are preserved.
2398
2399 The substrings in LaTeX math mode, which start with
2400 the character ``$`` and end with ``$``, are preserved
2401 without escaping. Otherwise regular LaTeX escaping applies.
2402
2403 Parameters
2404 ----------
2405 s : str
2406 Input to be escaped
2407
2408 Return
2409 ------
2410 str :
2411 Escaped string
2412 """
2413 s = s.replace(r"\$", r"rt8§=§7wz")
2414 pattern = re.compile(r"\$.*?\$")
2415 pos = 0
2416 ps = pattern.search(s, pos)
2417 res = []
2418 while ps:
2419 res.append(_escape_latex(s[pos : ps.span()[0]]))
2420 res.append(ps.group())
2421 pos = ps.span()[1]
2422 ps = pattern.search(s, pos)
2423
2424 res.append(_escape_latex(s[pos : len(s)]))
2425 return "".join(res).replace(r"rt8§=§7wz", r"\$")
2426
2427
2428def _math_mode_with_parentheses(s: str) -> str:
2429 r"""
2430 All characters in LaTeX math mode are preserved.
2431
2432 The substrings in LaTeX math mode, which start with
2433 the character ``\(`` and end with ``\)``, are preserved
2434 without escaping. Otherwise regular LaTeX escaping applies.
2435
2436 Parameters
2437 ----------
2438 s : str
2439 Input to be escaped
2440
2441 Return
2442 ------
2443 str :
2444 Escaped string
2445 """
2446 s = s.replace(r"\(", r"LEFT§=§6yzLEFT").replace(r"\)", r"RIGHTab5§=§RIGHT")
2447 res = []
2448 for item in re.split(r"LEFT§=§6yz|ab5§=§RIGHT", s):
2449 if item.startswith("LEFT") and item.endswith("RIGHT"):
2450 res.append(item.replace("LEFT", r"\(").replace("RIGHT", r"\)"))
2451 elif "LEFT" in item and "RIGHT" in item:
2452 res.append(
2453 _escape_latex(item).replace("LEFT", r"\(").replace("RIGHT", r"\)")
2454 )
2455 else:
2456 res.append(
2457 _escape_latex(item)
2458 .replace("LEFT", r"\textbackslash (")
2459 .replace("RIGHT", r"\textbackslash )")
2460 )
2461 return "".join(res)
2462
2463
2464def _escape_latex_math(s: str) -> str:
2465 r"""
2466 All characters in LaTeX math mode are preserved.
2467
2468 The substrings in LaTeX math mode, which either are surrounded
2469 by two characters ``$`` or start with the character ``\(`` and end with ``\)``,
2470 are preserved without escaping. Otherwise regular LaTeX escaping applies.
2471
2472 Parameters
2473 ----------
2474 s : str
2475 Input to be escaped
2476
2477 Return
2478 ------
2479 str :
2480 Escaped string
2481 """
2482 s = s.replace(r"\$", r"rt8§=§7wz")
2483 ps_d = re.compile(r"\$.*?\$").search(s, 0)
2484 ps_p = re.compile(r"\(.*?\)").search(s, 0)
2485 mode = []
2486 if ps_d:
2487 mode.append(ps_d.span()[0])
2488 if ps_p:
2489 mode.append(ps_p.span()[0])
2490 if len(mode) == 0:
2491 return _escape_latex(s.replace(r"rt8§=§7wz", r"\$"))
2492 if s[mode[0]] == r"$":
2493 return _math_mode_with_dollar(s.replace(r"rt8§=§7wz", r"\$"))
2494 if s[mode[0] - 1 : mode[0] + 1] == r"\(":
2495 return _math_mode_with_parentheses(s.replace(r"rt8§=§7wz", r"\$"))
2496 else:
2497 return _escape_latex(s.replace(r"rt8§=§7wz", r"\$"))