1"""Pretty-print tabular data."""
2
3import warnings
4from collections import namedtuple
5from collections.abc import Iterable, Sized
6from html import escape as htmlescape
7from itertools import chain, zip_longest as izip_longest
8from functools import reduce, partial
9import io
10import re
11import math
12import textwrap
13import dataclasses
14import sys
15
16try:
17 import wcwidth # optional wide-character (CJK) support
18except ImportError:
19 wcwidth = None
20
21
22def _is_file(f):
23 return isinstance(f, io.IOBase)
24
25
26__all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
27try:
28 from .version import version as __version__ # noqa: F401
29except ImportError:
30 pass # running __init__.py as a script, AppVeyor pytests
31
32
33# minimum extra space in headers
34MIN_PADDING = 2
35
36# Whether or not to preserve leading/trailing whitespace in data.
37PRESERVE_WHITESPACE = False
38
39# TextWrapper breaks words longer than 'width'.
40_BREAK_LONG_WORDS = True
41# TextWrapper is breaking hyphenated words.
42_BREAK_ON_HYPHENS = True
43
44
45_DEFAULT_FLOATFMT = "g"
46_DEFAULT_INTFMT = ""
47_DEFAULT_MISSINGVAL = ""
48# default align will be overwritten by "left", "center" or "decimal"
49# depending on the formatter
50_DEFAULT_ALIGN = "default"
51
52
53# if True, enable wide-character (CJK) support
54WIDE_CHARS_MODE = wcwidth is not None
55
56# Constant that can be used as part of passed rows to generate a separating line
57# It is purposely an unprintable character, very unlikely to be used in a table
58SEPARATING_LINE = "\001"
59
60Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
61
62
63DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
64
65
66# A table structure is supposed to be:
67#
68# --- lineabove ---------
69# headerrow
70# --- linebelowheader ---
71# datarow
72# --- linebetweenrows ---
73# ... (more datarows) ...
74# --- linebetweenrows ---
75# last datarow
76# --- linebelow ---------
77#
78# TableFormat's line* elements can be
79#
80# - either None, if the element is not used,
81# - or a Line tuple,
82# - or a function: [col_widths], [col_alignments] -> string.
83#
84# TableFormat's *row elements can be
85#
86# - either None, if the element is not used,
87# - or a DataRow tuple,
88# - or a function: [cell_values], [col_widths], [col_alignments] -> string.
89#
90# padding (an integer) is the amount of white space around data values.
91#
92# with_header_hide:
93#
94# - either None, to display all table elements unconditionally,
95# - or a list of elements not to be displayed if the table has column headers.
96#
97TableFormat = namedtuple(
98 "TableFormat",
99 [
100 "lineabove",
101 "linebelowheader",
102 "linebetweenrows",
103 "linebelow",
104 "headerrow",
105 "datarow",
106 "padding",
107 "with_header_hide",
108 ],
109)
110
111
112def _is_separating_line_value(value):
113 return type(value) is str and value.strip() == SEPARATING_LINE
114
115
116def _is_separating_line(row):
117 row_type = type(row)
118 is_sl = (row_type == list or row_type == str) and (
119 (len(row) >= 1 and _is_separating_line_value(row[0]))
120 or (len(row) >= 2 and _is_separating_line_value(row[1]))
121 )
122
123 return is_sl
124
125
126def _pipe_segment_with_colons(align, colwidth):
127 """Return a segment of a horizontal line with optional colons which
128 indicate column's alignment (as in `pipe` output format)."""
129 w = colwidth
130 if align in ["right", "decimal"]:
131 return ("-" * (w - 1)) + ":"
132 elif align == "center":
133 return ":" + ("-" * (w - 2)) + ":"
134 elif align == "left":
135 return ":" + ("-" * (w - 1))
136 else:
137 return "-" * w
138
139
140def _pipe_line_with_colons(colwidths, colaligns):
141 """Return a horizontal line with optional colons to indicate column's
142 alignment (as in `pipe` output format)."""
143 if not colaligns: # e.g. printing an empty data frame (github issue #15)
144 colaligns = [""] * len(colwidths)
145 segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
146 return "|" + "|".join(segments) + "|"
147
148
149def _grid_segment_with_colons(colwidth, align):
150 """Return a segment of a horizontal line with optional colons which indicate
151 column's alignment in a grid table."""
152 width = colwidth
153 if align == "right":
154 return ("=" * (width - 1)) + ":"
155 elif align == "center":
156 return ":" + ("=" * (width - 2)) + ":"
157 elif align == "left":
158 return ":" + ("=" * (width - 1))
159 else:
160 return "=" * width
161
162
163def _grid_line_with_colons(colwidths, colaligns):
164 """Return a horizontal line with optional colons to indicate column's alignment
165 in a grid table."""
166 if not colaligns:
167 colaligns = [""] * len(colwidths)
168 segments = [_grid_segment_with_colons(w, a) for a, w in zip(colaligns, colwidths)]
169 return "+" + "+".join(segments) + "+"
170
171
172def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
173 alignment = {
174 "left": "",
175 "right": 'style="text-align: right;"| ',
176 "center": 'style="text-align: center;"| ',
177 "decimal": 'style="text-align: right;"| ',
178 }
179 # hard-coded padding _around_ align attribute and value together
180 # rather than padding parameter which affects only the value
181 values_with_attrs = [
182 " " + alignment.get(a, "") + c + " " for c, a in zip(cell_values, colaligns)
183 ]
184 colsep = separator * 2
185 return (separator + colsep.join(values_with_attrs)).rstrip()
186
187
188def _textile_row_with_attrs(cell_values, colwidths, colaligns):
189 cell_values[0] += " "
190 alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."}
191 values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values))
192 return "|" + "|".join(values) + "|"
193
194
195def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
196 # this table header will be suppressed if there is a header row
197 return "<table>\n<tbody>"
198
199
200def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns):
201 alignment = {
202 "left": "",
203 "right": ' style="text-align: right;"',
204 "center": ' style="text-align: center;"',
205 "decimal": ' style="text-align: right;"',
206 }
207 if unsafe:
208 values_with_attrs = [
209 "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), c)
210 for c, a in zip(cell_values, colaligns)
211 ]
212 else:
213 values_with_attrs = [
214 "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), htmlescape(c))
215 for c, a in zip(cell_values, colaligns)
216 ]
217 rowhtml = "<tr>{}</tr>".format("".join(values_with_attrs).rstrip())
218 if celltag == "th": # it's a header row, create a new table header
219 rowhtml = f"<table>\n<thead>\n{rowhtml}\n</thead>\n<tbody>"
220 return rowhtml
221
222
223def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""):
224 alignment = {
225 "left": "",
226 "right": '<style="text-align: right;">',
227 "center": '<style="text-align: center;">',
228 "decimal": '<style="text-align: right;">',
229 }
230 values_with_attrs = [
231 "{}{} {} ".format(celltag, alignment.get(a, ""), header + c + header)
232 for c, a in zip(cell_values, colaligns)
233 ]
234 return "".join(values_with_attrs) + "||"
235
236
237def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=False):
238 alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"}
239 tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
240 return "\n".join(
241 [
242 ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{")
243 + tabular_columns_fmt
244 + "}",
245 "\\toprule" if booktabs else "\\hline",
246 ]
247 )
248
249
250def _asciidoc_row(is_header, *args):
251 """handle header and data rows for asciidoc format"""
252
253 def make_header_line(is_header, colwidths, colaligns):
254 # generate the column specifiers
255
256 alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"}
257 # use the column widths generated by tabulate for the asciidoc column width specifiers
258 asciidoc_alignments = zip(
259 colwidths, [alignment[colalign] for colalign in colaligns]
260 )
261 asciidoc_column_specifiers = [
262 f"{width:d}{align}" for width, align in asciidoc_alignments
263 ]
264 header_list = ['cols="' + (",".join(asciidoc_column_specifiers)) + '"']
265
266 # generate the list of options (currently only "header")
267 options_list = []
268
269 if is_header:
270 options_list.append("header")
271
272 if options_list:
273 header_list += ['options="' + ",".join(options_list) + '"']
274
275 # generate the list of entries in the table header field
276
277 return "[{}]\n|====".format(",".join(header_list))
278
279 if len(args) == 2:
280 # two arguments are passed if called in the context of aboveline
281 # print the table header with column widths and optional header tag
282 return make_header_line(False, *args)
283
284 elif len(args) == 3:
285 # three arguments are passed if called in the context of dataline or headerline
286 # print the table line and make the aboveline if it is a header
287
288 cell_values, colwidths, colaligns = args
289 data_line = "|" + "|".join(cell_values)
290
291 if is_header:
292 return make_header_line(True, colwidths, colaligns) + "\n" + data_line
293 else:
294 return data_line
295
296 else:
297 raise ValueError(
298 " _asciidoc_row() requires two (colwidths, colaligns) "
299 + "or three (cell_values, colwidths, colaligns) arguments) "
300 )
301
302
303LATEX_ESCAPE_RULES = {
304 r"&": r"\&",
305 r"%": r"\%",
306 r"$": r"\$",
307 r"#": r"\#",
308 r"_": r"\_",
309 r"^": r"\^{}",
310 r"{": r"\{",
311 r"}": r"\}",
312 r"~": r"\textasciitilde{}",
313 "\\": r"\textbackslash{}",
314 r"<": r"\ensuremath{<}",
315 r">": r"\ensuremath{>}",
316}
317
318
319def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES):
320 def escape_char(c):
321 return escrules.get(c, c)
322
323 escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
324 rowfmt = DataRow("", "&", "\\\\")
325 return _build_simple_row(escaped_values, rowfmt)
326
327
328def _rst_escape_first_column(rows, headers):
329 def escape_empty(val):
330 if isinstance(val, (str, bytes)) and not val.strip():
331 return ".."
332 else:
333 return val
334
335 new_headers = list(headers)
336 new_rows = []
337 if headers:
338 new_headers[0] = escape_empty(headers[0])
339 for row in rows:
340 new_row = list(row)
341 if new_row:
342 new_row[0] = escape_empty(row[0])
343 new_rows.append(new_row)
344 return new_rows, new_headers
345
346
347_table_formats = {
348 "simple": TableFormat(
349 lineabove=Line("", "-", " ", ""),
350 linebelowheader=Line("", "-", " ", ""),
351 linebetweenrows=None,
352 linebelow=Line("", "-", " ", ""),
353 headerrow=DataRow("", " ", ""),
354 datarow=DataRow("", " ", ""),
355 padding=0,
356 with_header_hide=["lineabove", "linebelow"],
357 ),
358 "plain": TableFormat(
359 lineabove=None,
360 linebelowheader=None,
361 linebetweenrows=None,
362 linebelow=None,
363 headerrow=DataRow("", " ", ""),
364 datarow=DataRow("", " ", ""),
365 padding=0,
366 with_header_hide=None,
367 ),
368 "grid": TableFormat(
369 lineabove=Line("+", "-", "+", "+"),
370 linebelowheader=Line("+", "=", "+", "+"),
371 linebetweenrows=Line("+", "-", "+", "+"),
372 linebelow=Line("+", "-", "+", "+"),
373 headerrow=DataRow("|", "|", "|"),
374 datarow=DataRow("|", "|", "|"),
375 padding=1,
376 with_header_hide=None,
377 ),
378 "simple_grid": TableFormat(
379 lineabove=Line("┌", "─", "┬", "┐"),
380 linebelowheader=Line("├", "─", "┼", "┤"),
381 linebetweenrows=Line("├", "─", "┼", "┤"),
382 linebelow=Line("└", "─", "┴", "┘"),
383 headerrow=DataRow("│", "│", "│"),
384 datarow=DataRow("│", "│", "│"),
385 padding=1,
386 with_header_hide=None,
387 ),
388 "rounded_grid": TableFormat(
389 lineabove=Line("╭", "─", "┬", "╮"),
390 linebelowheader=Line("├", "─", "┼", "┤"),
391 linebetweenrows=Line("├", "─", "┼", "┤"),
392 linebelow=Line("╰", "─", "┴", "╯"),
393 headerrow=DataRow("│", "│", "│"),
394 datarow=DataRow("│", "│", "│"),
395 padding=1,
396 with_header_hide=None,
397 ),
398 "heavy_grid": TableFormat(
399 lineabove=Line("┏", "━", "┳", "┓"),
400 linebelowheader=Line("┣", "━", "╋", "┫"),
401 linebetweenrows=Line("┣", "━", "╋", "┫"),
402 linebelow=Line("┗", "━", "┻", "┛"),
403 headerrow=DataRow("┃", "┃", "┃"),
404 datarow=DataRow("┃", "┃", "┃"),
405 padding=1,
406 with_header_hide=None,
407 ),
408 "mixed_grid": TableFormat(
409 lineabove=Line("┍", "━", "┯", "┑"),
410 linebelowheader=Line("┝", "━", "┿", "┥"),
411 linebetweenrows=Line("├", "─", "┼", "┤"),
412 linebelow=Line("┕", "━", "┷", "┙"),
413 headerrow=DataRow("│", "│", "│"),
414 datarow=DataRow("│", "│", "│"),
415 padding=1,
416 with_header_hide=None,
417 ),
418 "double_grid": TableFormat(
419 lineabove=Line("╔", "═", "╦", "╗"),
420 linebelowheader=Line("╠", "═", "╬", "╣"),
421 linebetweenrows=Line("╠", "═", "╬", "╣"),
422 linebelow=Line("╚", "═", "╩", "╝"),
423 headerrow=DataRow("║", "║", "║"),
424 datarow=DataRow("║", "║", "║"),
425 padding=1,
426 with_header_hide=None,
427 ),
428 "fancy_grid": TableFormat(
429 lineabove=Line("╒", "═", "╤", "╕"),
430 linebelowheader=Line("╞", "═", "╪", "╡"),
431 linebetweenrows=Line("├", "─", "┼", "┤"),
432 linebelow=Line("╘", "═", "╧", "╛"),
433 headerrow=DataRow("│", "│", "│"),
434 datarow=DataRow("│", "│", "│"),
435 padding=1,
436 with_header_hide=None,
437 ),
438 "colon_grid": TableFormat(
439 lineabove=Line("+", "-", "+", "+"),
440 linebelowheader=_grid_line_with_colons,
441 linebetweenrows=Line("+", "-", "+", "+"),
442 linebelow=Line("+", "-", "+", "+"),
443 headerrow=DataRow("|", "|", "|"),
444 datarow=DataRow("|", "|", "|"),
445 padding=1,
446 with_header_hide=None,
447 ),
448 "outline": TableFormat(
449 lineabove=Line("+", "-", "+", "+"),
450 linebelowheader=Line("+", "=", "+", "+"),
451 linebetweenrows=None,
452 linebelow=Line("+", "-", "+", "+"),
453 headerrow=DataRow("|", "|", "|"),
454 datarow=DataRow("|", "|", "|"),
455 padding=1,
456 with_header_hide=None,
457 ),
458 "simple_outline": TableFormat(
459 lineabove=Line("┌", "─", "┬", "┐"),
460 linebelowheader=Line("├", "─", "┼", "┤"),
461 linebetweenrows=None,
462 linebelow=Line("└", "─", "┴", "┘"),
463 headerrow=DataRow("│", "│", "│"),
464 datarow=DataRow("│", "│", "│"),
465 padding=1,
466 with_header_hide=None,
467 ),
468 "rounded_outline": TableFormat(
469 lineabove=Line("╭", "─", "┬", "╮"),
470 linebelowheader=Line("├", "─", "┼", "┤"),
471 linebetweenrows=None,
472 linebelow=Line("╰", "─", "┴", "╯"),
473 headerrow=DataRow("│", "│", "│"),
474 datarow=DataRow("│", "│", "│"),
475 padding=1,
476 with_header_hide=None,
477 ),
478 "heavy_outline": TableFormat(
479 lineabove=Line("┏", "━", "┳", "┓"),
480 linebelowheader=Line("┣", "━", "╋", "┫"),
481 linebetweenrows=None,
482 linebelow=Line("┗", "━", "┻", "┛"),
483 headerrow=DataRow("┃", "┃", "┃"),
484 datarow=DataRow("┃", "┃", "┃"),
485 padding=1,
486 with_header_hide=None,
487 ),
488 "mixed_outline": TableFormat(
489 lineabove=Line("┍", "━", "┯", "┑"),
490 linebelowheader=Line("┝", "━", "┿", "┥"),
491 linebetweenrows=None,
492 linebelow=Line("┕", "━", "┷", "┙"),
493 headerrow=DataRow("│", "│", "│"),
494 datarow=DataRow("│", "│", "│"),
495 padding=1,
496 with_header_hide=None,
497 ),
498 "double_outline": TableFormat(
499 lineabove=Line("╔", "═", "╦", "╗"),
500 linebelowheader=Line("╠", "═", "╬", "╣"),
501 linebetweenrows=None,
502 linebelow=Line("╚", "═", "╩", "╝"),
503 headerrow=DataRow("║", "║", "║"),
504 datarow=DataRow("║", "║", "║"),
505 padding=1,
506 with_header_hide=None,
507 ),
508 "fancy_outline": TableFormat(
509 lineabove=Line("╒", "═", "╤", "╕"),
510 linebelowheader=Line("╞", "═", "╪", "╡"),
511 linebetweenrows=None,
512 linebelow=Line("╘", "═", "╧", "╛"),
513 headerrow=DataRow("│", "│", "│"),
514 datarow=DataRow("│", "│", "│"),
515 padding=1,
516 with_header_hide=None,
517 ),
518 "github": TableFormat(
519 lineabove=Line("|", "-", "|", "|"),
520 linebelowheader=Line("|", "-", "|", "|"),
521 linebetweenrows=None,
522 linebelow=None,
523 headerrow=DataRow("|", "|", "|"),
524 datarow=DataRow("|", "|", "|"),
525 padding=1,
526 with_header_hide=["lineabove"],
527 ),
528 "pipe": TableFormat(
529 lineabove=_pipe_line_with_colons,
530 linebelowheader=_pipe_line_with_colons,
531 linebetweenrows=None,
532 linebelow=None,
533 headerrow=DataRow("|", "|", "|"),
534 datarow=DataRow("|", "|", "|"),
535 padding=1,
536 with_header_hide=["lineabove"],
537 ),
538 "orgtbl": TableFormat(
539 lineabove=None,
540 linebelowheader=Line("|", "-", "+", "|"),
541 linebetweenrows=None,
542 linebelow=None,
543 headerrow=DataRow("|", "|", "|"),
544 datarow=DataRow("|", "|", "|"),
545 padding=1,
546 with_header_hide=None,
547 ),
548 "jira": TableFormat(
549 lineabove=None,
550 linebelowheader=None,
551 linebetweenrows=None,
552 linebelow=None,
553 headerrow=DataRow("||", "||", "||"),
554 datarow=DataRow("|", "|", "|"),
555 padding=1,
556 with_header_hide=None,
557 ),
558 "presto": TableFormat(
559 lineabove=None,
560 linebelowheader=Line("", "-", "+", ""),
561 linebetweenrows=None,
562 linebelow=None,
563 headerrow=DataRow("", "|", ""),
564 datarow=DataRow("", "|", ""),
565 padding=1,
566 with_header_hide=None,
567 ),
568 "pretty": TableFormat(
569 lineabove=Line("+", "-", "+", "+"),
570 linebelowheader=Line("+", "-", "+", "+"),
571 linebetweenrows=None,
572 linebelow=Line("+", "-", "+", "+"),
573 headerrow=DataRow("|", "|", "|"),
574 datarow=DataRow("|", "|", "|"),
575 padding=1,
576 with_header_hide=None,
577 ),
578 "psql": TableFormat(
579 lineabove=Line("+", "-", "+", "+"),
580 linebelowheader=Line("|", "-", "+", "|"),
581 linebetweenrows=None,
582 linebelow=Line("+", "-", "+", "+"),
583 headerrow=DataRow("|", "|", "|"),
584 datarow=DataRow("|", "|", "|"),
585 padding=1,
586 with_header_hide=None,
587 ),
588 "rst": TableFormat(
589 lineabove=Line("", "=", " ", ""),
590 linebelowheader=Line("", "=", " ", ""),
591 linebetweenrows=None,
592 linebelow=Line("", "=", " ", ""),
593 headerrow=DataRow("", " ", ""),
594 datarow=DataRow("", " ", ""),
595 padding=0,
596 with_header_hide=None,
597 ),
598 "mediawiki": TableFormat(
599 lineabove=Line(
600 '{| class="wikitable" style="text-align: left;"',
601 "",
602 "",
603 "\n|+ <!-- caption -->\n|-",
604 ),
605 linebelowheader=Line("|-", "", "", ""),
606 linebetweenrows=Line("|-", "", "", ""),
607 linebelow=Line("|}", "", "", ""),
608 headerrow=partial(_mediawiki_row_with_attrs, "!"),
609 datarow=partial(_mediawiki_row_with_attrs, "|"),
610 padding=0,
611 with_header_hide=None,
612 ),
613 "moinmoin": TableFormat(
614 lineabove=None,
615 linebelowheader=None,
616 linebetweenrows=None,
617 linebelow=None,
618 headerrow=partial(_moin_row_with_attrs, "||", header="'''"),
619 datarow=partial(_moin_row_with_attrs, "||"),
620 padding=1,
621 with_header_hide=None,
622 ),
623 "youtrack": TableFormat(
624 lineabove=None,
625 linebelowheader=None,
626 linebetweenrows=None,
627 linebelow=None,
628 headerrow=DataRow("|| ", " || ", " || "),
629 datarow=DataRow("| ", " | ", " |"),
630 padding=1,
631 with_header_hide=None,
632 ),
633 "html": TableFormat(
634 lineabove=_html_begin_table_without_header,
635 linebelowheader="",
636 linebetweenrows=None,
637 linebelow=Line("</tbody>\n</table>", "", "", ""),
638 headerrow=partial(_html_row_with_attrs, "th", False),
639 datarow=partial(_html_row_with_attrs, "td", False),
640 padding=0,
641 with_header_hide=["lineabove"],
642 ),
643 "unsafehtml": TableFormat(
644 lineabove=_html_begin_table_without_header,
645 linebelowheader="",
646 linebetweenrows=None,
647 linebelow=Line("</tbody>\n</table>", "", "", ""),
648 headerrow=partial(_html_row_with_attrs, "th", True),
649 datarow=partial(_html_row_with_attrs, "td", True),
650 padding=0,
651 with_header_hide=["lineabove"],
652 ),
653 "latex": TableFormat(
654 lineabove=_latex_line_begin_tabular,
655 linebelowheader=Line("\\hline", "", "", ""),
656 linebetweenrows=None,
657 linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
658 headerrow=_latex_row,
659 datarow=_latex_row,
660 padding=1,
661 with_header_hide=None,
662 ),
663 "latex_raw": TableFormat(
664 lineabove=_latex_line_begin_tabular,
665 linebelowheader=Line("\\hline", "", "", ""),
666 linebetweenrows=None,
667 linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
668 headerrow=partial(_latex_row, escrules={}),
669 datarow=partial(_latex_row, escrules={}),
670 padding=1,
671 with_header_hide=None,
672 ),
673 "latex_booktabs": TableFormat(
674 lineabove=partial(_latex_line_begin_tabular, booktabs=True),
675 linebelowheader=Line("\\midrule", "", "", ""),
676 linebetweenrows=None,
677 linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
678 headerrow=_latex_row,
679 datarow=_latex_row,
680 padding=1,
681 with_header_hide=None,
682 ),
683 "latex_longtable": TableFormat(
684 lineabove=partial(_latex_line_begin_tabular, longtable=True),
685 linebelowheader=Line("\\hline\n\\endhead", "", "", ""),
686 linebetweenrows=None,
687 linebelow=Line("\\hline\n\\end{longtable}", "", "", ""),
688 headerrow=_latex_row,
689 datarow=_latex_row,
690 padding=1,
691 with_header_hide=None,
692 ),
693 "tsv": TableFormat(
694 lineabove=None,
695 linebelowheader=None,
696 linebetweenrows=None,
697 linebelow=None,
698 headerrow=DataRow("", "\t", ""),
699 datarow=DataRow("", "\t", ""),
700 padding=0,
701 with_header_hide=None,
702 ),
703 "textile": TableFormat(
704 lineabove=None,
705 linebelowheader=None,
706 linebetweenrows=None,
707 linebelow=None,
708 headerrow=DataRow("|_. ", "|_.", "|"),
709 datarow=_textile_row_with_attrs,
710 padding=1,
711 with_header_hide=None,
712 ),
713 "asciidoc": TableFormat(
714 lineabove=partial(_asciidoc_row, False),
715 linebelowheader=None,
716 linebetweenrows=None,
717 linebelow=Line("|====", "", "", ""),
718 headerrow=partial(_asciidoc_row, True),
719 datarow=partial(_asciidoc_row, False),
720 padding=1,
721 with_header_hide=["lineabove"],
722 ),
723}
724
725
726tabulate_formats = list(sorted(_table_formats.keys()))
727
728# The table formats for which multiline cells will be folded into subsequent
729# table rows. The key is the original format specified at the API. The value is
730# the format that will be used to represent the original format.
731multiline_formats = {
732 "plain": "plain",
733 "simple": "simple",
734 "grid": "grid",
735 "simple_grid": "simple_grid",
736 "rounded_grid": "rounded_grid",
737 "heavy_grid": "heavy_grid",
738 "mixed_grid": "mixed_grid",
739 "double_grid": "double_grid",
740 "fancy_grid": "fancy_grid",
741 "colon_grid": "colon_grid",
742 "pipe": "pipe",
743 "orgtbl": "orgtbl",
744 "jira": "jira",
745 "presto": "presto",
746 "pretty": "pretty",
747 "psql": "psql",
748 "rst": "rst",
749 "github": "github",
750 "outline": "outline",
751 "simple_outline": "simple_outline",
752 "rounded_outline": "rounded_outline",
753 "heavy_outline": "heavy_outline",
754 "mixed_outline": "mixed_outline",
755 "double_outline": "double_outline",
756 "fancy_outline": "fancy_outline",
757}
758
759# TODO: Add multiline support for the remaining table formats:
760# - mediawiki: Replace \n with <br>
761# - moinmoin: TBD
762# - youtrack: TBD
763# - html: Replace \n with <br>
764# - latex*: Use "makecell" package: In header, replace X\nY with
765# \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y}
766# - tsv: TBD
767# - textile: Replace \n with <br/> (must be well-formed XML)
768
769_multiline_codes = re.compile(r"\r|\n|\r\n")
770_multiline_codes_bytes = re.compile(b"\r|\n|\r\n")
771
772# Handle ANSI escape sequences for both control sequence introducer (CSI) and
773# operating system command (OSC). Both of these begin with 0x1b (or octal 033),
774# which will be shown below as ESC.
775#
776# CSI ANSI escape codes have the following format, defined in section 5.4 of ECMA-48:
777#
778# CSI: ESC followed by the '[' character (0x5b)
779# Parameter Bytes: 0..n bytes in the range 0x30-0x3f
780# Intermediate Bytes: 0..n bytes in the range 0x20-0x2f
781# Final Byte: a single byte in the range 0x40-0x7e
782#
783# Also include the terminal hyperlink sequences as described here:
784# https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
785#
786# OSC 8 ; params ; uri ST display_text OSC 8 ;; ST
787#
788# Example: \x1b]8;;https://example.com\x5ctext to show\x1b]8;;\x5c
789#
790# Where:
791# OSC: ESC followed by the ']' character (0x5d)
792# params: 0..n optional key value pairs separated by ':' (e.g. foo=bar:baz=qux:abc=123)
793# URI: the actual URI with protocol scheme (e.g. https://, file://, ftp://)
794# ST: ESC followed by the '\' character (0x5c)
795_esc = r"\x1b"
796_csi = rf"{_esc}\["
797_osc = rf"{_esc}\]"
798_st = rf"{_esc}\\"
799
800_ansi_escape_pat = rf"""
801 (
802 # terminal colors, etc
803 {_csi} # CSI
804 [\x30-\x3f]* # parameter bytes
805 [\x20-\x2f]* # intermediate bytes
806 [\x40-\x7e] # final byte
807 |
808 # terminal hyperlinks
809 {_osc}8; # OSC opening
810 (\w+=\w+:?)* # key=value params list (submatch 2)
811 ; # delimiter
812 ([^{_esc}]+) # URI - anything but ESC (submatch 3)
813 {_st} # ST
814 ([^{_esc}]+) # link text - anything but ESC (submatch 4)
815 {_osc}8;;{_st} # "closing" OSC sequence
816 )
817"""
818_ansi_codes = re.compile(_ansi_escape_pat, re.VERBOSE)
819_ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE)
820_ansi_color_reset_code = "\033[0m"
821
822_float_with_thousands_separators = re.compile(
823 r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$"
824)
825
826
827def simple_separated_format(separator):
828 """Construct a simple TableFormat with columns separated by a separator.
829
830 >>> tsv = simple_separated_format("\\t") ; \
831 tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
832 True
833
834 """
835 return TableFormat(
836 None,
837 None,
838 None,
839 None,
840 headerrow=DataRow("", separator, ""),
841 datarow=DataRow("", separator, ""),
842 padding=0,
843 with_header_hide=None,
844 )
845
846
847def _isnumber_with_thousands_separator(string):
848 """
849 >>> _isnumber_with_thousands_separator(".")
850 False
851 >>> _isnumber_with_thousands_separator("1")
852 True
853 >>> _isnumber_with_thousands_separator("1.")
854 True
855 >>> _isnumber_with_thousands_separator(".1")
856 True
857 >>> _isnumber_with_thousands_separator("1000")
858 False
859 >>> _isnumber_with_thousands_separator("1,000")
860 True
861 >>> _isnumber_with_thousands_separator("1,0000")
862 False
863 >>> _isnumber_with_thousands_separator("1,000.1234")
864 True
865 >>> _isnumber_with_thousands_separator(b"1,000.1234")
866 True
867 >>> _isnumber_with_thousands_separator("+1,000.1234")
868 True
869 >>> _isnumber_with_thousands_separator("-1,000.1234")
870 True
871 """
872 try:
873 string = string.decode()
874 except (UnicodeDecodeError, AttributeError):
875 pass
876
877 return bool(re.match(_float_with_thousands_separators, string))
878
879
880def _isconvertible(conv, string):
881 try:
882 conv(string)
883 return True
884 except (ValueError, TypeError):
885 return False
886
887
888def _isnumber(string):
889 """Detects if something *could* be considered a numeric value, vs. just a string.
890
891 This promotes types convertible to both int and float to be considered
892 a float. Note that, iff *all* values appear to be some form of numeric
893 value such as eg. "1e2", they would be considered numbers!
894
895 The exception is things that appear to be numbers but overflow to
896 +/-inf, eg. "1e23456"; we'll have to exclude them explicitly.
897
898 >>> _isnumber(123)
899 True
900 >>> _isnumber(123.45)
901 True
902 >>> _isnumber("123.45")
903 True
904 >>> _isnumber("123")
905 True
906 >>> _isnumber("spam")
907 False
908 >>> _isnumber("123e45")
909 True
910 >>> _isnumber("123e45678") # evaluates equal to 'inf', but ... isn't
911 False
912 >>> _isnumber("inf")
913 True
914 >>> from fractions import Fraction
915 >>> _isnumber(Fraction(1,3))
916 True
917
918 """
919 return (
920 # fast path
921 type(string) in (float, int)
922 # covers 'NaN', +/- 'inf', and eg. '1e2', as well as any type
923 # convertible to int/float.
924 or (
925 _isconvertible(float, string)
926 and (
927 # some other type convertible to float
928 not isinstance(string, (str, bytes))
929 # or, a numeric string eg. "1e1...", "NaN", ..., but isn't
930 # just an over/underflow
931 or (
932 not (math.isinf(float(string)) or math.isnan(float(string)))
933 or string.lower() in ["inf", "-inf", "nan"]
934 )
935 )
936 )
937 )
938
939
940def _isint(string, inttype=int):
941 """
942 >>> _isint("123")
943 True
944 >>> _isint("123.45")
945 False
946 """
947 return (
948 type(string) is inttype
949 or (
950 (hasattr(string, "is_integer") or hasattr(string, "__array__"))
951 and str(type(string)).startswith("<class 'numpy.int")
952 ) # numpy.int64 and similar
953 or (
954 isinstance(string, (bytes, str)) and _isconvertible(inttype, string)
955 ) # integer as string
956 )
957
958
959def _isbool(string):
960 """
961 >>> _isbool(True)
962 True
963 >>> _isbool("False")
964 True
965 >>> _isbool(1)
966 False
967 """
968 return type(string) is bool or (
969 isinstance(string, (bytes, str)) and string in ("True", "False")
970 )
971
972
973def _type(string, has_invisible=True, numparse=True):
974 """The least generic type (type(None), int, float, str, unicode).
975
976 Treats empty string as missing for the purposes of type deduction, so as to not influence
977 the type of an otherwise complete column; does *not* result in missingval replacement!
978
979 >>> _type(None) is type(None)
980 True
981 >>> _type("") is type(None)
982 True
983 >>> _type("foo") is type("")
984 True
985 >>> _type("1") is type(1)
986 True
987 >>> _type('\x1b[31m42\x1b[0m') is type(42)
988 True
989 >>> _type('\x1b[31m42\x1b[0m') is type(42)
990 True
991
992 """
993
994 if has_invisible and isinstance(string, (str, bytes)):
995 string = _strip_ansi(string)
996
997 if string is None or (isinstance(string, (bytes, str)) and not string):
998 return type(None)
999 elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
1000 return str
1001 elif _isbool(string):
1002 return bool
1003 elif numparse and (
1004 _isint(string)
1005 or (
1006 isinstance(string, str)
1007 and _isnumber_with_thousands_separator(string)
1008 and "." not in string
1009 )
1010 ):
1011 return int
1012 elif numparse and (
1013 _isnumber(string)
1014 or (isinstance(string, str) and _isnumber_with_thousands_separator(string))
1015 ):
1016 return float
1017 elif isinstance(string, bytes):
1018 return bytes
1019 else:
1020 return str
1021
1022
1023def _afterpoint(string):
1024 """Symbols after a decimal point, -1 if the string lacks the decimal point.
1025
1026 >>> _afterpoint("123.45")
1027 2
1028 >>> _afterpoint("1001")
1029 -1
1030 >>> _afterpoint("eggs")
1031 -1
1032 >>> _afterpoint("123e45")
1033 2
1034 >>> _afterpoint("123,456.78")
1035 2
1036
1037 """
1038 if _isnumber(string) or _isnumber_with_thousands_separator(string):
1039 if _isint(string):
1040 return -1
1041 else:
1042 pos = string.rfind(".")
1043 pos = string.lower().rfind("e") if pos < 0 else pos
1044 if pos >= 0:
1045 return len(string) - pos - 1
1046 else:
1047 return -1 # no point
1048 else:
1049 return -1 # not a number
1050
1051
1052def _padleft(width, s):
1053 """Flush right.
1054
1055 >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
1056 True
1057
1058 """
1059 fmt = "{0:>%ds}" % width
1060 return fmt.format(s)
1061
1062
1063def _padright(width, s):
1064 """Flush left.
1065
1066 >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
1067 True
1068
1069 """
1070 fmt = "{0:<%ds}" % width
1071 return fmt.format(s)
1072
1073
1074def _padboth(width, s):
1075 """Center string.
1076
1077 >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
1078 True
1079
1080 """
1081 fmt = "{0:^%ds}" % width
1082 return fmt.format(s)
1083
1084
1085def _padnone(ignore_width, s):
1086 return s
1087
1088
1089def _strip_ansi(s):
1090 r"""Remove ANSI escape sequences, both CSI (color codes, etc) and OSC hyperlinks.
1091
1092 CSI sequences are simply removed from the output, while OSC hyperlinks are replaced
1093 with the link text. Note: it may be desirable to show the URI instead but this is not
1094 supported.
1095
1096 >>> repr(_strip_ansi('\x1B]8;;https://example.com\x1B\\This is a link\x1B]8;;\x1B\\'))
1097 "'This is a link'"
1098
1099 >>> repr(_strip_ansi('\x1b[31mred\x1b[0m text'))
1100 "'red text'"
1101
1102 """
1103 if isinstance(s, str):
1104 return _ansi_codes.sub(r"\4", s)
1105 else: # a bytestring
1106 return _ansi_codes_bytes.sub(r"\4", s)
1107
1108
1109def _visible_width(s):
1110 """Visible width of a printed string. ANSI color codes are removed.
1111
1112 >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
1113 (5, 5)
1114
1115 """
1116 # optional wide-character support
1117 if wcwidth is not None and WIDE_CHARS_MODE:
1118 len_fn = wcwidth.wcswidth
1119 else:
1120 len_fn = len
1121 if isinstance(s, (str, bytes)):
1122 return len_fn(_strip_ansi(s))
1123 else:
1124 return len_fn(str(s))
1125
1126
1127def _is_multiline(s):
1128 if isinstance(s, str):
1129 return bool(re.search(_multiline_codes, s))
1130 else: # a bytestring
1131 return bool(re.search(_multiline_codes_bytes, s))
1132
1133
1134def _multiline_width(multiline_s, line_width_fn=len):
1135 """Visible width of a potentially multiline content."""
1136 return max(map(line_width_fn, re.split("[\r\n]", multiline_s)))
1137
1138
1139def _choose_width_fn(has_invisible, enable_widechars, is_multiline):
1140 """Return a function to calculate visible cell width."""
1141 if has_invisible:
1142 line_width_fn = _visible_width
1143 elif enable_widechars: # optional wide-character support if available
1144 line_width_fn = wcwidth.wcswidth
1145 else:
1146 line_width_fn = len
1147 if is_multiline:
1148 width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa
1149 else:
1150 width_fn = line_width_fn
1151 return width_fn
1152
1153
1154def _align_column_choose_padfn(strings, alignment, has_invisible, preserve_whitespace):
1155 if alignment == "right":
1156 if not preserve_whitespace:
1157 strings = [s.strip() for s in strings]
1158 padfn = _padleft
1159 elif alignment == "center":
1160 if not preserve_whitespace:
1161 strings = [s.strip() for s in strings]
1162 padfn = _padboth
1163 elif alignment == "decimal":
1164 if has_invisible:
1165 decimals = [_afterpoint(_strip_ansi(s)) for s in strings]
1166 else:
1167 decimals = [_afterpoint(s) for s in strings]
1168 maxdecimals = max(decimals)
1169 strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)]
1170 padfn = _padleft
1171 elif not alignment:
1172 padfn = _padnone
1173 else:
1174 if not preserve_whitespace:
1175 strings = [s.strip() for s in strings]
1176 padfn = _padright
1177 return strings, padfn
1178
1179
1180def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline):
1181 if has_invisible:
1182 line_width_fn = _visible_width
1183 elif enable_widechars: # optional wide-character support if available
1184 line_width_fn = wcwidth.wcswidth
1185 else:
1186 line_width_fn = len
1187 if is_multiline:
1188 width_fn = lambda s: _align_column_multiline_width(s, line_width_fn) # noqa
1189 else:
1190 width_fn = line_width_fn
1191 return width_fn
1192
1193
1194def _align_column_multiline_width(multiline_s, line_width_fn=len):
1195 """Visible width of a potentially multiline content."""
1196 return list(map(line_width_fn, re.split("[\r\n]", multiline_s)))
1197
1198
1199def _flat_list(nested_list):
1200 ret = []
1201 for item in nested_list:
1202 if isinstance(item, list):
1203 ret.extend(item)
1204 else:
1205 ret.append(item)
1206 return ret
1207
1208
1209def _align_column(
1210 strings,
1211 alignment,
1212 minwidth=0,
1213 has_invisible=True,
1214 enable_widechars=False,
1215 is_multiline=False,
1216 preserve_whitespace=False,
1217):
1218 """[string] -> [padded_string]"""
1219 strings, padfn = _align_column_choose_padfn(
1220 strings, alignment, has_invisible, preserve_whitespace
1221 )
1222 width_fn = _align_column_choose_width_fn(
1223 has_invisible, enable_widechars, is_multiline
1224 )
1225
1226 s_widths = list(map(width_fn, strings))
1227 maxwidth = max(max(_flat_list(s_widths)), minwidth)
1228 # TODO: refactor column alignment in single-line and multiline modes
1229 if is_multiline:
1230 if not enable_widechars and not has_invisible:
1231 padded_strings = [
1232 "\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
1233 for ms in strings
1234 ]
1235 else:
1236 # enable wide-character width corrections
1237 s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings]
1238 visible_widths = [
1239 [maxwidth - (w - l) for w, l in zip(mw, ml)]
1240 for mw, ml in zip(s_widths, s_lens)
1241 ]
1242 # wcswidth and _visible_width don't count invisible characters;
1243 # padfn doesn't need to apply another correction
1244 padded_strings = [
1245 "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)])
1246 for ms, mw in zip(strings, visible_widths)
1247 ]
1248 else: # single-line cell values
1249 if not enable_widechars and not has_invisible:
1250 padded_strings = [padfn(maxwidth, s) for s in strings]
1251 else:
1252 # enable wide-character width corrections
1253 s_lens = list(map(len, strings))
1254 visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
1255 # wcswidth and _visible_width don't count invisible characters;
1256 # padfn doesn't need to apply another correction
1257 padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
1258 return padded_strings
1259
1260
1261def _more_generic(type1, type2):
1262 types = {
1263 type(None): 0,
1264 bool: 1,
1265 int: 2,
1266 float: 3,
1267 bytes: 4,
1268 str: 5,
1269 }
1270 invtypes = {
1271 5: str,
1272 4: bytes,
1273 3: float,
1274 2: int,
1275 1: bool,
1276 0: type(None),
1277 }
1278 moregeneric = max(types.get(type1, 5), types.get(type2, 5))
1279 return invtypes[moregeneric]
1280
1281
1282def _column_type(strings, has_invisible=True, numparse=True):
1283 """The least generic type all column values are convertible to.
1284
1285 >>> _column_type([True, False]) is bool
1286 True
1287 >>> _column_type(["1", "2"]) is int
1288 True
1289 >>> _column_type(["1", "2.3"]) is float
1290 True
1291 >>> _column_type(["1", "2.3", "four"]) is str
1292 True
1293 >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is str
1294 True
1295 >>> _column_type([None, "brux"]) is str
1296 True
1297 >>> _column_type([1, 2, None]) is int
1298 True
1299 >>> import datetime as dt
1300 >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is str
1301 True
1302
1303 """
1304 types = [_type(s, has_invisible, numparse) for s in strings]
1305 return reduce(_more_generic, types, bool)
1306
1307
1308def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
1309 """Format a value according to its deduced type. Empty values are deemed valid for any type.
1310
1311 Unicode is supported:
1312
1313 >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
1314 tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
1315 good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
1316 tabulate(tbl, headers=hrow) == good_result
1317 True
1318
1319 """ # noqa
1320 if val is None:
1321 return missingval
1322 if isinstance(val, (bytes, str)) and not val:
1323 return ""
1324
1325 if valtype is str:
1326 return f"{val}"
1327 elif valtype is int:
1328 if isinstance(val, str):
1329 val_striped = val.encode("unicode_escape").decode("utf-8")
1330 colored = re.search(
1331 r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped
1332 )
1333 if colored:
1334 total_groups = len(colored.groups())
1335 if total_groups == 3:
1336 digits = colored.group(2)
1337 if digits.isdigit():
1338 val_new = (
1339 colored.group(1)
1340 + format(int(digits), intfmt)
1341 + colored.group(3)
1342 )
1343 val = val_new.encode("utf-8").decode("unicode_escape")
1344 intfmt = ""
1345 return format(val, intfmt)
1346 elif valtype is bytes:
1347 try:
1348 return str(val, "ascii")
1349 except (TypeError, UnicodeDecodeError):
1350 return str(val)
1351 elif valtype is float:
1352 is_a_colored_number = has_invisible and isinstance(val, (str, bytes))
1353 if is_a_colored_number:
1354 raw_val = _strip_ansi(val)
1355 formatted_val = format(float(raw_val), floatfmt)
1356 return val.replace(raw_val, formatted_val)
1357 else:
1358 if isinstance(val, str) and "," in val:
1359 val = val.replace(",", "") # handle thousands-separators
1360 return format(float(val), floatfmt)
1361 else:
1362 return f"{val}"
1363
1364
1365def _align_header(
1366 header, alignment, width, visible_width, is_multiline=False, width_fn=None
1367):
1368 "Pad string header to width chars given known visible_width of the header."
1369 if is_multiline:
1370 header_lines = re.split(_multiline_codes, header)
1371 padded_lines = [
1372 _align_header(h, alignment, width, width_fn(h)) for h in header_lines
1373 ]
1374 return "\n".join(padded_lines)
1375 # else: not multiline
1376 ninvisible = len(header) - visible_width
1377 width += ninvisible
1378 if alignment == "left":
1379 return _padright(width, header)
1380 elif alignment == "center":
1381 return _padboth(width, header)
1382 elif not alignment:
1383 return f"{header}"
1384 else:
1385 return _padleft(width, header)
1386
1387
1388def _remove_separating_lines(rows):
1389 if isinstance(rows, list):
1390 separating_lines = []
1391 sans_rows = []
1392 for index, row in enumerate(rows):
1393 if _is_separating_line(row):
1394 separating_lines.append(index)
1395 else:
1396 sans_rows.append(row)
1397 return sans_rows, separating_lines
1398 else:
1399 return rows, None
1400
1401
1402def _reinsert_separating_lines(rows, separating_lines):
1403 if separating_lines:
1404 for index in separating_lines:
1405 rows.insert(index, SEPARATING_LINE)
1406
1407
1408def _prepend_row_index(rows, index):
1409 """Add a left-most index column."""
1410 if index is None or index is False:
1411 return rows
1412 if isinstance(index, Sized) and len(index) != len(rows):
1413 raise ValueError(
1414 "index must be as long as the number of data rows: "
1415 + f"len(index)={len(index)} len(rows)={len(rows)}"
1416 )
1417 sans_rows, separating_lines = _remove_separating_lines(rows)
1418 new_rows = []
1419 index_iter = iter(index)
1420 for row in sans_rows:
1421 index_v = next(index_iter)
1422 new_rows.append([index_v] + list(row))
1423 rows = new_rows
1424 _reinsert_separating_lines(rows, separating_lines)
1425 return rows
1426
1427
1428def _bool(val):
1429 "A wrapper around standard bool() which doesn't throw on NumPy arrays"
1430 try:
1431 return bool(val)
1432 except ValueError: # val is likely to be a numpy array with many elements
1433 return False
1434
1435
1436def _normalize_tabular_data(tabular_data, headers, showindex="default"):
1437 """Transform a supported data type to a list of lists, and a list of headers,
1438 with headers padding.
1439
1440 Supported tabular data types:
1441
1442 * list-of-lists or another iterable of iterables
1443
1444 * list of named tuples (usually used with headers="keys")
1445
1446 * list of dicts (usually used with headers="keys")
1447
1448 * list of OrderedDicts (usually used with headers="keys")
1449
1450 * list of dataclasses (usually used with headers="keys")
1451
1452 * 2D NumPy arrays
1453
1454 * NumPy record arrays (usually used with headers="keys")
1455
1456 * dict of iterables (usually used with headers="keys")
1457
1458 * pandas.DataFrame (usually used with headers="keys")
1459
1460 The first row can be used as headers if headers="firstrow",
1461 column indices can be used as headers if headers="keys".
1462
1463 If showindex="default", show row indices of the pandas.DataFrame.
1464 If showindex="always", show row indices for all types of data.
1465 If showindex="never", don't show row indices for all types of data.
1466 If showindex is an iterable, show its values as row indices.
1467
1468 """
1469
1470 try:
1471 bool(headers)
1472 except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
1473 headers = list(headers)
1474
1475 err_msg = (
1476 "\n\nTo build a table python-tabulate requires two-dimensional data "
1477 "like a list of lists or similar."
1478 "\nDid you forget a pair of extra [] or ',' in ()?"
1479 )
1480 index = None
1481 if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
1482 # dict-like and pandas.DataFrame?
1483 if hasattr(tabular_data.values, "__call__"):
1484 # likely a conventional dict
1485 keys = tabular_data.keys()
1486 try:
1487 rows = list(
1488 izip_longest(*tabular_data.values())
1489 ) # columns have to be transposed
1490 except TypeError: # not iterable
1491 raise TypeError(err_msg)
1492
1493 elif hasattr(tabular_data, "index"):
1494 # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
1495 keys = list(tabular_data)
1496 if (
1497 showindex in ["default", "always", True]
1498 and tabular_data.index.name is not None
1499 ):
1500 if isinstance(tabular_data.index.name, list):
1501 keys[:0] = tabular_data.index.name
1502 else:
1503 keys[:0] = [tabular_data.index.name]
1504 vals = tabular_data.values # values matrix doesn't need to be transposed
1505 # for DataFrames add an index per default
1506 index = list(tabular_data.index)
1507 rows = [list(row) for row in vals]
1508 else:
1509 raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
1510
1511 if headers == "keys":
1512 headers = list(map(str, keys)) # headers should be strings
1513
1514 else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses
1515 try:
1516 rows = list(tabular_data)
1517 except TypeError: # not iterable
1518 raise TypeError(err_msg)
1519
1520 if headers == "keys" and not rows:
1521 # an empty table (issue #81)
1522 headers = []
1523 elif (
1524 headers == "keys"
1525 and hasattr(tabular_data, "dtype")
1526 and getattr(tabular_data.dtype, "names")
1527 ):
1528 # numpy record array
1529 headers = tabular_data.dtype.names
1530 elif (
1531 headers == "keys"
1532 and len(rows) > 0
1533 and isinstance(rows[0], tuple)
1534 and hasattr(rows[0], "_fields")
1535 ):
1536 # namedtuple
1537 headers = list(map(str, rows[0]._fields))
1538 elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"):
1539 # dict-like object
1540 uniq_keys = set() # implements hashed lookup
1541 keys = [] # storage for set
1542 if headers == "firstrow":
1543 firstdict = rows[0] if len(rows) > 0 else {}
1544 keys.extend(firstdict.keys())
1545 uniq_keys.update(keys)
1546 rows = rows[1:]
1547 for row in rows:
1548 for k in row.keys():
1549 # Save unique items in input order
1550 if k not in uniq_keys:
1551 keys.append(k)
1552 uniq_keys.add(k)
1553 if headers == "keys":
1554 headers = keys
1555 elif isinstance(headers, dict):
1556 # a dict of headers for a list of dicts
1557 headers = [headers.get(k, k) for k in keys]
1558 headers = list(map(str, headers))
1559 elif headers == "firstrow":
1560 if len(rows) > 0:
1561 headers = [firstdict.get(k, k) for k in keys]
1562 headers = list(map(str, headers))
1563 else:
1564 headers = []
1565 elif headers:
1566 raise ValueError(
1567 "headers for a list of dicts is not a dict or a keyword"
1568 )
1569 rows = [[row.get(k) for k in keys] for row in rows]
1570
1571 elif (
1572 headers == "keys"
1573 and hasattr(tabular_data, "description")
1574 and hasattr(tabular_data, "fetchone")
1575 and hasattr(tabular_data, "rowcount")
1576 ):
1577 # Python Database API cursor object (PEP 0249)
1578 # print tabulate(cursor, headers='keys')
1579 headers = [column[0] for column in tabular_data.description]
1580
1581 elif (
1582 dataclasses is not None
1583 and len(rows) > 0
1584 and dataclasses.is_dataclass(rows[0])
1585 ):
1586 # Python's dataclass
1587 field_names = [field.name for field in dataclasses.fields(rows[0])]
1588 if headers == "keys":
1589 headers = field_names
1590 rows = [
1591 [getattr(row, f) for f in field_names]
1592 if not _is_separating_line(row)
1593 else row
1594 for row in rows
1595 ]
1596
1597 elif headers == "keys" and len(rows) > 0:
1598 # keys are column indices
1599 headers = list(map(str, range(len(rows[0]))))
1600
1601 # take headers from the first row if necessary
1602 if headers == "firstrow" and len(rows) > 0:
1603 if index is not None:
1604 headers = [index[0]] + list(rows[0])
1605 index = index[1:]
1606 else:
1607 headers = rows[0]
1608 headers = list(map(str, headers)) # headers should be strings
1609 rows = rows[1:]
1610 elif headers == "firstrow":
1611 headers = []
1612
1613 headers = list(map(str, headers))
1614 # rows = list(map(list, rows))
1615 rows = list(map(lambda r: r if _is_separating_line(r) else list(r), rows))
1616
1617 # add or remove an index column
1618 showindex_is_a_str = type(showindex) in [str, bytes]
1619 if showindex == "default" and index is not None:
1620 rows = _prepend_row_index(rows, index)
1621 elif isinstance(showindex, Sized) and not showindex_is_a_str:
1622 rows = _prepend_row_index(rows, list(showindex))
1623 elif isinstance(showindex, Iterable) and not showindex_is_a_str:
1624 rows = _prepend_row_index(rows, showindex)
1625 elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
1626 if index is None:
1627 index = list(range(len(rows)))
1628 rows = _prepend_row_index(rows, index)
1629 elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
1630 pass
1631
1632 # pad with empty headers for initial columns if necessary
1633 headers_pad = 0
1634 if headers and len(rows) > 0:
1635 headers_pad = max(0, len(rows[0]) - len(headers))
1636 headers = [""] * headers_pad + headers
1637
1638 return rows, headers, headers_pad
1639
1640
1641def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS):
1642 if len(list_of_lists):
1643 num_cols = len(list_of_lists[0])
1644 else:
1645 num_cols = 0
1646 numparses = _expand_iterable(numparses, num_cols, True)
1647
1648 result = []
1649
1650 for row in list_of_lists:
1651 new_row = []
1652 for cell, width, numparse in zip(row, colwidths, numparses):
1653 if _isnumber(cell) and numparse:
1654 new_row.append(cell)
1655 continue
1656
1657 if width is not None:
1658 wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens)
1659 casted_cell = str(cell)
1660 wrapped = [
1661 "\n".join(wrapper.wrap(line))
1662 for line in casted_cell.splitlines()
1663 if line.strip() != ""
1664 ]
1665 new_row.append("\n".join(wrapped))
1666 else:
1667 new_row.append(cell)
1668 result.append(new_row)
1669
1670 return result
1671
1672
1673def _to_str(s, encoding="utf8", errors="ignore"):
1674 """
1675 A type safe wrapper for converting a bytestring to str. This is essentially just
1676 a wrapper around .decode() intended for use with things like map(), but with some
1677 specific behavior:
1678
1679 1. if the given parameter is not a bytestring, it is returned unmodified
1680 2. decode() is called for the given parameter and assumes utf8 encoding, but the
1681 default error behavior is changed from 'strict' to 'ignore'
1682
1683 >>> repr(_to_str(b'foo'))
1684 "'foo'"
1685
1686 >>> repr(_to_str('foo'))
1687 "'foo'"
1688
1689 >>> repr(_to_str(42))
1690 "'42'"
1691
1692 """
1693 if isinstance(s, bytes):
1694 return s.decode(encoding=encoding, errors=errors)
1695 return str(s)
1696
1697
1698def tabulate(
1699 tabular_data,
1700 headers=(),
1701 tablefmt="simple",
1702 floatfmt=_DEFAULT_FLOATFMT,
1703 intfmt=_DEFAULT_INTFMT,
1704 numalign=_DEFAULT_ALIGN,
1705 stralign=_DEFAULT_ALIGN,
1706 missingval=_DEFAULT_MISSINGVAL,
1707 showindex="default",
1708 disable_numparse=False,
1709 colglobalalign=None,
1710 colalign=None,
1711 preserve_whitespace=False,
1712 maxcolwidths=None,
1713 headersglobalalign=None,
1714 headersalign=None,
1715 rowalign=None,
1716 maxheadercolwidths=None,
1717 break_long_words=_BREAK_LONG_WORDS,
1718 break_on_hyphens=_BREAK_ON_HYPHENS,
1719):
1720 """Format a fixed width table for pretty printing.
1721
1722 >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
1723 --- ---------
1724 1 2.34
1725 -56 8.999
1726 2 10001
1727 --- ---------
1728
1729 The first required argument (`tabular_data`) can be a
1730 list-of-lists (or another iterable of iterables), a list of named
1731 tuples, a dictionary of iterables, an iterable of dictionaries,
1732 an iterable of dataclasses, a two-dimensional NumPy array,
1733 NumPy record array, or a Pandas' dataframe.
1734
1735
1736 Table headers
1737 -------------
1738
1739 To print nice column headers, supply the second argument (`headers`):
1740
1741 - `headers` can be an explicit list of column headers
1742 - if `headers="firstrow"`, then the first row of data is used
1743 - if `headers="keys"`, then dictionary keys or column indices are used
1744
1745 Otherwise a headerless table is produced.
1746
1747 If the number of headers is less than the number of columns, they
1748 are supposed to be names of the last columns. This is consistent
1749 with the plain-text format of R and Pandas' dataframes.
1750
1751 >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
1752 ... headers="firstrow"))
1753 sex age
1754 ----- ----- -----
1755 Alice F 24
1756 Bob M 19
1757
1758 By default, pandas.DataFrame data have an additional column called
1759 row index. To add a similar column to all other types of data,
1760 use `showindex="always"` or `showindex=True`. To suppress row indices
1761 for all types of data, pass `showindex="never" or `showindex=False`.
1762 To add a custom row index column, pass `showindex=some_iterable`.
1763
1764 >>> print(tabulate([["F",24],["M",19]], showindex="always"))
1765 - - --
1766 0 F 24
1767 1 M 19
1768 - - --
1769
1770
1771 Column and Headers alignment
1772 ----------------------------
1773
1774 `tabulate` tries to detect column types automatically, and aligns
1775 the values properly. By default it aligns decimal points of the
1776 numbers (or flushes integer numbers to the right), and flushes
1777 everything else to the left. Possible column alignments
1778 (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
1779 (only for `numalign`), and None (to disable alignment).
1780
1781 `colglobalalign` allows for global alignment of columns, before any
1782 specific override from `colalign`. Possible values are: None
1783 (defaults according to coltype), "right", "center", "decimal",
1784 "left".
1785 `colalign` allows for column-wise override starting from left-most
1786 column. Possible values are: "global" (no override), "right",
1787 "center", "decimal", "left".
1788 `headersglobalalign` allows for global headers alignment, before any
1789 specific override from `headersalign`. Possible values are: None
1790 (follow columns alignment), "right", "center", "left".
1791 `headersalign` allows for header-wise override starting from left-most
1792 given header. Possible values are: "global" (no override), "same"
1793 (follow column alignment), "right", "center", "left".
1794
1795 Note on intended behaviour: If there is no `tabular_data`, any column
1796 alignment argument is ignored. Hence, in this case, header
1797 alignment cannot be inferred from column alignment.
1798
1799 Table formats
1800 -------------
1801
1802 `intfmt` is a format specification used for columns which
1803 contain numeric data without a decimal point. This can also be
1804 a list or tuple of format strings, one per column.
1805
1806 `floatfmt` is a format specification used for columns which
1807 contain numeric data with a decimal point. This can also be
1808 a list or tuple of format strings, one per column.
1809
1810 `None` values are replaced with a `missingval` string (like
1811 `floatfmt`, this can also be a list of values for different
1812 columns):
1813
1814 >>> print(tabulate([["spam", 1, None],
1815 ... ["eggs", 42, 3.14],
1816 ... ["other", None, 2.7]], missingval="?"))
1817 ----- -- ----
1818 spam 1 ?
1819 eggs 42 3.14
1820 other ? 2.7
1821 ----- -- ----
1822
1823 Various plain-text table formats (`tablefmt`) are supported:
1824 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
1825 'latex', 'latex_raw', 'latex_booktabs', 'latex_longtable' and tsv.
1826 Variable `tabulate_formats`contains the list of currently supported formats.
1827
1828 "plain" format doesn't use any pseudographics to draw tables,
1829 it separates columns with a double space:
1830
1831 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1832 ... ["strings", "numbers"], "plain"))
1833 strings numbers
1834 spam 41.9999
1835 eggs 451
1836
1837 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
1838 spam 41.9999
1839 eggs 451
1840
1841 "simple" format is like Pandoc simple_tables:
1842
1843 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1844 ... ["strings", "numbers"], "simple"))
1845 strings numbers
1846 --------- ---------
1847 spam 41.9999
1848 eggs 451
1849
1850 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
1851 ---- --------
1852 spam 41.9999
1853 eggs 451
1854 ---- --------
1855
1856 "grid" is similar to tables produced by Emacs table.el package or
1857 Pandoc grid_tables:
1858
1859 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1860 ... ["strings", "numbers"], "grid"))
1861 +-----------+-----------+
1862 | strings | numbers |
1863 +===========+===========+
1864 | spam | 41.9999 |
1865 +-----------+-----------+
1866 | eggs | 451 |
1867 +-----------+-----------+
1868
1869 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
1870 +------+----------+
1871 | spam | 41.9999 |
1872 +------+----------+
1873 | eggs | 451 |
1874 +------+----------+
1875
1876 "simple_grid" draws a grid using single-line box-drawing
1877 characters:
1878
1879 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1880 ... ["strings", "numbers"], "simple_grid"))
1881 ┌───────────┬───────────┐
1882 │ strings │ numbers │
1883 ├───────────┼───────────┤
1884 │ spam │ 41.9999 │
1885 ├───────────┼───────────┤
1886 │ eggs │ 451 │
1887 └───────────┴───────────┘
1888
1889 "rounded_grid" draws a grid using single-line box-drawing
1890 characters with rounded corners:
1891
1892 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1893 ... ["strings", "numbers"], "rounded_grid"))
1894 ╭───────────┬───────────╮
1895 │ strings │ numbers │
1896 ├───────────┼───────────┤
1897 │ spam │ 41.9999 │
1898 ├───────────┼───────────┤
1899 │ eggs │ 451 │
1900 ╰───────────┴───────────╯
1901
1902 "heavy_grid" draws a grid using bold (thick) single-line box-drawing
1903 characters:
1904
1905 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1906 ... ["strings", "numbers"], "heavy_grid"))
1907 ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
1908 ┃ strings ┃ numbers ┃
1909 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
1910 ┃ spam ┃ 41.9999 ┃
1911 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
1912 ┃ eggs ┃ 451 ┃
1913 ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
1914
1915 "mixed_grid" draws a grid using a mix of light (thin) and heavy (thick) lines
1916 box-drawing characters:
1917
1918 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1919 ... ["strings", "numbers"], "mixed_grid"))
1920 ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
1921 │ strings │ numbers │
1922 ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
1923 │ spam │ 41.9999 │
1924 ├───────────┼───────────┤
1925 │ eggs │ 451 │
1926 ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
1927
1928 "double_grid" draws a grid using double-line box-drawing
1929 characters:
1930
1931 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1932 ... ["strings", "numbers"], "double_grid"))
1933 ╔═══════════╦═══════════╗
1934 ║ strings ║ numbers ║
1935 ╠═══════════╬═══════════╣
1936 ║ spam ║ 41.9999 ║
1937 ╠═══════════╬═══════════╣
1938 ║ eggs ║ 451 ║
1939 ╚═══════════╩═══════════╝
1940
1941 "fancy_grid" draws a grid using a mix of single and
1942 double-line box-drawing characters:
1943
1944 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1945 ... ["strings", "numbers"], "fancy_grid"))
1946 ╒═══════════╤═══════════╕
1947 │ strings │ numbers │
1948 ╞═══════════╪═══════════╡
1949 │ spam │ 41.9999 │
1950 ├───────────┼───────────┤
1951 │ eggs │ 451 │
1952 ╘═══════════╧═══════════╛
1953
1954 "colon_grid" is similar to "grid" but uses colons only to define
1955 columnwise content alignment, without whitespace padding,
1956 similar to the alignment specification of Pandoc `grid_tables`:
1957
1958 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1959 ... ["strings", "numbers"], "colon_grid"))
1960 +-----------+-----------+
1961 | strings | numbers |
1962 +:==========+:==========+
1963 | spam | 41.9999 |
1964 +-----------+-----------+
1965 | eggs | 451 |
1966 +-----------+-----------+
1967
1968 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1969 ... ["strings", "numbers"], "colon_grid",
1970 ... colalign=["right", "left"]))
1971 +-----------+-----------+
1972 | strings | numbers |
1973 +==========:+:==========+
1974 | spam | 41.9999 |
1975 +-----------+-----------+
1976 | eggs | 451 |
1977 +-----------+-----------+
1978
1979 "outline" is the same as the "grid" format but doesn't draw lines between rows:
1980
1981 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1982 ... ["strings", "numbers"], "outline"))
1983 +-----------+-----------+
1984 | strings | numbers |
1985 +===========+===========+
1986 | spam | 41.9999 |
1987 | eggs | 451 |
1988 +-----------+-----------+
1989
1990 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="outline"))
1991 +------+----------+
1992 | spam | 41.9999 |
1993 | eggs | 451 |
1994 +------+----------+
1995
1996 "simple_outline" is the same as the "simple_grid" format but doesn't draw lines between rows:
1997
1998 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1999 ... ["strings", "numbers"], "simple_outline"))
2000 ┌───────────┬───────────┐
2001 │ strings │ numbers │
2002 ├───────────┼───────────┤
2003 │ spam │ 41.9999 │
2004 │ eggs │ 451 │
2005 └───────────┴───────────┘
2006
2007 "rounded_outline" is the same as the "rounded_grid" format but doesn't draw lines between rows:
2008
2009 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2010 ... ["strings", "numbers"], "rounded_outline"))
2011 ╭───────────┬───────────╮
2012 │ strings │ numbers │
2013 ├───────────┼───────────┤
2014 │ spam │ 41.9999 │
2015 │ eggs │ 451 │
2016 ╰───────────┴───────────╯
2017
2018 "heavy_outline" is the same as the "heavy_grid" format but doesn't draw lines between rows:
2019
2020 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2021 ... ["strings", "numbers"], "heavy_outline"))
2022 ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
2023 ┃ strings ┃ numbers ┃
2024 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
2025 ┃ spam ┃ 41.9999 ┃
2026 ┃ eggs ┃ 451 ┃
2027 ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
2028
2029 "mixed_outline" is the same as the "mixed_grid" format but doesn't draw lines between rows:
2030
2031 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2032 ... ["strings", "numbers"], "mixed_outline"))
2033 ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
2034 │ strings │ numbers │
2035 ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
2036 │ spam │ 41.9999 │
2037 │ eggs │ 451 │
2038 ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
2039
2040 "double_outline" is the same as the "double_grid" format but doesn't draw lines between rows:
2041
2042 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2043 ... ["strings", "numbers"], "double_outline"))
2044 ╔═══════════╦═══════════╗
2045 ║ strings ║ numbers ║
2046 ╠═══════════╬═══════════╣
2047 ║ spam ║ 41.9999 ║
2048 ║ eggs ║ 451 ║
2049 ╚═══════════╩═══════════╝
2050
2051 "fancy_outline" is the same as the "fancy_grid" format but doesn't draw lines between rows:
2052
2053 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2054 ... ["strings", "numbers"], "fancy_outline"))
2055 ╒═══════════╤═══════════╕
2056 │ strings │ numbers │
2057 ╞═══════════╪═══════════╡
2058 │ spam │ 41.9999 │
2059 │ eggs │ 451 │
2060 ╘═══════════╧═══════════╛
2061
2062 "pipe" is like tables in PHP Markdown Extra extension or Pandoc
2063 pipe_tables:
2064
2065 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2066 ... ["strings", "numbers"], "pipe"))
2067 | strings | numbers |
2068 |:----------|----------:|
2069 | spam | 41.9999 |
2070 | eggs | 451 |
2071
2072 "presto" is like tables produce by the Presto CLI:
2073
2074 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2075 ... ["strings", "numbers"], "presto"))
2076 strings | numbers
2077 -----------+-----------
2078 spam | 41.9999
2079 eggs | 451
2080
2081 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
2082 |:-----|---------:|
2083 | spam | 41.9999 |
2084 | eggs | 451 |
2085
2086 "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
2087 are slightly different from "pipe" format by not using colons to
2088 define column alignment, and using a "+" sign to indicate line
2089 intersections:
2090
2091 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2092 ... ["strings", "numbers"], "orgtbl"))
2093 | strings | numbers |
2094 |-----------+-----------|
2095 | spam | 41.9999 |
2096 | eggs | 451 |
2097
2098
2099 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
2100 | spam | 41.9999 |
2101 | eggs | 451 |
2102
2103 "rst" is like a simple table format from reStructuredText; please
2104 note that reStructuredText accepts also "grid" tables:
2105
2106 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2107 ... ["strings", "numbers"], "rst"))
2108 ========= =========
2109 strings numbers
2110 ========= =========
2111 spam 41.9999
2112 eggs 451
2113 ========= =========
2114
2115 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
2116 ==== ========
2117 spam 41.9999
2118 eggs 451
2119 ==== ========
2120
2121 "mediawiki" produces a table markup used in Wikipedia and on other
2122 MediaWiki-based sites:
2123
2124 >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
2125 ... headers="firstrow", tablefmt="mediawiki"))
2126 {| class="wikitable" style="text-align: left;"
2127 |+ <!-- caption -->
2128 |-
2129 ! strings !! style="text-align: right;"| numbers
2130 |-
2131 | spam || style="text-align: right;"| 41.9999
2132 |-
2133 | eggs || style="text-align: right;"| 451
2134 |}
2135
2136 "html" produces HTML markup as an html.escape'd str
2137 with a ._repr_html_ method so that Jupyter Lab and Notebook display the HTML
2138 and a .str property so that the raw HTML remains accessible
2139 the unsafehtml table format can be used if an unescaped HTML format is required:
2140
2141 >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
2142 ... headers="firstrow", tablefmt="html"))
2143 <table>
2144 <thead>
2145 <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
2146 </thead>
2147 <tbody>
2148 <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
2149 <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
2150 </tbody>
2151 </table>
2152
2153 "latex" produces a tabular environment of LaTeX document markup:
2154
2155 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
2156 \\begin{tabular}{lr}
2157 \\hline
2158 spam & 41.9999 \\\\
2159 eggs & 451 \\\\
2160 \\hline
2161 \\end{tabular}
2162
2163 "latex_raw" is similar to "latex", but doesn't escape special characters,
2164 such as backslash and underscore, so LaTeX commands may embedded into
2165 cells' values:
2166
2167 >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], tablefmt="latex_raw"))
2168 \\begin{tabular}{lr}
2169 \\hline
2170 spam$_9$ & 41.9999 \\\\
2171 \\emph{eggs} & 451 \\\\
2172 \\hline
2173 \\end{tabular}
2174
2175 "latex_booktabs" produces a tabular environment of LaTeX document markup
2176 using the booktabs.sty package:
2177
2178 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
2179 \\begin{tabular}{lr}
2180 \\toprule
2181 spam & 41.9999 \\\\
2182 eggs & 451 \\\\
2183 \\bottomrule
2184 \\end{tabular}
2185
2186 "latex_longtable" produces a tabular environment that can stretch along
2187 multiple pages, using the longtable package for LaTeX.
2188
2189 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_longtable"))
2190 \\begin{longtable}{lr}
2191 \\hline
2192 spam & 41.9999 \\\\
2193 eggs & 451 \\\\
2194 \\hline
2195 \\end{longtable}
2196
2197
2198 Number parsing
2199 --------------
2200 By default, anything which can be parsed as a number is a number.
2201 This ensures numbers represented as strings are aligned properly.
2202 This can lead to weird results for particular strings such as
2203 specific git SHAs e.g. "42992e1" will be parsed into the number
2204 429920 and aligned as such.
2205
2206 To completely disable number parsing (and alignment), use
2207 `disable_numparse=True`. For more fine grained control, a list column
2208 indices is used to disable number parsing only on those columns
2209 e.g. `disable_numparse=[0, 2]` would disable number parsing only on the
2210 first and third columns.
2211
2212 Column Widths and Auto Line Wrapping
2213 ------------------------------------
2214 Tabulate will, by default, set the width of each column to the length of the
2215 longest element in that column. However, in situations where fields are expected
2216 to reasonably be too long to look good as a single line, tabulate can help automate
2217 word wrapping long fields for you. Use the parameter `maxcolwidth` to provide a
2218 list of maximal column widths
2219
2220 >>> print(tabulate( \
2221 [('1', 'John Smith', \
2222 'This is a rather long description that might look better if it is wrapped a bit')], \
2223 headers=("Issue Id", "Author", "Description"), \
2224 maxcolwidths=[None, None, 30], \
2225 tablefmt="grid" \
2226 ))
2227 +------------+------------+-------------------------------+
2228 | Issue Id | Author | Description |
2229 +============+============+===============================+
2230 | 1 | John Smith | This is a rather long |
2231 | | | description that might look |
2232 | | | better if it is wrapped a bit |
2233 +------------+------------+-------------------------------+
2234
2235 Header column width can be specified in a similar way using `maxheadercolwidth`
2236
2237 """
2238
2239 if tabular_data is None:
2240 tabular_data = []
2241
2242 list_of_lists, headers, headers_pad = _normalize_tabular_data(
2243 tabular_data, headers, showindex=showindex
2244 )
2245 list_of_lists, separating_lines = _remove_separating_lines(list_of_lists)
2246
2247 if maxcolwidths is not None:
2248 if type(maxcolwidths) is tuple: # Check if tuple, convert to list if so
2249 maxcolwidths = list(maxcolwidths)
2250 if len(list_of_lists):
2251 num_cols = len(list_of_lists[0])
2252 else:
2253 num_cols = 0
2254 if isinstance(maxcolwidths, int): # Expand scalar for all columns
2255 maxcolwidths = _expand_iterable(maxcolwidths, num_cols, maxcolwidths)
2256 else: # Ignore col width for any 'trailing' columns
2257 maxcolwidths = _expand_iterable(maxcolwidths, num_cols, None)
2258
2259 numparses = _expand_numparse(disable_numparse, num_cols)
2260 list_of_lists = _wrap_text_to_colwidths(
2261 list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2262 )
2263
2264 if maxheadercolwidths is not None:
2265 num_cols = len(list_of_lists[0])
2266 if isinstance(maxheadercolwidths, int): # Expand scalar for all columns
2267 maxheadercolwidths = _expand_iterable(
2268 maxheadercolwidths, num_cols, maxheadercolwidths
2269 )
2270 else: # Ignore col width for any 'trailing' columns
2271 maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, None)
2272
2273 numparses = _expand_numparse(disable_numparse, num_cols)
2274 headers = _wrap_text_to_colwidths(
2275 [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2276 )[0]
2277
2278 # empty values in the first column of RST tables should be escaped (issue #82)
2279 # "" should be escaped as "\\ " or ".."
2280 if tablefmt == "rst":
2281 list_of_lists, headers = _rst_escape_first_column(list_of_lists, headers)
2282
2283 # PrettyTable formatting does not use any extra padding.
2284 # Numbers are not parsed and are treated the same as strings for alignment.
2285 # Check if pretty is the format being used and override the defaults so it
2286 # does not impact other formats.
2287 min_padding = MIN_PADDING
2288 if tablefmt == "pretty":
2289 min_padding = 0
2290 disable_numparse = True
2291 numalign = "center" if numalign == _DEFAULT_ALIGN else numalign
2292 stralign = "center" if stralign == _DEFAULT_ALIGN else stralign
2293 else:
2294 numalign = "decimal" if numalign == _DEFAULT_ALIGN else numalign
2295 stralign = "left" if stralign == _DEFAULT_ALIGN else stralign
2296
2297 # 'colon_grid' uses colons in the line beneath the header to represent a column's
2298 # alignment instead of literally aligning the text differently. Hence,
2299 # left alignment of the data in the text output is enforced.
2300 if tablefmt == "colon_grid":
2301 colglobalalign = "left"
2302 headersglobalalign = "left"
2303
2304 # optimization: look for ANSI control codes once,
2305 # enable smart width functions only if a control code is found
2306 #
2307 # convert the headers and rows into a single, tab-delimited string ensuring
2308 # that any bytestrings are decoded safely (i.e. errors ignored)
2309 plain_text = "\t".join(
2310 chain(
2311 # headers
2312 map(_to_str, headers),
2313 # rows: chain the rows together into a single iterable after mapping
2314 # the bytestring conversino to each cell value
2315 chain.from_iterable(map(_to_str, row) for row in list_of_lists),
2316 )
2317 )
2318
2319 has_invisible = _ansi_codes.search(plain_text) is not None
2320
2321 enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
2322 if (
2323 not isinstance(tablefmt, TableFormat)
2324 and tablefmt in multiline_formats
2325 and _is_multiline(plain_text)
2326 ):
2327 tablefmt = multiline_formats.get(tablefmt, tablefmt)
2328 is_multiline = True
2329 else:
2330 is_multiline = False
2331 width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
2332
2333 # format rows and columns, convert numeric values to strings
2334 cols = list(izip_longest(*list_of_lists))
2335 numparses = _expand_numparse(disable_numparse, len(cols))
2336 coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)]
2337 if isinstance(floatfmt, str): # old version
2338 float_formats = len(cols) * [
2339 floatfmt
2340 ] # just duplicate the string to use in each column
2341 else: # if floatfmt is list, tuple etc we have one per column
2342 float_formats = list(floatfmt)
2343 if len(float_formats) < len(cols):
2344 float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT])
2345 if isinstance(intfmt, str): # old version
2346 int_formats = len(cols) * [
2347 intfmt
2348 ] # just duplicate the string to use in each column
2349 else: # if intfmt is list, tuple etc we have one per column
2350 int_formats = list(intfmt)
2351 if len(int_formats) < len(cols):
2352 int_formats.extend((len(cols) - len(int_formats)) * [_DEFAULT_INTFMT])
2353 if isinstance(missingval, str):
2354 missing_vals = len(cols) * [missingval]
2355 else:
2356 missing_vals = list(missingval)
2357 if len(missing_vals) < len(cols):
2358 missing_vals.extend((len(cols) - len(missing_vals)) * [_DEFAULT_MISSINGVAL])
2359 cols = [
2360 [_format(v, ct, fl_fmt, int_fmt, miss_v, has_invisible) for v in c]
2361 for c, ct, fl_fmt, int_fmt, miss_v in zip(
2362 cols, coltypes, float_formats, int_formats, missing_vals
2363 )
2364 ]
2365
2366 # align columns
2367 # first set global alignment
2368 if colglobalalign is not None: # if global alignment provided
2369 aligns = [colglobalalign] * len(cols)
2370 else: # default
2371 aligns = [numalign if ct in [int, float] else stralign for ct in coltypes]
2372 # then specific alignments
2373 if colalign is not None:
2374 assert isinstance(colalign, Iterable)
2375 if isinstance(colalign, str):
2376 warnings.warn(
2377 f"As a string, `colalign` is interpreted as {[c for c in colalign]}. "
2378 f'Did you mean `colglobalalign = "{colalign}"` or `colalign = ("{colalign}",)`?',
2379 stacklevel=2,
2380 )
2381 for idx, align in enumerate(colalign):
2382 if not idx < len(aligns):
2383 break
2384 elif align != "global":
2385 aligns[idx] = align
2386 minwidths = (
2387 [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
2388 )
2389 aligns_copy = aligns.copy()
2390 # Reset alignments in copy of alignments list to "left" for 'colon_grid' format,
2391 # which enforces left alignment in the text output of the data.
2392 if tablefmt == "colon_grid":
2393 aligns_copy = ["left"] * len(cols)
2394 cols = [
2395 _align_column(
2396 c,
2397 a,
2398 minw,
2399 has_invisible,
2400 enable_widechars,
2401 is_multiline,
2402 preserve_whitespace,
2403 )
2404 for c, a, minw in zip(cols, aligns_copy, minwidths)
2405 ]
2406
2407 aligns_headers = None
2408 if headers:
2409 # align headers and add headers
2410 t_cols = cols or [[""]] * len(headers)
2411 # first set global alignment
2412 if headersglobalalign is not None: # if global alignment provided
2413 aligns_headers = [headersglobalalign] * len(t_cols)
2414 else: # default
2415 aligns_headers = aligns or [stralign] * len(headers)
2416 # then specific header alignments
2417 if headersalign is not None:
2418 assert isinstance(headersalign, Iterable)
2419 if isinstance(headersalign, str):
2420 warnings.warn(
2421 f"As a string, `headersalign` is interpreted as {[c for c in headersalign]}. "
2422 f'Did you mean `headersglobalalign = "{headersalign}"` '
2423 f'or `headersalign = ("{headersalign}",)`?',
2424 stacklevel=2,
2425 )
2426 for idx, align in enumerate(headersalign):
2427 hidx = headers_pad + idx
2428 if not hidx < len(aligns_headers):
2429 break
2430 elif align == "same" and hidx < len(aligns): # same as column align
2431 aligns_headers[hidx] = aligns[hidx]
2432 elif align != "global":
2433 aligns_headers[hidx] = align
2434 minwidths = [
2435 max(minw, max(width_fn(cl) for cl in c))
2436 for minw, c in zip(minwidths, t_cols)
2437 ]
2438 headers = [
2439 _align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
2440 for h, a, minw in zip(headers, aligns_headers, minwidths)
2441 ]
2442 rows = list(zip(*cols))
2443 else:
2444 minwidths = [max(width_fn(cl) for cl in c) for c in cols]
2445 rows = list(zip(*cols))
2446
2447 if not isinstance(tablefmt, TableFormat):
2448 tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
2449
2450 ra_default = rowalign if isinstance(rowalign, str) else None
2451 rowaligns = _expand_iterable(rowalign, len(rows), ra_default)
2452 _reinsert_separating_lines(rows, separating_lines)
2453
2454 return _format_table(
2455 tablefmt,
2456 headers,
2457 aligns_headers,
2458 rows,
2459 minwidths,
2460 aligns,
2461 is_multiline,
2462 rowaligns=rowaligns,
2463 )
2464
2465
2466def _expand_numparse(disable_numparse, column_count):
2467 """
2468 Return a list of bools of length `column_count` which indicates whether
2469 number parsing should be used on each column.
2470 If `disable_numparse` is a list of indices, each of those indices are False,
2471 and everything else is True.
2472 If `disable_numparse` is a bool, then the returned list is all the same.
2473 """
2474 if isinstance(disable_numparse, Iterable):
2475 numparses = [True] * column_count
2476 for index in disable_numparse:
2477 numparses[index] = False
2478 return numparses
2479 else:
2480 return [not disable_numparse] * column_count
2481
2482
2483def _expand_iterable(original, num_desired, default):
2484 """
2485 Expands the `original` argument to return a return a list of
2486 length `num_desired`. If `original` is shorter than `num_desired`, it will
2487 be padded with the value in `default`.
2488 If `original` is not a list to begin with (i.e. scalar value) a list of
2489 length `num_desired` completely populated with `default will be returned
2490 """
2491 if isinstance(original, Iterable) and not isinstance(original, str):
2492 return original + [default] * (num_desired - len(original))
2493 else:
2494 return [default] * num_desired
2495
2496
2497def _pad_row(cells, padding):
2498 if cells:
2499 if cells == SEPARATING_LINE:
2500 return SEPARATING_LINE
2501 pad = " " * padding
2502 padded_cells = [pad + cell + pad for cell in cells]
2503 return padded_cells
2504 else:
2505 return cells
2506
2507
2508def _build_simple_row(padded_cells, rowfmt):
2509 "Format row according to DataRow format without padding."
2510 begin, sep, end = rowfmt
2511 return (begin + sep.join(padded_cells) + end).rstrip()
2512
2513
2514def _build_row(padded_cells, colwidths, colaligns, rowfmt):
2515 "Return a string which represents a row of data cells."
2516 if not rowfmt:
2517 return None
2518 if hasattr(rowfmt, "__call__"):
2519 return rowfmt(padded_cells, colwidths, colaligns)
2520 else:
2521 return _build_simple_row(padded_cells, rowfmt)
2522
2523
2524def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt, rowalign=None):
2525 # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row
2526 lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt))
2527 return lines
2528
2529
2530def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment):
2531 delta_lines = num_lines - len(text_lines)
2532 blank = [" " * column_width]
2533 if row_alignment == "bottom":
2534 return blank * delta_lines + text_lines
2535 elif row_alignment == "center":
2536 top_delta = delta_lines // 2
2537 bottom_delta = delta_lines - top_delta
2538 return top_delta * blank + text_lines + bottom_delta * blank
2539 else:
2540 return text_lines + blank * delta_lines
2541
2542
2543def _append_multiline_row(
2544 lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad, rowalign=None
2545):
2546 colwidths = [w - 2 * pad for w in padded_widths]
2547 cells_lines = [c.splitlines() for c in padded_multiline_cells]
2548 nlines = max(map(len, cells_lines)) # number of lines in the row
2549 # vertically pad cells where some lines are missing
2550 # cells_lines = [
2551 # (cl + [" " * w] * (nlines - len(cl))) for cl, w in zip(cells_lines, colwidths)
2552 # ]
2553
2554 cells_lines = [
2555 _align_cell_veritically(cl, nlines, w, rowalign)
2556 for cl, w in zip(cells_lines, colwidths)
2557 ]
2558 lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)]
2559 for ln in lines_cells:
2560 padded_ln = _pad_row(ln, pad)
2561 _append_basic_row(lines, padded_ln, colwidths, colaligns, rowfmt)
2562 return lines
2563
2564
2565def _build_line(colwidths, colaligns, linefmt):
2566 "Return a string which represents a horizontal line."
2567 if not linefmt:
2568 return None
2569 if hasattr(linefmt, "__call__"):
2570 return linefmt(colwidths, colaligns)
2571 else:
2572 begin, fill, sep, end = linefmt
2573 cells = [fill * w for w in colwidths]
2574 return _build_simple_row(cells, (begin, sep, end))
2575
2576
2577def _append_line(lines, colwidths, colaligns, linefmt):
2578 lines.append(_build_line(colwidths, colaligns, linefmt))
2579 return lines
2580
2581
2582class JupyterHTMLStr(str):
2583 """Wrap the string with a _repr_html_ method so that Jupyter
2584 displays the HTML table"""
2585
2586 def _repr_html_(self):
2587 return self
2588
2589 @property
2590 def str(self):
2591 """add a .str property so that the raw string is still accessible"""
2592 return self
2593
2594
2595def _format_table(
2596 fmt, headers, headersaligns, rows, colwidths, colaligns, is_multiline, rowaligns
2597):
2598 """Produce a plain-text representation of the table."""
2599 lines = []
2600 hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
2601 pad = fmt.padding
2602 headerrow = fmt.headerrow
2603
2604 padded_widths = [(w + 2 * pad) for w in colwidths]
2605 if is_multiline:
2606 pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row
2607 append_row = partial(_append_multiline_row, pad=pad)
2608 else:
2609 pad_row = _pad_row
2610 append_row = _append_basic_row
2611
2612 padded_headers = pad_row(headers, pad)
2613
2614 if fmt.lineabove and "lineabove" not in hidden:
2615 _append_line(lines, padded_widths, colaligns, fmt.lineabove)
2616
2617 if padded_headers:
2618 append_row(lines, padded_headers, padded_widths, headersaligns, headerrow)
2619 if fmt.linebelowheader and "linebelowheader" not in hidden:
2620 _append_line(lines, padded_widths, colaligns, fmt.linebelowheader)
2621
2622 if rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
2623 # initial rows with a line below
2624 for row, ralign in zip(rows[:-1], rowaligns):
2625 if row != SEPARATING_LINE:
2626 append_row(
2627 lines,
2628 pad_row(row, pad),
2629 padded_widths,
2630 colaligns,
2631 fmt.datarow,
2632 rowalign=ralign,
2633 )
2634 _append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
2635 # the last row without a line below
2636 append_row(
2637 lines,
2638 pad_row(rows[-1], pad),
2639 padded_widths,
2640 colaligns,
2641 fmt.datarow,
2642 rowalign=rowaligns[-1],
2643 )
2644 else:
2645 separating_line = (
2646 fmt.linebetweenrows
2647 or fmt.linebelowheader
2648 or fmt.linebelow
2649 or fmt.lineabove
2650 or Line("", "", "", "")
2651 )
2652 for row in rows:
2653 # test to see if either the 1st column or the 2nd column (account for showindex) has
2654 # the SEPARATING_LINE flag
2655 if _is_separating_line(row):
2656 _append_line(lines, padded_widths, colaligns, separating_line)
2657 else:
2658 append_row(
2659 lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow
2660 )
2661
2662 if fmt.linebelow and "linebelow" not in hidden:
2663 _append_line(lines, padded_widths, colaligns, fmt.linebelow)
2664
2665 if headers or rows:
2666 output = "\n".join(lines)
2667 if fmt.lineabove == _html_begin_table_without_header:
2668 return JupyterHTMLStr(output)
2669 else:
2670 return output
2671 else: # a completely empty table
2672 return ""
2673
2674
2675class _CustomTextWrap(textwrap.TextWrapper):
2676 """A custom implementation of CPython's textwrap.TextWrapper. This supports
2677 both wide characters (Korea, Japanese, Chinese) - including mixed string.
2678 For the most part, the `_handle_long_word` and `_wrap_chunks` functions were
2679 copy pasted out of the CPython baseline, and updated with our custom length
2680 and line appending logic.
2681 """
2682
2683 def __init__(self, *args, **kwargs):
2684 self._active_codes = []
2685 self.max_lines = None # For python2 compatibility
2686 textwrap.TextWrapper.__init__(self, *args, **kwargs)
2687
2688 @staticmethod
2689 def _len(item):
2690 """Custom len that gets console column width for wide
2691 and non-wide characters as well as ignores color codes"""
2692 stripped = _strip_ansi(item)
2693 if wcwidth:
2694 return wcwidth.wcswidth(stripped)
2695 else:
2696 return len(stripped)
2697
2698 def _update_lines(self, lines, new_line):
2699 """Adds a new line to the list of lines the text is being wrapped into
2700 This function will also track any ANSI color codes in this string as well
2701 as add any colors from previous lines order to preserve the same formatting
2702 as a single unwrapped string.
2703 """
2704 code_matches = [x for x in _ansi_codes.finditer(new_line)]
2705 color_codes = [
2706 code.string[code.span()[0] : code.span()[1]] for code in code_matches
2707 ]
2708
2709 # Add color codes from earlier in the unwrapped line, and then track any new ones we add.
2710 new_line = "".join(self._active_codes) + new_line
2711
2712 for code in color_codes:
2713 if code != _ansi_color_reset_code:
2714 self._active_codes.append(code)
2715 else: # A single reset code resets everything
2716 self._active_codes = []
2717
2718 # Always ensure each line is color terminated if any colors are
2719 # still active, otherwise colors will bleed into other cells on the console
2720 if len(self._active_codes) > 0:
2721 new_line = new_line + _ansi_color_reset_code
2722
2723 lines.append(new_line)
2724
2725 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2726 """_handle_long_word(chunks : [string],
2727 cur_line : [string],
2728 cur_len : int, width : int)
2729 Handle a chunk of text (most likely a word, not whitespace) that
2730 is too long to fit in any line.
2731 """
2732 # Figure out when indent is larger than the specified width, and make
2733 # sure at least one character is stripped off on every pass
2734 if width < 1:
2735 space_left = 1
2736 else:
2737 space_left = width - cur_len
2738
2739 # If we're allowed to break long words, then do so: put as much
2740 # of the next chunk onto the current line as will fit.
2741 if self.break_long_words:
2742 # Tabulate Custom: Build the string up piece-by-piece in order to
2743 # take each charcter's width into account
2744 chunk = reversed_chunks[-1]
2745 i = 1
2746 # Only count printable characters, so strip_ansi first, index later.
2747 while len(_strip_ansi(chunk)[:i]) <= space_left:
2748 i = i + 1
2749 # Consider escape codes when breaking words up
2750 total_escape_len = 0
2751 last_group = 0
2752 if _ansi_codes.search(chunk) is not None:
2753 for group, _, _, _ in _ansi_codes.findall(chunk):
2754 escape_len = len(group)
2755 if (
2756 group
2757 in chunk[last_group : i + total_escape_len + escape_len - 1]
2758 ):
2759 total_escape_len += escape_len
2760 found = _ansi_codes.search(chunk[last_group:])
2761 last_group += found.end()
2762 cur_line.append(chunk[: i + total_escape_len - 1])
2763 reversed_chunks[-1] = chunk[i + total_escape_len - 1 :]
2764
2765 # Otherwise, we have to preserve the long word intact. Only add
2766 # it to the current line if there's nothing already there --
2767 # that minimizes how much we violate the width constraint.
2768 elif not cur_line:
2769 cur_line.append(reversed_chunks.pop())
2770
2771 # If we're not allowed to break long words, and there's already
2772 # text on the current line, do nothing. Next time through the
2773 # main loop of _wrap_chunks(), we'll wind up here again, but
2774 # cur_len will be zero, so the next line will be entirely
2775 # devoted to the long word that we can't handle right now.
2776
2777 def _wrap_chunks(self, chunks):
2778 """_wrap_chunks(chunks : [string]) -> [string]
2779 Wrap a sequence of text chunks and return a list of lines of
2780 length 'self.width' or less. (If 'break_long_words' is false,
2781 some lines may be longer than this.) Chunks correspond roughly
2782 to words and the whitespace between them: each chunk is
2783 indivisible (modulo 'break_long_words'), but a line break can
2784 come between any two chunks. Chunks should not have internal
2785 whitespace; ie. a chunk is either all whitespace or a "word".
2786 Whitespace chunks will be removed from the beginning and end of
2787 lines, but apart from that whitespace is preserved.
2788 """
2789 lines = []
2790 if self.width <= 0:
2791 raise ValueError("invalid width %r (must be > 0)" % self.width)
2792 if self.max_lines is not None:
2793 if self.max_lines > 1:
2794 indent = self.subsequent_indent
2795 else:
2796 indent = self.initial_indent
2797 if self._len(indent) + self._len(self.placeholder.lstrip()) > self.width:
2798 raise ValueError("placeholder too large for max width")
2799
2800 # Arrange in reverse order so items can be efficiently popped
2801 # from a stack of chucks.
2802 chunks.reverse()
2803
2804 while chunks:
2805
2806 # Start the list of chunks that will make up the current line.
2807 # cur_len is just the length of all the chunks in cur_line.
2808 cur_line = []
2809 cur_len = 0
2810
2811 # Figure out which static string will prefix this line.
2812 if lines:
2813 indent = self.subsequent_indent
2814 else:
2815 indent = self.initial_indent
2816
2817 # Maximum width for this line.
2818 width = self.width - self._len(indent)
2819
2820 # First chunk on line is whitespace -- drop it, unless this
2821 # is the very beginning of the text (ie. no lines started yet).
2822 if self.drop_whitespace and chunks[-1].strip() == "" and lines:
2823 del chunks[-1]
2824
2825 while chunks:
2826 chunk_len = self._len(chunks[-1])
2827
2828 # Can at least squeeze this chunk onto the current line.
2829 if cur_len + chunk_len <= width:
2830 cur_line.append(chunks.pop())
2831 cur_len += chunk_len
2832
2833 # Nope, this line is full.
2834 else:
2835 break
2836
2837 # The current line is full, and the next chunk is too big to
2838 # fit on *any* line (not just this one).
2839 if chunks and self._len(chunks[-1]) > width:
2840 self._handle_long_word(chunks, cur_line, cur_len, width)
2841 cur_len = sum(map(self._len, cur_line))
2842
2843 # If the last chunk on this line is all whitespace, drop it.
2844 if self.drop_whitespace and cur_line and cur_line[-1].strip() == "":
2845 cur_len -= self._len(cur_line[-1])
2846 del cur_line[-1]
2847
2848 if cur_line:
2849 if (
2850 self.max_lines is None
2851 or len(lines) + 1 < self.max_lines
2852 or (
2853 not chunks
2854 or self.drop_whitespace
2855 and len(chunks) == 1
2856 and not chunks[0].strip()
2857 )
2858 and cur_len <= width
2859 ):
2860 # Convert current line back to a string and store it in
2861 # list of all lines (return value).
2862 self._update_lines(lines, indent + "".join(cur_line))
2863 else:
2864 while cur_line:
2865 if (
2866 cur_line[-1].strip()
2867 and cur_len + self._len(self.placeholder) <= width
2868 ):
2869 cur_line.append(self.placeholder)
2870 self._update_lines(lines, indent + "".join(cur_line))
2871 break
2872 cur_len -= self._len(cur_line[-1])
2873 del cur_line[-1]
2874 else:
2875 if lines:
2876 prev_line = lines[-1].rstrip()
2877 if (
2878 self._len(prev_line) + self._len(self.placeholder)
2879 <= self.width
2880 ):
2881 lines[-1] = prev_line + self.placeholder
2882 break
2883 self._update_lines(lines, indent + self.placeholder.lstrip())
2884 break
2885
2886 return lines
2887
2888
2889def _main():
2890 """\
2891 Usage: tabulate [options] [FILE ...]
2892
2893 Pretty-print tabular data.
2894 See also https://github.com/astanin/python-tabulate
2895
2896 FILE a filename of the file with tabular data;
2897 if "-" or missing, read data from stdin.
2898
2899 Options:
2900
2901 -h, --help show this message
2902 -1, --header use the first row of data as a table header
2903 -o FILE, --output FILE print table to FILE (default: stdout)
2904 -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
2905 -F FPFMT, --float FPFMT floating point number format (default: g)
2906 -I INTFMT, --int INTFMT integer point number format (default: "")
2907 -f FMT, --format FMT set output table format; supported formats:
2908 plain, simple, grid, fancy_grid, pipe, orgtbl,
2909 rst, mediawiki, html, latex, latex_raw,
2910 latex_booktabs, latex_longtable, tsv
2911 (default: simple)
2912 """
2913 import getopt
2914
2915 usage = textwrap.dedent(_main.__doc__)
2916 try:
2917 opts, args = getopt.getopt(
2918 sys.argv[1:],
2919 "h1o:s:F:I:f:",
2920 [
2921 "help",
2922 "header",
2923 "output=",
2924 "sep=",
2925 "float=",
2926 "int=",
2927 "colalign=",
2928 "format=",
2929 ],
2930 )
2931 except getopt.GetoptError as e:
2932 print(e)
2933 print(usage)
2934 sys.exit(2)
2935 headers = []
2936 floatfmt = _DEFAULT_FLOATFMT
2937 intfmt = _DEFAULT_INTFMT
2938 colalign = None
2939 tablefmt = "simple"
2940 sep = r"\s+"
2941 outfile = "-"
2942 for opt, value in opts:
2943 if opt in ["-1", "--header"]:
2944 headers = "firstrow"
2945 elif opt in ["-o", "--output"]:
2946 outfile = value
2947 elif opt in ["-F", "--float"]:
2948 floatfmt = value
2949 elif opt in ["-I", "--int"]:
2950 intfmt = value
2951 elif opt in ["-C", "--colalign"]:
2952 colalign = value.split()
2953 elif opt in ["-f", "--format"]:
2954 if value not in tabulate_formats:
2955 print("%s is not a supported table format" % value)
2956 print(usage)
2957 sys.exit(3)
2958 tablefmt = value
2959 elif opt in ["-s", "--sep"]:
2960 sep = value
2961 elif opt in ["-h", "--help"]:
2962 print(usage)
2963 sys.exit(0)
2964 files = [sys.stdin] if not args else args
2965 with sys.stdout if outfile == "-" else open(outfile, "w") as out:
2966 for f in files:
2967 if f == "-":
2968 f = sys.stdin
2969 if _is_file(f):
2970 _pprint_file(
2971 f,
2972 headers=headers,
2973 tablefmt=tablefmt,
2974 sep=sep,
2975 floatfmt=floatfmt,
2976 intfmt=intfmt,
2977 file=out,
2978 colalign=colalign,
2979 )
2980 else:
2981 with open(f) as fobj:
2982 _pprint_file(
2983 fobj,
2984 headers=headers,
2985 tablefmt=tablefmt,
2986 sep=sep,
2987 floatfmt=floatfmt,
2988 intfmt=intfmt,
2989 file=out,
2990 colalign=colalign,
2991 )
2992
2993
2994def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign):
2995 rows = fobject.readlines()
2996 table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
2997 print(
2998 tabulate(
2999 table,
3000 headers,
3001 tablefmt,
3002 floatfmt=floatfmt,
3003 intfmt=intfmt,
3004 colalign=colalign,
3005 ),
3006 file=file,
3007 )
3008
3009
3010if __name__ == "__main__":
3011 _main()