1"""Pretty-print tabular data."""
2
3import warnings
4from collections import namedtuple
5from collections.abc import Iterable, Sized
6from html import escape as htmlescape
7from itertools import chain, zip_longest as izip_longest
8from functools import reduce, partial
9import io
10import re
11import math
12import textwrap
13import dataclasses
14import sys
15
16try:
17 import wcwidth # optional wide-character (CJK) support
18except ImportError:
19 wcwidth = None
20
21
22def _is_file(f):
23 return isinstance(f, io.IOBase)
24
25
26__all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
27try:
28 from .version import version as __version__ # noqa: F401
29except ImportError:
30 pass # running __init__.py as a script, AppVeyor pytests
31
32
33# minimum extra space in headers
34MIN_PADDING = 2
35
36_DEFAULT_FLOATFMT = "g"
37_DEFAULT_INTFMT = ""
38_DEFAULT_MISSINGVAL = ""
39# default align will be overwritten by "left", "center" or "decimal"
40# depending on the formatter
41_DEFAULT_ALIGN = "default"
42
43
44# if True, enable wide-character (CJK) support
45WIDE_CHARS_MODE = wcwidth is not None
46
47# Constant that can be used as part of passed rows to generate a separating line
48# It is purposely an unprintable character, very unlikely to be used in a table
49SEPARATING_LINE = "\001"
50
51Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
52
53
54DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
55
56
57# A table structure is supposed to be:
58#
59# --- lineabove ---------
60# headerrow
61# --- linebelowheader ---
62# datarow
63# --- linebetweenrows ---
64# ... (more datarows) ...
65# --- linebetweenrows ---
66# last datarow
67# --- linebelow ---------
68#
69# TableFormat's line* elements can be
70#
71# - either None, if the element is not used,
72# - or a Line tuple,
73# - or a function: [col_widths], [col_alignments] -> string.
74#
75# TableFormat's *row elements can be
76#
77# - either None, if the element is not used,
78# - or a DataRow tuple,
79# - or a function: [cell_values], [col_widths], [col_alignments] -> string.
80#
81# padding (an integer) is the amount of white space around data values.
82#
83# with_header_hide:
84#
85# - either None, to display all table elements unconditionally,
86# - or a list of elements not to be displayed if the table has column headers.
87#
88TableFormat = namedtuple(
89 "TableFormat",
90 [
91 "lineabove",
92 "linebelowheader",
93 "linebetweenrows",
94 "linebelow",
95 "headerrow",
96 "datarow",
97 "padding",
98 "with_header_hide",
99 ],
100)
101
102
103def _is_separating_line_value(value):
104 return type(value) is str and value.strip() == SEPARATING_LINE
105
106
107def _is_separating_line(row):
108 row_type = type(row)
109 is_sl = (row_type == list or row_type == str) and (
110 (len(row) >= 1 and _is_separating_line_value(row[0]))
111 or (len(row) >= 2 and _is_separating_line_value(row[1]))
112 )
113
114 return is_sl
115
116
117def _pipe_segment_with_colons(align, colwidth):
118 """Return a segment of a horizontal line with optional colons which
119 indicate column's alignment (as in `pipe` output format)."""
120 w = colwidth
121 if align in ["right", "decimal"]:
122 return ("-" * (w - 1)) + ":"
123 elif align == "center":
124 return ":" + ("-" * (w - 2)) + ":"
125 elif align == "left":
126 return ":" + ("-" * (w - 1))
127 else:
128 return "-" * w
129
130
131def _pipe_line_with_colons(colwidths, colaligns):
132 """Return a horizontal line with optional colons to indicate column's
133 alignment (as in `pipe` output format)."""
134 if not colaligns: # e.g. printing an empty data frame (github issue #15)
135 colaligns = [""] * len(colwidths)
136 segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
137 return "|" + "|".join(segments) + "|"
138
139
140def _grid_segment_with_colons(colwidth, align):
141 """Return a segment of a horizontal line with optional colons which indicate
142 column's alignment in a grid table."""
143 width = colwidth
144 if align == "right":
145 return ("=" * (width - 1)) + ":"
146 elif align == "center":
147 return ":" + ("=" * (width - 2)) + ":"
148 elif align == "left":
149 return ":" + ("=" * (width - 1))
150 else:
151 return "=" * width
152
153
154def _grid_line_with_colons(colwidths, colaligns):
155 """Return a horizontal line with optional colons to indicate column's alignment
156 in a grid table."""
157 if not colaligns:
158 colaligns = [""] * len(colwidths)
159 segments = [_grid_segment_with_colons(w, a) for a, w in zip(colaligns, colwidths)]
160 return "+" + "+".join(segments) + "+"
161
162
163def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
164 alignment = {
165 "left": "",
166 "right": 'style="text-align: right;"| ',
167 "center": 'style="text-align: center;"| ',
168 "decimal": 'style="text-align: right;"| ',
169 }
170 # hard-coded padding _around_ align attribute and value together
171 # rather than padding parameter which affects only the value
172 values_with_attrs = [
173 " " + alignment.get(a, "") + c + " " for c, a in zip(cell_values, colaligns)
174 ]
175 colsep = separator * 2
176 return (separator + colsep.join(values_with_attrs)).rstrip()
177
178
179def _textile_row_with_attrs(cell_values, colwidths, colaligns):
180 cell_values[0] += " "
181 alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."}
182 values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values))
183 return "|" + "|".join(values) + "|"
184
185
186def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
187 # this table header will be suppressed if there is a header row
188 return "<table>\n<tbody>"
189
190
191def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns):
192 alignment = {
193 "left": "",
194 "right": ' style="text-align: right;"',
195 "center": ' style="text-align: center;"',
196 "decimal": ' style="text-align: right;"',
197 }
198 if unsafe:
199 values_with_attrs = [
200 "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), c)
201 for c, a in zip(cell_values, colaligns)
202 ]
203 else:
204 values_with_attrs = [
205 "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), htmlescape(c))
206 for c, a in zip(cell_values, colaligns)
207 ]
208 rowhtml = "<tr>{}</tr>".format("".join(values_with_attrs).rstrip())
209 if celltag == "th": # it's a header row, create a new table header
210 rowhtml = f"<table>\n<thead>\n{rowhtml}\n</thead>\n<tbody>"
211 return rowhtml
212
213
214def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""):
215 alignment = {
216 "left": "",
217 "right": '<style="text-align: right;">',
218 "center": '<style="text-align: center;">',
219 "decimal": '<style="text-align: right;">',
220 }
221 values_with_attrs = [
222 "{}{} {} ".format(celltag, alignment.get(a, ""), header + c + header)
223 for c, a in zip(cell_values, colaligns)
224 ]
225 return "".join(values_with_attrs) + "||"
226
227
228def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=False):
229 alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"}
230 tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
231 return "\n".join(
232 [
233 ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{")
234 + tabular_columns_fmt
235 + "}",
236 "\\toprule" if booktabs else "\\hline",
237 ]
238 )
239
240
241def _asciidoc_row(is_header, *args):
242 """handle header and data rows for asciidoc format"""
243
244 def make_header_line(is_header, colwidths, colaligns):
245 # generate the column specifiers
246
247 alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"}
248 # use the column widths generated by tabulate for the asciidoc column width specifiers
249 asciidoc_alignments = zip(
250 colwidths, [alignment[colalign] for colalign in colaligns]
251 )
252 asciidoc_column_specifiers = [
253 f"{width:d}{align}" for width, align in asciidoc_alignments
254 ]
255 header_list = ['cols="' + (",".join(asciidoc_column_specifiers)) + '"']
256
257 # generate the list of options (currently only "header")
258 options_list = []
259
260 if is_header:
261 options_list.append("header")
262
263 if options_list:
264 header_list += ['options="' + ",".join(options_list) + '"']
265
266 # generate the list of entries in the table header field
267
268 return "[{}]\n|====".format(",".join(header_list))
269
270 if len(args) == 2:
271 # two arguments are passed if called in the context of aboveline
272 # print the table header with column widths and optional header tag
273 return make_header_line(False, *args)
274
275 elif len(args) == 3:
276 # three arguments are passed if called in the context of dataline or headerline
277 # print the table line and make the aboveline if it is a header
278
279 cell_values, colwidths, colaligns = args
280 data_line = "|" + "|".join(cell_values)
281
282 if is_header:
283 return make_header_line(True, colwidths, colaligns) + "\n" + data_line
284 else:
285 return data_line
286
287 else:
288 raise ValueError(
289 " _asciidoc_row() requires two (colwidths, colaligns) "
290 + "or three (cell_values, colwidths, colaligns) arguments) "
291 )
292
293
294LATEX_ESCAPE_RULES = {
295 r"&": r"\&",
296 r"%": r"\%",
297 r"$": r"\$",
298 r"#": r"\#",
299 r"_": r"\_",
300 r"^": r"\^{}",
301 r"{": r"\{",
302 r"}": r"\}",
303 r"~": r"\textasciitilde{}",
304 "\\": r"\textbackslash{}",
305 r"<": r"\ensuremath{<}",
306 r">": r"\ensuremath{>}",
307}
308
309
310def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES):
311 def escape_char(c):
312 return escrules.get(c, c)
313
314 escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
315 rowfmt = DataRow("", "&", "\\\\")
316 return _build_simple_row(escaped_values, rowfmt)
317
318
319def _rst_escape_first_column(rows, headers):
320 def escape_empty(val):
321 if isinstance(val, (str, bytes)) and not val.strip():
322 return ".."
323 else:
324 return val
325
326 new_headers = list(headers)
327 new_rows = []
328 if headers:
329 new_headers[0] = escape_empty(headers[0])
330 for row in rows:
331 new_row = list(row)
332 if new_row:
333 new_row[0] = escape_empty(row[0])
334 new_rows.append(new_row)
335 return new_rows, new_headers
336
337
338_table_formats = {
339 "simple": TableFormat(
340 lineabove=Line("", "-", " ", ""),
341 linebelowheader=Line("", "-", " ", ""),
342 linebetweenrows=None,
343 linebelow=Line("", "-", " ", ""),
344 headerrow=DataRow("", " ", ""),
345 datarow=DataRow("", " ", ""),
346 padding=0,
347 with_header_hide=["lineabove", "linebelow"],
348 ),
349 "plain": TableFormat(
350 lineabove=None,
351 linebelowheader=None,
352 linebetweenrows=None,
353 linebelow=None,
354 headerrow=DataRow("", " ", ""),
355 datarow=DataRow("", " ", ""),
356 padding=0,
357 with_header_hide=None,
358 ),
359 "grid": TableFormat(
360 lineabove=Line("+", "-", "+", "+"),
361 linebelowheader=Line("+", "=", "+", "+"),
362 linebetweenrows=Line("+", "-", "+", "+"),
363 linebelow=Line("+", "-", "+", "+"),
364 headerrow=DataRow("|", "|", "|"),
365 datarow=DataRow("|", "|", "|"),
366 padding=1,
367 with_header_hide=None,
368 ),
369 "simple_grid": TableFormat(
370 lineabove=Line("┌", "─", "┬", "┐"),
371 linebelowheader=Line("├", "─", "┼", "┤"),
372 linebetweenrows=Line("├", "─", "┼", "┤"),
373 linebelow=Line("└", "─", "┴", "┘"),
374 headerrow=DataRow("│", "│", "│"),
375 datarow=DataRow("│", "│", "│"),
376 padding=1,
377 with_header_hide=None,
378 ),
379 "rounded_grid": TableFormat(
380 lineabove=Line("╭", "─", "┬", "╮"),
381 linebelowheader=Line("├", "─", "┼", "┤"),
382 linebetweenrows=Line("├", "─", "┼", "┤"),
383 linebelow=Line("╰", "─", "┴", "╯"),
384 headerrow=DataRow("│", "│", "│"),
385 datarow=DataRow("│", "│", "│"),
386 padding=1,
387 with_header_hide=None,
388 ),
389 "heavy_grid": TableFormat(
390 lineabove=Line("┏", "━", "┳", "┓"),
391 linebelowheader=Line("┣", "━", "╋", "┫"),
392 linebetweenrows=Line("┣", "━", "╋", "┫"),
393 linebelow=Line("┗", "━", "┻", "┛"),
394 headerrow=DataRow("┃", "┃", "┃"),
395 datarow=DataRow("┃", "┃", "┃"),
396 padding=1,
397 with_header_hide=None,
398 ),
399 "mixed_grid": TableFormat(
400 lineabove=Line("┍", "━", "┯", "┑"),
401 linebelowheader=Line("┝", "━", "┿", "┥"),
402 linebetweenrows=Line("├", "─", "┼", "┤"),
403 linebelow=Line("┕", "━", "┷", "┙"),
404 headerrow=DataRow("│", "│", "│"),
405 datarow=DataRow("│", "│", "│"),
406 padding=1,
407 with_header_hide=None,
408 ),
409 "double_grid": TableFormat(
410 lineabove=Line("╔", "═", "╦", "╗"),
411 linebelowheader=Line("╠", "═", "╬", "╣"),
412 linebetweenrows=Line("╠", "═", "╬", "╣"),
413 linebelow=Line("╚", "═", "╩", "╝"),
414 headerrow=DataRow("║", "║", "║"),
415 datarow=DataRow("║", "║", "║"),
416 padding=1,
417 with_header_hide=None,
418 ),
419 "fancy_grid": TableFormat(
420 lineabove=Line("╒", "═", "╤", "╕"),
421 linebelowheader=Line("╞", "═", "╪", "╡"),
422 linebetweenrows=Line("├", "─", "┼", "┤"),
423 linebelow=Line("╘", "═", "╧", "╛"),
424 headerrow=DataRow("│", "│", "│"),
425 datarow=DataRow("│", "│", "│"),
426 padding=1,
427 with_header_hide=None,
428 ),
429 "colon_grid": TableFormat(
430 lineabove=Line("+", "-", "+", "+"),
431 linebelowheader=_grid_line_with_colons,
432 linebetweenrows=Line("+", "-", "+", "+"),
433 linebelow=Line("+", "-", "+", "+"),
434 headerrow=DataRow("|", "|", "|"),
435 datarow=DataRow("|", "|", "|"),
436 padding=1,
437 with_header_hide=None,
438 ),
439 "outline": TableFormat(
440 lineabove=Line("+", "-", "+", "+"),
441 linebelowheader=Line("+", "=", "+", "+"),
442 linebetweenrows=None,
443 linebelow=Line("+", "-", "+", "+"),
444 headerrow=DataRow("|", "|", "|"),
445 datarow=DataRow("|", "|", "|"),
446 padding=1,
447 with_header_hide=None,
448 ),
449 "simple_outline": TableFormat(
450 lineabove=Line("┌", "─", "┬", "┐"),
451 linebelowheader=Line("├", "─", "┼", "┤"),
452 linebetweenrows=None,
453 linebelow=Line("└", "─", "┴", "┘"),
454 headerrow=DataRow("│", "│", "│"),
455 datarow=DataRow("│", "│", "│"),
456 padding=1,
457 with_header_hide=None,
458 ),
459 "rounded_outline": TableFormat(
460 lineabove=Line("╭", "─", "┬", "╮"),
461 linebelowheader=Line("├", "─", "┼", "┤"),
462 linebetweenrows=None,
463 linebelow=Line("╰", "─", "┴", "╯"),
464 headerrow=DataRow("│", "│", "│"),
465 datarow=DataRow("│", "│", "│"),
466 padding=1,
467 with_header_hide=None,
468 ),
469 "heavy_outline": TableFormat(
470 lineabove=Line("┏", "━", "┳", "┓"),
471 linebelowheader=Line("┣", "━", "╋", "┫"),
472 linebetweenrows=None,
473 linebelow=Line("┗", "━", "┻", "┛"),
474 headerrow=DataRow("┃", "┃", "┃"),
475 datarow=DataRow("┃", "┃", "┃"),
476 padding=1,
477 with_header_hide=None,
478 ),
479 "mixed_outline": TableFormat(
480 lineabove=Line("┍", "━", "┯", "┑"),
481 linebelowheader=Line("┝", "━", "┿", "┥"),
482 linebetweenrows=None,
483 linebelow=Line("┕", "━", "┷", "┙"),
484 headerrow=DataRow("│", "│", "│"),
485 datarow=DataRow("│", "│", "│"),
486 padding=1,
487 with_header_hide=None,
488 ),
489 "double_outline": TableFormat(
490 lineabove=Line("╔", "═", "╦", "╗"),
491 linebelowheader=Line("╠", "═", "╬", "╣"),
492 linebetweenrows=None,
493 linebelow=Line("╚", "═", "╩", "╝"),
494 headerrow=DataRow("║", "║", "║"),
495 datarow=DataRow("║", "║", "║"),
496 padding=1,
497 with_header_hide=None,
498 ),
499 "fancy_outline": TableFormat(
500 lineabove=Line("╒", "═", "╤", "╕"),
501 linebelowheader=Line("╞", "═", "╪", "╡"),
502 linebetweenrows=None,
503 linebelow=Line("╘", "═", "╧", "╛"),
504 headerrow=DataRow("│", "│", "│"),
505 datarow=DataRow("│", "│", "│"),
506 padding=1,
507 with_header_hide=None,
508 ),
509 "github": TableFormat(
510 lineabove=Line("|", "-", "|", "|"),
511 linebelowheader=Line("|", "-", "|", "|"),
512 linebetweenrows=None,
513 linebelow=None,
514 headerrow=DataRow("|", "|", "|"),
515 datarow=DataRow("|", "|", "|"),
516 padding=1,
517 with_header_hide=["lineabove"],
518 ),
519 "pipe": TableFormat(
520 lineabove=_pipe_line_with_colons,
521 linebelowheader=_pipe_line_with_colons,
522 linebetweenrows=None,
523 linebelow=None,
524 headerrow=DataRow("|", "|", "|"),
525 datarow=DataRow("|", "|", "|"),
526 padding=1,
527 with_header_hide=["lineabove"],
528 ),
529 "orgtbl": TableFormat(
530 lineabove=None,
531 linebelowheader=Line("|", "-", "+", "|"),
532 linebetweenrows=None,
533 linebelow=None,
534 headerrow=DataRow("|", "|", "|"),
535 datarow=DataRow("|", "|", "|"),
536 padding=1,
537 with_header_hide=None,
538 ),
539 "jira": TableFormat(
540 lineabove=None,
541 linebelowheader=None,
542 linebetweenrows=None,
543 linebelow=None,
544 headerrow=DataRow("||", "||", "||"),
545 datarow=DataRow("|", "|", "|"),
546 padding=1,
547 with_header_hide=None,
548 ),
549 "presto": TableFormat(
550 lineabove=None,
551 linebelowheader=Line("", "-", "+", ""),
552 linebetweenrows=None,
553 linebelow=None,
554 headerrow=DataRow("", "|", ""),
555 datarow=DataRow("", "|", ""),
556 padding=1,
557 with_header_hide=None,
558 ),
559 "pretty": TableFormat(
560 lineabove=Line("+", "-", "+", "+"),
561 linebelowheader=Line("+", "-", "+", "+"),
562 linebetweenrows=None,
563 linebelow=Line("+", "-", "+", "+"),
564 headerrow=DataRow("|", "|", "|"),
565 datarow=DataRow("|", "|", "|"),
566 padding=1,
567 with_header_hide=None,
568 ),
569 "psql": TableFormat(
570 lineabove=Line("+", "-", "+", "+"),
571 linebelowheader=Line("|", "-", "+", "|"),
572 linebetweenrows=None,
573 linebelow=Line("+", "-", "+", "+"),
574 headerrow=DataRow("|", "|", "|"),
575 datarow=DataRow("|", "|", "|"),
576 padding=1,
577 with_header_hide=None,
578 ),
579 "rst": TableFormat(
580 lineabove=Line("", "=", " ", ""),
581 linebelowheader=Line("", "=", " ", ""),
582 linebetweenrows=None,
583 linebelow=Line("", "=", " ", ""),
584 headerrow=DataRow("", " ", ""),
585 datarow=DataRow("", " ", ""),
586 padding=0,
587 with_header_hide=None,
588 ),
589 "mediawiki": TableFormat(
590 lineabove=Line(
591 '{| class="wikitable" style="text-align: left;"',
592 "",
593 "",
594 "\n|+ <!-- caption -->\n|-",
595 ),
596 linebelowheader=Line("|-", "", "", ""),
597 linebetweenrows=Line("|-", "", "", ""),
598 linebelow=Line("|}", "", "", ""),
599 headerrow=partial(_mediawiki_row_with_attrs, "!"),
600 datarow=partial(_mediawiki_row_with_attrs, "|"),
601 padding=0,
602 with_header_hide=None,
603 ),
604 "moinmoin": TableFormat(
605 lineabove=None,
606 linebelowheader=None,
607 linebetweenrows=None,
608 linebelow=None,
609 headerrow=partial(_moin_row_with_attrs, "||", header="'''"),
610 datarow=partial(_moin_row_with_attrs, "||"),
611 padding=1,
612 with_header_hide=None,
613 ),
614 "youtrack": TableFormat(
615 lineabove=None,
616 linebelowheader=None,
617 linebetweenrows=None,
618 linebelow=None,
619 headerrow=DataRow("|| ", " || ", " || "),
620 datarow=DataRow("| ", " | ", " |"),
621 padding=1,
622 with_header_hide=None,
623 ),
624 "html": TableFormat(
625 lineabove=_html_begin_table_without_header,
626 linebelowheader="",
627 linebetweenrows=None,
628 linebelow=Line("</tbody>\n</table>", "", "", ""),
629 headerrow=partial(_html_row_with_attrs, "th", False),
630 datarow=partial(_html_row_with_attrs, "td", False),
631 padding=0,
632 with_header_hide=["lineabove"],
633 ),
634 "unsafehtml": TableFormat(
635 lineabove=_html_begin_table_without_header,
636 linebelowheader="",
637 linebetweenrows=None,
638 linebelow=Line("</tbody>\n</table>", "", "", ""),
639 headerrow=partial(_html_row_with_attrs, "th", True),
640 datarow=partial(_html_row_with_attrs, "td", True),
641 padding=0,
642 with_header_hide=["lineabove"],
643 ),
644 "latex": TableFormat(
645 lineabove=_latex_line_begin_tabular,
646 linebelowheader=Line("\\hline", "", "", ""),
647 linebetweenrows=None,
648 linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
649 headerrow=_latex_row,
650 datarow=_latex_row,
651 padding=1,
652 with_header_hide=None,
653 ),
654 "latex_raw": TableFormat(
655 lineabove=_latex_line_begin_tabular,
656 linebelowheader=Line("\\hline", "", "", ""),
657 linebetweenrows=None,
658 linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
659 headerrow=partial(_latex_row, escrules={}),
660 datarow=partial(_latex_row, escrules={}),
661 padding=1,
662 with_header_hide=None,
663 ),
664 "latex_booktabs": TableFormat(
665 lineabove=partial(_latex_line_begin_tabular, booktabs=True),
666 linebelowheader=Line("\\midrule", "", "", ""),
667 linebetweenrows=None,
668 linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
669 headerrow=_latex_row,
670 datarow=_latex_row,
671 padding=1,
672 with_header_hide=None,
673 ),
674 "latex_longtable": TableFormat(
675 lineabove=partial(_latex_line_begin_tabular, longtable=True),
676 linebelowheader=Line("\\hline\n\\endhead", "", "", ""),
677 linebetweenrows=None,
678 linebelow=Line("\\hline\n\\end{longtable}", "", "", ""),
679 headerrow=_latex_row,
680 datarow=_latex_row,
681 padding=1,
682 with_header_hide=None,
683 ),
684 "tsv": TableFormat(
685 lineabove=None,
686 linebelowheader=None,
687 linebetweenrows=None,
688 linebelow=None,
689 headerrow=DataRow("", "\t", ""),
690 datarow=DataRow("", "\t", ""),
691 padding=0,
692 with_header_hide=None,
693 ),
694 "textile": TableFormat(
695 lineabove=None,
696 linebelowheader=None,
697 linebetweenrows=None,
698 linebelow=None,
699 headerrow=DataRow("|_. ", "|_.", "|"),
700 datarow=_textile_row_with_attrs,
701 padding=1,
702 with_header_hide=None,
703 ),
704 "asciidoc": TableFormat(
705 lineabove=partial(_asciidoc_row, False),
706 linebelowheader=None,
707 linebetweenrows=None,
708 linebelow=Line("|====", "", "", ""),
709 headerrow=partial(_asciidoc_row, True),
710 datarow=partial(_asciidoc_row, False),
711 padding=1,
712 with_header_hide=["lineabove"],
713 ),
714}
715
716
717tabulate_formats = list(sorted(_table_formats.keys()))
718
719# The table formats for which multiline cells will be folded into subsequent
720# table rows. The key is the original format specified at the API. The value is
721# the format that will be used to represent the original format.
722multiline_formats = {
723 "plain": "plain",
724 "simple": "simple",
725 "grid": "grid",
726 "simple_grid": "simple_grid",
727 "rounded_grid": "rounded_grid",
728 "heavy_grid": "heavy_grid",
729 "mixed_grid": "mixed_grid",
730 "double_grid": "double_grid",
731 "fancy_grid": "fancy_grid",
732 "colon_grid": "colon_grid",
733 "pipe": "pipe",
734 "orgtbl": "orgtbl",
735 "jira": "jira",
736 "presto": "presto",
737 "pretty": "pretty",
738 "psql": "psql",
739 "rst": "rst",
740 "outline": "outline",
741 "simple_outline": "simple_outline",
742 "rounded_outline": "rounded_outline",
743 "heavy_outline": "heavy_outline",
744 "mixed_outline": "mixed_outline",
745 "double_outline": "double_outline",
746 "fancy_outline": "fancy_outline",
747}
748
749# TODO: Add multiline support for the remaining table formats:
750# - mediawiki: Replace \n with <br>
751# - moinmoin: TBD
752# - youtrack: TBD
753# - html: Replace \n with <br>
754# - latex*: Use "makecell" package: In header, replace X\nY with
755# \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y}
756# - tsv: TBD
757# - textile: Replace \n with <br/> (must be well-formed XML)
758
759_multiline_codes = re.compile(r"\r|\n|\r\n")
760_multiline_codes_bytes = re.compile(b"\r|\n|\r\n")
761
762# Handle ANSI escape sequences for both control sequence introducer (CSI) and
763# operating system command (OSC). Both of these begin with 0x1b (or octal 033),
764# which will be shown below as ESC.
765#
766# CSI ANSI escape codes have the following format, defined in section 5.4 of ECMA-48:
767#
768# CSI: ESC followed by the '[' character (0x5b)
769# Parameter Bytes: 0..n bytes in the range 0x30-0x3f
770# Intermediate Bytes: 0..n bytes in the range 0x20-0x2f
771# Final Byte: a single byte in the range 0x40-0x7e
772#
773# Also include the terminal hyperlink sequences as described here:
774# https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
775#
776# OSC 8 ; params ; uri ST display_text OSC 8 ;; ST
777#
778# Example: \x1b]8;;https://example.com\x5ctext to show\x1b]8;;\x5c
779#
780# Where:
781# OSC: ESC followed by the ']' character (0x5d)
782# params: 0..n optional key value pairs separated by ':' (e.g. foo=bar:baz=qux:abc=123)
783# URI: the actual URI with protocol scheme (e.g. https://, file://, ftp://)
784# ST: ESC followed by the '\' character (0x5c)
785_esc = r"\x1b"
786_csi = rf"{_esc}\["
787_osc = rf"{_esc}\]"
788_st = rf"{_esc}\\"
789
790_ansi_escape_pat = rf"""
791 (
792 # terminal colors, etc
793 {_csi} # CSI
794 [\x30-\x3f]* # parameter bytes
795 [\x20-\x2f]* # intermediate bytes
796 [\x40-\x7e] # final byte
797 |
798 # terminal hyperlinks
799 {_osc}8; # OSC opening
800 (\w+=\w+:?)* # key=value params list (submatch 2)
801 ; # delimiter
802 ([^{_esc}]+) # URI - anything but ESC (submatch 3)
803 {_st} # ST
804 ([^{_esc}]+) # link text - anything but ESC (submatch 4)
805 {_osc}8;;{_st} # "closing" OSC sequence
806 )
807"""
808_ansi_codes = re.compile(_ansi_escape_pat, re.VERBOSE)
809_ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE)
810_ansi_color_reset_code = "\033[0m"
811
812_float_with_thousands_separators = re.compile(
813 r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$"
814)
815
816
817def simple_separated_format(separator):
818 """Construct a simple TableFormat with columns separated by a separator.
819
820 >>> tsv = simple_separated_format("\\t") ; \
821 tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
822 True
823
824 """
825 return TableFormat(
826 None,
827 None,
828 None,
829 None,
830 headerrow=DataRow("", separator, ""),
831 datarow=DataRow("", separator, ""),
832 padding=0,
833 with_header_hide=None,
834 )
835
836
837def _isnumber_with_thousands_separator(string):
838 """
839 >>> _isnumber_with_thousands_separator(".")
840 False
841 >>> _isnumber_with_thousands_separator("1")
842 True
843 >>> _isnumber_with_thousands_separator("1.")
844 True
845 >>> _isnumber_with_thousands_separator(".1")
846 True
847 >>> _isnumber_with_thousands_separator("1000")
848 False
849 >>> _isnumber_with_thousands_separator("1,000")
850 True
851 >>> _isnumber_with_thousands_separator("1,0000")
852 False
853 >>> _isnumber_with_thousands_separator("1,000.1234")
854 True
855 >>> _isnumber_with_thousands_separator(b"1,000.1234")
856 True
857 >>> _isnumber_with_thousands_separator("+1,000.1234")
858 True
859 >>> _isnumber_with_thousands_separator("-1,000.1234")
860 True
861 """
862 try:
863 string = string.decode()
864 except (UnicodeDecodeError, AttributeError):
865 pass
866
867 return bool(re.match(_float_with_thousands_separators, string))
868
869
870def _isconvertible(conv, string):
871 try:
872 conv(string)
873 return True
874 except (ValueError, TypeError):
875 return False
876
877
878def _isnumber(string):
879 """Detects if something *could* be considered a numeric value, vs. just a string.
880
881 This promotes types convertible to both int and float to be considered
882 a float. Note that, iff *all* values appear to be some form of numeric
883 value such as eg. "1e2", they would be considered numbers!
884
885 The exception is things that appear to be numbers but overflow to
886 +/-inf, eg. "1e23456"; we'll have to exclude them explicitly.
887
888 >>> _isnumber(123)
889 True
890 >>> _isnumber(123.45)
891 True
892 >>> _isnumber("123.45")
893 True
894 >>> _isnumber("123")
895 True
896 >>> _isnumber("spam")
897 False
898 >>> _isnumber("123e45")
899 True
900 >>> _isnumber("123e45678") # evaluates equal to 'inf', but ... isn't
901 False
902 >>> _isnumber("inf")
903 True
904 >>> from fractions import Fraction
905 >>> _isnumber(Fraction(1,3))
906 True
907
908 """
909 return (
910 # fast path
911 type(string) in (float, int)
912 # covers 'NaN', +/- 'inf', and eg. '1e2', as well as any type
913 # convertible to int/float.
914 or (
915 _isconvertible(float, string)
916 and (
917 # some other type convertible to float
918 not isinstance(string, (str, bytes))
919 # or, a numeric string eg. "1e1...", "NaN", ..., but isn't
920 # just an over/underflow
921 or (
922 not (math.isinf(float(string)) or math.isnan(float(string)))
923 or string.lower() in ["inf", "-inf", "nan"]
924 )
925 )
926 )
927 )
928
929
930def _isint(string, inttype=int):
931 """
932 >>> _isint("123")
933 True
934 >>> _isint("123.45")
935 False
936 """
937 return (
938 type(string) is inttype
939 or (
940 (hasattr(string, "is_integer") or hasattr(string, "__array__"))
941 and str(type(string)).startswith("<class 'numpy.int")
942 ) # numpy.int64 and similar
943 or (
944 isinstance(string, (bytes, str)) and _isconvertible(inttype, string)
945 ) # integer as string
946 )
947
948
949def _isbool(string):
950 """
951 >>> _isbool(True)
952 True
953 >>> _isbool("False")
954 True
955 >>> _isbool(1)
956 False
957 """
958 return type(string) is bool or (
959 isinstance(string, (bytes, str)) and string in ("True", "False")
960 )
961
962
963def _type(string, has_invisible=True, numparse=True):
964 """The least generic type (type(None), int, float, str, unicode).
965
966 Treats empty string as missing for the purposes of type deduction, so as to not influence
967 the type of an otherwise complete column; does *not* result in missingval replacement!
968
969 >>> _type(None) is type(None)
970 True
971 >>> _type("") is type(None)
972 True
973 >>> _type("foo") is type("")
974 True
975 >>> _type("1") is type(1)
976 True
977 >>> _type('\x1b[31m42\x1b[0m') is type(42)
978 True
979 >>> _type('\x1b[31m42\x1b[0m') is type(42)
980 True
981
982 """
983
984 if has_invisible and isinstance(string, (str, bytes)):
985 string = _strip_ansi(string)
986
987 if string is None or (isinstance(string, (bytes, str)) and not string):
988 return type(None)
989 elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
990 return str
991 elif _isbool(string):
992 return bool
993 elif numparse and (
994 _isint(string)
995 or (
996 isinstance(string, str)
997 and _isnumber_with_thousands_separator(string)
998 and "." not in string
999 )
1000 ):
1001 return int
1002 elif numparse and (
1003 _isnumber(string)
1004 or (isinstance(string, str) and _isnumber_with_thousands_separator(string))
1005 ):
1006 return float
1007 elif isinstance(string, bytes):
1008 return bytes
1009 else:
1010 return str
1011
1012
1013def _afterpoint(string):
1014 """Symbols after a decimal point, -1 if the string lacks the decimal point.
1015
1016 >>> _afterpoint("123.45")
1017 2
1018 >>> _afterpoint("1001")
1019 -1
1020 >>> _afterpoint("eggs")
1021 -1
1022 >>> _afterpoint("123e45")
1023 2
1024 >>> _afterpoint("123,456.78")
1025 2
1026
1027 """
1028 if _isnumber(string) or _isnumber_with_thousands_separator(string):
1029 if _isint(string):
1030 return -1
1031 else:
1032 pos = string.rfind(".")
1033 pos = string.lower().rfind("e") if pos < 0 else pos
1034 if pos >= 0:
1035 return len(string) - pos - 1
1036 else:
1037 return -1 # no point
1038 else:
1039 return -1 # not a number
1040
1041
1042def _padleft(width, s):
1043 """Flush right.
1044
1045 >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
1046 True
1047
1048 """
1049 fmt = "{0:>%ds}" % width
1050 return fmt.format(s)
1051
1052
1053def _padright(width, s):
1054 """Flush left.
1055
1056 >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
1057 True
1058
1059 """
1060 fmt = "{0:<%ds}" % width
1061 return fmt.format(s)
1062
1063
1064def _padboth(width, s):
1065 """Center string.
1066
1067 >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
1068 True
1069
1070 """
1071 fmt = "{0:^%ds}" % width
1072 return fmt.format(s)
1073
1074
1075def _padnone(ignore_width, s):
1076 return s
1077
1078
1079def _strip_ansi(s):
1080 r"""Remove ANSI escape sequences, both CSI (color codes, etc) and OSC hyperlinks.
1081
1082 CSI sequences are simply removed from the output, while OSC hyperlinks are replaced
1083 with the link text. Note: it may be desirable to show the URI instead but this is not
1084 supported.
1085
1086 >>> repr(_strip_ansi('\x1B]8;;https://example.com\x1B\\This is a link\x1B]8;;\x1B\\'))
1087 "'This is a link'"
1088
1089 >>> repr(_strip_ansi('\x1b[31mred\x1b[0m text'))
1090 "'red text'"
1091
1092 """
1093 if isinstance(s, str):
1094 return _ansi_codes.sub(r"\4", s)
1095 else: # a bytestring
1096 return _ansi_codes_bytes.sub(r"\4", s)
1097
1098
1099def _visible_width(s):
1100 """Visible width of a printed string. ANSI color codes are removed.
1101
1102 >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
1103 (5, 5)
1104
1105 """
1106 # optional wide-character support
1107 if wcwidth is not None and WIDE_CHARS_MODE:
1108 len_fn = wcwidth.wcswidth
1109 else:
1110 len_fn = len
1111 if isinstance(s, (str, bytes)):
1112 return len_fn(_strip_ansi(s))
1113 else:
1114 return len_fn(str(s))
1115
1116
1117def _is_multiline(s):
1118 if isinstance(s, str):
1119 return bool(re.search(_multiline_codes, s))
1120 else: # a bytestring
1121 return bool(re.search(_multiline_codes_bytes, s))
1122
1123
1124def _multiline_width(multiline_s, line_width_fn=len):
1125 """Visible width of a potentially multiline content."""
1126 return max(map(line_width_fn, re.split("[\r\n]", multiline_s)))
1127
1128
1129def _choose_width_fn(has_invisible, enable_widechars, is_multiline):
1130 """Return a function to calculate visible cell width."""
1131 if has_invisible:
1132 line_width_fn = _visible_width
1133 elif enable_widechars: # optional wide-character support if available
1134 line_width_fn = wcwidth.wcswidth
1135 else:
1136 line_width_fn = len
1137 if is_multiline:
1138 width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa
1139 else:
1140 width_fn = line_width_fn
1141 return width_fn
1142
1143
1144def _align_column_choose_padfn(strings, alignment, has_invisible, preserve_whitespace):
1145 if alignment == "right":
1146 if not preserve_whitespace:
1147 strings = [s.strip() for s in strings]
1148 padfn = _padleft
1149 elif alignment == "center":
1150 if not preserve_whitespace:
1151 strings = [s.strip() for s in strings]
1152 padfn = _padboth
1153 elif alignment == "decimal":
1154 if has_invisible:
1155 decimals = [_afterpoint(_strip_ansi(s)) for s in strings]
1156 else:
1157 decimals = [_afterpoint(s) for s in strings]
1158 maxdecimals = max(decimals)
1159 strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)]
1160 padfn = _padleft
1161 elif not alignment:
1162 padfn = _padnone
1163 else:
1164 if not preserve_whitespace:
1165 strings = [s.strip() for s in strings]
1166 padfn = _padright
1167 return strings, padfn
1168
1169
1170def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline):
1171 if has_invisible:
1172 line_width_fn = _visible_width
1173 elif enable_widechars: # optional wide-character support if available
1174 line_width_fn = wcwidth.wcswidth
1175 else:
1176 line_width_fn = len
1177 if is_multiline:
1178 width_fn = lambda s: _align_column_multiline_width(s, line_width_fn) # noqa
1179 else:
1180 width_fn = line_width_fn
1181 return width_fn
1182
1183
1184def _align_column_multiline_width(multiline_s, line_width_fn=len):
1185 """Visible width of a potentially multiline content."""
1186 return list(map(line_width_fn, re.split("[\r\n]", multiline_s)))
1187
1188
1189def _flat_list(nested_list):
1190 ret = []
1191 for item in nested_list:
1192 if isinstance(item, list):
1193 ret.extend(item)
1194 else:
1195 ret.append(item)
1196 return ret
1197
1198
1199def _align_column(
1200 strings,
1201 alignment,
1202 minwidth=0,
1203 has_invisible=True,
1204 enable_widechars=False,
1205 is_multiline=False,
1206 preserve_whitespace=False,
1207):
1208 """[string] -> [padded_string]"""
1209 strings, padfn = _align_column_choose_padfn(
1210 strings, alignment, has_invisible, preserve_whitespace
1211 )
1212 width_fn = _align_column_choose_width_fn(
1213 has_invisible, enable_widechars, is_multiline
1214 )
1215
1216 s_widths = list(map(width_fn, strings))
1217 maxwidth = max(max(_flat_list(s_widths)), minwidth)
1218 # TODO: refactor column alignment in single-line and multiline modes
1219 if is_multiline:
1220 if not enable_widechars and not has_invisible:
1221 padded_strings = [
1222 "\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
1223 for ms in strings
1224 ]
1225 else:
1226 # enable wide-character width corrections
1227 s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings]
1228 visible_widths = [
1229 [maxwidth - (w - l) for w, l in zip(mw, ml)]
1230 for mw, ml in zip(s_widths, s_lens)
1231 ]
1232 # wcswidth and _visible_width don't count invisible characters;
1233 # padfn doesn't need to apply another correction
1234 padded_strings = [
1235 "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)])
1236 for ms, mw in zip(strings, visible_widths)
1237 ]
1238 else: # single-line cell values
1239 if not enable_widechars and not has_invisible:
1240 padded_strings = [padfn(maxwidth, s) for s in strings]
1241 else:
1242 # enable wide-character width corrections
1243 s_lens = list(map(len, strings))
1244 visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
1245 # wcswidth and _visible_width don't count invisible characters;
1246 # padfn doesn't need to apply another correction
1247 padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
1248 return padded_strings
1249
1250
1251def _more_generic(type1, type2):
1252 types = {
1253 type(None): 0,
1254 bool: 1,
1255 int: 2,
1256 float: 3,
1257 bytes: 4,
1258 str: 5,
1259 }
1260 invtypes = {
1261 5: str,
1262 4: bytes,
1263 3: float,
1264 2: int,
1265 1: bool,
1266 0: type(None),
1267 }
1268 moregeneric = max(types.get(type1, 5), types.get(type2, 5))
1269 return invtypes[moregeneric]
1270
1271
1272def _column_type(strings, has_invisible=True, numparse=True):
1273 """The least generic type all column values are convertible to.
1274
1275 >>> _column_type([True, False]) is bool
1276 True
1277 >>> _column_type(["1", "2"]) is int
1278 True
1279 >>> _column_type(["1", "2.3"]) is float
1280 True
1281 >>> _column_type(["1", "2.3", "four"]) is str
1282 True
1283 >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is str
1284 True
1285 >>> _column_type([None, "brux"]) is str
1286 True
1287 >>> _column_type([1, 2, None]) is int
1288 True
1289 >>> import datetime as dt
1290 >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is str
1291 True
1292
1293 """
1294 types = [_type(s, has_invisible, numparse) for s in strings]
1295 return reduce(_more_generic, types, bool)
1296
1297
1298def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
1299 """Format a value according to its deduced type. Empty values are deemed valid for any type.
1300
1301 Unicode is supported:
1302
1303 >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
1304 tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
1305 good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
1306 tabulate(tbl, headers=hrow) == good_result
1307 True
1308
1309 """ # noqa
1310 if val is None:
1311 return missingval
1312 if isinstance(val, (bytes, str)) and not val:
1313 return ""
1314
1315 if valtype is str:
1316 return f"{val}"
1317 elif valtype is int:
1318 if isinstance(val, str):
1319 val_striped = val.encode("unicode_escape").decode("utf-8")
1320 colored = re.search(
1321 r"(\\[xX]+[0-9a-fA-F]+\[\d+[mM]+)([0-9.]+)(\\.*)$", val_striped
1322 )
1323 if colored:
1324 total_groups = len(colored.groups())
1325 if total_groups == 3:
1326 digits = colored.group(2)
1327 if digits.isdigit():
1328 val_new = (
1329 colored.group(1)
1330 + format(int(digits), intfmt)
1331 + colored.group(3)
1332 )
1333 val = val_new.encode("utf-8").decode("unicode_escape")
1334 intfmt = ""
1335 return format(val, intfmt)
1336 elif valtype is bytes:
1337 try:
1338 return str(val, "ascii")
1339 except (TypeError, UnicodeDecodeError):
1340 return str(val)
1341 elif valtype is float:
1342 is_a_colored_number = has_invisible and isinstance(val, (str, bytes))
1343 if is_a_colored_number:
1344 raw_val = _strip_ansi(val)
1345 formatted_val = format(float(raw_val), floatfmt)
1346 return val.replace(raw_val, formatted_val)
1347 else:
1348 if isinstance(val, str) and "," in val:
1349 val = val.replace(",", "") # handle thousands-separators
1350 return format(float(val), floatfmt)
1351 else:
1352 return f"{val}"
1353
1354
1355def _align_header(
1356 header, alignment, width, visible_width, is_multiline=False, width_fn=None
1357):
1358 "Pad string header to width chars given known visible_width of the header."
1359 if is_multiline:
1360 header_lines = re.split(_multiline_codes, header)
1361 padded_lines = [
1362 _align_header(h, alignment, width, width_fn(h)) for h in header_lines
1363 ]
1364 return "\n".join(padded_lines)
1365 # else: not multiline
1366 ninvisible = len(header) - visible_width
1367 width += ninvisible
1368 if alignment == "left":
1369 return _padright(width, header)
1370 elif alignment == "center":
1371 return _padboth(width, header)
1372 elif not alignment:
1373 return f"{header}"
1374 else:
1375 return _padleft(width, header)
1376
1377
1378def _remove_separating_lines(rows):
1379 if isinstance(rows, list):
1380 separating_lines = []
1381 sans_rows = []
1382 for index, row in enumerate(rows):
1383 if _is_separating_line(row):
1384 separating_lines.append(index)
1385 else:
1386 sans_rows.append(row)
1387 return sans_rows, separating_lines
1388 else:
1389 return rows, None
1390
1391
1392def _reinsert_separating_lines(rows, separating_lines):
1393 if separating_lines:
1394 for index in separating_lines:
1395 rows.insert(index, SEPARATING_LINE)
1396
1397
1398def _prepend_row_index(rows, index):
1399 """Add a left-most index column."""
1400 if index is None or index is False:
1401 return rows
1402 if isinstance(index, Sized) and len(index) != len(rows):
1403 raise ValueError(
1404 "index must be as long as the number of data rows: "
1405 + f"len(index)={len(index)} len(rows)={len(rows)}"
1406 )
1407 sans_rows, separating_lines = _remove_separating_lines(rows)
1408 new_rows = []
1409 index_iter = iter(index)
1410 for row in sans_rows:
1411 index_v = next(index_iter)
1412 new_rows.append([index_v] + list(row))
1413 rows = new_rows
1414 _reinsert_separating_lines(rows, separating_lines)
1415 return rows
1416
1417
1418def _bool(val):
1419 "A wrapper around standard bool() which doesn't throw on NumPy arrays"
1420 try:
1421 return bool(val)
1422 except ValueError: # val is likely to be a numpy array with many elements
1423 return False
1424
1425
1426def _normalize_tabular_data(tabular_data, headers, showindex="default"):
1427 """Transform a supported data type to a list of lists, and a list of headers,
1428 with headers padding.
1429
1430 Supported tabular data types:
1431
1432 * list-of-lists or another iterable of iterables
1433
1434 * list of named tuples (usually used with headers="keys")
1435
1436 * list of dicts (usually used with headers="keys")
1437
1438 * list of OrderedDicts (usually used with headers="keys")
1439
1440 * list of dataclasses (usually used with headers="keys")
1441
1442 * 2D NumPy arrays
1443
1444 * NumPy record arrays (usually used with headers="keys")
1445
1446 * dict of iterables (usually used with headers="keys")
1447
1448 * pandas.DataFrame (usually used with headers="keys")
1449
1450 The first row can be used as headers if headers="firstrow",
1451 column indices can be used as headers if headers="keys".
1452
1453 If showindex="default", show row indices of the pandas.DataFrame.
1454 If showindex="always", show row indices for all types of data.
1455 If showindex="never", don't show row indices for all types of data.
1456 If showindex is an iterable, show its values as row indices.
1457
1458 """
1459
1460 try:
1461 bool(headers)
1462 except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
1463 headers = list(headers)
1464
1465 err_msg = (
1466 "\n\nTo build a table python-tabulate requires two-dimensional data "
1467 "like a list of lists or similar."
1468 "\nDid you forget a pair of extra [] or ',' in ()?"
1469 )
1470 index = None
1471 if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
1472 # dict-like and pandas.DataFrame?
1473 if hasattr(tabular_data.values, "__call__"):
1474 # likely a conventional dict
1475 keys = tabular_data.keys()
1476 try:
1477 rows = list(
1478 izip_longest(*tabular_data.values())
1479 ) # columns have to be transposed
1480 except TypeError: # not iterable
1481 raise TypeError(err_msg)
1482
1483 elif hasattr(tabular_data, "index"):
1484 # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
1485 keys = list(tabular_data)
1486 if (
1487 showindex in ["default", "always", True]
1488 and tabular_data.index.name is not None
1489 ):
1490 if isinstance(tabular_data.index.name, list):
1491 keys[:0] = tabular_data.index.name
1492 else:
1493 keys[:0] = [tabular_data.index.name]
1494 vals = tabular_data.values # values matrix doesn't need to be transposed
1495 # for DataFrames add an index per default
1496 index = list(tabular_data.index)
1497 rows = [list(row) for row in vals]
1498 else:
1499 raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
1500
1501 if headers == "keys":
1502 headers = list(map(str, keys)) # headers should be strings
1503
1504 else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses
1505 try:
1506 rows = list(tabular_data)
1507 except TypeError: # not iterable
1508 raise TypeError(err_msg)
1509
1510 if headers == "keys" and not rows:
1511 # an empty table (issue #81)
1512 headers = []
1513 elif (
1514 headers == "keys"
1515 and hasattr(tabular_data, "dtype")
1516 and getattr(tabular_data.dtype, "names")
1517 ):
1518 # numpy record array
1519 headers = tabular_data.dtype.names
1520 elif (
1521 headers == "keys"
1522 and len(rows) > 0
1523 and isinstance(rows[0], tuple)
1524 and hasattr(rows[0], "_fields")
1525 ):
1526 # namedtuple
1527 headers = list(map(str, rows[0]._fields))
1528 elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"):
1529 # dict-like object
1530 uniq_keys = set() # implements hashed lookup
1531 keys = [] # storage for set
1532 if headers == "firstrow":
1533 firstdict = rows[0] if len(rows) > 0 else {}
1534 keys.extend(firstdict.keys())
1535 uniq_keys.update(keys)
1536 rows = rows[1:]
1537 for row in rows:
1538 for k in row.keys():
1539 # Save unique items in input order
1540 if k not in uniq_keys:
1541 keys.append(k)
1542 uniq_keys.add(k)
1543 if headers == "keys":
1544 headers = keys
1545 elif isinstance(headers, dict):
1546 # a dict of headers for a list of dicts
1547 headers = [headers.get(k, k) for k in keys]
1548 headers = list(map(str, headers))
1549 elif headers == "firstrow":
1550 if len(rows) > 0:
1551 headers = [firstdict.get(k, k) for k in keys]
1552 headers = list(map(str, headers))
1553 else:
1554 headers = []
1555 elif headers:
1556 raise ValueError(
1557 "headers for a list of dicts is not a dict or a keyword"
1558 )
1559 rows = [[row.get(k) for k in keys] for row in rows]
1560
1561 elif (
1562 headers == "keys"
1563 and hasattr(tabular_data, "description")
1564 and hasattr(tabular_data, "fetchone")
1565 and hasattr(tabular_data, "rowcount")
1566 ):
1567 # Python Database API cursor object (PEP 0249)
1568 # print tabulate(cursor, headers='keys')
1569 headers = [column[0] for column in tabular_data.description]
1570
1571 elif (
1572 dataclasses is not None
1573 and len(rows) > 0
1574 and dataclasses.is_dataclass(rows[0])
1575 ):
1576 # Python's dataclass
1577 field_names = [field.name for field in dataclasses.fields(rows[0])]
1578 if headers == "keys":
1579 headers = field_names
1580 rows = [[getattr(row, f) for f in field_names] for row in rows]
1581
1582 elif headers == "keys" and len(rows) > 0:
1583 # keys are column indices
1584 headers = list(map(str, range(len(rows[0]))))
1585
1586 # take headers from the first row if necessary
1587 if headers == "firstrow" and len(rows) > 0:
1588 if index is not None:
1589 headers = [index[0]] + list(rows[0])
1590 index = index[1:]
1591 else:
1592 headers = rows[0]
1593 headers = list(map(str, headers)) # headers should be strings
1594 rows = rows[1:]
1595 elif headers == "firstrow":
1596 headers = []
1597
1598 headers = list(map(str, headers))
1599 # rows = list(map(list, rows))
1600 rows = list(map(lambda r: r if _is_separating_line(r) else list(r), rows))
1601
1602 # add or remove an index column
1603 showindex_is_a_str = type(showindex) in [str, bytes]
1604 if showindex == "default" and index is not None:
1605 rows = _prepend_row_index(rows, index)
1606 elif isinstance(showindex, Sized) and not showindex_is_a_str:
1607 rows = _prepend_row_index(rows, list(showindex))
1608 elif isinstance(showindex, Iterable) and not showindex_is_a_str:
1609 rows = _prepend_row_index(rows, showindex)
1610 elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
1611 if index is None:
1612 index = list(range(len(rows)))
1613 rows = _prepend_row_index(rows, index)
1614 elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
1615 pass
1616
1617 # pad with empty headers for initial columns if necessary
1618 headers_pad = 0
1619 if headers and len(rows) > 0:
1620 headers_pad = max(0, len(rows[0]) - len(headers))
1621 headers = [""] * headers_pad + headers
1622
1623 return rows, headers, headers_pad
1624
1625
1626def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True):
1627 if len(list_of_lists):
1628 num_cols = len(list_of_lists[0])
1629 else:
1630 num_cols = 0
1631 numparses = _expand_iterable(numparses, num_cols, True)
1632
1633 result = []
1634
1635 for row in list_of_lists:
1636 new_row = []
1637 for cell, width, numparse in zip(row, colwidths, numparses):
1638 if _isnumber(cell) and numparse:
1639 new_row.append(cell)
1640 continue
1641
1642 if width is not None:
1643 wrapper = _CustomTextWrap(width=width)
1644 # Cast based on our internal type handling. Any future custom
1645 # formatting of types (such as datetimes) may need to be more
1646 # explicit than just `str` of the object. Also doesn't work for
1647 # custom floatfmt/intfmt, nor with any missing/blank cells.
1648 casted_cell = (
1649 str(cell) if _isnumber(cell) else _type(cell, numparse)(cell)
1650 )
1651 wrapped = [
1652 "\n".join(wrapper.wrap(line))
1653 for line in casted_cell.splitlines()
1654 if line.strip() != ""
1655 ]
1656 new_row.append("\n".join(wrapped))
1657 else:
1658 new_row.append(cell)
1659 result.append(new_row)
1660
1661 return result
1662
1663
1664def _to_str(s, encoding="utf8", errors="ignore"):
1665 """
1666 A type safe wrapper for converting a bytestring to str. This is essentially just
1667 a wrapper around .decode() intended for use with things like map(), but with some
1668 specific behavior:
1669
1670 1. if the given parameter is not a bytestring, it is returned unmodified
1671 2. decode() is called for the given parameter and assumes utf8 encoding, but the
1672 default error behavior is changed from 'strict' to 'ignore'
1673
1674 >>> repr(_to_str(b'foo'))
1675 "'foo'"
1676
1677 >>> repr(_to_str('foo'))
1678 "'foo'"
1679
1680 >>> repr(_to_str(42))
1681 "'42'"
1682
1683 """
1684 if isinstance(s, bytes):
1685 return s.decode(encoding=encoding, errors=errors)
1686 return str(s)
1687
1688
1689def tabulate(
1690 tabular_data,
1691 headers=(),
1692 tablefmt="simple",
1693 floatfmt=_DEFAULT_FLOATFMT,
1694 intfmt=_DEFAULT_INTFMT,
1695 numalign=_DEFAULT_ALIGN,
1696 stralign=_DEFAULT_ALIGN,
1697 missingval=_DEFAULT_MISSINGVAL,
1698 showindex="default",
1699 disable_numparse=False,
1700 colglobalalign=None,
1701 colalign=None,
1702 preserve_whitespace=False,
1703 maxcolwidths=None,
1704 headersglobalalign=None,
1705 headersalign=None,
1706 rowalign=None,
1707 maxheadercolwidths=None,
1708):
1709 """Format a fixed width table for pretty printing.
1710
1711 >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
1712 --- ---------
1713 1 2.34
1714 -56 8.999
1715 2 10001
1716 --- ---------
1717
1718 The first required argument (`tabular_data`) can be a
1719 list-of-lists (or another iterable of iterables), a list of named
1720 tuples, a dictionary of iterables, an iterable of dictionaries,
1721 an iterable of dataclasses, a two-dimensional NumPy array,
1722 NumPy record array, or a Pandas' dataframe.
1723
1724
1725 Table headers
1726 -------------
1727
1728 To print nice column headers, supply the second argument (`headers`):
1729
1730 - `headers` can be an explicit list of column headers
1731 - if `headers="firstrow"`, then the first row of data is used
1732 - if `headers="keys"`, then dictionary keys or column indices are used
1733
1734 Otherwise a headerless table is produced.
1735
1736 If the number of headers is less than the number of columns, they
1737 are supposed to be names of the last columns. This is consistent
1738 with the plain-text format of R and Pandas' dataframes.
1739
1740 >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
1741 ... headers="firstrow"))
1742 sex age
1743 ----- ----- -----
1744 Alice F 24
1745 Bob M 19
1746
1747 By default, pandas.DataFrame data have an additional column called
1748 row index. To add a similar column to all other types of data,
1749 use `showindex="always"` or `showindex=True`. To suppress row indices
1750 for all types of data, pass `showindex="never" or `showindex=False`.
1751 To add a custom row index column, pass `showindex=some_iterable`.
1752
1753 >>> print(tabulate([["F",24],["M",19]], showindex="always"))
1754 - - --
1755 0 F 24
1756 1 M 19
1757 - - --
1758
1759
1760 Column and Headers alignment
1761 ----------------------------
1762
1763 `tabulate` tries to detect column types automatically, and aligns
1764 the values properly. By default it aligns decimal points of the
1765 numbers (or flushes integer numbers to the right), and flushes
1766 everything else to the left. Possible column alignments
1767 (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
1768 (only for `numalign`), and None (to disable alignment).
1769
1770 `colglobalalign` allows for global alignment of columns, before any
1771 specific override from `colalign`. Possible values are: None
1772 (defaults according to coltype), "right", "center", "decimal",
1773 "left".
1774 `colalign` allows for column-wise override starting from left-most
1775 column. Possible values are: "global" (no override), "right",
1776 "center", "decimal", "left".
1777 `headersglobalalign` allows for global headers alignment, before any
1778 specific override from `headersalign`. Possible values are: None
1779 (follow columns alignment), "right", "center", "left".
1780 `headersalign` allows for header-wise override starting from left-most
1781 given header. Possible values are: "global" (no override), "same"
1782 (follow column alignment), "right", "center", "left".
1783
1784 Note on intended behaviour: If there is no `tabular_data`, any column
1785 alignment argument is ignored. Hence, in this case, header
1786 alignment cannot be inferred from column alignment.
1787
1788 Table formats
1789 -------------
1790
1791 `intfmt` is a format specification used for columns which
1792 contain numeric data without a decimal point. This can also be
1793 a list or tuple of format strings, one per column.
1794
1795 `floatfmt` is a format specification used for columns which
1796 contain numeric data with a decimal point. This can also be
1797 a list or tuple of format strings, one per column.
1798
1799 `None` values are replaced with a `missingval` string (like
1800 `floatfmt`, this can also be a list of values for different
1801 columns):
1802
1803 >>> print(tabulate([["spam", 1, None],
1804 ... ["eggs", 42, 3.14],
1805 ... ["other", None, 2.7]], missingval="?"))
1806 ----- -- ----
1807 spam 1 ?
1808 eggs 42 3.14
1809 other ? 2.7
1810 ----- -- ----
1811
1812 Various plain-text table formats (`tablefmt`) are supported:
1813 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
1814 'latex', 'latex_raw', 'latex_booktabs', 'latex_longtable' and tsv.
1815 Variable `tabulate_formats`contains the list of currently supported formats.
1816
1817 "plain" format doesn't use any pseudographics to draw tables,
1818 it separates columns with a double space:
1819
1820 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1821 ... ["strings", "numbers"], "plain"))
1822 strings numbers
1823 spam 41.9999
1824 eggs 451
1825
1826 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
1827 spam 41.9999
1828 eggs 451
1829
1830 "simple" format is like Pandoc simple_tables:
1831
1832 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1833 ... ["strings", "numbers"], "simple"))
1834 strings numbers
1835 --------- ---------
1836 spam 41.9999
1837 eggs 451
1838
1839 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
1840 ---- --------
1841 spam 41.9999
1842 eggs 451
1843 ---- --------
1844
1845 "grid" is similar to tables produced by Emacs table.el package or
1846 Pandoc grid_tables:
1847
1848 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1849 ... ["strings", "numbers"], "grid"))
1850 +-----------+-----------+
1851 | strings | numbers |
1852 +===========+===========+
1853 | spam | 41.9999 |
1854 +-----------+-----------+
1855 | eggs | 451 |
1856 +-----------+-----------+
1857
1858 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
1859 +------+----------+
1860 | spam | 41.9999 |
1861 +------+----------+
1862 | eggs | 451 |
1863 +------+----------+
1864
1865 "simple_grid" draws a grid using single-line box-drawing
1866 characters:
1867
1868 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1869 ... ["strings", "numbers"], "simple_grid"))
1870 ┌───────────┬───────────┐
1871 │ strings │ numbers │
1872 ├───────────┼───────────┤
1873 │ spam │ 41.9999 │
1874 ├───────────┼───────────┤
1875 │ eggs │ 451 │
1876 └───────────┴───────────┘
1877
1878 "rounded_grid" draws a grid using single-line box-drawing
1879 characters with rounded corners:
1880
1881 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1882 ... ["strings", "numbers"], "rounded_grid"))
1883 ╭───────────┬───────────╮
1884 │ strings │ numbers │
1885 ├───────────┼───────────┤
1886 │ spam │ 41.9999 │
1887 ├───────────┼───────────┤
1888 │ eggs │ 451 │
1889 ╰───────────┴───────────╯
1890
1891 "heavy_grid" draws a grid using bold (thick) single-line box-drawing
1892 characters:
1893
1894 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1895 ... ["strings", "numbers"], "heavy_grid"))
1896 ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
1897 ┃ strings ┃ numbers ┃
1898 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
1899 ┃ spam ┃ 41.9999 ┃
1900 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
1901 ┃ eggs ┃ 451 ┃
1902 ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
1903
1904 "mixed_grid" draws a grid using a mix of light (thin) and heavy (thick) lines
1905 box-drawing characters:
1906
1907 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1908 ... ["strings", "numbers"], "mixed_grid"))
1909 ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
1910 │ strings │ numbers │
1911 ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
1912 │ spam │ 41.9999 │
1913 ├───────────┼───────────┤
1914 │ eggs │ 451 │
1915 ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
1916
1917 "double_grid" draws a grid using double-line box-drawing
1918 characters:
1919
1920 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1921 ... ["strings", "numbers"], "double_grid"))
1922 ╔═══════════╦═══════════╗
1923 ║ strings ║ numbers ║
1924 ╠═══════════╬═══════════╣
1925 ║ spam ║ 41.9999 ║
1926 ╠═══════════╬═══════════╣
1927 ║ eggs ║ 451 ║
1928 ╚═══════════╩═══════════╝
1929
1930 "fancy_grid" draws a grid using a mix of single and
1931 double-line box-drawing characters:
1932
1933 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1934 ... ["strings", "numbers"], "fancy_grid"))
1935 ╒═══════════╤═══════════╕
1936 │ strings │ numbers │
1937 ╞═══════════╪═══════════╡
1938 │ spam │ 41.9999 │
1939 ├───────────┼───────────┤
1940 │ eggs │ 451 │
1941 ╘═══════════╧═══════════╛
1942
1943 "colon_grid" is similar to "grid" but uses colons only to define
1944 columnwise content alignment, without whitespace padding,
1945 similar to the alignment specification of Pandoc `grid_tables`:
1946
1947 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1948 ... ["strings", "numbers"], "colon_grid"))
1949 +-----------+-----------+
1950 | strings | numbers |
1951 +:==========+:==========+
1952 | spam | 41.9999 |
1953 +-----------+-----------+
1954 | eggs | 451 |
1955 +-----------+-----------+
1956
1957 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1958 ... ["strings", "numbers"], "colon_grid",
1959 ... colalign=["right", "left"]))
1960 +-----------+-----------+
1961 | strings | numbers |
1962 +==========:+:==========+
1963 | spam | 41.9999 |
1964 +-----------+-----------+
1965 | eggs | 451 |
1966 +-----------+-----------+
1967
1968 "outline" is the same as the "grid" format but doesn't draw lines between rows:
1969
1970 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1971 ... ["strings", "numbers"], "outline"))
1972 +-----------+-----------+
1973 | strings | numbers |
1974 +===========+===========+
1975 | spam | 41.9999 |
1976 | eggs | 451 |
1977 +-----------+-----------+
1978
1979 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="outline"))
1980 +------+----------+
1981 | spam | 41.9999 |
1982 | eggs | 451 |
1983 +------+----------+
1984
1985 "simple_outline" is the same as the "simple_grid" format but doesn't draw lines between rows:
1986
1987 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1988 ... ["strings", "numbers"], "simple_outline"))
1989 ┌───────────┬───────────┐
1990 │ strings │ numbers │
1991 ├───────────┼───────────┤
1992 │ spam │ 41.9999 │
1993 │ eggs │ 451 │
1994 └───────────┴───────────┘
1995
1996 "rounded_outline" is the same as the "rounded_grid" format but doesn't draw lines between rows:
1997
1998 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
1999 ... ["strings", "numbers"], "rounded_outline"))
2000 ╭───────────┬───────────╮
2001 │ strings │ numbers │
2002 ├───────────┼───────────┤
2003 │ spam │ 41.9999 │
2004 │ eggs │ 451 │
2005 ╰───────────┴───────────╯
2006
2007 "heavy_outline" is the same as the "heavy_grid" format but doesn't draw lines between rows:
2008
2009 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2010 ... ["strings", "numbers"], "heavy_outline"))
2011 ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
2012 ┃ strings ┃ numbers ┃
2013 ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
2014 ┃ spam ┃ 41.9999 ┃
2015 ┃ eggs ┃ 451 ┃
2016 ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
2017
2018 "mixed_outline" is the same as the "mixed_grid" format but doesn't draw lines between rows:
2019
2020 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2021 ... ["strings", "numbers"], "mixed_outline"))
2022 ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
2023 │ strings │ numbers │
2024 ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
2025 │ spam │ 41.9999 │
2026 │ eggs │ 451 │
2027 ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
2028
2029 "double_outline" is the same as the "double_grid" format but doesn't draw lines between rows:
2030
2031 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2032 ... ["strings", "numbers"], "double_outline"))
2033 ╔═══════════╦═══════════╗
2034 ║ strings ║ numbers ║
2035 ╠═══════════╬═══════════╣
2036 ║ spam ║ 41.9999 ║
2037 ║ eggs ║ 451 ║
2038 ╚═══════════╩═══════════╝
2039
2040 "fancy_outline" is the same as the "fancy_grid" format but doesn't draw lines between rows:
2041
2042 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2043 ... ["strings", "numbers"], "fancy_outline"))
2044 ╒═══════════╤═══════════╕
2045 │ strings │ numbers │
2046 ╞═══════════╪═══════════╡
2047 │ spam │ 41.9999 │
2048 │ eggs │ 451 │
2049 ╘═══════════╧═══════════╛
2050
2051 "pipe" is like tables in PHP Markdown Extra extension or Pandoc
2052 pipe_tables:
2053
2054 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2055 ... ["strings", "numbers"], "pipe"))
2056 | strings | numbers |
2057 |:----------|----------:|
2058 | spam | 41.9999 |
2059 | eggs | 451 |
2060
2061 "presto" is like tables produce by the Presto CLI:
2062
2063 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2064 ... ["strings", "numbers"], "presto"))
2065 strings | numbers
2066 -----------+-----------
2067 spam | 41.9999
2068 eggs | 451
2069
2070 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
2071 |:-----|---------:|
2072 | spam | 41.9999 |
2073 | eggs | 451 |
2074
2075 "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
2076 are slightly different from "pipe" format by not using colons to
2077 define column alignment, and using a "+" sign to indicate line
2078 intersections:
2079
2080 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2081 ... ["strings", "numbers"], "orgtbl"))
2082 | strings | numbers |
2083 |-----------+-----------|
2084 | spam | 41.9999 |
2085 | eggs | 451 |
2086
2087
2088 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
2089 | spam | 41.9999 |
2090 | eggs | 451 |
2091
2092 "rst" is like a simple table format from reStructuredText; please
2093 note that reStructuredText accepts also "grid" tables:
2094
2095 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
2096 ... ["strings", "numbers"], "rst"))
2097 ========= =========
2098 strings numbers
2099 ========= =========
2100 spam 41.9999
2101 eggs 451
2102 ========= =========
2103
2104 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
2105 ==== ========
2106 spam 41.9999
2107 eggs 451
2108 ==== ========
2109
2110 "mediawiki" produces a table markup used in Wikipedia and on other
2111 MediaWiki-based sites:
2112
2113 >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
2114 ... headers="firstrow", tablefmt="mediawiki"))
2115 {| class="wikitable" style="text-align: left;"
2116 |+ <!-- caption -->
2117 |-
2118 ! strings !! style="text-align: right;"| numbers
2119 |-
2120 | spam || style="text-align: right;"| 41.9999
2121 |-
2122 | eggs || style="text-align: right;"| 451
2123 |}
2124
2125 "html" produces HTML markup as an html.escape'd str
2126 with a ._repr_html_ method so that Jupyter Lab and Notebook display the HTML
2127 and a .str property so that the raw HTML remains accessible
2128 the unsafehtml table format can be used if an unescaped HTML format is required:
2129
2130 >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
2131 ... headers="firstrow", tablefmt="html"))
2132 <table>
2133 <thead>
2134 <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
2135 </thead>
2136 <tbody>
2137 <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
2138 <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
2139 </tbody>
2140 </table>
2141
2142 "latex" produces a tabular environment of LaTeX document markup:
2143
2144 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
2145 \\begin{tabular}{lr}
2146 \\hline
2147 spam & 41.9999 \\\\
2148 eggs & 451 \\\\
2149 \\hline
2150 \\end{tabular}
2151
2152 "latex_raw" is similar to "latex", but doesn't escape special characters,
2153 such as backslash and underscore, so LaTeX commands may embedded into
2154 cells' values:
2155
2156 >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], tablefmt="latex_raw"))
2157 \\begin{tabular}{lr}
2158 \\hline
2159 spam$_9$ & 41.9999 \\\\
2160 \\emph{eggs} & 451 \\\\
2161 \\hline
2162 \\end{tabular}
2163
2164 "latex_booktabs" produces a tabular environment of LaTeX document markup
2165 using the booktabs.sty package:
2166
2167 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
2168 \\begin{tabular}{lr}
2169 \\toprule
2170 spam & 41.9999 \\\\
2171 eggs & 451 \\\\
2172 \\bottomrule
2173 \\end{tabular}
2174
2175 "latex_longtable" produces a tabular environment that can stretch along
2176 multiple pages, using the longtable package for LaTeX.
2177
2178 >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_longtable"))
2179 \\begin{longtable}{lr}
2180 \\hline
2181 spam & 41.9999 \\\\
2182 eggs & 451 \\\\
2183 \\hline
2184 \\end{longtable}
2185
2186
2187 Number parsing
2188 --------------
2189 By default, anything which can be parsed as a number is a number.
2190 This ensures numbers represented as strings are aligned properly.
2191 This can lead to weird results for particular strings such as
2192 specific git SHAs e.g. "42992e1" will be parsed into the number
2193 429920 and aligned as such.
2194
2195 To completely disable number parsing (and alignment), use
2196 `disable_numparse=True`. For more fine grained control, a list column
2197 indices is used to disable number parsing only on those columns
2198 e.g. `disable_numparse=[0, 2]` would disable number parsing only on the
2199 first and third columns.
2200
2201 Column Widths and Auto Line Wrapping
2202 ------------------------------------
2203 Tabulate will, by default, set the width of each column to the length of the
2204 longest element in that column. However, in situations where fields are expected
2205 to reasonably be too long to look good as a single line, tabulate can help automate
2206 word wrapping long fields for you. Use the parameter `maxcolwidth` to provide a
2207 list of maximal column widths
2208
2209 >>> print(tabulate( \
2210 [('1', 'John Smith', \
2211 'This is a rather long description that might look better if it is wrapped a bit')], \
2212 headers=("Issue Id", "Author", "Description"), \
2213 maxcolwidths=[None, None, 30], \
2214 tablefmt="grid" \
2215 ))
2216 +------------+------------+-------------------------------+
2217 | Issue Id | Author | Description |
2218 +============+============+===============================+
2219 | 1 | John Smith | This is a rather long |
2220 | | | description that might look |
2221 | | | better if it is wrapped a bit |
2222 +------------+------------+-------------------------------+
2223
2224 Header column width can be specified in a similar way using `maxheadercolwidth`
2225
2226 """
2227
2228 if tabular_data is None:
2229 tabular_data = []
2230
2231 list_of_lists, headers, headers_pad = _normalize_tabular_data(
2232 tabular_data, headers, showindex=showindex
2233 )
2234 list_of_lists, separating_lines = _remove_separating_lines(list_of_lists)
2235
2236 if maxcolwidths is not None:
2237 if type(maxcolwidths) is tuple: # Check if tuple, convert to list if so
2238 maxcolwidths = list(maxcolwidths)
2239 if len(list_of_lists):
2240 num_cols = len(list_of_lists[0])
2241 else:
2242 num_cols = 0
2243 if isinstance(maxcolwidths, int): # Expand scalar for all columns
2244 maxcolwidths = _expand_iterable(maxcolwidths, num_cols, maxcolwidths)
2245 else: # Ignore col width for any 'trailing' columns
2246 maxcolwidths = _expand_iterable(maxcolwidths, num_cols, None)
2247
2248 numparses = _expand_numparse(disable_numparse, num_cols)
2249 list_of_lists = _wrap_text_to_colwidths(
2250 list_of_lists, maxcolwidths, numparses=numparses
2251 )
2252
2253 if maxheadercolwidths is not None:
2254 num_cols = len(list_of_lists[0])
2255 if isinstance(maxheadercolwidths, int): # Expand scalar for all columns
2256 maxheadercolwidths = _expand_iterable(
2257 maxheadercolwidths, num_cols, maxheadercolwidths
2258 )
2259 else: # Ignore col width for any 'trailing' columns
2260 maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, None)
2261
2262 numparses = _expand_numparse(disable_numparse, num_cols)
2263 headers = _wrap_text_to_colwidths(
2264 [headers], maxheadercolwidths, numparses=numparses
2265 )[0]
2266
2267 # empty values in the first column of RST tables should be escaped (issue #82)
2268 # "" should be escaped as "\\ " or ".."
2269 if tablefmt == "rst":
2270 list_of_lists, headers = _rst_escape_first_column(list_of_lists, headers)
2271
2272 # PrettyTable formatting does not use any extra padding.
2273 # Numbers are not parsed and are treated the same as strings for alignment.
2274 # Check if pretty is the format being used and override the defaults so it
2275 # does not impact other formats.
2276 min_padding = MIN_PADDING
2277 if tablefmt == "pretty":
2278 min_padding = 0
2279 disable_numparse = True
2280 numalign = "center" if numalign == _DEFAULT_ALIGN else numalign
2281 stralign = "center" if stralign == _DEFAULT_ALIGN else stralign
2282 else:
2283 numalign = "decimal" if numalign == _DEFAULT_ALIGN else numalign
2284 stralign = "left" if stralign == _DEFAULT_ALIGN else stralign
2285
2286 # 'colon_grid' uses colons in the line beneath the header to represent a column's
2287 # alignment instead of literally aligning the text differently. Hence,
2288 # left alignment of the data in the text output is enforced.
2289 if tablefmt == "colon_grid":
2290 colglobalalign = "left"
2291 headersglobalalign = "left"
2292
2293 # optimization: look for ANSI control codes once,
2294 # enable smart width functions only if a control code is found
2295 #
2296 # convert the headers and rows into a single, tab-delimited string ensuring
2297 # that any bytestrings are decoded safely (i.e. errors ignored)
2298 plain_text = "\t".join(
2299 chain(
2300 # headers
2301 map(_to_str, headers),
2302 # rows: chain the rows together into a single iterable after mapping
2303 # the bytestring conversino to each cell value
2304 chain.from_iterable(map(_to_str, row) for row in list_of_lists),
2305 )
2306 )
2307
2308 has_invisible = _ansi_codes.search(plain_text) is not None
2309
2310 enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
2311 if (
2312 not isinstance(tablefmt, TableFormat)
2313 and tablefmt in multiline_formats
2314 and _is_multiline(plain_text)
2315 ):
2316 tablefmt = multiline_formats.get(tablefmt, tablefmt)
2317 is_multiline = True
2318 else:
2319 is_multiline = False
2320 width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
2321
2322 # format rows and columns, convert numeric values to strings
2323 cols = list(izip_longest(*list_of_lists))
2324 numparses = _expand_numparse(disable_numparse, len(cols))
2325 coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)]
2326 if isinstance(floatfmt, str): # old version
2327 float_formats = len(cols) * [
2328 floatfmt
2329 ] # just duplicate the string to use in each column
2330 else: # if floatfmt is list, tuple etc we have one per column
2331 float_formats = list(floatfmt)
2332 if len(float_formats) < len(cols):
2333 float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT])
2334 if isinstance(intfmt, str): # old version
2335 int_formats = len(cols) * [
2336 intfmt
2337 ] # just duplicate the string to use in each column
2338 else: # if intfmt is list, tuple etc we have one per column
2339 int_formats = list(intfmt)
2340 if len(int_formats) < len(cols):
2341 int_formats.extend((len(cols) - len(int_formats)) * [_DEFAULT_INTFMT])
2342 if isinstance(missingval, str):
2343 missing_vals = len(cols) * [missingval]
2344 else:
2345 missing_vals = list(missingval)
2346 if len(missing_vals) < len(cols):
2347 missing_vals.extend((len(cols) - len(missing_vals)) * [_DEFAULT_MISSINGVAL])
2348 cols = [
2349 [_format(v, ct, fl_fmt, int_fmt, miss_v, has_invisible) for v in c]
2350 for c, ct, fl_fmt, int_fmt, miss_v in zip(
2351 cols, coltypes, float_formats, int_formats, missing_vals
2352 )
2353 ]
2354
2355 # align columns
2356 # first set global alignment
2357 if colglobalalign is not None: # if global alignment provided
2358 aligns = [colglobalalign] * len(cols)
2359 else: # default
2360 aligns = [numalign if ct in [int, float] else stralign for ct in coltypes]
2361 # then specific alignments
2362 if colalign is not None:
2363 assert isinstance(colalign, Iterable)
2364 if isinstance(colalign, str):
2365 warnings.warn(
2366 f"As a string, `colalign` is interpreted as {[c for c in colalign]}. "
2367 f'Did you mean `colglobalalign = "{colalign}"` or `colalign = ("{colalign}",)`?',
2368 stacklevel=2,
2369 )
2370 for idx, align in enumerate(colalign):
2371 if not idx < len(aligns):
2372 break
2373 elif align != "global":
2374 aligns[idx] = align
2375 minwidths = (
2376 [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
2377 )
2378 aligns_copy = aligns.copy()
2379 # Reset alignments in copy of alignments list to "left" for 'colon_grid' format,
2380 # which enforces left alignment in the text output of the data.
2381 if tablefmt == "colon_grid":
2382 aligns_copy = ["left"] * len(cols)
2383 cols = [
2384 _align_column(
2385 c,
2386 a,
2387 minw,
2388 has_invisible,
2389 enable_widechars,
2390 is_multiline,
2391 preserve_whitespace,
2392 )
2393 for c, a, minw in zip(cols, aligns_copy, minwidths)
2394 ]
2395
2396 aligns_headers = None
2397 if headers:
2398 # align headers and add headers
2399 t_cols = cols or [[""]] * len(headers)
2400 # first set global alignment
2401 if headersglobalalign is not None: # if global alignment provided
2402 aligns_headers = [headersglobalalign] * len(t_cols)
2403 else: # default
2404 aligns_headers = aligns or [stralign] * len(headers)
2405 # then specific header alignments
2406 if headersalign is not None:
2407 assert isinstance(headersalign, Iterable)
2408 if isinstance(headersalign, str):
2409 warnings.warn(
2410 f"As a string, `headersalign` is interpreted as {[c for c in headersalign]}. "
2411 f'Did you mean `headersglobalalign = "{headersalign}"` '
2412 f'or `headersalign = ("{headersalign}",)`?',
2413 stacklevel=2,
2414 )
2415 for idx, align in enumerate(headersalign):
2416 hidx = headers_pad + idx
2417 if not hidx < len(aligns_headers):
2418 break
2419 elif align == "same" and hidx < len(aligns): # same as column align
2420 aligns_headers[hidx] = aligns[hidx]
2421 elif align != "global":
2422 aligns_headers[hidx] = align
2423 minwidths = [
2424 max(minw, max(width_fn(cl) for cl in c))
2425 for minw, c in zip(minwidths, t_cols)
2426 ]
2427 headers = [
2428 _align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
2429 for h, a, minw in zip(headers, aligns_headers, minwidths)
2430 ]
2431 rows = list(zip(*cols))
2432 else:
2433 minwidths = [max(width_fn(cl) for cl in c) for c in cols]
2434 rows = list(zip(*cols))
2435
2436 if not isinstance(tablefmt, TableFormat):
2437 tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
2438
2439 ra_default = rowalign if isinstance(rowalign, str) else None
2440 rowaligns = _expand_iterable(rowalign, len(rows), ra_default)
2441 _reinsert_separating_lines(rows, separating_lines)
2442
2443 return _format_table(
2444 tablefmt,
2445 headers,
2446 aligns_headers,
2447 rows,
2448 minwidths,
2449 aligns,
2450 is_multiline,
2451 rowaligns=rowaligns,
2452 )
2453
2454
2455def _expand_numparse(disable_numparse, column_count):
2456 """
2457 Return a list of bools of length `column_count` which indicates whether
2458 number parsing should be used on each column.
2459 If `disable_numparse` is a list of indices, each of those indices are False,
2460 and everything else is True.
2461 If `disable_numparse` is a bool, then the returned list is all the same.
2462 """
2463 if isinstance(disable_numparse, Iterable):
2464 numparses = [True] * column_count
2465 for index in disable_numparse:
2466 numparses[index] = False
2467 return numparses
2468 else:
2469 return [not disable_numparse] * column_count
2470
2471
2472def _expand_iterable(original, num_desired, default):
2473 """
2474 Expands the `original` argument to return a return a list of
2475 length `num_desired`. If `original` is shorter than `num_desired`, it will
2476 be padded with the value in `default`.
2477 If `original` is not a list to begin with (i.e. scalar value) a list of
2478 length `num_desired` completely populated with `default will be returned
2479 """
2480 if isinstance(original, Iterable) and not isinstance(original, str):
2481 return original + [default] * (num_desired - len(original))
2482 else:
2483 return [default] * num_desired
2484
2485
2486def _pad_row(cells, padding):
2487 if cells:
2488 if cells == SEPARATING_LINE:
2489 return SEPARATING_LINE
2490 pad = " " * padding
2491 padded_cells = [pad + cell + pad for cell in cells]
2492 return padded_cells
2493 else:
2494 return cells
2495
2496
2497def _build_simple_row(padded_cells, rowfmt):
2498 "Format row according to DataRow format without padding."
2499 begin, sep, end = rowfmt
2500 return (begin + sep.join(padded_cells) + end).rstrip()
2501
2502
2503def _build_row(padded_cells, colwidths, colaligns, rowfmt):
2504 "Return a string which represents a row of data cells."
2505 if not rowfmt:
2506 return None
2507 if hasattr(rowfmt, "__call__"):
2508 return rowfmt(padded_cells, colwidths, colaligns)
2509 else:
2510 return _build_simple_row(padded_cells, rowfmt)
2511
2512
2513def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt, rowalign=None):
2514 # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row
2515 lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt))
2516 return lines
2517
2518
2519def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment):
2520 delta_lines = num_lines - len(text_lines)
2521 blank = [" " * column_width]
2522 if row_alignment == "bottom":
2523 return blank * delta_lines + text_lines
2524 elif row_alignment == "center":
2525 top_delta = delta_lines // 2
2526 bottom_delta = delta_lines - top_delta
2527 return top_delta * blank + text_lines + bottom_delta * blank
2528 else:
2529 return text_lines + blank * delta_lines
2530
2531
2532def _append_multiline_row(
2533 lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad, rowalign=None
2534):
2535 colwidths = [w - 2 * pad for w in padded_widths]
2536 cells_lines = [c.splitlines() for c in padded_multiline_cells]
2537 nlines = max(map(len, cells_lines)) # number of lines in the row
2538 # vertically pad cells where some lines are missing
2539 # cells_lines = [
2540 # (cl + [" " * w] * (nlines - len(cl))) for cl, w in zip(cells_lines, colwidths)
2541 # ]
2542
2543 cells_lines = [
2544 _align_cell_veritically(cl, nlines, w, rowalign)
2545 for cl, w in zip(cells_lines, colwidths)
2546 ]
2547 lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)]
2548 for ln in lines_cells:
2549 padded_ln = _pad_row(ln, pad)
2550 _append_basic_row(lines, padded_ln, colwidths, colaligns, rowfmt)
2551 return lines
2552
2553
2554def _build_line(colwidths, colaligns, linefmt):
2555 "Return a string which represents a horizontal line."
2556 if not linefmt:
2557 return None
2558 if hasattr(linefmt, "__call__"):
2559 return linefmt(colwidths, colaligns)
2560 else:
2561 begin, fill, sep, end = linefmt
2562 cells = [fill * w for w in colwidths]
2563 return _build_simple_row(cells, (begin, sep, end))
2564
2565
2566def _append_line(lines, colwidths, colaligns, linefmt):
2567 lines.append(_build_line(colwidths, colaligns, linefmt))
2568 return lines
2569
2570
2571class JupyterHTMLStr(str):
2572 """Wrap the string with a _repr_html_ method so that Jupyter
2573 displays the HTML table"""
2574
2575 def _repr_html_(self):
2576 return self
2577
2578 @property
2579 def str(self):
2580 """add a .str property so that the raw string is still accessible"""
2581 return self
2582
2583
2584def _format_table(
2585 fmt, headers, headersaligns, rows, colwidths, colaligns, is_multiline, rowaligns
2586):
2587 """Produce a plain-text representation of the table."""
2588 lines = []
2589 hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
2590 pad = fmt.padding
2591 headerrow = fmt.headerrow
2592
2593 padded_widths = [(w + 2 * pad) for w in colwidths]
2594 if is_multiline:
2595 pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row
2596 append_row = partial(_append_multiline_row, pad=pad)
2597 else:
2598 pad_row = _pad_row
2599 append_row = _append_basic_row
2600
2601 padded_headers = pad_row(headers, pad)
2602
2603 if fmt.lineabove and "lineabove" not in hidden:
2604 _append_line(lines, padded_widths, colaligns, fmt.lineabove)
2605
2606 if padded_headers:
2607 append_row(lines, padded_headers, padded_widths, headersaligns, headerrow)
2608 if fmt.linebelowheader and "linebelowheader" not in hidden:
2609 _append_line(lines, padded_widths, colaligns, fmt.linebelowheader)
2610
2611 if rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
2612 # initial rows with a line below
2613 for row, ralign in zip(rows[:-1], rowaligns):
2614 if row != SEPARATING_LINE:
2615 append_row(
2616 lines,
2617 pad_row(row, pad),
2618 padded_widths,
2619 colaligns,
2620 fmt.datarow,
2621 rowalign=ralign,
2622 )
2623 _append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
2624 # the last row without a line below
2625 append_row(
2626 lines,
2627 pad_row(rows[-1], pad),
2628 padded_widths,
2629 colaligns,
2630 fmt.datarow,
2631 rowalign=rowaligns[-1],
2632 )
2633 else:
2634 separating_line = (
2635 fmt.linebetweenrows
2636 or fmt.linebelowheader
2637 or fmt.linebelow
2638 or fmt.lineabove
2639 or Line("", "", "", "")
2640 )
2641 for row in rows:
2642 # test to see if either the 1st column or the 2nd column (account for showindex) has
2643 # the SEPARATING_LINE flag
2644 if _is_separating_line(row):
2645 _append_line(lines, padded_widths, colaligns, separating_line)
2646 else:
2647 append_row(
2648 lines, pad_row(row, pad), padded_widths, colaligns, fmt.datarow
2649 )
2650
2651 if fmt.linebelow and "linebelow" not in hidden:
2652 _append_line(lines, padded_widths, colaligns, fmt.linebelow)
2653
2654 if headers or rows:
2655 output = "\n".join(lines)
2656 if fmt.lineabove == _html_begin_table_without_header:
2657 return JupyterHTMLStr(output)
2658 else:
2659 return output
2660 else: # a completely empty table
2661 return ""
2662
2663
2664class _CustomTextWrap(textwrap.TextWrapper):
2665 """A custom implementation of CPython's textwrap.TextWrapper. This supports
2666 both wide characters (Korea, Japanese, Chinese) - including mixed string.
2667 For the most part, the `_handle_long_word` and `_wrap_chunks` functions were
2668 copy pasted out of the CPython baseline, and updated with our custom length
2669 and line appending logic.
2670 """
2671
2672 def __init__(self, *args, **kwargs):
2673 self._active_codes = []
2674 self.max_lines = None # For python2 compatibility
2675 textwrap.TextWrapper.__init__(self, *args, **kwargs)
2676
2677 @staticmethod
2678 def _len(item):
2679 """Custom len that gets console column width for wide
2680 and non-wide characters as well as ignores color codes"""
2681 stripped = _strip_ansi(item)
2682 if wcwidth:
2683 return wcwidth.wcswidth(stripped)
2684 else:
2685 return len(stripped)
2686
2687 def _update_lines(self, lines, new_line):
2688 """Adds a new line to the list of lines the text is being wrapped into
2689 This function will also track any ANSI color codes in this string as well
2690 as add any colors from previous lines order to preserve the same formatting
2691 as a single unwrapped string.
2692 """
2693 code_matches = [x for x in _ansi_codes.finditer(new_line)]
2694 color_codes = [
2695 code.string[code.span()[0] : code.span()[1]] for code in code_matches
2696 ]
2697
2698 # Add color codes from earlier in the unwrapped line, and then track any new ones we add.
2699 new_line = "".join(self._active_codes) + new_line
2700
2701 for code in color_codes:
2702 if code != _ansi_color_reset_code:
2703 self._active_codes.append(code)
2704 else: # A single reset code resets everything
2705 self._active_codes = []
2706
2707 # Always ensure each line is color terminated if any colors are
2708 # still active, otherwise colors will bleed into other cells on the console
2709 if len(self._active_codes) > 0:
2710 new_line = new_line + _ansi_color_reset_code
2711
2712 lines.append(new_line)
2713
2714 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2715 """_handle_long_word(chunks : [string],
2716 cur_line : [string],
2717 cur_len : int, width : int)
2718 Handle a chunk of text (most likely a word, not whitespace) that
2719 is too long to fit in any line.
2720 """
2721 # Figure out when indent is larger than the specified width, and make
2722 # sure at least one character is stripped off on every pass
2723 if width < 1:
2724 space_left = 1
2725 else:
2726 space_left = width - cur_len
2727
2728 # If we're allowed to break long words, then do so: put as much
2729 # of the next chunk onto the current line as will fit.
2730 if self.break_long_words:
2731 # Tabulate Custom: Build the string up piece-by-piece in order to
2732 # take each charcter's width into account
2733 chunk = reversed_chunks[-1]
2734 i = 1
2735 # Only count printable characters, so strip_ansi first, index later.
2736 while len(_strip_ansi(chunk)[:i]) <= space_left:
2737 i = i + 1
2738 # Consider escape codes when breaking words up
2739 total_escape_len = 0
2740 last_group = 0
2741 if _ansi_codes.search(chunk) is not None:
2742 for group, _, _, _ in _ansi_codes.findall(chunk):
2743 escape_len = len(group)
2744 if (
2745 group
2746 in chunk[last_group : i + total_escape_len + escape_len - 1]
2747 ):
2748 total_escape_len += escape_len
2749 found = _ansi_codes.search(chunk[last_group:])
2750 last_group += found.end()
2751 cur_line.append(chunk[: i + total_escape_len - 1])
2752 reversed_chunks[-1] = chunk[i + total_escape_len - 1 :]
2753
2754 # Otherwise, we have to preserve the long word intact. Only add
2755 # it to the current line if there's nothing already there --
2756 # that minimizes how much we violate the width constraint.
2757 elif not cur_line:
2758 cur_line.append(reversed_chunks.pop())
2759
2760 # If we're not allowed to break long words, and there's already
2761 # text on the current line, do nothing. Next time through the
2762 # main loop of _wrap_chunks(), we'll wind up here again, but
2763 # cur_len will be zero, so the next line will be entirely
2764 # devoted to the long word that we can't handle right now.
2765
2766 def _wrap_chunks(self, chunks):
2767 """_wrap_chunks(chunks : [string]) -> [string]
2768 Wrap a sequence of text chunks and return a list of lines of
2769 length 'self.width' or less. (If 'break_long_words' is false,
2770 some lines may be longer than this.) Chunks correspond roughly
2771 to words and the whitespace between them: each chunk is
2772 indivisible (modulo 'break_long_words'), but a line break can
2773 come between any two chunks. Chunks should not have internal
2774 whitespace; ie. a chunk is either all whitespace or a "word".
2775 Whitespace chunks will be removed from the beginning and end of
2776 lines, but apart from that whitespace is preserved.
2777 """
2778 lines = []
2779 if self.width <= 0:
2780 raise ValueError("invalid width %r (must be > 0)" % self.width)
2781 if self.max_lines is not None:
2782 if self.max_lines > 1:
2783 indent = self.subsequent_indent
2784 else:
2785 indent = self.initial_indent
2786 if self._len(indent) + self._len(self.placeholder.lstrip()) > self.width:
2787 raise ValueError("placeholder too large for max width")
2788
2789 # Arrange in reverse order so items can be efficiently popped
2790 # from a stack of chucks.
2791 chunks.reverse()
2792
2793 while chunks:
2794
2795 # Start the list of chunks that will make up the current line.
2796 # cur_len is just the length of all the chunks in cur_line.
2797 cur_line = []
2798 cur_len = 0
2799
2800 # Figure out which static string will prefix this line.
2801 if lines:
2802 indent = self.subsequent_indent
2803 else:
2804 indent = self.initial_indent
2805
2806 # Maximum width for this line.
2807 width = self.width - self._len(indent)
2808
2809 # First chunk on line is whitespace -- drop it, unless this
2810 # is the very beginning of the text (ie. no lines started yet).
2811 if self.drop_whitespace and chunks[-1].strip() == "" and lines:
2812 del chunks[-1]
2813
2814 while chunks:
2815 chunk_len = self._len(chunks[-1])
2816
2817 # Can at least squeeze this chunk onto the current line.
2818 if cur_len + chunk_len <= width:
2819 cur_line.append(chunks.pop())
2820 cur_len += chunk_len
2821
2822 # Nope, this line is full.
2823 else:
2824 break
2825
2826 # The current line is full, and the next chunk is too big to
2827 # fit on *any* line (not just this one).
2828 if chunks and self._len(chunks[-1]) > width:
2829 self._handle_long_word(chunks, cur_line, cur_len, width)
2830 cur_len = sum(map(self._len, cur_line))
2831
2832 # If the last chunk on this line is all whitespace, drop it.
2833 if self.drop_whitespace and cur_line and cur_line[-1].strip() == "":
2834 cur_len -= self._len(cur_line[-1])
2835 del cur_line[-1]
2836
2837 if cur_line:
2838 if (
2839 self.max_lines is None
2840 or len(lines) + 1 < self.max_lines
2841 or (
2842 not chunks
2843 or self.drop_whitespace
2844 and len(chunks) == 1
2845 and not chunks[0].strip()
2846 )
2847 and cur_len <= width
2848 ):
2849 # Convert current line back to a string and store it in
2850 # list of all lines (return value).
2851 self._update_lines(lines, indent + "".join(cur_line))
2852 else:
2853 while cur_line:
2854 if (
2855 cur_line[-1].strip()
2856 and cur_len + self._len(self.placeholder) <= width
2857 ):
2858 cur_line.append(self.placeholder)
2859 self._update_lines(lines, indent + "".join(cur_line))
2860 break
2861 cur_len -= self._len(cur_line[-1])
2862 del cur_line[-1]
2863 else:
2864 if lines:
2865 prev_line = lines[-1].rstrip()
2866 if (
2867 self._len(prev_line) + self._len(self.placeholder)
2868 <= self.width
2869 ):
2870 lines[-1] = prev_line + self.placeholder
2871 break
2872 self._update_lines(lines, indent + self.placeholder.lstrip())
2873 break
2874
2875 return lines
2876
2877
2878def _main():
2879 """\
2880 Usage: tabulate [options] [FILE ...]
2881
2882 Pretty-print tabular data.
2883 See also https://github.com/astanin/python-tabulate
2884
2885 FILE a filename of the file with tabular data;
2886 if "-" or missing, read data from stdin.
2887
2888 Options:
2889
2890 -h, --help show this message
2891 -1, --header use the first row of data as a table header
2892 -o FILE, --output FILE print table to FILE (default: stdout)
2893 -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
2894 -F FPFMT, --float FPFMT floating point number format (default: g)
2895 -I INTFMT, --int INTFMT integer point number format (default: "")
2896 -f FMT, --format FMT set output table format; supported formats:
2897 plain, simple, grid, fancy_grid, pipe, orgtbl,
2898 rst, mediawiki, html, latex, latex_raw,
2899 latex_booktabs, latex_longtable, tsv
2900 (default: simple)
2901 """
2902 import getopt
2903
2904 usage = textwrap.dedent(_main.__doc__)
2905 try:
2906 opts, args = getopt.getopt(
2907 sys.argv[1:],
2908 "h1o:s:F:I:f:",
2909 [
2910 "help",
2911 "header",
2912 "output=",
2913 "sep=",
2914 "float=",
2915 "int=",
2916 "colalign=",
2917 "format=",
2918 ],
2919 )
2920 except getopt.GetoptError as e:
2921 print(e)
2922 print(usage)
2923 sys.exit(2)
2924 headers = []
2925 floatfmt = _DEFAULT_FLOATFMT
2926 intfmt = _DEFAULT_INTFMT
2927 colalign = None
2928 tablefmt = "simple"
2929 sep = r"\s+"
2930 outfile = "-"
2931 for opt, value in opts:
2932 if opt in ["-1", "--header"]:
2933 headers = "firstrow"
2934 elif opt in ["-o", "--output"]:
2935 outfile = value
2936 elif opt in ["-F", "--float"]:
2937 floatfmt = value
2938 elif opt in ["-I", "--int"]:
2939 intfmt = value
2940 elif opt in ["-C", "--colalign"]:
2941 colalign = value.split()
2942 elif opt in ["-f", "--format"]:
2943 if value not in tabulate_formats:
2944 print("%s is not a supported table format" % value)
2945 print(usage)
2946 sys.exit(3)
2947 tablefmt = value
2948 elif opt in ["-s", "--sep"]:
2949 sep = value
2950 elif opt in ["-h", "--help"]:
2951 print(usage)
2952 sys.exit(0)
2953 files = [sys.stdin] if not args else args
2954 with sys.stdout if outfile == "-" else open(outfile, "w") as out:
2955 for f in files:
2956 if f == "-":
2957 f = sys.stdin
2958 if _is_file(f):
2959 _pprint_file(
2960 f,
2961 headers=headers,
2962 tablefmt=tablefmt,
2963 sep=sep,
2964 floatfmt=floatfmt,
2965 intfmt=intfmt,
2966 file=out,
2967 colalign=colalign,
2968 )
2969 else:
2970 with open(f) as fobj:
2971 _pprint_file(
2972 fobj,
2973 headers=headers,
2974 tablefmt=tablefmt,
2975 sep=sep,
2976 floatfmt=floatfmt,
2977 intfmt=intfmt,
2978 file=out,
2979 colalign=colalign,
2980 )
2981
2982
2983def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign):
2984 rows = fobject.readlines()
2985 table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
2986 print(
2987 tabulate(
2988 table,
2989 headers,
2990 tablefmt,
2991 floatfmt=floatfmt,
2992 intfmt=intfmt,
2993 colalign=colalign,
2994 ),
2995 file=file,
2996 )
2997
2998
2999if __name__ == "__main__":
3000 _main()