1# Tables Extension for Python-Markdown
2# ====================================
3
4# Adds parsing of tables to Python-Markdown.
5
6# See https://Python-Markdown.github.io/extensions/tables
7# for documentation.
8
9# Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
10
11# All changes Copyright 2008-2014 The Python Markdown Project
12
13# License: [BSD](https://opensource.org/licenses/bsd-license.php)
14
15"""
16Adds parsing of tables to Python-Markdown.
17
18See the [documentation](https://Python-Markdown.github.io/extensions/tables)
19for details.
20"""
21
22from __future__ import annotations
23
24from . import Extension
25from ..blockprocessors import BlockProcessor
26import xml.etree.ElementTree as etree
27import re
28from typing import TYPE_CHECKING, Any, Sequence
29
30if TYPE_CHECKING: # pragma: no cover
31 from .. import blockparser
32
33PIPE_NONE = 0
34PIPE_LEFT = 1
35PIPE_RIGHT = 2
36
37
38class TableProcessor(BlockProcessor):
39 """ Process Tables. """
40
41 RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
42 RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
43
44 def __init__(self, parser: blockparser.BlockParser, config: dict[str, Any]):
45 self.border: bool | int = False
46 self.separator: Sequence[str] = ''
47 self.config = config
48
49 super().__init__(parser)
50
51 def test(self, parent: etree.Element, block: str) -> bool:
52 """
53 Ensure first two rows (column header and separator row) are valid table rows.
54
55 Keep border check and separator row do avoid repeating the work.
56 """
57 is_table = False
58 rows = [row.strip(' ') for row in block.split('\n')]
59 if len(rows) > 1:
60 header0 = rows[0]
61 self.border = PIPE_NONE
62 if header0.startswith('|'):
63 self.border |= PIPE_LEFT
64 if self.RE_END_BORDER.search(header0) is not None:
65 self.border |= PIPE_RIGHT
66 row = self._split_row(header0)
67 row0_len = len(row)
68 is_table = row0_len > 1
69
70 # Each row in a single column table needs at least one pipe.
71 if not is_table and row0_len == 1 and self.border:
72 for index in range(1, len(rows)):
73 is_table = rows[index].startswith('|')
74 if not is_table:
75 is_table = self.RE_END_BORDER.search(rows[index]) is not None
76 if not is_table:
77 break
78
79 if is_table:
80 row = self._split_row(rows[1])
81 is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ')
82 if is_table:
83 self.separator = row
84
85 return is_table
86
87 def run(self, parent: etree.Element, blocks: list[str]) -> None:
88 """ Parse a table block and build table. """
89 block = blocks.pop(0).split('\n')
90 header = block[0].strip(' ')
91 rows = [] if len(block) < 3 else block[2:]
92
93 # Get alignment of columns
94 align: list[str | None] = []
95 for c in self.separator:
96 c = c.strip(' ')
97 if c.startswith(':') and c.endswith(':'):
98 align.append('center')
99 elif c.startswith(':'):
100 align.append('left')
101 elif c.endswith(':'):
102 align.append('right')
103 else:
104 align.append(None)
105
106 # Build table
107 table = etree.SubElement(parent, 'table')
108 thead = etree.SubElement(table, 'thead')
109 self._build_row(header, thead, align)
110 tbody = etree.SubElement(table, 'tbody')
111 if len(rows) == 0:
112 # Handle empty table
113 self._build_empty_row(tbody, align)
114 else:
115 for row in rows:
116 self._build_row(row.strip(' '), tbody, align)
117
118 def _build_empty_row(self, parent: etree.Element, align: Sequence[str | None]) -> None:
119 """Build an empty row."""
120 tr = etree.SubElement(parent, 'tr')
121 count = len(align)
122 while count:
123 etree.SubElement(tr, 'td')
124 count -= 1
125
126 def _build_row(self, row: str, parent: etree.Element, align: Sequence[str | None]) -> None:
127 """ Given a row of text, build table cells. """
128 tr = etree.SubElement(parent, 'tr')
129 tag = 'td'
130 if parent.tag == 'thead':
131 tag = 'th'
132 cells = self._split_row(row)
133 # We use align here rather than cells to ensure every row
134 # contains the same number of columns.
135 for i, a in enumerate(align):
136 c = etree.SubElement(tr, tag)
137 try:
138 c.text = cells[i].strip(' ')
139 except IndexError: # pragma: no cover
140 c.text = ""
141 if a:
142 if self.config['use_align_attribute']:
143 c.set('align', a)
144 else:
145 c.set('style', f'text-align: {a};')
146
147 def _split_row(self, row: str) -> list[str]:
148 """ split a row of text into list of cells. """
149 if self.border:
150 if row.startswith('|'):
151 row = row[1:]
152 row = self.RE_END_BORDER.sub('', row)
153 return self._split(row)
154
155 def _split(self, row: str) -> list[str]:
156 """ split a row of text with some code into a list of cells. """
157 elements = []
158 pipes = []
159 tics = []
160 tic_points = []
161 tic_region = []
162 good_pipes = []
163
164 # Parse row
165 # Throw out \\, and \|
166 for m in self.RE_CODE_PIPES.finditer(row):
167 # Store ` data (len, start_pos, end_pos)
168 if m.group(2):
169 # \`+
170 # Store length of each tic group: subtract \
171 tics.append(len(m.group(2)) - 1)
172 # Store start of group, end of group, and escape length
173 tic_points.append((m.start(2), m.end(2) - 1, 1))
174 elif m.group(3):
175 # `+
176 # Store length of each tic group
177 tics.append(len(m.group(3)))
178 # Store start of group, end of group, and escape length
179 tic_points.append((m.start(3), m.end(3) - 1, 0))
180 # Store pipe location
181 elif m.group(5):
182 pipes.append(m.start(5))
183
184 # Pair up tics according to size if possible
185 # Subtract the escape length *only* from the opening.
186 # Walk through tic list and see if tic has a close.
187 # Store the tic region (start of region, end of region).
188 pos = 0
189 tic_len = len(tics)
190 while pos < tic_len:
191 try:
192 tic_size = tics[pos] - tic_points[pos][2]
193 if tic_size == 0:
194 raise ValueError
195 index = tics[pos + 1:].index(tic_size) + 1
196 tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
197 pos += index + 1
198 except ValueError:
199 pos += 1
200
201 # Resolve pipes. Check if they are within a tic pair region.
202 # Walk through pipes comparing them to each region.
203 # - If pipe position is less that a region, it isn't in a region
204 # - If it is within a region, we don't want it, so throw it out
205 # - If we didn't throw it out, it must be a table pipe
206 for pipe in pipes:
207 throw_out = False
208 for region in tic_region:
209 if pipe < region[0]:
210 # Pipe is not in a region
211 break
212 elif region[0] <= pipe <= region[1]:
213 # Pipe is within a code region. Throw it out.
214 throw_out = True
215 break
216 if not throw_out:
217 good_pipes.append(pipe)
218
219 # Split row according to table delimiters.
220 pos = 0
221 for pipe in good_pipes:
222 elements.append(row[pos:pipe])
223 pos = pipe + 1
224 elements.append(row[pos:])
225 return elements
226
227
228class TableExtension(Extension):
229 """ Add tables to Markdown. """
230
231 def __init__(self, **kwargs):
232 self.config = {
233 'use_align_attribute': [False, 'True to use align attribute instead of style.'],
234 }
235 """ Default configuration options. """
236
237 super().__init__(**kwargs)
238
239 def extendMarkdown(self, md):
240 """ Add an instance of `TableProcessor` to `BlockParser`. """
241 if '|' not in md.ESCAPED_CHARS:
242 md.ESCAPED_CHARS.append('|')
243 processor = TableProcessor(md.parser, self.getConfigs())
244 md.parser.blockprocessors.register(processor, 'table', 75)
245
246
247def makeExtension(**kwargs): # pragma: no cover
248 return TableExtension(**kwargs)