1# GFM table, https://github.github.com/gfm/#tables-extension-
2from __future__ import annotations
3
4import re
5
6from ..common.utils import charStrAt, isStrSpace
7from .state_block import StateBlock
8
9headerLineRe = re.compile(r"^:?-+:?$")
10enclosingPipesRe = re.compile(r"^\||\|$")
11
12# Limit the amount of empty autocompleted cells in a table,
13# see https://github.com/markdown-it/markdown-it/issues/1000,
14# Both pulldown-cmark and commonmark-hs limit the number of cells this way to ~200k.
15# We set it to 65k, which can expand user input by a factor of x370
16# (256x256 square is 1.8kB expanded into 650kB).
17MAX_AUTOCOMPLETED_CELLS = 0x10000
18
19
20def getLine(state: StateBlock, line: int) -> str:
21 pos = state.bMarks[line] + state.tShift[line]
22 maximum = state.eMarks[line]
23
24 # return state.src.substr(pos, max - pos)
25 return state.src[pos:maximum]
26
27
28def escapedSplit(string: str) -> list[str]:
29 result: list[str] = []
30 pos = 0
31 max = len(string)
32 isEscaped = False
33 lastPos = 0
34 current = ""
35 ch = charStrAt(string, pos)
36
37 while pos < max:
38 if ch == "|":
39 if not isEscaped:
40 # pipe separating cells, '|'
41 result.append(current + string[lastPos:pos])
42 current = ""
43 lastPos = pos + 1
44 else:
45 # escaped pipe, '\|'
46 current += string[lastPos : pos - 1]
47 lastPos = pos
48
49 isEscaped = ch == "\\"
50 pos += 1
51
52 ch = charStrAt(string, pos)
53
54 result.append(current + string[lastPos:])
55
56 return result
57
58
59def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
60 tbodyLines = None
61
62 # should have at least two lines
63 if startLine + 2 > endLine:
64 return False
65
66 nextLine = startLine + 1
67
68 if state.sCount[nextLine] < state.blkIndent:
69 return False
70
71 if state.is_code_block(nextLine):
72 return False
73
74 # first character of the second line should be '|', '-', ':',
75 # and no other characters are allowed but spaces;
76 # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
77
78 pos = state.bMarks[nextLine] + state.tShift[nextLine]
79 if pos >= state.eMarks[nextLine]:
80 return False
81 first_ch = state.src[pos]
82 pos += 1
83 if first_ch not in ("|", "-", ":"):
84 return False
85
86 if pos >= state.eMarks[nextLine]:
87 return False
88 second_ch = state.src[pos]
89 pos += 1
90 if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
91 return False
92
93 # if first character is '-', then second character must not be a space
94 # (due to parsing ambiguity with list)
95 if first_ch == "-" and isStrSpace(second_ch):
96 return False
97
98 while pos < state.eMarks[nextLine]:
99 ch = state.src[pos]
100
101 if ch not in ("|", "-", ":") and not isStrSpace(ch):
102 return False
103
104 pos += 1
105
106 lineText = getLine(state, startLine + 1)
107
108 columns = lineText.split("|")
109 aligns = []
110 for i in range(len(columns)):
111 t = columns[i].strip()
112 if not t:
113 # allow empty columns before and after table, but not in between columns;
114 # e.g. allow ` |---| `, disallow ` ---||--- `
115 if i == 0 or i == len(columns) - 1:
116 continue
117 else:
118 return False
119
120 if not headerLineRe.search(t):
121 return False
122 if charStrAt(t, len(t) - 1) == ":":
123 aligns.append("center" if charStrAt(t, 0) == ":" else "right")
124 elif charStrAt(t, 0) == ":":
125 aligns.append("left")
126 else:
127 aligns.append("")
128
129 lineText = getLine(state, startLine).strip()
130 if "|" not in lineText:
131 return False
132 if state.is_code_block(startLine):
133 return False
134 columns = escapedSplit(lineText)
135 if columns and columns[0] == "":
136 columns.pop(0)
137 if columns and columns[-1] == "":
138 columns.pop()
139
140 # header row will define an amount of columns in the entire table,
141 # and align row should be exactly the same (the rest of the rows can differ)
142 columnCount = len(columns)
143 if columnCount == 0 or columnCount != len(aligns):
144 return False
145
146 if silent:
147 return True
148
149 oldParentType = state.parentType
150 state.parentType = "table"
151
152 # use 'blockquote' lists for termination because it's
153 # the most similar to tables
154 terminatorRules = state.md.block.ruler.getRules("blockquote")
155
156 token = state.push("table_open", "table", 1)
157 token.map = tableLines = [startLine, 0]
158
159 token = state.push("thead_open", "thead", 1)
160 token.map = [startLine, startLine + 1]
161
162 token = state.push("tr_open", "tr", 1)
163 token.map = [startLine, startLine + 1]
164
165 for i in range(len(columns)):
166 token = state.push("th_open", "th", 1)
167 if aligns[i]:
168 token.attrs = {"style": "text-align:" + aligns[i]}
169
170 token = state.push("inline", "", 0)
171 # note in markdown-it this map was removed in v12.0.0 however, we keep it,
172 # since it is helpful to propagate to children tokens
173 token.map = [startLine, startLine + 1]
174 token.content = columns[i].strip()
175 token.children = []
176
177 token = state.push("th_close", "th", -1)
178
179 token = state.push("tr_close", "tr", -1)
180 token = state.push("thead_close", "thead", -1)
181
182 autocompleted_cells = 0
183 nextLine = startLine + 2
184 while nextLine < endLine:
185 if state.sCount[nextLine] < state.blkIndent:
186 break
187
188 terminate = False
189 for i in range(len(terminatorRules)):
190 if terminatorRules[i](state, nextLine, endLine, True):
191 terminate = True
192 break
193
194 if terminate:
195 break
196 lineText = getLine(state, nextLine).strip()
197 if not lineText:
198 break
199 if state.is_code_block(nextLine):
200 break
201 columns = escapedSplit(lineText)
202 if columns and columns[0] == "":
203 columns.pop(0)
204 if columns and columns[-1] == "":
205 columns.pop()
206
207 # note: autocomplete count can be negative if user specifies more columns than header,
208 # but that does not affect intended use (which is limiting expansion)
209 autocompleted_cells += columnCount - len(columns)
210 if autocompleted_cells > MAX_AUTOCOMPLETED_CELLS:
211 break
212
213 if nextLine == startLine + 2:
214 token = state.push("tbody_open", "tbody", 1)
215 token.map = tbodyLines = [startLine + 2, 0]
216
217 token = state.push("tr_open", "tr", 1)
218 token.map = [nextLine, nextLine + 1]
219
220 for i in range(columnCount):
221 token = state.push("td_open", "td", 1)
222 if aligns[i]:
223 token.attrs = {"style": "text-align:" + aligns[i]}
224
225 token = state.push("inline", "", 0)
226 # note in markdown-it this map was removed in v12.0.0 however, we keep it,
227 # since it is helpful to propagate to children tokens
228 token.map = [nextLine, nextLine + 1]
229 try:
230 token.content = columns[i].strip() if columns[i] else ""
231 except IndexError:
232 token.content = ""
233 token.children = []
234
235 token = state.push("td_close", "td", -1)
236
237 token = state.push("tr_close", "tr", -1)
238
239 nextLine += 1
240
241 if tbodyLines:
242 token = state.push("tbody_close", "tbody", -1)
243 tbodyLines[1] = nextLine
244
245 token = state.push("table_close", "table", -1)
246
247 tableLines[1] = nextLine
248 state.parentType = oldParentType
249 state.line = nextLine
250 return True