1# GFM table, https://github.github.com/gfm/#tables-extension-
2from __future__ import annotations
3
4import re
5
6from ..common.utils import charStrAt, isStrSpace
7from .state_block import StateBlock
8
9headerLineRe = re.compile(r"^:?-+:?$")
10enclosingPipesRe = re.compile(r"^\||\|$")
11
12
13def getLine(state: StateBlock, line: int) -> str:
14 pos = state.bMarks[line] + state.tShift[line]
15 maximum = state.eMarks[line]
16
17 # return state.src.substr(pos, max - pos)
18 return state.src[pos:maximum]
19
20
21def escapedSplit(string: str) -> list[str]:
22 result: list[str] = []
23 pos = 0
24 max = len(string)
25 isEscaped = False
26 lastPos = 0
27 current = ""
28 ch = charStrAt(string, pos)
29
30 while pos < max:
31 if ch == "|":
32 if not isEscaped:
33 # pipe separating cells, '|'
34 result.append(current + string[lastPos:pos])
35 current = ""
36 lastPos = pos + 1
37 else:
38 # escaped pipe, '\|'
39 current += string[lastPos : pos - 1]
40 lastPos = pos
41
42 isEscaped = ch == "\\"
43 pos += 1
44
45 ch = charStrAt(string, pos)
46
47 result.append(current + string[lastPos:])
48
49 return result
50
51
52def table(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool:
53 tbodyLines = None
54
55 # should have at least two lines
56 if startLine + 2 > endLine:
57 return False
58
59 nextLine = startLine + 1
60
61 if state.sCount[nextLine] < state.blkIndent:
62 return False
63
64 if state.is_code_block(nextLine):
65 return False
66
67 # first character of the second line should be '|', '-', ':',
68 # and no other characters are allowed but spaces;
69 # basically, this is the equivalent of /^[-:|][-:|\s]*$/ regexp
70
71 pos = state.bMarks[nextLine] + state.tShift[nextLine]
72 if pos >= state.eMarks[nextLine]:
73 return False
74 first_ch = state.src[pos]
75 pos += 1
76 if first_ch not in ("|", "-", ":"):
77 return False
78
79 if pos >= state.eMarks[nextLine]:
80 return False
81 second_ch = state.src[pos]
82 pos += 1
83 if second_ch not in ("|", "-", ":") and not isStrSpace(second_ch):
84 return False
85
86 # if first character is '-', then second character must not be a space
87 # (due to parsing ambiguity with list)
88 if first_ch == "-" and isStrSpace(second_ch):
89 return False
90
91 while pos < state.eMarks[nextLine]:
92 ch = state.src[pos]
93
94 if ch not in ("|", "-", ":") and not isStrSpace(ch):
95 return False
96
97 pos += 1
98
99 lineText = getLine(state, startLine + 1)
100
101 columns = lineText.split("|")
102 aligns = []
103 for i in range(len(columns)):
104 t = columns[i].strip()
105 if not t:
106 # allow empty columns before and after table, but not in between columns;
107 # e.g. allow ` |---| `, disallow ` ---||--- `
108 if i == 0 or i == len(columns) - 1:
109 continue
110 else:
111 return False
112
113 if not headerLineRe.search(t):
114 return False
115 if charStrAt(t, len(t) - 1) == ":":
116 aligns.append("center" if charStrAt(t, 0) == ":" else "right")
117 elif charStrAt(t, 0) == ":":
118 aligns.append("left")
119 else:
120 aligns.append("")
121
122 lineText = getLine(state, startLine).strip()
123 if "|" not in lineText:
124 return False
125 if state.is_code_block(startLine):
126 return False
127 columns = escapedSplit(lineText)
128 if columns and columns[0] == "":
129 columns.pop(0)
130 if columns and columns[-1] == "":
131 columns.pop()
132
133 # header row will define an amount of columns in the entire table,
134 # and align row should be exactly the same (the rest of the rows can differ)
135 columnCount = len(columns)
136 if columnCount == 0 or columnCount != len(aligns):
137 return False
138
139 if silent:
140 return True
141
142 oldParentType = state.parentType
143 state.parentType = "table"
144
145 # use 'blockquote' lists for termination because it's
146 # the most similar to tables
147 terminatorRules = state.md.block.ruler.getRules("blockquote")
148
149 token = state.push("table_open", "table", 1)
150 token.map = tableLines = [startLine, 0]
151
152 token = state.push("thead_open", "thead", 1)
153 token.map = [startLine, startLine + 1]
154
155 token = state.push("tr_open", "tr", 1)
156 token.map = [startLine, startLine + 1]
157
158 for i in range(len(columns)):
159 token = state.push("th_open", "th", 1)
160 if aligns[i]:
161 token.attrs = {"style": "text-align:" + aligns[i]}
162
163 token = state.push("inline", "", 0)
164 # note in markdown-it this map was removed in v12.0.0 however, we keep it,
165 # since it is helpful to propagate to children tokens
166 token.map = [startLine, startLine + 1]
167 token.content = columns[i].strip()
168 token.children = []
169
170 token = state.push("th_close", "th", -1)
171
172 token = state.push("tr_close", "tr", -1)
173 token = state.push("thead_close", "thead", -1)
174
175 nextLine = startLine + 2
176 while nextLine < endLine:
177 if state.sCount[nextLine] < state.blkIndent:
178 break
179
180 terminate = False
181 for i in range(len(terminatorRules)):
182 if terminatorRules[i](state, nextLine, endLine, True):
183 terminate = True
184 break
185
186 if terminate:
187 break
188 lineText = getLine(state, nextLine).strip()
189 if not lineText:
190 break
191 if state.is_code_block(nextLine):
192 break
193 columns = escapedSplit(lineText)
194 if columns and columns[0] == "":
195 columns.pop(0)
196 if columns and columns[-1] == "":
197 columns.pop()
198
199 if nextLine == startLine + 2:
200 token = state.push("tbody_open", "tbody", 1)
201 token.map = tbodyLines = [startLine + 2, 0]
202
203 token = state.push("tr_open", "tr", 1)
204 token.map = [nextLine, nextLine + 1]
205
206 for i in range(columnCount):
207 token = state.push("td_open", "td", 1)
208 if aligns[i]:
209 token.attrs = {"style": "text-align:" + aligns[i]}
210
211 token = state.push("inline", "", 0)
212 # note in markdown-it this map was removed in v12.0.0 however, we keep it,
213 # since it is helpful to propagate to children tokens
214 token.map = [nextLine, nextLine + 1]
215 try:
216 token.content = columns[i].strip() if columns[i] else ""
217 except IndexError:
218 token.content = ""
219 token.children = []
220
221 token = state.push("td_close", "td", -1)
222
223 token = state.push("tr_close", "tr", -1)
224
225 nextLine += 1
226
227 if tbodyLines:
228 token = state.push("tbody_close", "tbody", -1)
229 tbodyLines[1] = nextLine
230
231 token = state.push("table_close", "table", -1)
232
233 tableLines[1] = nextLine
234 state.parentType = oldParentType
235 state.line = nextLine
236 return True