1from __future__ import annotations
2
3from typing import TYPE_CHECKING, Literal
4
5from ..common.utils import isStrSpace
6from ..ruler import StateBase
7from ..token import Token
8from ..utils import EnvType
9
10if TYPE_CHECKING:
11 from markdown_it.main import MarkdownIt
12
13
14class StateBlock(StateBase):
15 def __init__(
16 self, src: str, md: MarkdownIt, env: EnvType, tokens: list[Token]
17 ) -> None:
18 self.src = src
19
20 # link to parser instance
21 self.md = md
22
23 self.env = env
24
25 #
26 # Internal state variables
27 #
28
29 self.tokens = tokens
30
31 self.bMarks: list[int] = [] # line begin offsets for fast jumps
32 self.eMarks: list[int] = [] # line end offsets for fast jumps
33 # offsets of the first non-space characters (tabs not expanded)
34 self.tShift: list[int] = []
35 self.sCount: list[int] = [] # indents for each line (tabs expanded)
36
37 # An amount of virtual spaces (tabs expanded) between beginning
38 # of each line (bMarks) and real beginning of that line.
39 #
40 # It exists only as a hack because blockquotes override bMarks
41 # losing information in the process.
42 #
43 # It's used only when expanding tabs, you can think about it as
44 # an initial tab length, e.g. bsCount=21 applied to string `\t123`
45 # means first tab should be expanded to 4-21%4 === 3 spaces.
46 #
47 self.bsCount: list[int] = []
48
49 # block parser variables
50 self.blkIndent = 0 # required block content indent (for example, if we are
51 # inside a list, it would be positioned after list marker)
52 self.line = 0 # line index in src
53 self.lineMax = 0 # lines count
54 self.tight = False # loose/tight mode for lists
55 self.ddIndent = -1 # indent of the current dd block (-1 if there isn't any)
56 self.listIndent = -1 # indent of the current list block (-1 if there isn't any)
57
58 # can be 'blockquote', 'list', 'root', 'paragraph' or 'reference'
59 # used in lists to determine if they interrupt a paragraph
60 self.parentType = "root"
61
62 self.level = 0
63
64 # renderer
65 self.result = ""
66
67 # Create caches
68 # Generate markers.
69 indent_found = False
70
71 start = pos = indent = offset = 0
72 length = len(self.src)
73
74 for pos, character in enumerate(self.src):
75 if not indent_found:
76 if isStrSpace(character):
77 indent += 1
78
79 if character == "\t":
80 offset += 4 - offset % 4
81 else:
82 offset += 1
83 continue
84 else:
85 indent_found = True
86
87 if character == "\n" or pos == length - 1:
88 if character != "\n":
89 pos += 1
90 self.bMarks.append(start)
91 self.eMarks.append(pos)
92 self.tShift.append(indent)
93 self.sCount.append(offset)
94 self.bsCount.append(0)
95
96 indent_found = False
97 indent = 0
98 offset = 0
99 start = pos + 1
100
101 # Push fake entry to simplify cache bounds checks
102 self.bMarks.append(length)
103 self.eMarks.append(length)
104 self.tShift.append(0)
105 self.sCount.append(0)
106 self.bsCount.append(0)
107
108 self.lineMax = len(self.bMarks) - 1 # don't count last fake line
109
110 # pre-check if code blocks are enabled, to speed up is_code_block method
111 self._code_enabled = "code" in self.md["block"].ruler.get_active_rules()
112
113 def __repr__(self) -> str:
114 return (
115 f"{self.__class__.__name__}"
116 f"(line={self.line},level={self.level},tokens={len(self.tokens)})"
117 )
118
119 def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
120 """Push new token to "stream"."""
121 token = Token(ttype, tag, nesting)
122 token.block = True
123 if nesting < 0:
124 self.level -= 1 # closing tag
125 token.level = self.level
126 if nesting > 0:
127 self.level += 1 # opening tag
128 self.tokens.append(token)
129 return token
130
131 def isEmpty(self, line: int) -> bool:
132 """."""
133 return (self.bMarks[line] + self.tShift[line]) >= self.eMarks[line]
134
135 def skipEmptyLines(self, from_pos: int) -> int:
136 """."""
137 while from_pos < self.lineMax:
138 try:
139 if (self.bMarks[from_pos] + self.tShift[from_pos]) < self.eMarks[
140 from_pos
141 ]:
142 break
143 except IndexError:
144 pass
145 from_pos += 1
146 return from_pos
147
148 def skipSpaces(self, pos: int) -> int:
149 """Skip spaces from given position."""
150 while True:
151 try:
152 current = self.src[pos]
153 except IndexError:
154 break
155 if not isStrSpace(current):
156 break
157 pos += 1
158 return pos
159
160 def skipSpacesBack(self, pos: int, minimum: int) -> int:
161 """Skip spaces from given position in reverse."""
162 if pos <= minimum:
163 return pos
164 while pos > minimum:
165 pos -= 1
166 if not isStrSpace(self.src[pos]):
167 return pos + 1
168 return pos
169
170 def skipChars(self, pos: int, code: int) -> int:
171 """Skip character code from given position."""
172 while True:
173 try:
174 current = self.srcCharCode[pos]
175 except IndexError:
176 break
177 if current != code:
178 break
179 pos += 1
180 return pos
181
182 def skipCharsStr(self, pos: int, ch: str) -> int:
183 """Skip character string from given position."""
184 while True:
185 try:
186 current = self.src[pos]
187 except IndexError:
188 break
189 if current != ch:
190 break
191 pos += 1
192 return pos
193
194 def skipCharsBack(self, pos: int, code: int, minimum: int) -> int:
195 """Skip character code reverse from given position - 1."""
196 if pos <= minimum:
197 return pos
198 while pos > minimum:
199 pos -= 1
200 if code != self.srcCharCode[pos]:
201 return pos + 1
202 return pos
203
204 def skipCharsStrBack(self, pos: int, ch: str, minimum: int) -> int:
205 """Skip character string reverse from given position - 1."""
206 if pos <= minimum:
207 return pos
208 while pos > minimum:
209 pos -= 1
210 if ch != self.src[pos]:
211 return pos + 1
212 return pos
213
214 def getLines(self, begin: int, end: int, indent: int, keepLastLF: bool) -> str:
215 """Cut lines range from source."""
216 line = begin
217 if begin >= end:
218 return ""
219
220 queue = [""] * (end - begin)
221
222 i = 1
223 while line < end:
224 lineIndent = 0
225 lineStart = first = self.bMarks[line]
226 last = (
227 self.eMarks[line] + 1
228 if line + 1 < end or keepLastLF
229 else self.eMarks[line]
230 )
231
232 while (first < last) and (lineIndent < indent):
233 ch = self.src[first]
234 if isStrSpace(ch):
235 if ch == "\t":
236 lineIndent += 4 - (lineIndent + self.bsCount[line]) % 4
237 else:
238 lineIndent += 1
239 elif first - lineStart < self.tShift[line]:
240 lineIndent += 1
241 else:
242 break
243 first += 1
244
245 if lineIndent > indent:
246 # partially expanding tabs in code blocks, e.g '\t\tfoobar'
247 # with indent=2 becomes ' \tfoobar'
248 queue[i - 1] = (" " * (lineIndent - indent)) + self.src[first:last]
249 else:
250 queue[i - 1] = self.src[first:last]
251
252 line += 1
253 i += 1
254
255 return "".join(queue)
256
257 def is_code_block(self, line: int) -> bool:
258 """Check if line is a code block,
259 i.e. the code block rule is enabled and text is indented by more than 3 spaces.
260 """
261 return self._code_enabled and (self.sCount[line] - self.blkIndent) >= 4