1from __future__ import annotations
2
3from dataclasses import dataclass
4from typing import TYPE_CHECKING, Any, Literal, NamedTuple
5
6from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
7from ..ruler import StateBase
8from ..token import Token
9from ..utils import EnvType
10
11if TYPE_CHECKING:
12 from markdown_it import MarkdownIt
13
14
15@dataclass(slots=True)
16class Delimiter:
17 # Char code of the starting marker (number).
18 marker: int
19
20 # Total length of these series of delimiters.
21 length: int
22
23 # A position of the token this delimiter corresponds to.
24 token: int
25
26 # If this delimiter is matched as a valid opener, `end` will be
27 # equal to its position, otherwise it's `-1`.
28 end: int
29
30 # Boolean flags that determine if this delimiter could open or close
31 # an emphasis.
32 open: bool
33 close: bool
34
35 level: bool | None = None
36
37
38class Scanned(NamedTuple):
39 can_open: bool
40 can_close: bool
41 length: int
42
43
44class StateInline(StateBase):
45 def __init__(
46 self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
47 ) -> None:
48 self.src = src
49 self.env = env
50 self.md = md
51 self.tokens = outTokens
52 self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
53
54 self.pos = 0
55 self.posMax = len(self.src)
56 self.level = 0
57 self.pending = ""
58 self.pendingLevel = 0
59
60 # Stores { start: end } pairs. Useful for backtrack
61 # optimization of pairs parse (emphasis, strikes).
62 self.cache: dict[int, int] = {}
63
64 # List of emphasis-like delimiters for current tag
65 self.delimiters: list[Delimiter] = []
66
67 # Stack of delimiter lists for upper level tags
68 self._prev_delimiters: list[list[Delimiter]] = []
69
70 # backticklength => last seen position
71 self.backticks: dict[int, int] = {}
72 self.backticksScanned = False
73
74 # Counter used to disable inline linkify-it execution
75 # inside <a> and markdown links
76 self.linkLevel = 0
77
78 def __repr__(self) -> str:
79 return (
80 f"{self.__class__.__name__}"
81 f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
82 )
83
84 def pushPending(self) -> Token:
85 token = Token("text", "", 0)
86 token.content = self.pending
87 token.level = self.pendingLevel
88 self.tokens.append(token)
89 self.pending = ""
90 return token
91
92 def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
93 """Push new token to "stream".
94 If pending text exists - flush it as text token
95 """
96 if self.pending:
97 self.pushPending()
98
99 token = Token(ttype, tag, nesting)
100 token_meta = None
101
102 if nesting < 0:
103 # closing tag
104 self.level -= 1
105 self.delimiters = self._prev_delimiters.pop()
106
107 token.level = self.level
108
109 if nesting > 0:
110 # opening tag
111 self.level += 1
112 self._prev_delimiters.append(self.delimiters)
113 self.delimiters = []
114 token_meta = {"delimiters": self.delimiters}
115
116 self.pendingLevel = self.level
117 self.tokens.append(token)
118 self.tokens_meta.append(token_meta)
119 return token
120
121 def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
122 """
123 Scan a sequence of emphasis-like markers, and determine whether
124 it can start an emphasis sequence or end an emphasis sequence.
125
126 - start - position to scan from (it should point at a valid marker);
127 - canSplitWord - determine if these markers can be found inside a word
128
129 """
130 pos = start
131 maximum = self.posMax
132 marker = self.src[start]
133
134 # treat beginning of the line as a whitespace
135 lastChar = self.src[start - 1] if start > 0 else " "
136
137 while pos < maximum and self.src[pos] == marker:
138 pos += 1
139
140 count = pos - start
141
142 # treat end of the line as a whitespace
143 nextChar = self.src[pos] if pos < maximum else " "
144
145 isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
146 isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
147
148 isLastWhiteSpace = isWhiteSpace(ord(lastChar))
149 isNextWhiteSpace = isWhiteSpace(ord(nextChar))
150
151 left_flanking = not (
152 isNextWhiteSpace
153 or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
154 )
155 right_flanking = not (
156 isLastWhiteSpace
157 or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
158 )
159
160 can_open = left_flanking and (
161 canSplitWord or (not right_flanking) or isLastPunctChar
162 )
163 can_close = right_flanking and (
164 canSplitWord or (not left_flanking) or isNextPunctChar
165 )
166
167 return Scanned(can_open, can_close, count)