1from __future__ import annotations
2
3from collections import namedtuple
4from dataclasses import dataclass
5from typing import TYPE_CHECKING, Any, Literal
6
7from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
8from ..ruler import StateBase
9from ..token import Token
10from ..utils import EnvType
11
12if TYPE_CHECKING:
13 from markdown_it import MarkdownIt
14
15
16@dataclass(slots=True)
17class Delimiter:
18 # Char code of the starting marker (number).
19 marker: int
20
21 # Total length of these series of delimiters.
22 length: int
23
24 # A position of the token this delimiter corresponds to.
25 token: int
26
27 # If this delimiter is matched as a valid opener, `end` will be
28 # equal to its position, otherwise it's `-1`.
29 end: int
30
31 # Boolean flags that determine if this delimiter could open or close
32 # an emphasis.
33 open: bool
34 close: bool
35
36 level: bool | None = None
37
38
39Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
40
41
42class StateInline(StateBase):
43 def __init__(
44 self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
45 ) -> None:
46 self.src = src
47 self.env = env
48 self.md = md
49 self.tokens = outTokens
50 self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
51
52 self.pos = 0
53 self.posMax = len(self.src)
54 self.level = 0
55 self.pending = ""
56 self.pendingLevel = 0
57
58 # Stores { start: end } pairs. Useful for backtrack
59 # optimization of pairs parse (emphasis, strikes).
60 self.cache: dict[int, int] = {}
61
62 # List of emphasis-like delimiters for current tag
63 self.delimiters: list[Delimiter] = []
64
65 # Stack of delimiter lists for upper level tags
66 self._prev_delimiters: list[list[Delimiter]] = []
67
68 # backticklength => last seen position
69 self.backticks: dict[int, int] = {}
70 self.backticksScanned = False
71
72 # Counter used to disable inline linkify-it execution
73 # inside <a> and markdown links
74 self.linkLevel = 0
75
76 def __repr__(self) -> str:
77 return (
78 f"{self.__class__.__name__}"
79 f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
80 )
81
82 def pushPending(self) -> Token:
83 token = Token("text", "", 0)
84 token.content = self.pending
85 token.level = self.pendingLevel
86 self.tokens.append(token)
87 self.pending = ""
88 return token
89
90 def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
91 """Push new token to "stream".
92 If pending text exists - flush it as text token
93 """
94 if self.pending:
95 self.pushPending()
96
97 token = Token(ttype, tag, nesting)
98 token_meta = None
99
100 if nesting < 0:
101 # closing tag
102 self.level -= 1
103 self.delimiters = self._prev_delimiters.pop()
104
105 token.level = self.level
106
107 if nesting > 0:
108 # opening tag
109 self.level += 1
110 self._prev_delimiters.append(self.delimiters)
111 self.delimiters = []
112 token_meta = {"delimiters": self.delimiters}
113
114 self.pendingLevel = self.level
115 self.tokens.append(token)
116 self.tokens_meta.append(token_meta)
117 return token
118
119 def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
120 """
121 Scan a sequence of emphasis-like markers, and determine whether
122 it can start an emphasis sequence or end an emphasis sequence.
123
124 - start - position to scan from (it should point at a valid marker);
125 - canSplitWord - determine if these markers can be found inside a word
126
127 """
128 pos = start
129 maximum = self.posMax
130 marker = self.src[start]
131
132 # treat beginning of the line as a whitespace
133 lastChar = self.src[start - 1] if start > 0 else " "
134
135 while pos < maximum and self.src[pos] == marker:
136 pos += 1
137
138 count = pos - start
139
140 # treat end of the line as a whitespace
141 nextChar = self.src[pos] if pos < maximum else " "
142
143 isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
144 isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
145
146 isLastWhiteSpace = isWhiteSpace(ord(lastChar))
147 isNextWhiteSpace = isWhiteSpace(ord(nextChar))
148
149 left_flanking = not (
150 isNextWhiteSpace
151 or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
152 )
153 right_flanking = not (
154 isLastWhiteSpace
155 or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
156 )
157
158 can_open = left_flanking and (
159 canSplitWord or (not right_flanking) or isLastPunctChar
160 )
161 can_close = right_flanking and (
162 canSplitWord or (not left_flanking) or isNextPunctChar
163 )
164
165 return Scanned(can_open, can_close, count)