1from __future__ import annotations
2
3from collections import namedtuple
4from dataclasses import dataclass
5from typing import TYPE_CHECKING, Any, Literal
6
7from .._compat import DATACLASS_KWARGS
8from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
9from ..ruler import StateBase
10from ..token import Token
11from ..utils import EnvType
12
13if TYPE_CHECKING:
14 from markdown_it import MarkdownIt
15
16
17@dataclass(**DATACLASS_KWARGS)
18class Delimiter:
19 # Char code of the starting marker (number).
20 marker: int
21
22 # Total length of these series of delimiters.
23 length: int
24
25 # A position of the token this delimiter corresponds to.
26 token: int
27
28 # If this delimiter is matched as a valid opener, `end` will be
29 # equal to its position, otherwise it's `-1`.
30 end: int
31
32 # Boolean flags that determine if this delimiter could open or close
33 # an emphasis.
34 open: bool
35 close: bool
36
37 level: bool | None = None
38
39
40Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
41
42
43class StateInline(StateBase):
44 def __init__(
45 self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
46 ) -> None:
47 self.src = src
48 self.env = env
49 self.md = md
50 self.tokens = outTokens
51 self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)
52
53 self.pos = 0
54 self.posMax = len(self.src)
55 self.level = 0
56 self.pending = ""
57 self.pendingLevel = 0
58
59 # Stores { start: end } pairs. Useful for backtrack
60 # optimization of pairs parse (emphasis, strikes).
61 self.cache: dict[int, int] = {}
62
63 # List of emphasis-like delimiters for current tag
64 self.delimiters: list[Delimiter] = []
65
66 # Stack of delimiter lists for upper level tags
67 self._prev_delimiters: list[list[Delimiter]] = []
68
69 # backticklength => last seen position
70 self.backticks: dict[int, int] = {}
71 self.backticksScanned = False
72
73 # Counter used to disable inline linkify-it execution
74 # inside <a> and markdown links
75 self.linkLevel = 0
76
77 def __repr__(self) -> str:
78 return (
79 f"{self.__class__.__name__}"
80 f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
81 )
82
83 def pushPending(self) -> Token:
84 token = Token("text", "", 0)
85 token.content = self.pending
86 token.level = self.pendingLevel
87 self.tokens.append(token)
88 self.pending = ""
89 return token
90
91 def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
92 """Push new token to "stream".
93 If pending text exists - flush it as text token
94 """
95 if self.pending:
96 self.pushPending()
97
98 token = Token(ttype, tag, nesting)
99 token_meta = None
100
101 if nesting < 0:
102 # closing tag
103 self.level -= 1
104 self.delimiters = self._prev_delimiters.pop()
105
106 token.level = self.level
107
108 if nesting > 0:
109 # opening tag
110 self.level += 1
111 self._prev_delimiters.append(self.delimiters)
112 self.delimiters = []
113 token_meta = {"delimiters": self.delimiters}
114
115 self.pendingLevel = self.level
116 self.tokens.append(token)
117 self.tokens_meta.append(token_meta)
118 return token
119
120 def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
121 """
122 Scan a sequence of emphasis-like markers, and determine whether
123 it can start an emphasis sequence or end an emphasis sequence.
124
125 - start - position to scan from (it should point at a valid marker);
126 - canSplitWord - determine if these markers can be found inside a word
127
128 """
129 pos = start
130 maximum = self.posMax
131 marker = self.src[start]
132
133 # treat beginning of the line as a whitespace
134 lastChar = self.src[start - 1] if start > 0 else " "
135
136 while pos < maximum and self.src[pos] == marker:
137 pos += 1
138
139 count = pos - start
140
141 # treat end of the line as a whitespace
142 nextChar = self.src[pos] if pos < maximum else " "
143
144 isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
145 isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)
146
147 isLastWhiteSpace = isWhiteSpace(ord(lastChar))
148 isNextWhiteSpace = isWhiteSpace(ord(nextChar))
149
150 left_flanking = not (
151 isNextWhiteSpace
152 or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
153 )
154 right_flanking = not (
155 isLastWhiteSpace
156 or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
157 )
158
159 if not canSplitWord:
160 can_open = left_flanking and ((not right_flanking) or isLastPunctChar)
161 can_close = right_flanking and ((not left_flanking) or isNextPunctChar)
162 else:
163 can_open = left_flanking
164 can_close = right_flanking
165
166 return Scanned(can_open, can_close, count)