Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/rules_inline/state_inline.py: 98%

97 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:07 +0000

1from __future__ import annotations 

2 

3from collections import namedtuple 

4from collections.abc import MutableMapping 

5from dataclasses import dataclass 

6from typing import TYPE_CHECKING 

7 

8from .._compat import DATACLASS_KWARGS 

9from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace 

10from ..ruler import StateBase 

11from ..token import Token 

12 

13if TYPE_CHECKING: 

14 from markdown_it import MarkdownIt 

15 

16 

17@dataclass(**DATACLASS_KWARGS) 

18class Delimiter: 

19 # Char code of the starting marker (number). 

20 marker: int 

21 

22 # Total length of these series of delimiters. 

23 length: int 

24 

25 # An amount of characters before this one that's equivalent to 

26 # current one. In plain English: if this delimiter does not open 

27 # an emphasis, neither do previous `jump` characters. 

28 # 

29 # Used to skip sequences like "*****" in one step, for 1st asterisk 

30 # value will be 0, for 2nd it's 1 and so on. 

31 jump: int 

32 

33 # A position of the token this delimiter corresponds to. 

34 token: int 

35 

36 # If this delimiter is matched as a valid opener, `end` will be 

37 # equal to its position, otherwise it's `-1`. 

38 end: int 

39 

40 # Boolean flags that determine if this delimiter could open or close 

41 # an emphasis. 

42 open: bool 

43 close: bool 

44 

45 level: bool | None = None 

46 

47 

48Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) 

49 

50 

51class StateInline(StateBase): 

52 def __init__( 

53 self, src: str, md: MarkdownIt, env: MutableMapping, outTokens: list[Token] 

54 ): 

55 self.src = src 

56 self.env = env 

57 self.md = md 

58 self.tokens = outTokens 

59 self.tokens_meta: list[dict | None] = [None] * len(outTokens) 

60 

61 self.pos = 0 

62 self.posMax = len(self.src) 

63 self.level = 0 

64 self.pending = "" 

65 self.pendingLevel = 0 

66 

67 # Stores { start: end } pairs. Useful for backtrack 

68 # optimization of pairs parse (emphasis, strikes). 

69 self.cache: dict[int, int] = {} 

70 

71 # List of emphasis-like delimiters for current tag 

72 self.delimiters: list[Delimiter] = [] 

73 

74 # Stack of delimiter lists for upper level tags 

75 self._prev_delimiters: list[list[Delimiter]] = [] 

76 

77 # backticklength => last seen position 

78 self.backticks: dict[int, int] = {} 

79 self.backticksScanned = False 

80 

81 def __repr__(self): 

82 return ( 

83 f"{self.__class__.__name__}" 

84 f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})" 

85 ) 

86 

87 def pushPending(self): 

88 token = Token("text", "", 0) 

89 token.content = self.pending 

90 token.level = self.pendingLevel 

91 self.tokens.append(token) 

92 self.pending = "" 

93 return token 

94 

95 def push(self, ttype, tag, nesting): 

96 """Push new token to "stream". 

97 If pending text exists - flush it as text token 

98 """ 

99 if self.pending: 

100 self.pushPending() 

101 

102 token = Token(ttype, tag, nesting) 

103 token_meta = None 

104 

105 if nesting < 0: 

106 # closing tag 

107 self.level -= 1 

108 self.delimiters = self._prev_delimiters.pop() 

109 

110 token.level = self.level 

111 

112 if nesting > 0: 

113 # opening tag 

114 self.level += 1 

115 self._prev_delimiters.append(self.delimiters) 

116 self.delimiters = [] 

117 token_meta = {"delimiters": self.delimiters} 

118 

119 self.pendingLevel = self.level 

120 self.tokens.append(token) 

121 self.tokens_meta.append(token_meta) 

122 return token 

123 

124 def scanDelims(self, start, canSplitWord): 

125 """ 

126 Scan a sequence of emphasis-like markers, and determine whether 

127 it can start an emphasis sequence or end an emphasis sequence. 

128 

129 - start - position to scan from (it should point at a valid marker); 

130 - canSplitWord - determine if these markers can be found inside a word 

131 

132 """ 

133 pos = start 

134 left_flanking = True 

135 right_flanking = True 

136 maximum = self.posMax 

137 marker = self.srcCharCode[start] 

138 

139 # treat beginning of the line as a whitespace 

140 lastChar = self.srcCharCode[start - 1] if start > 0 else 0x20 

141 

142 while pos < maximum and self.srcCharCode[pos] == marker: 

143 pos += 1 

144 

145 count = pos - start 

146 

147 # treat end of the line as a whitespace 

148 nextChar = self.srcCharCode[pos] if pos < maximum else 0x20 

149 

150 isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) 

151 isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) 

152 

153 isLastWhiteSpace = isWhiteSpace(lastChar) 

154 isNextWhiteSpace = isWhiteSpace(nextChar) 

155 

156 if isNextWhiteSpace: 

157 left_flanking = False 

158 elif isNextPunctChar: 

159 if not (isLastWhiteSpace or isLastPunctChar): 

160 left_flanking = False 

161 

162 if isLastWhiteSpace: 

163 right_flanking = False 

164 elif isLastPunctChar: 

165 if not (isNextWhiteSpace or isNextPunctChar): 

166 right_flanking = False 

167 

168 if not canSplitWord: 

169 can_open = left_flanking and ((not right_flanking) or isLastPunctChar) 

170 can_close = right_flanking and ((not left_flanking) or isNextPunctChar) 

171 else: 

172 can_open = left_flanking 

173 can_close = right_flanking 

174 

175 return Scanned(can_open, can_close, count)