Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown_it/rules

1"""Convert straight quotation marks to typographic ones"""

3from __future__ import annotations

5import re

6from typing import Any

8from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace

9from ..token import Token

10from .state_core import StateCore

12QUOTE_TEST_RE = re.compile(r"['\"]")

13QUOTE_RE = re.compile(r"['\"]")

14APOSTROPHE = "\u2019" # ’

17def replaceAt(string: str, index: int, ch: str) -> str:

18 # When the index is negative, the behavior is different from the js version.

19 # But basically, the index will not be negative.

20 assert index >= 0

21 return string[:index] + ch + string[index + 1 :]

24def process_inlines(tokens: list[Token], state: StateCore) -> None:

25 stack: list[dict[str, Any]] = []

27 for i, token in enumerate(tokens):

28 thisLevel = token.level

30 j = 0

31 for j in range(len(stack))[::-1]:

32 if stack[j]["level"] <= thisLevel:

33 break

34 else:

35 # When the loop is terminated without a "break".

36 # Subtract 1 to get the same index as the js version.

37 j -= 1

39 stack = stack[: j + 1]

41 if token.type != "text":

42 continue

44 text = token.content

45 pos = 0

46 maximum = len(text)

48 while pos < maximum:

49 goto_outer = False

50 lastIndex = pos

51 t = QUOTE_RE.search(text[lastIndex:])

52 if not t:

53 break

55 canOpen = canClose = True

56 pos = t.start(0) + lastIndex + 1

57 isSingle = t.group(0) == "'"

59 # Find previous character,

60 # default to space if it's the beginning of the line

61 lastChar: None | int = 0x20

63 if t.start(0) + lastIndex - 1 >= 0:

64 lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)

65 else:

66 for j in range(i)[::-1]:

67 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":

68 break

69 # should skip all tokens except 'text', 'html_inline' or 'code_inline'

70 if not tokens[j].content:

71 continue

73 lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)

74 break

76 # Find next character,

77 # default to space if it's the end of the line

78 nextChar: None | int = 0x20

80 if pos < maximum:

81 nextChar = charCodeAt(text, pos)

82 else:

83 for j in range(i + 1, len(tokens)):

84 # nextChar defaults to 0x20

85 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":

86 break

87 # should skip all tokens except 'text', 'html_inline' or 'code_inline'

88 if not tokens[j].content:

89 continue

91 nextChar = charCodeAt(tokens[j].content, 0)

92 break

94 isLastPunctChar = lastChar is not None and (

95 isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))

96 )

97 isNextPunctChar = nextChar is not None and (

98 isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))

99 )

100

101 isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar)

102 isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar)

103

104 if isNextWhiteSpace: # noqa: SIM114

105 canOpen = False

106 elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar):

107 canOpen = False

108

109 if isLastWhiteSpace: # noqa: SIM114

110 canClose = False

111 elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar):

112 canClose = False

113

114 if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102

115 if (

116 lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39

117 ): # 0x30: 0, 0x39: 9

118 # special case: 1"" - count first quote as an inch

119 canClose = canOpen = False

120

121 if canOpen and canClose:

122 # Replace quotes in the middle of punctuation sequence, but not

123 # in the middle of the words, i.e.:

124 #

125 # 1. foo " bar " baz - not replaced

126 # 2. foo-"-bar-"-baz - replaced

127 # 3. foo"bar"baz - not replaced

128 canOpen = isLastPunctChar

129 canClose = isNextPunctChar

130

131 if not canOpen and not canClose:

132 # middle of word

133 if isSingle:

134 token.content = replaceAt(

135 token.content, t.start(0) + lastIndex, APOSTROPHE

136 )

137 continue

138

139 if canClose:

140 # this could be a closing quote, rewind the stack to get a match

141 for j in range(len(stack))[::-1]:

142 item = stack[j]

143 if stack[j]["level"] < thisLevel:

144 break

145 if item["single"] == isSingle and stack[j]["level"] == thisLevel:

146 item = stack[j]

147

148 if isSingle:

149 openQuote = state.md.options.quotes[2]

150 closeQuote = state.md.options.quotes[3]

151 else:

152 openQuote = state.md.options.quotes[0]

153 closeQuote = state.md.options.quotes[1]

154

155 # replace token.content *before* tokens[item.token].content,

156 # because, if they are pointing at the same token, replaceAt

157 # could mess up indices when quote length != 1

158 token.content = replaceAt(

159 token.content, t.start(0) + lastIndex, closeQuote

160 )

161 tokens[item["token"]].content = replaceAt(

162 tokens[item["token"]].content, item["pos"], openQuote

163 )

164

165 pos += len(closeQuote) - 1

166 if item["token"] == i:

167 pos += len(openQuote) - 1

168

169 text = token.content

170 maximum = len(text)

171

172 stack = stack[:j]

173 goto_outer = True

174 break

175 if goto_outer:

176 goto_outer = False

177 continue

178

179 if canOpen:

180 stack.append(

181 {

182 "token": i,

183 "pos": t.start(0) + lastIndex,

184 "single": isSingle,

185 "level": thisLevel,

186 }

187 )

188 elif canClose and isSingle:

189 token.content = replaceAt(

190 token.content, t.start(0) + lastIndex, APOSTROPHE

191 )

192

193

194def smartquotes(state: StateCore) -> None:

195 if not state.md.options.typographer:

196 return

197

198 for token in state.tokens:

199 if token.type != "inline" or not QUOTE_RE.search(token.content):

200 continue

201 if token.children is not None:

202 process_inlines(token.children, state)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown_it/rules_core/smartquotes.py: 11%

116 statements