Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/rules_core/smartquotes.py: 10%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

115 statements  

1"""Convert straight quotation marks to typographic ones 

2""" 

3from __future__ import annotations 

4 

5import re 

6from typing import Any 

7 

8from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace 

9from ..token import Token 

10from .state_core import StateCore 

11 

12QUOTE_TEST_RE = re.compile(r"['\"]") 

13QUOTE_RE = re.compile(r"['\"]") 

14APOSTROPHE = "\u2019" # ’ 

15 

16 

17def replaceAt(string: str, index: int, ch: str) -> str: 

18 # When the index is negative, the behavior is different from the js version. 

19 # But basically, the index will not be negative. 

20 assert index >= 0 

21 return string[:index] + ch + string[index + 1 :] 

22 

23 

24def process_inlines(tokens: list[Token], state: StateCore) -> None: 

25 stack: list[dict[str, Any]] = [] 

26 

27 for i, token in enumerate(tokens): 

28 thisLevel = token.level 

29 

30 j = 0 

31 for j in range(len(stack))[::-1]: 

32 if stack[j]["level"] <= thisLevel: 

33 break 

34 else: 

35 # When the loop is terminated without a "break". 

36 # Subtract 1 to get the same index as the js version. 

37 j -= 1 

38 

39 stack = stack[: j + 1] 

40 

41 if token.type != "text": 

42 continue 

43 

44 text = token.content 

45 pos = 0 

46 maximum = len(text) 

47 

48 while pos < maximum: 

49 goto_outer = False 

50 lastIndex = pos 

51 t = QUOTE_RE.search(text[lastIndex:]) 

52 if not t: 

53 break 

54 

55 canOpen = canClose = True 

56 pos = t.start(0) + lastIndex + 1 

57 isSingle = t.group(0) == "'" 

58 

59 # Find previous character, 

60 # default to space if it's the beginning of the line 

61 lastChar: None | int = 0x20 

62 

63 if t.start(0) + lastIndex - 1 >= 0: 

64 lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) 

65 else: 

66 for j in range(i)[::-1]: 

67 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": 

68 break 

69 # should skip all tokens except 'text', 'html_inline' or 'code_inline' 

70 if not tokens[j].content: 

71 continue 

72 

73 lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1) 

74 break 

75 

76 # Find next character, 

77 # default to space if it's the end of the line 

78 nextChar: None | int = 0x20 

79 

80 if pos < maximum: 

81 nextChar = charCodeAt(text, pos) 

82 else: 

83 for j in range(i + 1, len(tokens)): 

84 # nextChar defaults to 0x20 

85 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": 

86 break 

87 # should skip all tokens except 'text', 'html_inline' or 'code_inline' 

88 if not tokens[j].content: 

89 continue 

90 

91 nextChar = charCodeAt(tokens[j].content, 0) 

92 break 

93 

94 isLastPunctChar = lastChar is not None and ( 

95 isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) 

96 ) 

97 isNextPunctChar = nextChar is not None and ( 

98 isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) 

99 ) 

100 

101 isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar) 

102 isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar) 

103 

104 if isNextWhiteSpace: # noqa: SIM114 

105 canOpen = False 

106 elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar): 

107 canOpen = False 

108 

109 if isLastWhiteSpace: # noqa: SIM114 

110 canClose = False 

111 elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar): 

112 canClose = False 

113 

114 if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102 

115 if ( 

116 lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39 

117 ): # 0x30: 0, 0x39: 9 

118 # special case: 1"" - count first quote as an inch 

119 canClose = canOpen = False 

120 

121 if canOpen and canClose: 

122 # Replace quotes in the middle of punctuation sequence, but not 

123 # in the middle of the words, i.e.: 

124 # 

125 # 1. foo " bar " baz - not replaced 

126 # 2. foo-"-bar-"-baz - replaced 

127 # 3. foo"bar"baz - not replaced 

128 canOpen = isLastPunctChar 

129 canClose = isNextPunctChar 

130 

131 if not canOpen and not canClose: 

132 # middle of word 

133 if isSingle: 

134 token.content = replaceAt( 

135 token.content, t.start(0) + lastIndex, APOSTROPHE 

136 ) 

137 continue 

138 

139 if canClose: 

140 # this could be a closing quote, rewind the stack to get a match 

141 for j in range(len(stack))[::-1]: 

142 item = stack[j] 

143 if stack[j]["level"] < thisLevel: 

144 break 

145 if item["single"] == isSingle and stack[j]["level"] == thisLevel: 

146 item = stack[j] 

147 

148 if isSingle: 

149 openQuote = state.md.options.quotes[2] 

150 closeQuote = state.md.options.quotes[3] 

151 else: 

152 openQuote = state.md.options.quotes[0] 

153 closeQuote = state.md.options.quotes[1] 

154 

155 # replace token.content *before* tokens[item.token].content, 

156 # because, if they are pointing at the same token, replaceAt 

157 # could mess up indices when quote length != 1 

158 token.content = replaceAt( 

159 token.content, t.start(0) + lastIndex, closeQuote 

160 ) 

161 tokens[item["token"]].content = replaceAt( 

162 tokens[item["token"]].content, item["pos"], openQuote 

163 ) 

164 

165 pos += len(closeQuote) - 1 

166 if item["token"] == i: 

167 pos += len(openQuote) - 1 

168 

169 text = token.content 

170 maximum = len(text) 

171 

172 stack = stack[:j] 

173 goto_outer = True 

174 break 

175 if goto_outer: 

176 goto_outer = False 

177 continue 

178 

179 if canOpen: 

180 stack.append( 

181 { 

182 "token": i, 

183 "pos": t.start(0) + lastIndex, 

184 "single": isSingle, 

185 "level": thisLevel, 

186 } 

187 ) 

188 elif canClose and isSingle: 

189 token.content = replaceAt( 

190 token.content, t.start(0) + lastIndex, APOSTROPHE 

191 ) 

192 

193 

194def smartquotes(state: StateCore) -> None: 

195 if not state.md.options.typographer: 

196 return 

197 

198 for token in state.tokens: 

199 if token.type != "inline" or not QUOTE_RE.search(token.content): 

200 continue 

201 if token.children is not None: 

202 process_inlines(token.children, state)