Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/rules_core/smartquotes.py: 10%

118 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:07 +0000

1"""Convert straight quotation marks to typographic ones 

2""" 

3from __future__ import annotations 

4 

5import re 

6from typing import Any 

7 

8from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace 

9from ..token import Token 

10from .state_core import StateCore 

11 

12QUOTE_TEST_RE = re.compile(r"['\"]") 

13QUOTE_RE = re.compile(r"['\"]") 

14APOSTROPHE = "\u2019" # ’ 

15 

16 

17def replaceAt(string: str, index: int, ch: str) -> str: 

18 # When the index is negative, the behavior is different from the js version. 

19 # But basically, the index will not be negative. 

20 assert index >= 0 

21 return string[:index] + ch + string[index + 1 :] 

22 

23 

24def process_inlines(tokens: list[Token], state: StateCore) -> None: 

25 stack: list[dict[str, Any]] = [] 

26 

27 for i in range(len(tokens)): 

28 token = tokens[i] 

29 

30 thisLevel = token.level 

31 

32 j = 0 

33 for j in range(len(stack))[::-1]: 

34 if stack[j]["level"] <= thisLevel: 

35 break 

36 else: 

37 # When the loop is terminated without a "break". 

38 # Subtract 1 to get the same index as the js version. 

39 j -= 1 

40 

41 stack = stack[: j + 1] 

42 

43 if token.type != "text": 

44 continue 

45 

46 text = token.content 

47 pos = 0 

48 maximum = len(text) 

49 

50 while pos < maximum: 

51 goto_outer = False 

52 lastIndex = pos 

53 t = QUOTE_RE.search(text[lastIndex:]) 

54 if not t: 

55 break 

56 

57 canOpen = canClose = True 

58 pos = t.start(0) + lastIndex + 1 

59 isSingle = t.group(0) == "'" 

60 

61 # Find previous character, 

62 # default to space if it's the beginning of the line 

63 lastChar = 0x20 

64 

65 if t.start(0) + lastIndex - 1 >= 0: 

66 lastChar = charCodeAt(text, t.start(0) + lastIndex - 1) 

67 else: 

68 for j in range(i)[::-1]: 

69 # lastChar defaults to 0x20 

70 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": 

71 break 

72 # should skip all tokens except 'text', 'html_inline' or 'code_inline' 

73 if not tokens[j].content: 

74 continue 

75 

76 lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1) 

77 break 

78 

79 # Find next character, 

80 # default to space if it's the end of the line 

81 nextChar = 0x20 

82 

83 if pos < maximum: 

84 nextChar = charCodeAt(text, pos) 

85 else: 

86 for j in range(i + 1, len(tokens)): 

87 # nextChar defaults to 0x20 

88 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak": 

89 break 

90 # should skip all tokens except 'text', 'html_inline' or 'code_inline' 

91 if not tokens[j].content: 

92 continue 

93 

94 nextChar = charCodeAt(tokens[j].content, 0) 

95 break 

96 

97 isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar)) 

98 isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar)) 

99 

100 isLastWhiteSpace = isWhiteSpace(lastChar) 

101 isNextWhiteSpace = isWhiteSpace(nextChar) 

102 

103 if isNextWhiteSpace: 

104 canOpen = False 

105 elif isNextPunctChar: 

106 if not (isLastWhiteSpace or isLastPunctChar): 

107 canOpen = False 

108 

109 if isLastWhiteSpace: 

110 canClose = False 

111 elif isLastPunctChar: 

112 if not (isNextWhiteSpace or isNextPunctChar): 

113 canClose = False 

114 

115 if nextChar == 0x22 and t.group(0) == '"': # 0x22: " 

116 if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9 

117 # special case: 1"" - count first quote as an inch 

118 canClose = canOpen = False 

119 

120 if canOpen and canClose: 

121 # Replace quotes in the middle of punctuation sequence, but not 

122 # in the middle of the words, i.e.: 

123 # 

124 # 1. foo " bar " baz - not replaced 

125 # 2. foo-"-bar-"-baz - replaced 

126 # 3. foo"bar"baz - not replaced 

127 canOpen = isLastPunctChar 

128 canClose = isNextPunctChar 

129 

130 if not canOpen and not canClose: 

131 # middle of word 

132 if isSingle: 

133 token.content = replaceAt( 

134 token.content, t.start(0) + lastIndex, APOSTROPHE 

135 ) 

136 continue 

137 

138 if canClose: 

139 # this could be a closing quote, rewind the stack to get a match 

140 for j in range(len(stack))[::-1]: 

141 item = stack[j] 

142 if stack[j]["level"] < thisLevel: 

143 break 

144 if item["single"] == isSingle and stack[j]["level"] == thisLevel: 

145 item = stack[j] 

146 

147 if isSingle: 

148 openQuote = state.md.options.quotes[2] 

149 closeQuote = state.md.options.quotes[3] 

150 else: 

151 openQuote = state.md.options.quotes[0] 

152 closeQuote = state.md.options.quotes[1] 

153 

154 # replace token.content *before* tokens[item.token].content, 

155 # because, if they are pointing at the same token, replaceAt 

156 # could mess up indices when quote length != 1 

157 token.content = replaceAt( 

158 token.content, t.start(0) + lastIndex, closeQuote 

159 ) 

160 tokens[item["token"]].content = replaceAt( 

161 tokens[item["token"]].content, item["pos"], openQuote 

162 ) 

163 

164 pos += len(closeQuote) - 1 

165 if item["token"] == i: 

166 pos += len(openQuote) - 1 

167 

168 text = token.content 

169 maximum = len(text) 

170 

171 stack = stack[:j] 

172 goto_outer = True 

173 break 

174 if goto_outer: 

175 goto_outer = False 

176 continue 

177 

178 if canOpen: 

179 stack.append( 

180 { 

181 "token": i, 

182 "pos": t.start(0) + lastIndex, 

183 "single": isSingle, 

184 "level": thisLevel, 

185 } 

186 ) 

187 elif canClose and isSingle: 

188 token.content = replaceAt( 

189 token.content, t.start(0) + lastIndex, APOSTROPHE 

190 ) 

191 

192 

193def smartquotes(state: StateCore) -> None: 

194 if not state.md.options.typographer: 

195 return 

196 

197 for token in state.tokens: 

198 if token.type != "inline" or not QUOTE_RE.search(token.content): 

199 continue 

200 if token.children is not None: 

201 process_inlines(token.children, state)