Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/rules_core/smartquotes.py: 10%
118 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:07 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:07 +0000
1"""Convert straight quotation marks to typographic ones
2"""
3from __future__ import annotations
5import re
6from typing import Any
8from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
9from ..token import Token
10from .state_core import StateCore
12QUOTE_TEST_RE = re.compile(r"['\"]")
13QUOTE_RE = re.compile(r"['\"]")
14APOSTROPHE = "\u2019" # ’
17def replaceAt(string: str, index: int, ch: str) -> str:
18 # When the index is negative, the behavior is different from the js version.
19 # But basically, the index will not be negative.
20 assert index >= 0
21 return string[:index] + ch + string[index + 1 :]
24def process_inlines(tokens: list[Token], state: StateCore) -> None:
25 stack: list[dict[str, Any]] = []
27 for i in range(len(tokens)):
28 token = tokens[i]
30 thisLevel = token.level
32 j = 0
33 for j in range(len(stack))[::-1]:
34 if stack[j]["level"] <= thisLevel:
35 break
36 else:
37 # When the loop is terminated without a "break".
38 # Subtract 1 to get the same index as the js version.
39 j -= 1
41 stack = stack[: j + 1]
43 if token.type != "text":
44 continue
46 text = token.content
47 pos = 0
48 maximum = len(text)
50 while pos < maximum:
51 goto_outer = False
52 lastIndex = pos
53 t = QUOTE_RE.search(text[lastIndex:])
54 if not t:
55 break
57 canOpen = canClose = True
58 pos = t.start(0) + lastIndex + 1
59 isSingle = t.group(0) == "'"
61 # Find previous character,
62 # default to space if it's the beginning of the line
63 lastChar = 0x20
65 if t.start(0) + lastIndex - 1 >= 0:
66 lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
67 else:
68 for j in range(i)[::-1]:
69 # lastChar defaults to 0x20
70 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
71 break
72 # should skip all tokens except 'text', 'html_inline' or 'code_inline'
73 if not tokens[j].content:
74 continue
76 lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
77 break
79 # Find next character,
80 # default to space if it's the end of the line
81 nextChar = 0x20
83 if pos < maximum:
84 nextChar = charCodeAt(text, pos)
85 else:
86 for j in range(i + 1, len(tokens)):
87 # nextChar defaults to 0x20
88 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
89 break
90 # should skip all tokens except 'text', 'html_inline' or 'code_inline'
91 if not tokens[j].content:
92 continue
94 nextChar = charCodeAt(tokens[j].content, 0)
95 break
97 isLastPunctChar = isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
98 isNextPunctChar = isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
100 isLastWhiteSpace = isWhiteSpace(lastChar)
101 isNextWhiteSpace = isWhiteSpace(nextChar)
103 if isNextWhiteSpace:
104 canOpen = False
105 elif isNextPunctChar:
106 if not (isLastWhiteSpace or isLastPunctChar):
107 canOpen = False
109 if isLastWhiteSpace:
110 canClose = False
111 elif isLastPunctChar:
112 if not (isNextWhiteSpace or isNextPunctChar):
113 canClose = False
115 if nextChar == 0x22 and t.group(0) == '"': # 0x22: "
116 if lastChar >= 0x30 and lastChar <= 0x39: # 0x30: 0, 0x39: 9
117 # special case: 1"" - count first quote as an inch
118 canClose = canOpen = False
120 if canOpen and canClose:
121 # Replace quotes in the middle of punctuation sequence, but not
122 # in the middle of the words, i.e.:
123 #
124 # 1. foo " bar " baz - not replaced
125 # 2. foo-"-bar-"-baz - replaced
126 # 3. foo"bar"baz - not replaced
127 canOpen = isLastPunctChar
128 canClose = isNextPunctChar
130 if not canOpen and not canClose:
131 # middle of word
132 if isSingle:
133 token.content = replaceAt(
134 token.content, t.start(0) + lastIndex, APOSTROPHE
135 )
136 continue
138 if canClose:
139 # this could be a closing quote, rewind the stack to get a match
140 for j in range(len(stack))[::-1]:
141 item = stack[j]
142 if stack[j]["level"] < thisLevel:
143 break
144 if item["single"] == isSingle and stack[j]["level"] == thisLevel:
145 item = stack[j]
147 if isSingle:
148 openQuote = state.md.options.quotes[2]
149 closeQuote = state.md.options.quotes[3]
150 else:
151 openQuote = state.md.options.quotes[0]
152 closeQuote = state.md.options.quotes[1]
154 # replace token.content *before* tokens[item.token].content,
155 # because, if they are pointing at the same token, replaceAt
156 # could mess up indices when quote length != 1
157 token.content = replaceAt(
158 token.content, t.start(0) + lastIndex, closeQuote
159 )
160 tokens[item["token"]].content = replaceAt(
161 tokens[item["token"]].content, item["pos"], openQuote
162 )
164 pos += len(closeQuote) - 1
165 if item["token"] == i:
166 pos += len(openQuote) - 1
168 text = token.content
169 maximum = len(text)
171 stack = stack[:j]
172 goto_outer = True
173 break
174 if goto_outer:
175 goto_outer = False
176 continue
178 if canOpen:
179 stack.append(
180 {
181 "token": i,
182 "pos": t.start(0) + lastIndex,
183 "single": isSingle,
184 "level": thisLevel,
185 }
186 )
187 elif canClose and isSingle:
188 token.content = replaceAt(
189 token.content, t.start(0) + lastIndex, APOSTROPHE
190 )
193def smartquotes(state: StateCore) -> None:
194 if not state.md.options.typographer:
195 return
197 for token in state.tokens:
198 if token.type != "inline" or not QUOTE_RE.search(token.content):
199 continue
200 if token.children is not None:
201 process_inlines(token.children, state)