1"""Convert straight quotation marks to typographic ones
2"""
3from __future__ import annotations
4
5import re
6from typing import Any
7
8from ..common.utils import charCodeAt, isMdAsciiPunct, isPunctChar, isWhiteSpace
9from ..token import Token
10from .state_core import StateCore
11
12QUOTE_TEST_RE = re.compile(r"['\"]")
13QUOTE_RE = re.compile(r"['\"]")
14APOSTROPHE = "\u2019" # ’
15
16
17def replaceAt(string: str, index: int, ch: str) -> str:
18 # When the index is negative, the behavior is different from the js version.
19 # But basically, the index will not be negative.
20 assert index >= 0
21 return string[:index] + ch + string[index + 1 :]
22
23
24def process_inlines(tokens: list[Token], state: StateCore) -> None:
25 stack: list[dict[str, Any]] = []
26
27 for i, token in enumerate(tokens):
28 thisLevel = token.level
29
30 j = 0
31 for j in range(len(stack))[::-1]:
32 if stack[j]["level"] <= thisLevel:
33 break
34 else:
35 # When the loop is terminated without a "break".
36 # Subtract 1 to get the same index as the js version.
37 j -= 1
38
39 stack = stack[: j + 1]
40
41 if token.type != "text":
42 continue
43
44 text = token.content
45 pos = 0
46 maximum = len(text)
47
48 while pos < maximum:
49 goto_outer = False
50 lastIndex = pos
51 t = QUOTE_RE.search(text[lastIndex:])
52 if not t:
53 break
54
55 canOpen = canClose = True
56 pos = t.start(0) + lastIndex + 1
57 isSingle = t.group(0) == "'"
58
59 # Find previous character,
60 # default to space if it's the beginning of the line
61 lastChar: None | int = 0x20
62
63 if t.start(0) + lastIndex - 1 >= 0:
64 lastChar = charCodeAt(text, t.start(0) + lastIndex - 1)
65 else:
66 for j in range(i)[::-1]:
67 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
68 break
69 # should skip all tokens except 'text', 'html_inline' or 'code_inline'
70 if not tokens[j].content:
71 continue
72
73 lastChar = charCodeAt(tokens[j].content, len(tokens[j].content) - 1)
74 break
75
76 # Find next character,
77 # default to space if it's the end of the line
78 nextChar: None | int = 0x20
79
80 if pos < maximum:
81 nextChar = charCodeAt(text, pos)
82 else:
83 for j in range(i + 1, len(tokens)):
84 # nextChar defaults to 0x20
85 if tokens[j].type == "softbreak" or tokens[j].type == "hardbreak":
86 break
87 # should skip all tokens except 'text', 'html_inline' or 'code_inline'
88 if not tokens[j].content:
89 continue
90
91 nextChar = charCodeAt(tokens[j].content, 0)
92 break
93
94 isLastPunctChar = lastChar is not None and (
95 isMdAsciiPunct(lastChar) or isPunctChar(chr(lastChar))
96 )
97 isNextPunctChar = nextChar is not None and (
98 isMdAsciiPunct(nextChar) or isPunctChar(chr(nextChar))
99 )
100
101 isLastWhiteSpace = lastChar is not None and isWhiteSpace(lastChar)
102 isNextWhiteSpace = nextChar is not None and isWhiteSpace(nextChar)
103
104 if isNextWhiteSpace: # noqa: SIM114
105 canOpen = False
106 elif isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar):
107 canOpen = False
108
109 if isLastWhiteSpace: # noqa: SIM114
110 canClose = False
111 elif isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar):
112 canClose = False
113
114 if nextChar == 0x22 and t.group(0) == '"': # 0x22: " # noqa: SIM102
115 if (
116 lastChar is not None and lastChar >= 0x30 and lastChar <= 0x39
117 ): # 0x30: 0, 0x39: 9
118 # special case: 1"" - count first quote as an inch
119 canClose = canOpen = False
120
121 if canOpen and canClose:
122 # Replace quotes in the middle of punctuation sequence, but not
123 # in the middle of the words, i.e.:
124 #
125 # 1. foo " bar " baz - not replaced
126 # 2. foo-"-bar-"-baz - replaced
127 # 3. foo"bar"baz - not replaced
128 canOpen = isLastPunctChar
129 canClose = isNextPunctChar
130
131 if not canOpen and not canClose:
132 # middle of word
133 if isSingle:
134 token.content = replaceAt(
135 token.content, t.start(0) + lastIndex, APOSTROPHE
136 )
137 continue
138
139 if canClose:
140 # this could be a closing quote, rewind the stack to get a match
141 for j in range(len(stack))[::-1]:
142 item = stack[j]
143 if stack[j]["level"] < thisLevel:
144 break
145 if item["single"] == isSingle and stack[j]["level"] == thisLevel:
146 item = stack[j]
147
148 if isSingle:
149 openQuote = state.md.options.quotes[2]
150 closeQuote = state.md.options.quotes[3]
151 else:
152 openQuote = state.md.options.quotes[0]
153 closeQuote = state.md.options.quotes[1]
154
155 # replace token.content *before* tokens[item.token].content,
156 # because, if they are pointing at the same token, replaceAt
157 # could mess up indices when quote length != 1
158 token.content = replaceAt(
159 token.content, t.start(0) + lastIndex, closeQuote
160 )
161 tokens[item["token"]].content = replaceAt(
162 tokens[item["token"]].content, item["pos"], openQuote
163 )
164
165 pos += len(closeQuote) - 1
166 if item["token"] == i:
167 pos += len(openQuote) - 1
168
169 text = token.content
170 maximum = len(text)
171
172 stack = stack[:j]
173 goto_outer = True
174 break
175 if goto_outer:
176 goto_outer = False
177 continue
178
179 if canOpen:
180 stack.append(
181 {
182 "token": i,
183 "pos": t.start(0) + lastIndex,
184 "single": isSingle,
185 "level": thisLevel,
186 }
187 )
188 elif canClose and isSingle:
189 token.content = replaceAt(
190 token.content, t.start(0) + lastIndex, APOSTROPHE
191 )
192
193
194def smartquotes(state: StateCore) -> None:
195 if not state.md.options.typographer:
196 return
197
198 for token in state.tokens:
199 if token.type != "inline" or not QUOTE_RE.search(token.content):
200 continue
201 if token.children is not None:
202 process_inlines(token.children, state)