1"""Simple typographic replacements
2
3* ``(c)``, ``(C)`` → ©
4* ``(tm)``, ``(TM)`` → ™
5* ``(r)``, ``(R)`` → ®
6* ``+-`` → ±
7* ``...`` → …
8* ``?....`` → ?..
9* ``!....`` → !..
10* ``????????`` → ???
11* ``!!!!!`` → !!!
12* ``,,,`` → ,
13* ``--`` → &ndash
14* ``---`` → &mdash
15"""
16from __future__ import annotations
17
18import logging
19import re
20
21from ..token import Token
22from .state_core import StateCore
23
24LOGGER = logging.getLogger(__name__)
25
26# TODO:
27# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
28# - multiplication 2 x 4 -> 2 × 4
29
30RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
31
32# Workaround for phantomjs - need regex without /g flag,
33# or root check will fail every second time
34# SCOPED_ABBR_TEST_RE = r"\((c|tm|r)\)"
35
36SCOPED_ABBR_RE = re.compile(r"\((c|tm|r)\)", flags=re.IGNORECASE)
37
38PLUS_MINUS_RE = re.compile(r"\+-")
39
40ELLIPSIS_RE = re.compile(r"\.{2,}")
41
42ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
43
44QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
45
46COMMA_RE = re.compile(r",{2,}")
47
48EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
49
50EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
51
52EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
53
54
55SCOPED_ABBR = {"c": "©", "r": "®", "tm": "™"}
56
57
58def replaceFn(match: re.Match[str]) -> str:
59 return SCOPED_ABBR[match.group(1).lower()]
60
61
62def replace_scoped(inlineTokens: list[Token]) -> None:
63 inside_autolink = 0
64
65 for token in inlineTokens:
66 if token.type == "text" and not inside_autolink:
67 token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
68
69 if token.type == "link_open" and token.info == "auto":
70 inside_autolink -= 1
71
72 if token.type == "link_close" and token.info == "auto":
73 inside_autolink += 1
74
75
76def replace_rare(inlineTokens: list[Token]) -> None:
77 inside_autolink = 0
78
79 for token in inlineTokens:
80 if (
81 token.type == "text"
82 and (not inside_autolink)
83 and RARE_RE.search(token.content)
84 ):
85 # +- -> ±
86 token.content = PLUS_MINUS_RE.sub("±", token.content)
87
88 # .., ..., ....... -> …
89 token.content = ELLIPSIS_RE.sub("…", token.content)
90
91 # but ?..... & !..... -> ?.. & !..
92 token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub("\\1..", token.content)
93 token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
94
95 # ,, ,,, ,,,, -> ,
96 token.content = COMMA_RE.sub(",", token.content)
97
98 # em-dash
99 token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
100
101 # en-dash
102 token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
103 token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
104
105 if token.type == "link_open" and token.info == "auto":
106 inside_autolink -= 1
107
108 if token.type == "link_close" and token.info == "auto":
109 inside_autolink += 1
110
111
112def replace(state: StateCore) -> None:
113 if not state.md.options.typographer:
114 return
115
116 for token in state.tokens:
117 if token.type != "inline":
118 continue
119 if token.children is None:
120 continue
121
122 if SCOPED_ABBR_RE.search(token.content):
123 replace_scoped(token.children)
124
125 if RARE_RE.search(token.content):
126 replace_rare(token.children)