Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/rules_core/linkify.py: 17%
92 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1from __future__ import annotations
3import re
4from typing import Protocol
6from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
7from ..token import Token
8from .state_core import StateCore
10HTTP_RE = re.compile(r"^http://")
11MAILTO_RE = re.compile(r"^mailto:")
12TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
15def linkify(state: StateCore) -> None:
16 """Rule for identifying plain-text links."""
17 if not state.md.options.linkify:
18 return
20 if not state.md.linkify:
21 raise ModuleNotFoundError("Linkify enabled but not installed.")
23 for inline_token in state.tokens:
24 if inline_token.type != "inline" or not state.md.linkify.pretest(
25 inline_token.content
26 ):
27 continue
29 tokens = inline_token.children
31 htmlLinkLevel = 0
33 # We scan from the end, to keep position when new tags added.
34 # Use reversed logic in links start/end match
35 assert tokens is not None
36 i = len(tokens)
37 while i >= 1:
38 i -= 1
39 assert isinstance(tokens, list)
40 currentToken = tokens[i]
42 # Skip content of markdown links
43 if currentToken.type == "link_close":
44 i -= 1
45 while (
46 tokens[i].level != currentToken.level
47 and tokens[i].type != "link_open"
48 ):
49 i -= 1
50 continue
52 # Skip content of html tag links
53 if currentToken.type == "html_inline":
54 if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
55 htmlLinkLevel -= 1
56 if isLinkClose(currentToken.content):
57 htmlLinkLevel += 1
58 if htmlLinkLevel > 0:
59 continue
61 if currentToken.type == "text" and state.md.linkify.test(
62 currentToken.content
63 ):
64 text = currentToken.content
65 links: list[_LinkType] = state.md.linkify.match(text) or []
67 # Now split string to nodes
68 nodes = []
69 level = currentToken.level
70 lastPos = 0
72 # forbid escape sequence at the start of the string,
73 # this avoids http\://example.com/ from being linkified as
74 # http:<a href="//example.com/">//example.com/</a>
75 if (
76 links
77 and links[0].index == 0
78 and i > 0
79 and tokens[i - 1].type == "text_special"
80 ):
81 links = links[1:]
83 for link in links:
84 url = link.url
85 fullUrl = state.md.normalizeLink(url)
86 if not state.md.validateLink(fullUrl):
87 continue
89 urlText = link.text
91 # Linkifier might send raw hostnames like "example.com", where url
92 # starts with domain name. So we prepend http:// in those cases,
93 # and remove it afterwards.
94 if not link.schema:
95 urlText = HTTP_RE.sub(
96 "", state.md.normalizeLinkText("http://" + urlText)
97 )
98 elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
99 urlText = MAILTO_RE.sub(
100 "", state.md.normalizeLinkText("mailto:" + urlText)
101 )
102 else:
103 urlText = state.md.normalizeLinkText(urlText)
105 pos = link.index
107 if pos > lastPos:
108 token = Token("text", "", 0)
109 token.content = text[lastPos:pos]
110 token.level = level
111 nodes.append(token)
113 token = Token("link_open", "a", 1)
114 token.attrs = {"href": fullUrl}
115 token.level = level
116 level += 1
117 token.markup = "linkify"
118 token.info = "auto"
119 nodes.append(token)
121 token = Token("text", "", 0)
122 token.content = urlText
123 token.level = level
124 nodes.append(token)
126 token = Token("link_close", "a", -1)
127 level -= 1
128 token.level = level
129 token.markup = "linkify"
130 token.info = "auto"
131 nodes.append(token)
133 lastPos = link.last_index
135 if lastPos < len(text):
136 token = Token("text", "", 0)
137 token.content = text[lastPos:]
138 token.level = level
139 nodes.append(token)
141 inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)
144class _LinkType(Protocol):
145 url: str
146 text: str
147 index: int
148 last_index: int
149 schema: str | None