1"""Functions for manipulating strings and bytes."""
2
3import re
4
5from icalendar.parser_tools import DEFAULT_ENCODING
6
7
8def escape_char(text: str | bytes) -> str | bytes:
9 r"""Format value according to iCalendar TEXT escaping rules.
10
11 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
12 rules.
13 The order of replacements matters to avoid double-escaping.
14
15 Parameters:
16 text: The text to escape.
17
18 Returns:
19 The escaped text with special characters escaped.
20
21 Note:
22 The replacement order is critical:
23
24 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
25 2. ``\`` -> ``\\`` (escape backslashes)
26 3. ``;`` -> ``\;`` (escape semicolons)
27 4. ``,`` -> ``\,`` (escape commas)
28 5. ``\r\n`` -> ``\n`` (normalize line endings)
29 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
30 newline character)
31 """
32 assert isinstance(text, (str, bytes))
33 # NOTE: ORDER MATTERS!
34 return (
35 text.replace(r"\N", "\n")
36 .replace("\\", "\\\\")
37 .replace(";", r"\;")
38 .replace(",", r"\,")
39 .replace("\r\n", r"\n")
40 .replace("\n", r"\n")
41 )
42
43
44def unescape_char(text: str | bytes) -> str | bytes | None:
45 r"""Unescape iCalendar TEXT values.
46
47 Reverses the escaping applied by :func:`escape_char` according to
48 :rfc:`5545#section-3.3.11` TEXT escaping rules.
49
50 Parameters:
51 text: The escaped text.
52
53 Returns:
54 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
55
56 Note:
57 The replacement order is critical to avoid double-unescaping:
58
59 1. ``\N`` -> ``\n`` (intermediate step)
60 2. ``\r\n`` -> ``\n`` (normalize line endings)
61 3. ``\n`` -> newline (unescape newlines)
62 4. ``\,`` -> ``,`` (unescape commas)
63 5. ``\;`` -> ``;`` (unescape semicolons)
64 6. ``\\`` -> ``\`` (unescape backslashes last)
65 """
66 assert isinstance(text, (str, bytes))
67 # NOTE: ORDER MATTERS!
68 if isinstance(text, str):
69 return (
70 text.replace("\\N", "\\n")
71 .replace("\r\n", "\n")
72 .replace("\\n", "\n")
73 .replace("\\,", ",")
74 .replace("\\;", ";")
75 .replace("\\\\", "\\")
76 )
77 if isinstance(text, bytes):
78 return (
79 text.replace(b"\\N", b"\\n")
80 .replace(b"\r\n", b"\n")
81 .replace(b"\\n", b"\n")
82 .replace(b"\\,", b",")
83 .replace(b"\\;", b";")
84 .replace(b"\\\\", b"\\")
85 )
86 return None
87
88
89def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str:
90 """Make a string folded as defined in RFC5545
91 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
92 break. Long content lines SHOULD be split into a multiple line
93 representations using a line "folding" technique. That is, a long
94 line can be split between any two characters by inserting a CRLF
95 immediately followed by a single linear white-space character (i.e.,
96 SPACE or HTAB).
97 """
98 assert isinstance(line, str)
99 assert "\n" not in line
100
101 # Use a fast and simple variant for the common case that line is all ASCII.
102 try:
103 line.encode("ascii")
104 except (UnicodeEncodeError, UnicodeDecodeError):
105 pass
106 else:
107 return fold_sep.join(
108 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
109 )
110
111 ret_chars: list[str] = []
112 byte_count = 0
113 for char in line:
114 char_byte_len = len(char.encode(DEFAULT_ENCODING))
115 byte_count += char_byte_len
116 if byte_count >= limit:
117 ret_chars.append(fold_sep)
118 byte_count = char_byte_len
119 ret_chars.append(char)
120
121 return "".join(ret_chars)
122
123
124def escape_string(val: str) -> str:
125 r"""Escape backslash sequences to URL-encoded hex values.
126
127 Converts backslash-escaped characters to their percent-encoded hex
128 equivalents. This is used for parameter parsing to preserve escaped
129 characters during processing.
130
131 Parameters:
132 val: The string with backslash escapes.
133
134 Returns:
135 The string with backslash escapes converted to percent encoding.
136
137 Note:
138 Conversions:
139
140 - ``\,`` -> ``%2C``
141 - ``\:`` -> ``%3A``
142 - ``\;`` -> ``%3B``
143 - ``\\`` -> ``%5C``
144 """
145 # f'{i:02X}'
146 return (
147 val.replace(r"\,", "%2C")
148 .replace(r"\:", "%3A")
149 .replace(r"\;", "%3B")
150 .replace(r"\\", "%5C")
151 )
152
153
154def unescape_string(val: str) -> str:
155 r"""Unescape URL-encoded hex values to their original characters.
156
157 Reverses :func:`escape_string` by converting percent-encoded hex values
158 back to their original characters. This is used for parameter parsing.
159
160 Parameters:
161 val: The string with percent-encoded values.
162
163 Returns:
164 The string with percent encoding converted to characters.
165
166 Note:
167 Conversions:
168
169 - ``%2C`` -> ``,``
170 - ``%3A`` -> ``:``
171 - ``%3B`` -> ``;``
172 - ``%5C`` -> ``\``
173 """
174 return (
175 val.replace("%2C", ",")
176 .replace("%3A", ":")
177 .replace("%3B", ";")
178 .replace("%5C", "\\")
179 )
180
181
182# [\w-] because of the iCalendar RFC
183# . because of the vCard RFC
184NAME = re.compile(r"[\w.-]+")
185
186
187def validate_token(name: str) -> None:
188 r"""Validate that a name is a valid iCalendar token.
189
190 Checks if the name matches the :rfc:`5545` token syntax using the NAME
191 regex pattern (``[\w.-]+``).
192
193 Parameters:
194 name: The token name to validate.
195
196 Raises:
197 ValueError: If the name is not a valid token.
198 """
199 match = NAME.findall(name)
200 if len(match) == 1 and name == match[0]:
201 return
202 raise ValueError(name)
203
204
205__all__ = [
206 "escape_char",
207 "escape_string",
208 "foldline",
209 "unescape_char",
210 "unescape_string",
211 "validate_token",
212]