1"""Functions for manipulating strings and bytes."""
2
3import re
4import warnings
5
6from icalendar.parser_tools import DEFAULT_ENCODING
7
8
9def _escape_char(text: str | bytes) -> str | bytes:
10 r"""Format value according to iCalendar TEXT escaping rules.
11
12 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
13 rules.
14 The order of replacements matters to avoid double-escaping.
15
16 Parameters:
17 text: The text to escape.
18
19 Returns:
20 The escaped text with special characters escaped.
21
22 Note:
23 The replacement order is critical:
24
25 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
26 2. ``\`` -> ``\\`` (escape backslashes)
27 3. ``;`` -> ``\;`` (escape semicolons)
28 4. ``,`` -> ``\,`` (escape commas)
29 5. ``\r\n`` -> ``\n`` (normalize line endings)
30 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
31 newline character)
32 """
33 assert isinstance(text, (str, bytes))
34 # NOTE: ORDER MATTERS!
35 return (
36 text.replace(r"\N", "\n")
37 .replace("\\", "\\\\")
38 .replace(";", r"\;")
39 .replace(",", r"\,")
40 .replace("\r\n", r"\n")
41 .replace("\n", r"\n")
42 )
43
44
45def escape_char(text: str | bytes) -> str | bytes:
46 r"""Format value according to iCalendar TEXT escaping rules.
47
48 .. deprecated:: 7.0.0
49 Use the private :func:`_escape_char` internally. For external use,
50 this function is deprecated. Please use alternative escaping methods
51 or contact the maintainers.
52
53 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
54 rules.
55 The order of replacements matters to avoid double-escaping.
56
57 Parameters:
58 text: The text to escape.
59
60 Returns:
61 The escaped text with special characters escaped.
62
63 Note:
64 The replacement order is critical:
65
66 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
67 2. ``\`` -> ``\\`` (escape backslashes)
68 3. ``;`` -> ``\;`` (escape semicolons)
69 4. ``,`` -> ``\,`` (escape commas)
70 5. ``\r\n`` -> ``\n`` (normalize line endings)
71 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
72 newline character)
73 """
74 warnings.warn(
75 "escape_char is deprecated and will be removed in a future version. "
76 "If you are using this function externally, please contact the maintainers.",
77 DeprecationWarning,
78 stacklevel=2,
79 )
80 return _escape_char(text)
81
82
83def _unescape_char(text: str | bytes) -> str | bytes | None:
84 r"""Unescape iCalendar TEXT values.
85
86 Reverses the escaping applied by :func:`_escape_char` according to
87 :rfc:`5545#section-3.3.11` TEXT escaping rules.
88
89 Parameters:
90 text: The escaped text.
91
92 Returns:
93 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
94
95 Note:
96 The replacement order is critical to avoid double-unescaping:
97
98 1. ``\N`` -> ``\n`` (intermediate step)
99 2. ``\r\n`` -> ``\n`` (normalize line endings)
100 3. ``\n`` -> newline (unescape newlines)
101 4. ``\,`` -> ``,`` (unescape commas)
102 5. ``\;`` -> ``;`` (unescape semicolons)
103 6. ``\\`` -> ``\`` (unescape backslashes last)
104 """
105 assert isinstance(text, (str, bytes))
106 # NOTE: ORDER MATTERS!
107 if isinstance(text, str):
108 return (
109 text.replace("\\N", "\\n")
110 .replace("\r\n", "\n")
111 .replace("\\n", "\n")
112 .replace("\\,", ",")
113 .replace("\\;", ";")
114 .replace("\\\\", "\\")
115 )
116 if isinstance(text, bytes):
117 return (
118 text.replace(b"\\N", b"\\n")
119 .replace(b"\r\n", b"\n")
120 .replace(b"\\n", b"\n")
121 .replace(b"\\,", b",")
122 .replace(b"\\;", b";")
123 .replace(b"\\\\", b"\\")
124 )
125 return None
126
127
128def unescape_char(text: str | bytes) -> str | bytes | None:
129 r"""Unescape iCalendar TEXT values.
130
131 .. deprecated:: 7.0.0
132 Use the private :func:`_unescape_char` internally. For external use,
133 this function is deprecated. Please use alternative unescaping methods
134 or contact the maintainers.
135
136 Reverses the escaping applied by :func:`escape_char` according to
137 :rfc:`5545#section-3.3.11` TEXT escaping rules.
138
139 Parameters:
140 text: The escaped text.
141
142 Returns:
143 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
144
145 Note:
146 The replacement order is critical to avoid double-unescaping:
147
148 1. ``\N`` -> ``\n`` (intermediate step)
149 2. ``\r\n`` -> ``\n`` (normalize line endings)
150 3. ``\n`` -> newline (unescape newlines)
151 4. ``\,`` -> ``,`` (unescape commas)
152 5. ``\;`` -> ``;`` (unescape semicolons)
153 6. ``\\`` -> ``\`` (unescape backslashes last)
154 """
155 warnings.warn(
156 "unescape_char is deprecated and will be removed in a future version. "
157 "If you are using this function externally, please contact the maintainers.",
158 DeprecationWarning,
159 stacklevel=2,
160 )
161 return _unescape_char(text)
162
163
164def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str:
165 """Make a string folded as defined in RFC5545
166 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
167 break. Long content lines SHOULD be split into a multiple line
168 representations using a line "folding" technique. That is, a long
169 line can be split between any two characters by inserting a CRLF
170 immediately followed by a single linear white-space character (i.e.,
171 SPACE or HTAB).
172 """
173 assert isinstance(line, str)
174 assert "\n" not in line
175
176 # Use a fast and simple variant for the common case that line is all ASCII.
177 try:
178 line.encode("ascii")
179 except (UnicodeEncodeError, UnicodeDecodeError):
180 pass
181 else:
182 return fold_sep.join(
183 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
184 )
185
186 ret_chars: list[str] = []
187 byte_count = 0
188 for char in line:
189 char_byte_len = len(char.encode(DEFAULT_ENCODING))
190 byte_count += char_byte_len
191 if byte_count >= limit:
192 ret_chars.append(fold_sep)
193 byte_count = char_byte_len
194 ret_chars.append(char)
195
196 return "".join(ret_chars)
197
198
199def escape_string(val: str) -> str:
200 r"""Escape backslash sequences to URL-encoded hex values.
201
202 Converts backslash-escaped characters to their percent-encoded hex
203 equivalents. This is used for parameter parsing to preserve escaped
204 characters during processing.
205
206 Parameters:
207 val: The string with backslash escapes.
208
209 Returns:
210 The string with backslash escapes converted to percent encoding.
211
212 Note:
213 Conversions:
214
215 - ``\,`` -> ``%2C``
216 - ``\:`` -> ``%3A``
217 - ``\;`` -> ``%3B``
218 - ``\\`` -> ``%5C``
219 """
220 # f'{i:02X}'
221 return (
222 val.replace(r"\,", "%2C")
223 .replace(r"\:", "%3A")
224 .replace(r"\;", "%3B")
225 .replace(r"\\", "%5C")
226 )
227
228
229def unescape_string(val: str) -> str:
230 r"""Unescape URL-encoded hex values to their original characters.
231
232 Reverses :func:`escape_string` by converting percent-encoded hex values
233 back to their original characters. This is used for parameter parsing.
234
235 Parameters:
236 val: The string with percent-encoded values.
237
238 Returns:
239 The string with percent encoding converted to characters.
240
241 Note:
242 Conversions:
243
244 - ``%2C`` -> ``,``
245 - ``%3A`` -> ``:``
246 - ``%3B`` -> ``;``
247 - ``%5C`` -> ``\``
248 """
249 return (
250 val.replace("%2C", ",")
251 .replace("%3A", ":")
252 .replace("%3B", ";")
253 .replace("%5C", "\\")
254 )
255
256
257# [\w-] because of the iCalendar RFC
258# . because of the vCard RFC
259NAME = re.compile(r"[\w.-]+")
260
261
262def validate_token(name: str) -> None:
263 r"""Validate that a name is a valid iCalendar token.
264
265 Checks if the name matches the :rfc:`5545` token syntax using the NAME
266 regex pattern (``[\w.-]+``).
267
268 Parameters:
269 name: The token name to validate.
270
271 Raises:
272 ValueError: If the name is not a valid token.
273 """
274 match = NAME.findall(name)
275 if len(match) == 1 and name == match[0]:
276 return
277 raise ValueError(name)
278
279
280__all__ = [
281 "_escape_char",
282 "_unescape_char",
283 "escape_char",
284 "escape_string",
285 "foldline",
286 "unescape_char",
287 "unescape_string",
288 "validate_token",
289]