1"""Functions for manipulating strings and bytes."""
2
3import re
4import warnings
5
6from icalendar.parser_tools import DEFAULT_ENCODING, to_unicode
7
8
9def _escape_char(text: str | bytes) -> str:
10 r"""Format value according to iCalendar TEXT escaping rules.
11
12 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
13 rules.
14 The order of replacements matters to avoid double-escaping.
15
16 Parameters:
17 text: The text to escape.
18
19 Returns:
20 The escaped text with special characters escaped.
21
22 Note:
23 The replacement order is critical:
24
25 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
26 2. ``\`` -> ``\\`` (escape backslashes)
27 3. ``;`` -> ``\;`` (escape semicolons)
28 4. ``,`` -> ``\,`` (escape commas)
29 5. ``\r\n`` -> ``\n`` (normalize line endings)
30 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
31 newline character)
32 """
33 assert isinstance(text, (str, bytes))
34 text = to_unicode(text)
35 # NOTE: ORDER MATTERS!
36 return (
37 text.replace(r"\N", "\n")
38 .replace("\\", "\\\\")
39 .replace(";", r"\;")
40 .replace(",", r"\,")
41 .replace("\r\n", r"\n")
42 .replace("\n", r"\n")
43 )
44
45
46def escape_char(text: str | bytes) -> str:
47 r"""Format value according to iCalendar TEXT escaping rules.
48
49 .. deprecated:: 7.0.0
50 Use the private :func:`_escape_char` internally. For external use,
51 this function is deprecated. Please use alternative escaping methods
52 or contact the maintainers.
53
54 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
55 rules.
56 The order of replacements matters to avoid double-escaping.
57
58 Parameters:
59 text: The text to escape.
60
61 Returns:
62 The escaped text with special characters escaped.
63
64 Note:
65 The replacement order is critical:
66
67 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
68 2. ``\`` -> ``\\`` (escape backslashes)
69 3. ``;`` -> ``\;`` (escape semicolons)
70 4. ``,`` -> ``\,`` (escape commas)
71 5. ``\r\n`` -> ``\n`` (normalize line endings)
72 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
73 newline character)
74 """
75 warnings.warn(
76 "escape_char is deprecated and will be removed in a future version. "
77 "If you are using this function externally, please contact the maintainers.",
78 DeprecationWarning,
79 stacklevel=2,
80 )
81 return _escape_char(text)
82
83
84def _unescape_char(text: str | bytes) -> str | bytes | None:
85 r"""Unescape iCalendar TEXT values.
86
87 Reverses the escaping applied by :func:`_escape_char` according to
88 :rfc:`5545#section-3.3.11` TEXT escaping rules.
89
90 Parameters:
91 text: The escaped text.
92
93 Returns:
94 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
95
96 Note:
97 The replacement order is critical to avoid double-unescaping:
98
99 1. ``\N`` -> ``\n`` (intermediate step)
100 2. ``\r\n`` -> ``\n`` (normalize line endings)
101 3. ``\n`` -> newline (unescape newlines)
102 4. ``\,`` -> ``,`` (unescape commas)
103 5. ``\;`` -> ``;`` (unescape semicolons)
104 6. ``\\`` -> ``\`` (unescape backslashes last)
105 """
106 assert isinstance(text, (str, bytes))
107 # NOTE: ORDER MATTERS!
108 if isinstance(text, str):
109 return (
110 text.replace("\\N", "\\n")
111 .replace("\r\n", "\n")
112 .replace("\\n", "\n")
113 .replace("\\,", ",")
114 .replace("\\;", ";")
115 .replace("\\\\", "\\")
116 )
117 if isinstance(text, bytes):
118 return (
119 text.replace(b"\\N", b"\\n")
120 .replace(b"\r\n", b"\n")
121 .replace(b"\\n", b"\n")
122 .replace(b"\\,", b",")
123 .replace(b"\\;", b";")
124 .replace(b"\\\\", b"\\")
125 )
126 return None
127
128
129def unescape_char(text: str | bytes) -> str | bytes | None:
130 r"""Unescape iCalendar TEXT values.
131
132 .. deprecated:: 7.0.0
133 Use the private :func:`_unescape_char` internally. For external use,
134 this function is deprecated. Please use alternative unescaping methods
135 or contact the maintainers.
136
137 Reverses the escaping applied by :func:`escape_char` according to
138 :rfc:`5545#section-3.3.11` TEXT escaping rules.
139
140 Parameters:
141 text: The escaped text.
142
143 Returns:
144 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
145
146 Note:
147 The replacement order is critical to avoid double-unescaping:
148
149 1. ``\N`` -> ``\n`` (intermediate step)
150 2. ``\r\n`` -> ``\n`` (normalize line endings)
151 3. ``\n`` -> newline (unescape newlines)
152 4. ``\,`` -> ``,`` (unescape commas)
153 5. ``\;`` -> ``;`` (unescape semicolons)
154 6. ``\\`` -> ``\`` (unescape backslashes last)
155 """
156 warnings.warn(
157 "unescape_char is deprecated and will be removed in a future version. "
158 "If you are using this function externally, please contact the maintainers.",
159 DeprecationWarning,
160 stacklevel=2,
161 )
162 return _unescape_char(text)
163
164
165def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str:
166 """Make a string folded as defined in RFC5545
167 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
168 break. Long content lines SHOULD be split into a multiple line
169 representations using a line "folding" technique. That is, a long
170 line can be split between any two characters by inserting a CRLF
171 immediately followed by a single linear white-space character (i.e.,
172 SPACE or HTAB).
173 """
174 assert isinstance(line, str)
175 assert "\n" not in line
176
177 # Use a fast and simple variant for the common case that line is all ASCII.
178 try:
179 line.encode("ascii")
180 except (UnicodeEncodeError, UnicodeDecodeError):
181 pass
182 else:
183 return fold_sep.join(
184 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
185 )
186
187 ret_chars: list[str] = []
188 byte_count = 0
189 for char in line:
190 char_byte_len = len(char.encode(DEFAULT_ENCODING))
191 byte_count += char_byte_len
192 if byte_count >= limit:
193 ret_chars.append(fold_sep)
194 byte_count = char_byte_len
195 ret_chars.append(char)
196
197 return "".join(ret_chars)
198
199
200def _escape_string(val: str) -> str:
201 r"""Escape backslash sequences to URL-encoded hex values.
202
203 Converts backslash-escaped characters to their percent-encoded hex
204 equivalents. This is used for parameter parsing to preserve escaped
205 characters during processing.
206
207 Parameters:
208 val: The string with backslash escapes.
209
210 Returns:
211 The string with backslash escapes converted to percent encoding.
212
213 Note:
214 Conversions:
215
216 - ``\,`` -> ``%2C``
217 - ``\:`` -> ``%3A``
218 - ``\;`` -> ``%3B``
219 - ``\\`` -> ``%5C``
220 """
221 # f'{i:02X}'
222 return (
223 val.replace(r"\,", "%2C")
224 .replace(r"\:", "%3A")
225 .replace(r"\;", "%3B")
226 .replace(r"\\", "%5C")
227 )
228
229
230def escape_string(val: str) -> str:
231 r"""Escape backslash sequences to URL-encoded hex values.
232
233 .. deprecated:: 7.0.0
234 Use the private :func:`_escape_string` internally. For external use,
235 this function is deprecated.
236 """
237 warnings.warn(
238 "escape_string is deprecated and will be removed in icalendar 8. "
239 "If you are using this function externally, please contact the maintainers.",
240 DeprecationWarning,
241 stacklevel=2,
242 )
243 return _escape_string(val)
244
245
246def _unescape_string(val: str) -> str:
247 r"""Unescape URL-encoded hex values to their original characters.
248
249 Reverses :func:`_escape_string` by converting percent-encoded hex values
250 back to their original characters. This is used for parameter parsing.
251
252 Parameters:
253 val: The string with percent-encoded values.
254
255 Returns:
256 The string with percent encoding converted to characters.
257
258 Note:
259 Conversions:
260
261 - ``%2C`` -> ``,``
262 - ``%3A`` -> ``:``
263 - ``%3B`` -> ``;``
264 - ``%5C`` -> ``\``
265 """
266 return (
267 val.replace("%2C", ",")
268 .replace("%3A", ":")
269 .replace("%3B", ";")
270 .replace("%5C", "\\")
271 )
272
273
274def unescape_string(val: str) -> str:
275 r"""Unescape URL-encoded hex values to their original characters.
276
277 .. deprecated:: 7.0.0
278 Use the private :func:`_unescape_string` internally. For external use,
279 this function is deprecated.
280 """
281 warnings.warn(
282 "unescape_string is deprecated and will be removed in icalendar 8. "
283 "If you are using this function externally, please contact the maintainers.",
284 DeprecationWarning,
285 stacklevel=2,
286 )
287 return _unescape_string(val)
288
289
290# [\w-] because of the iCalendar RFC
291# . because of the vCard RFC
292NAME = re.compile(r"[\w.-]+")
293
294
295def validate_token(name: str) -> None:
296 r"""Validate that a name is a valid iCalendar token.
297
298 Checks if the name matches the :rfc:`5545` token syntax using the NAME
299 regex pattern (``[\w.-]+``).
300
301 Parameters:
302 name: The token name to validate.
303
304 Raises:
305 ValueError: If the name is not a valid token.
306 """
307 match = NAME.findall(name)
308 if len(match) == 1 and name == match[0]:
309 return
310 raise ValueError(name)
311
312
313__all__ = [
314 "_escape_char",
315 "_escape_string",
316 "_unescape_char",
317 "_unescape_string",
318 "escape_char",
319 "escape_string",
320 "foldline",
321 "unescape_char",
322 "unescape_string",
323 "validate_token",
324]