1"""Functions for manipulating strings and bytes."""
2
3import re
4
5from icalendar.compatibility import deprecate_for_version_8
6from icalendar.parser_tools import DEFAULT_ENCODING, to_unicode
7
8
9def _escape_char(text: str | bytes) -> str:
10 r"""Format value according to iCalendar TEXT escaping rules.
11
12 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`
13 rules.
14 The order of replacements matters to avoid double-escaping.
15
16 Parameters:
17 text: The text to escape.
18
19 Returns:
20 The escaped text with special characters escaped.
21
22 Note:
23 The replacement order is critical:
24
25 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
26 2. ``\`` -> ``\\`` (escape backslashes)
27 3. ``;`` -> ``\;`` (escape semicolons)
28 4. ``,`` -> ``\,`` (escape commas)
29 5. ``\r\n`` -> ``\n`` (normalize line endings)
30 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,
31 newline character)
32 """
33 assert isinstance(text, (str, bytes))
34 text = to_unicode(text)
35 # NOTE: ORDER MATTERS!
36 return (
37 text.replace(r"\N", "\n")
38 .replace("\\", "\\\\")
39 .replace(";", r"\;")
40 .replace(",", r"\,")
41 .replace("\r\n", r"\n")
42 .replace("\n", r"\n")
43 )
44
45
46escape_char = deprecate_for_version_8(_escape_char)
47"""Format value according to iCalendar TEXT escaping rules.
48
49.. deprecated:: 7.0.0
50 Use the private :func:`_escape_char` internally. For external use,
51 this function is deprecated. Please use alternative escaping methods
52 or contact the maintainers.
53"""
54
55
56def _unescape_char(text: str | bytes) -> str | bytes | None:
57 r"""Unescape iCalendar TEXT values.
58
59 Reverses the escaping applied by :func:`_escape_char` according to
60 :rfc:`5545#section-3.3.11` TEXT escaping rules.
61
62 Parameters:
63 text: The escaped text.
64
65 Returns:
66 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
67
68 Note:
69 The replacement order is critical to avoid double-unescaping:
70
71 1. ``\N`` -> ``\n`` (intermediate step)
72 2. ``\r\n`` -> ``\n`` (normalize line endings)
73 3. ``\n`` -> newline (unescape newlines)
74 4. ``\,`` -> ``,`` (unescape commas)
75 5. ``\;`` -> ``;`` (unescape semicolons)
76 6. ``\\`` -> ``\`` (unescape backslashes last)
77 """
78 assert isinstance(text, (str, bytes))
79 # NOTE: ORDER MATTERS!
80 if isinstance(text, str):
81 return (
82 text.replace("\\N", "\\n")
83 .replace("\r\n", "\n")
84 .replace("\\n", "\n")
85 .replace("\\,", ",")
86 .replace("\\;", ";")
87 .replace("\\\\", "\\")
88 )
89 if isinstance(text, bytes):
90 return (
91 text.replace(b"\\N", b"\\n")
92 .replace(b"\r\n", b"\n")
93 .replace(b"\\n", b"\n")
94 .replace(b"\\,", b",")
95 .replace(b"\\;", b";")
96 .replace(b"\\\\", b"\\")
97 )
98 return None
99
100
101unescape_char = deprecate_for_version_8(_unescape_char)
102"""Unescape iCalendar TEXT values.
103
104.. deprecated:: 7.0.0
105 Use the private :func:`_unescape_char` internally. For external use,
106 this function is deprecated. Please use alternative unescaping methods
107 or contact the maintainers.
108"""
109
110
111def _foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str:
112 """Make a string folded as defined in RFC5545.
113
114 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
115 break. Long content lines SHOULD be split into a multiple line
116 representations using a line "folding" technique. That is, a long
117 line can be split between any two characters by inserting a CRLF
118 immediately followed by a single linear white-space character (i.e.,
119 SPACE or HTAB).
120 """
121 assert isinstance(line, str)
122 assert "\n" not in line
123
124 # Use a fast and simple variant for the common case that line is all ASCII.
125 try:
126 line.encode("ascii")
127 except (UnicodeEncodeError, UnicodeDecodeError):
128 pass
129 else:
130 return fold_sep.join(
131 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
132 )
133
134 ret_chars: list[str] = []
135 byte_count = 0
136 for char in line:
137 char_byte_len = len(char.encode(DEFAULT_ENCODING))
138 byte_count += char_byte_len
139 if byte_count >= limit:
140 ret_chars.append(fold_sep)
141 byte_count = char_byte_len
142 ret_chars.append(char)
143
144 return "".join(ret_chars)
145
146
147foldline = deprecate_for_version_8(_foldline)
148"""Make a string folded as defined in RFC5545.
149
150.. deprecated:: 7.0.0
151 Use the private :func:`_foldline` internally.
152"""
153
154
155def _escape_string(val: str) -> str:
156 r"""Escape backslash sequences to URL-encoded hex values.
157
158 Converts backslash-escaped characters to their percent-encoded hex
159 equivalents. This is used for parameter parsing to preserve escaped
160 characters during processing.
161
162 Parameters:
163 val: The string with backslash escapes.
164
165 Returns:
166 The string with backslash escapes converted to percent encoding.
167
168 Note:
169 Conversions:
170
171 - ``\,`` -> ``%2C``
172 - ``\:`` -> ``%3A``
173 - ``\;`` -> ``%3B``
174 - ``\\`` -> ``%5C``
175 """
176 # f'{i:02X}'
177 return (
178 val.replace(r"\,", "%2C")
179 .replace(r"\:", "%3A")
180 .replace(r"\;", "%3B")
181 .replace(r"\\", "%5C")
182 )
183
184
185escape_string = deprecate_for_version_8(_escape_string)
186"""Escape backslash sequences to URL-encoded hex values.
187
188.. deprecated:: 7.0.0
189 Use the private :func:`_escape_string` internally. For external use,
190 this function is deprecated.
191"""
192
193
194def _unescape_string(val: str) -> str:
195 r"""Unescape URL-encoded hex values to their original characters.
196
197 Reverses :func:`_escape_string` by converting percent-encoded hex values
198 back to their original characters. This is used for parameter parsing.
199
200 Parameters:
201 val: The string with percent-encoded values.
202
203 Returns:
204 The string with percent encoding converted to characters.
205
206 Note:
207 Conversions:
208
209 - ``%2C`` -> ``,``
210 - ``%3A`` -> ``:``
211 - ``%3B`` -> ``;``
212 - ``%5C`` -> ``\``
213 """
214 return (
215 val.replace("%2C", ",")
216 .replace("%3A", ":")
217 .replace("%3B", ";")
218 .replace("%5C", "\\")
219 )
220
221
222unescape_string = deprecate_for_version_8(_unescape_string)
223"""Unescape URL-encoded hex values to their original characters.
224
225.. deprecated:: 7.0.0
226 Use the private :func:`_unescape_string` internally. For external use,
227 this function is deprecated.
228"""
229
230
231# [\w-] because of the iCalendar RFC
232# . because of the vCard RFC
233NAME = re.compile(r"[\w.-]+")
234
235
236def validate_token(name: str) -> None:
237 r"""Validate that a name is a valid iCalendar token.
238
239 Checks if the name matches the :rfc:`5545` token syntax using the NAME
240 regex pattern (``[\w.-]+``).
241
242 Parameters:
243 name: The token name to validate.
244
245 Raises:
246 ValueError: If the name is not a valid token.
247 """
248 match = NAME.findall(name)
249 if len(match) == 1 and name == match[0]:
250 return
251 raise ValueError(name)
252
253
254__all__ = [
255 "_escape_char",
256 "_escape_string",
257 "_foldline",
258 "_unescape_char",
259 "_unescape_string",
260 "escape_char",
261 "escape_string",
262 "foldline",
263 "unescape_char",
264 "unescape_string",
265 "validate_token",
266]