1"""This module parses and generates contentlines as defined in RFC 5545
2(iCalendar), but will probably work for other MIME types with similar syntax.
3Eg. RFC 2426 (vCard)
4
5It is stupid in the sense that it treats the content purely as strings. No type
6conversion is attempted.
7"""
8
9from __future__ import annotations
10
11import functools
12import os
13import re
14from typing import TYPE_CHECKING
15
16from icalendar.caselessdict import CaselessDict
17from icalendar.parser_tools import (
18 DEFAULT_ENCODING,
19 ICAL_TYPE,
20 SEQUENCE_TYPES,
21 to_unicode,
22)
23
24if TYPE_CHECKING:
25 from icalendar.enums import VALUE
26
27
28def escape_char(text):
29 """Format value according to iCalendar TEXT escaping rules."""
30 assert isinstance(text, (str, bytes))
31 # NOTE: ORDER MATTERS!
32 return (
33 text.replace(r"\N", "\n")
34 .replace("\\", "\\\\")
35 .replace(";", r"\;")
36 .replace(",", r"\,")
37 .replace("\r\n", r"\n")
38 .replace("\n", r"\n")
39 )
40
41
42def unescape_char(text):
43 assert isinstance(text, (str, bytes))
44 # NOTE: ORDER MATTERS!
45 if isinstance(text, str):
46 return (
47 text.replace("\\N", "\\n")
48 .replace("\r\n", "\n")
49 .replace("\\n", "\n")
50 .replace("\\,", ",")
51 .replace("\\;", ";")
52 .replace("\\\\", "\\")
53 )
54 if isinstance(text, bytes):
55 return (
56 text.replace(b"\\N", b"\\n")
57 .replace(b"\r\n", b"\n")
58 .replace(b"\\n", b"\n")
59 .replace(b"\\,", b",")
60 .replace(b"\\;", b";")
61 .replace(b"\\\\", b"\\")
62 )
63 return None
64
65
66def foldline(line, limit=75, fold_sep="\r\n "):
67 """Make a string folded as defined in RFC5545
68 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
69 break. Long content lines SHOULD be split into a multiple line
70 representations using a line "folding" technique. That is, a long
71 line can be split between any two characters by inserting a CRLF
72 immediately followed by a single linear white-space character (i.e.,
73 SPACE or HTAB).
74 """
75 assert isinstance(line, str)
76 assert "\n" not in line
77
78 # Use a fast and simple variant for the common case that line is all ASCII.
79 try:
80 line.encode("ascii")
81 except (UnicodeEncodeError, UnicodeDecodeError):
82 pass
83 else:
84 return fold_sep.join(
85 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
86 )
87
88 ret_chars = []
89 byte_count = 0
90 for char in line:
91 char_byte_len = len(char.encode(DEFAULT_ENCODING))
92 byte_count += char_byte_len
93 if byte_count >= limit:
94 ret_chars.append(fold_sep)
95 byte_count = char_byte_len
96 ret_chars.append(char)
97
98 return "".join(ret_chars)
99
100
101#################################################################
102# Property parameter stuff
103
104
105def param_value(value, always_quote=False):
106 """Returns a parameter value."""
107 if isinstance(value, SEQUENCE_TYPES):
108 return q_join(map(rfc_6868_escape, value), always_quote=always_quote)
109 if isinstance(value, str):
110 return dquote(rfc_6868_escape(value), always_quote=always_quote)
111 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING)))
112
113
114# Could be improved
115
116# [\w-] because of the iCalendar RFC
117# . because of the vCard RFC
118NAME = re.compile(r"[\w.-]+")
119
120UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]')
121QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]')
122FOLD = re.compile(b"(\r?\n)+[ \t]")
123UFOLD = re.compile("(\r?\n)+[ \t]")
124NEWLINE = re.compile(r"\r?\n")
125
126
127def validate_token(name):
128 match = NAME.findall(name)
129 if len(match) == 1 and name == match[0]:
130 return
131 raise ValueError(name)
132
133
134def validate_param_value(value, quoted=True):
135 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR
136 if validator.findall(value):
137 raise ValueError(value)
138
139
140# chars presence of which in parameter value will be cause the value
141# to be enclosed in double-quotes
142QUOTABLE = re.compile("[,;:’]")# noqa: RUF001
143
144
145def dquote(val, always_quote=False):
146 """Enclose parameter values containing [,;:] in double quotes."""
147 # a double-quote character is forbidden to appear in a parameter value
148 # so replace it with a single-quote character
149 val = val.replace('"', "'")
150 if QUOTABLE.search(val) or always_quote:
151 return f'"{val}"'
152 return val
153
154
155# parsing helper
156def q_split(st, sep=",", maxsplit=-1):
157 """Splits a string on char, taking double (q)uotes into considderation."""
158 if maxsplit == 0:
159 return [st]
160
161 result = []
162 cursor = 0
163 length = len(st)
164 inquote = 0
165 splits = 0
166 for i, ch in enumerate(st):
167 if ch == '"':
168 inquote = not inquote
169 if not inquote and ch == sep:
170 result.append(st[cursor:i])
171 cursor = i + 1
172 splits += 1
173 if i + 1 == length or splits == maxsplit:
174 result.append(st[cursor:])
175 break
176 return result
177
178
179def q_join(lst, sep=",", always_quote=False):
180 """Joins a list on sep, quoting strings with QUOTABLE chars."""
181 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst)
182
183
184def single_string_parameter(func):
185 """Create a parameter getter/setter for a single string parameter."""
186
187 name = func.__name__
188
189 @functools.wraps(func)
190 def fget(self: Parameters):
191 """Get the value."""
192 return self.get(name)
193
194 def fset(self: Parameters, value: str|None):
195 """Set the value"""
196 if value is None:
197 fdel(self)
198 else:
199 self[name] = value
200
201 def fdel(self: Parameters):
202 """Delete the value."""
203 self.pop(name, None)
204
205 return property(fget, fset, fdel, doc=func.__doc__)
206
207class Parameters(CaselessDict):
208 """Parser and generator of Property parameter strings. It knows nothing of
209 datatypes. Its main concern is textual structure.
210 """
211
212 # The following paremeters must always be enclosed in double quotes
213 always_quoted = (
214 "ALTREP",
215 "DELEGATED-FROM",
216 "DELEGATED-TO",
217 "DIR",
218 "MEMBER",
219 "SENT-BY",
220 # Part of X-APPLE-STRUCTURED-LOCATION
221 "X-ADDRESS",
222 "X-TITLE",
223 )
224 # this is quoted should one of the values be present
225 quote_also = {
226 # This is escaped in the RFC
227 "CN" : " '",
228 }
229
230 def params(self):
231 """In RFC 5545 keys are called parameters, so this is to be consitent
232 with the naming conventions.
233 """
234 return self.keys()
235
236 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001
237 result = []
238 items = list(self.items())
239 if sorted:
240 items.sort()
241
242 for key, value in items:
243 upper_key = key.upper()
244 check_quoteable_characters = self.quote_also.get(key.upper())
245 always_quote = (
246 upper_key in self.always_quoted or (
247 check_quoteable_characters and
248 any(c in value for c in check_quoteable_characters)
249 )
250 )
251 quoted_value = param_value(value, always_quote=always_quote)
252 if isinstance(quoted_value, str):
253 quoted_value = quoted_value.encode(DEFAULT_ENCODING)
254 # CaselessDict keys are always unicode
255 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value)
256 return b";".join(result)
257
258 @classmethod
259 def from_ical(cls, st, strict=False):
260 """Parses the parameter format from ical text format."""
261
262 # parse into strings
263 result = cls()
264 for param in q_split(st, ";"):
265 try:
266 key, val = q_split(param, "=", maxsplit=1)
267 validate_token(key)
268 # Property parameter values that are not in quoted
269 # strings are case insensitive.
270 vals = []
271 for v in q_split(val, ","):
272 if v.startswith('"') and v.endswith('"'):
273 v2 = v.strip('"')
274 validate_param_value(v2, quoted=True)
275 vals.append(rfc_6868_unescape(v2))
276 else:
277 validate_param_value(v, quoted=False)
278 if strict:
279 vals.append(rfc_6868_unescape(v.upper()))
280 else:
281 vals.append(rfc_6868_unescape(v))
282 if not vals:
283 result[key] = val
284 elif len(vals) == 1:
285 result[key] = vals[0]
286 else:
287 result[key] = vals
288 except ValueError as exc: # noqa: PERF203
289 raise ValueError(
290 f"{param!r} is not a valid parameter string: {exc}"
291 ) from exc
292 return result
293
294 @single_string_parameter
295 def value(self) -> VALUE | str | None:
296 """The VALUE parameter from :rfc:`5545`.
297
298 Description:
299 This parameter specifies the value type and format of
300 the property value. The property values MUST be of a single value
301 type. For example, a "RDATE" property cannot have a combination
302 of DATE-TIME and TIME value types.
303
304 If the property's value is the default value type, then this
305 parameter need not be specified. However, if the property's
306 default value type is overridden by some other allowable value
307 type, then this parameter MUST be specified.
308
309 Applications MUST preserve the value data for x-name and iana-
310 token values that they don't recognize without attempting to
311 interpret or parse the value data.
312 """
313
314
315def escape_string(val):
316 # f'{i:02X}'
317 return (
318 val.replace(r"\,", "%2C")
319 .replace(r"\:", "%3A")
320 .replace(r"\;", "%3B")
321 .replace(r"\\", "%5C")
322 )
323
324
325def unescape_string(val):
326 return (
327 val.replace("%2C", ",")
328 .replace("%3A", ":")
329 .replace("%3B", ";")
330 .replace("%5C", "\\")
331 )
332
333
334RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'")
335
336
337def rfc_6868_unescape(param_value: str) -> str:
338 """Take care of :rfc:`6868` unescaping.
339
340 - ^^ -> ^
341 - ^n -> system specific newline
342 - ^' -> "
343 - ^ with others stay intact
344 """
345 replacements = {
346 "^^": "^",
347 "^n": os.linesep,
348 "^'": '"',
349 }
350 return RFC_6868_UNESCAPE_REGEX.sub(
351 lambda m: replacements.get(m.group(0), m.group(0)), param_value
352 )
353
354
355RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"')
356
357
358def rfc_6868_escape(param_value: str) -> str:
359 """Take care of :rfc:`6868` escaping.
360
361 - ^ -> ^^
362 - " -> ^'
363 - newline -> ^n
364 """
365 replacements = {
366 "^": "^^",
367 "\n": "^n",
368 "\r": "^n",
369 "\r\n": "^n",
370 '"': "^'",
371 }
372 return RFC_6868_ESCAPE_REGEX.sub(
373 lambda m: replacements.get(m.group(0), m.group(0)), param_value
374 )
375
376
377def unescape_list_or_string(val):
378 if isinstance(val, list):
379 return [unescape_string(s) for s in val]
380 return unescape_string(val)
381
382
383#########################################
384# parsing and generation of content lines
385
386
387class Contentline(str):
388 """A content line is basically a string that can be folded and parsed into
389 parts.
390 """
391
392 __slots__ = ("strict",)
393
394 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
395 value = to_unicode(value, encoding=encoding)
396 assert "\n" not in value, (
397 "Content line can not contain unescaped new line characters."
398 )
399 self = super().__new__(cls, value)
400 self.strict = strict
401 return self
402
403 @classmethod
404 def from_parts(
405 cls,
406 name: ICAL_TYPE,
407 params: Parameters,
408 values,
409 sorted: bool = True, # noqa: A002, FBT001
410 ):
411 """Turn a parts into a content line."""
412 assert isinstance(params, Parameters)
413 if hasattr(values, "to_ical"):
414 values = values.to_ical()
415 else:
416 from icalendar.prop import vText
417
418 values = vText(values).to_ical()
419 # elif isinstance(values, basestring):
420 # values = escape_char(values)
421
422 # TODO: after unicode only, remove this
423 # Convert back to unicode, after to_ical encoded it.
424 name = to_unicode(name)
425 values = to_unicode(values)
426 if params:
427 params = to_unicode(params.to_ical(sorted=sorted))
428 return cls(f"{name};{params}:{values}")
429 return cls(f"{name}:{values}")
430
431 def parts(self):
432 """Split the content line up into (name, parameters, values) parts."""
433 try:
434 st = escape_string(self)
435 name_split = None
436 value_split = None
437 in_quotes = False
438 for i, ch in enumerate(st):
439 if not in_quotes:
440 if ch in ":;" and not name_split:
441 name_split = i
442 if ch == ":" and not value_split:
443 value_split = i
444 if ch == '"':
445 in_quotes = not in_quotes
446 name = unescape_string(st[:name_split])
447 if not name:
448 raise ValueError("Key name is required") # noqa: TRY301
449 validate_token(name)
450 if not value_split:
451 value_split = i + 1
452 if not name_split or name_split + 1 == value_split:
453 raise ValueError("Invalid content line") # noqa: TRY301
454 params = Parameters.from_ical(
455 st[name_split + 1 : value_split], strict=self.strict
456 )
457 params = Parameters(
458 (unescape_string(key), unescape_list_or_string(value))
459 for key, value in iter(params.items())
460 )
461 values = unescape_string(st[value_split + 1 :])
462 except ValueError as exc:
463 raise ValueError(
464 f"Content line could not be parsed into parts: '{self}': {exc}"
465 ) from exc
466 return (name, params, values)
467
468 @classmethod
469 def from_ical(cls, ical, strict=False):
470 """Unfold the content lines in an iCalendar into long content lines."""
471 ical = to_unicode(ical)
472 # a fold is carriage return followed by either a space or a tab
473 return cls(UFOLD.sub("", ical), strict=strict)
474
475 def to_ical(self):
476 """Long content lines are folded so they are less than 75 characters
477 wide.
478 """
479 return foldline(self).encode(DEFAULT_ENCODING)
480
481
482class Contentlines(list):
483 """I assume that iCalendar files generally are a few kilobytes in size.
484 Then this should be efficient. for Huge files, an iterator should probably
485 be used instead.
486 """
487
488 def to_ical(self):
489 """Simply join self."""
490 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"
491
492 @classmethod
493 def from_ical(cls, st):
494 """Parses a string into content lines."""
495 st = to_unicode(st)
496 try:
497 # a fold is carriage return followed by either a space or a tab
498 unfolded = UFOLD.sub("", st)
499 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)
500 lines.append("") # '\r\n' at the end of every content line
501 except Exception as e:
502 raise ValueError("Expected StringType with content lines") from e
503 return lines
504
505
506__all__ = [
507 "FOLD",
508 "NAME",
509 "NEWLINE",
510 "QUNSAFE_CHAR",
511 "QUOTABLE",
512 "UFOLD",
513 "UNSAFE_CHAR",
514 "Contentline",
515 "Contentlines",
516 "Parameters",
517 "dquote",
518 "escape_char",
519 "escape_string",
520 "foldline",
521 "param_value",
522 "q_join",
523 "q_split",
524 "rfc_6868_escape",
525 "rfc_6868_unescape",
526 "unescape_char",
527 "unescape_list_or_string",
528 "unescape_string",
529 "validate_param_value",
530 "validate_token",
531]