Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 82%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""This module parses and generates contentlines as defined in RFC 5545
2(iCalendar), but will probably work for other MIME types with similar syntax.
3Eg. RFC 2426 (vCard)
5It is stupid in the sense that it treats the content purely as strings. No type
6conversion is attempted.
7"""
9from __future__ import annotations
11import functools
12import os
13import re
14from collections.abc import Sequence
15from datetime import datetime, time
16from typing import TYPE_CHECKING, Any, Callable, Protocol
18from icalendar.caselessdict import CaselessDict
19from icalendar.error import JCalParsingError
20from icalendar.parser_tools import (
21 DEFAULT_ENCODING,
22 ICAL_TYPE,
23 SEQUENCE_TYPES,
24 to_unicode,
25)
26from icalendar.timezone.tzid import tzid_from_dt
28if TYPE_CHECKING:
29 from icalendar.enums import VALUE
30 from icalendar.prop import VPROPERTY
33class HasToIcal(Protocol):
34 """Protocol for objects with a to_ical method."""
36 def to_ical(self) -> bytes:
37 """Convert to iCalendar format."""
38 ...
41def escape_char(text: str | bytes) -> str | bytes:
42 r"""Format value according to iCalendar TEXT escaping rules.
44 Escapes special characters in text values according to :rfc:`5545#section-3.3.11` rules.
45 The order of replacements matters to avoid double-escaping.
47 Parameters:
48 text: The text to escape.
50 Returns:
51 The escaped text with special characters escaped.
53 Note:
54 The replacement order is critical:
56 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)
57 2. ``\`` -> ``\\`` (escape backslashes)
58 3. ``;`` -> ``\;`` (escape semicolons)
59 4. ``,`` -> ``\,`` (escape commas)
60 5. ``\r\n`` -> ``\n`` (normalize line endings)
61 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, newline character)
62 """
63 assert isinstance(text, (str, bytes))
64 # NOTE: ORDER MATTERS!
65 return (
66 text.replace(r"\N", "\n")
67 .replace("\\", "\\\\")
68 .replace(";", r"\;")
69 .replace(",", r"\,")
70 .replace("\r\n", r"\n")
71 .replace("\n", r"\n")
72 )
75def unescape_char(text: str | bytes) -> str | bytes | None:
76 r"""Unescape iCalendar TEXT values.
78 Reverses the escaping applied by :func:`escape_char` according to
79 :rfc:`5545#section-3.3.11` TEXT escaping rules.
81 Parameters:
82 text: The escaped text.
84 Returns:
85 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.
87 Note:
88 The replacement order is critical to avoid double-unescaping:
90 1. ``\N`` -> ``\n`` (intermediate step)
91 2. ``\r\n`` -> ``\n`` (normalize line endings)
92 3. ``\n`` -> newline (unescape newlines)
93 4. ``\,`` -> ``,`` (unescape commas)
94 5. ``\;`` -> ``;`` (unescape semicolons)
95 6. ``\\`` -> ``\`` (unescape backslashes last)
96 """
97 assert isinstance(text, (str, bytes))
98 # NOTE: ORDER MATTERS!
99 if isinstance(text, str):
100 return (
101 text.replace("\\N", "\\n")
102 .replace("\r\n", "\n")
103 .replace("\\n", "\n")
104 .replace("\\,", ",")
105 .replace("\\;", ";")
106 .replace("\\\\", "\\")
107 )
108 if isinstance(text, bytes):
109 return (
110 text.replace(b"\\N", b"\\n")
111 .replace(b"\r\n", b"\n")
112 .replace(b"\\n", b"\n")
113 .replace(b"\\,", b",")
114 .replace(b"\\;", b";")
115 .replace(b"\\\\", b"\\")
116 )
117 return None
120def foldline(line: str, limit: int=75, fold_sep: str="\r\n ") -> str:
121 """Make a string folded as defined in RFC5545
122 Lines of text SHOULD NOT be longer than 75 octets, excluding the line
123 break. Long content lines SHOULD be split into a multiple line
124 representations using a line "folding" technique. That is, a long
125 line can be split between any two characters by inserting a CRLF
126 immediately followed by a single linear white-space character (i.e.,
127 SPACE or HTAB).
128 """
129 assert isinstance(line, str)
130 assert "\n" not in line
132 # Use a fast and simple variant for the common case that line is all ASCII.
133 try:
134 line.encode("ascii")
135 except (UnicodeEncodeError, UnicodeDecodeError):
136 pass
137 else:
138 return fold_sep.join(
139 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)
140 )
142 ret_chars: list[str] = []
143 byte_count = 0
144 for char in line:
145 char_byte_len = len(char.encode(DEFAULT_ENCODING))
146 byte_count += char_byte_len
147 if byte_count >= limit:
148 ret_chars.append(fold_sep)
149 byte_count = char_byte_len
150 ret_chars.append(char)
152 return "".join(ret_chars)
155#################################################################
156# Property parameter stuff
159def param_value(value: Sequence[str] | str | HasToIcal, always_quote: bool = False) -> str:
160 """Convert a parameter value to its iCalendar representation.
162 Applies :rfc:`6868` escaping and optionally quotes the value according
163 to :rfc:`5545` parameter value formatting rules.
165 Parameters:
166 value: The parameter value to convert. Can be a sequence, string, or
167 object with a ``to_ical()`` method.
168 always_quote: If ``True``, always enclose the value in double quotes.
169 Defaults to ``False`` (only quote when necessary).
171 Returns:
172 The formatted parameter value, escaped and quoted as needed.
173 """
174 if isinstance(value, SEQUENCE_TYPES):
175 return q_join(map(rfc_6868_escape, value), always_quote=always_quote)
176 if isinstance(value, str):
177 return dquote(rfc_6868_escape(value), always_quote=always_quote)
178 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING)))
181# Could be improved
183# [\w-] because of the iCalendar RFC
184# . because of the vCard RFC
185NAME = re.compile(r"[\w.-]+")
187UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]')
188QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]')
189FOLD = re.compile(b"(\r?\n)+[ \t]")
190UFOLD = re.compile("(\r?\n)+[ \t]")
191NEWLINE = re.compile(r"\r?\n")
194def validate_token(name: str) -> None:
195 r"""Validate that a name is a valid iCalendar token.
197 Checks if the name matches the :rfc:`5545` token syntax using the NAME
198 regex pattern (``[\w.-]+``).
200 Parameters:
201 name: The token name to validate.
203 Raises:
204 ValueError: If the name is not a valid token.
205 """
206 match = NAME.findall(name)
207 if len(match) == 1 and name == match[0]:
208 return
209 raise ValueError(name)
212def validate_param_value(value: str, quoted: bool = True) -> None:
213 """Validate a parameter value for unsafe characters.
215 Checks parameter values for characters that are not allowed according to
216 :rfc:`5545`. Uses different validation rules for quoted and unquoted values.
218 Parameters:
219 value: The parameter value to validate.
220 quoted: If ``True``, validate as a quoted value (allows more characters).
221 If ``False``, validate as an unquoted value (stricter). Defaults to ``True``.
223 Raises:
224 ValueError: If the value contains unsafe characters for its quote state.
225 """
226 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR
227 if validator.findall(value):
228 raise ValueError(value)
231# chars presence of which in parameter value will be cause the value
232# to be enclosed in double-quotes
233QUOTABLE = re.compile("[,;:’]") # noqa: RUF001
236def dquote(val: str, always_quote: bool = False) -> str:
237 """Enclose parameter values in double quotes when needed.
239 Parameter values containing special characters ``,``, ``;``, ``:``, or ``'`` must be enclosed
240 in double quotes according to :rfc:`5545`. Double-quote characters in the
241 value are replaced with single quotes since they're forbidden in parameter
242 values.
244 Parameters:
245 val: The parameter value to quote.
246 always_quote: If ``True``, always enclose in quotes regardless of content.
247 Defaults to ``False`` (only quote when necessary).
249 Returns:
250 The value, enclosed in double quotes if needed or requested.
251 """
252 # a double-quote character is forbidden to appear in a parameter value
253 # so replace it with a single-quote character
254 val = val.replace('"', "'")
255 if QUOTABLE.search(val) or always_quote:
256 return f'"{val}"'
257 return val
260# parsing helper
261def q_split(st: str, sep: str = ",", maxsplit: int = -1) -> list[str]:
262 """Split a string on a separator, respecting double quotes.
264 Splits the string on the separator character, but ignores separators that
265 appear inside double-quoted sections. This is needed for parsing parameter
266 values that may contain quoted strings.
268 Parameters:
269 st: The string to split.
270 sep: The separator character. Defaults to ``,``.
271 maxsplit: Maximum number of splits to perform. If ``-1`` (default),
272 then perform all possible splits.
274 Returns:
275 The split string parts.
277 Examples:
278 .. code-block:: pycon
280 >>> from icalendar.parser import q_split
281 >>> q_split('a,b,c')
282 ['a', 'b', 'c']
283 >>> q_split('a,"b,c",d')
284 ['a', '"b,c"', 'd']
285 >>> q_split('a;b;c', sep=';')
286 ['a', 'b', 'c']
287 """
288 if maxsplit == 0:
289 return [st]
291 result = []
292 cursor = 0
293 length = len(st)
294 inquote = 0
295 splits = 0
296 for i, ch in enumerate(st):
297 if ch == '"':
298 inquote = not inquote
299 if not inquote and ch == sep:
300 result.append(st[cursor:i])
301 cursor = i + 1
302 splits += 1
303 if i + 1 == length or splits == maxsplit:
304 result.append(st[cursor:])
305 break
306 return result
309def q_join(lst: list[str], sep: str = ",", always_quote: bool = False) -> str:
310 """Join a list with a separator, quoting items as needed.
312 Joins list items with the separator, applying :func:`dquote` to each item
313 to add double quotes when they contain special characters.
315 Parameters:
316 lst: The list of items to join.
317 sep: The separator to use. Defaults to ``,``.
318 always_quote: If ``True``, always quote all items. Defaults to ``False``
319 (only quote when necessary).
321 Returns:
322 The joined string with items quoted as needed.
324 Examples:
325 .. code-block:: pycon
327 >>> from icalendar.parser import q_join
328 >>> q_join(['a', 'b', 'c'])
329 'a,b,c'
330 >>> q_join(['plain', 'has,comma'])
331 'plain,"has,comma"'
332 """
333 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst)
336def single_string_parameter(func: Callable | None = None, upper=False):
337 """Create a parameter getter/setter for a single string parameter.
339 Parameters:
340 upper: Convert the value to uppercase
341 func: The function to decorate.
343 Returns:
344 The property for the parameter or a decorator for the parameter
345 if func is ``None``.
346 """
348 def decorator(func):
349 name = func.__name__
351 @functools.wraps(func)
352 def fget(self: Parameters):
353 """Get the value."""
354 value = self.get(name)
355 if value is not None and upper:
356 value = value.upper()
357 return value
359 def fset(self: Parameters, value: str | None):
360 """Set the value"""
361 if value is None:
362 fdel(self)
363 else:
364 if upper:
365 value = value.upper()
366 self[name] = value
368 def fdel(self: Parameters):
369 """Delete the value."""
370 self.pop(name, None)
372 return property(fget, fset, fdel, doc=func.__doc__)
374 if func is None:
375 return decorator
376 return decorator(func)
379class Parameters(CaselessDict):
380 """Parser and generator of Property parameter strings.
382 It knows nothing of datatypes.
383 Its main concern is textual structure.
385 Examples:
387 Modify parameters:
389 .. code-block:: pycon
391 >>> from icalendar import Parameters
392 >>> params = Parameters()
393 >>> params['VALUE'] = 'TEXT'
394 >>> params.value
395 'TEXT'
396 >>> params
397 Parameters({'VALUE': 'TEXT'})
399 Create new parameters:
401 .. code-block:: pycon
403 >>> params = Parameters(value="BINARY")
404 >>> params.value
405 'BINARY'
407 Set a default:
409 .. code-block:: pycon
411 >>> params = Parameters(value="BINARY", default_value="TEXT")
412 >>> params
413 Parameters({'VALUE': 'BINARY'})
415 """
417 def __init__(self, *args, **kwargs):
418 """Create new parameters."""
419 if args and args[0] is None:
420 # allow passing None
421 args = args[1:]
422 defaults = {
423 key[8:]: kwargs.pop(key)
424 for key in list(kwargs.keys())
425 if key.lower().startswith("default_")
426 }
427 super().__init__(*args, **kwargs)
428 for key, value in defaults.items():
429 self.setdefault(key, value)
431 # The following paremeters must always be enclosed in double quotes
432 always_quoted = (
433 "ALTREP",
434 "DELEGATED-FROM",
435 "DELEGATED-TO",
436 "DIR",
437 "MEMBER",
438 "SENT-BY",
439 # Part of X-APPLE-STRUCTURED-LOCATION
440 "X-ADDRESS",
441 "X-TITLE",
442 # RFC 9253
443 "LINKREL",
444 )
445 # this is quoted should one of the values be present
446 quote_also = {
447 # This is escaped in the RFC
448 "CN": " '",
449 }
451 def params(self):
452 """In RFC 5545 keys are called parameters, so this is to be consitent
453 with the naming conventions.
454 """
455 return self.keys()
457 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001
458 """Returns an :rfc:`5545` representation of the parameters.
460 Parameters:
461 sorted (bool): Sort the parameters before encoding.
462 exclude_utc (bool): Exclude TZID if it is set to ``"UTC"``
463 """
464 result = []
465 items = list(self.items())
466 if sorted:
467 items.sort()
469 for key, value in items:
470 if key == "TZID" and value == "UTC":
471 # The "TZID" property parameter MUST NOT be applied to DATE-TIME
472 # properties whose time values are specified in UTC.
473 continue
474 upper_key = key.upper()
475 check_quoteable_characters = self.quote_also.get(key.upper())
476 always_quote = upper_key in self.always_quoted or (
477 check_quoteable_characters
478 and any(c in value for c in check_quoteable_characters)
479 )
480 quoted_value = param_value(value, always_quote=always_quote)
481 if isinstance(quoted_value, str):
482 quoted_value = quoted_value.encode(DEFAULT_ENCODING)
483 # CaselessDict keys are always unicode
484 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value)
485 return b";".join(result)
487 @classmethod
488 def from_ical(cls, st, strict=False):
489 """Parses the parameter format from ical text format."""
491 # parse into strings
492 result = cls()
493 for param in q_split(st, ";"):
494 try:
495 key, val = q_split(param, "=", maxsplit=1)
496 validate_token(key)
497 # Property parameter values that are not in quoted
498 # strings are case insensitive.
499 vals = []
500 for v in q_split(val, ","):
501 if v.startswith('"') and v.endswith('"'):
502 v2 = v.strip('"')
503 validate_param_value(v2, quoted=True)
504 vals.append(rfc_6868_unescape(v2))
505 else:
506 validate_param_value(v, quoted=False)
507 if strict:
508 vals.append(rfc_6868_unescape(v.upper()))
509 else:
510 vals.append(rfc_6868_unescape(v))
511 if not vals:
512 result[key] = val
513 elif len(vals) == 1:
514 result[key] = vals[0]
515 else:
516 result[key] = vals
517 except ValueError as exc: # noqa: PERF203
518 raise ValueError(
519 f"{param!r} is not a valid parameter string: {exc}"
520 ) from exc
521 return result
523 @single_string_parameter(upper=True)
524 def value(self) -> VALUE | str | None:
525 """The VALUE parameter from :rfc:`5545`.
527 Description:
528 This parameter specifies the value type and format of
529 the property value. The property values MUST be of a single value
530 type. For example, a "RDATE" property cannot have a combination
531 of DATE-TIME and TIME value types.
533 If the property's value is the default value type, then this
534 parameter need not be specified. However, if the property's
535 default value type is overridden by some other allowable value
536 type, then this parameter MUST be specified.
538 Applications MUST preserve the value data for x-name and iana-
539 token values that they don't recognize without attempting to
540 interpret or parse the value data.
542 For convenience, using this property, the value will be converted to
543 an uppercase string.
545 .. code-block:: pycon
547 >>> from icalendar import Parameters
548 >>> params = Parameters()
549 >>> params.value = "unknown"
550 >>> params
551 Parameters({'VALUE': 'UNKNOWN'})
553 """
555 def _parameter_value_to_jcal(
556 self, value: str | float | list | VPROPERTY
557 ) -> str | int | float | list[str] | list[int] | list[float]:
558 """Convert a parameter value to jCal format.
560 Parameters:
561 value: The parameter value
563 Returns:
564 The jCal representation of the parameter value
565 """
566 if isinstance(value, list):
567 return [self._parameter_value_to_jcal(v) for v in value]
568 if hasattr(value, "to_jcal"):
569 # proprty values respond to this
570 jcal = value.to_jcal()
571 # we only need the value part
572 if len(jcal) == 4:
573 return jcal[3]
574 return jcal[3:]
575 for t in (int, float, str):
576 if isinstance(value, t):
577 return t(value)
578 raise TypeError(
579 "Unsupported parameter value type for jCal conversion: "
580 f"{type(value)} {value!r}"
581 )
583 def to_jcal(self, exclude_utc=False) -> dict[str, str]:
584 """Return the jCal representation of the parameters.
586 Parameters:
587 exclude_utc (bool): Exclude the TZID parameter if it is UTC
588 """
589 jcal = {
590 k.lower(): self._parameter_value_to_jcal(v)
591 for k, v in self.items()
592 if k.lower() != "value"
593 }
594 if exclude_utc and jcal.get("tzid") == "UTC":
595 del jcal["tzid"]
596 return jcal
598 @single_string_parameter
599 def tzid(self) -> str | None:
600 """The TZID parameter from :rfc:`5545`."""
602 def is_utc(self):
603 """Whether the TZID parameter is UTC."""
604 return self.tzid == "UTC"
606 def update_tzid_from(self, dt: datetime | time | Any) -> None:
607 """Update the TZID parameter from a datetime object.
609 This sets the TZID parameter or deletes it according to the datetime.
610 """
611 if isinstance(dt, (datetime, time)):
612 self.tzid = tzid_from_dt(dt)
614 @classmethod
615 def from_jcal(cls, jcal: dict[str : str | list[str]]):
616 """Parse jCal parameters."""
617 if not isinstance(jcal, dict):
618 raise JCalParsingError("The parameters must be a mapping.", cls)
619 for name, value in jcal.items():
620 if not isinstance(name, str):
621 raise JCalParsingError(
622 "All parameter names must be strings.", cls, value=name
623 )
624 if not (
625 (
626 isinstance(value, list)
627 and all(isinstance(v, (str, int, float)) for v in value)
628 and value
629 )
630 or isinstance(value, (str, int, float))
631 ):
632 raise JCalParsingError(
633 "Parameter values must be a string, integer or "
634 "float or a list of those.",
635 cls,
636 name,
637 value=value,
638 )
639 return cls(jcal)
641 @classmethod
642 def from_jcal_property(cls, jcal_property: list):
643 """Create the parameters for a jCal property.
645 Parameters:
646 jcal_property (list): The jCal property [name, params, value, ...]
647 default_value (str, optional): The default value of the property.
648 If this is given, the default value will not be set.
649 """
650 if not isinstance(jcal_property, list) or len(jcal_property) < 4:
651 raise JCalParsingError(
652 "The property must be a list with at least 4 items.", cls
653 )
654 jcal_params = jcal_property[1]
655 with JCalParsingError.reraise_with_path_added(1):
656 self = cls.from_jcal(jcal_params)
657 if self.is_utc():
658 del self.tzid # we do not want this parameter
659 return self
662def escape_string(val: str) -> str:
663 r"""Escape backslash sequences to URL-encoded hex values.
665 Converts backslash-escaped characters to their percent-encoded hex
666 equivalents. This is used for parameter parsing to preserve escaped
667 characters during processing.
669 Parameters:
670 val: The string with backslash escapes.
672 Returns:
673 The string with backslash escapes converted to percent encoding.
675 Note:
676 Conversions:
678 - ``\,`` -> ``%2C``
679 - ``\:`` -> ``%3A``
680 - ``\;`` -> ``%3B``
681 - ``\\`` -> ``%5C``
682 """
683 # f'{i:02X}'
684 return (
685 val.replace(r"\,", "%2C")
686 .replace(r"\:", "%3A")
687 .replace(r"\;", "%3B")
688 .replace(r"\\", "%5C")
689 )
692def unescape_string(val: str) -> str:
693 r"""Unescape URL-encoded hex values to their original characters.
695 Reverses :func:`escape_string` by converting percent-encoded hex values
696 back to their original characters. This is used for parameter parsing.
698 Parameters:
699 val: The string with percent-encoded values.
701 Returns:
702 The string with percent encoding converted to characters.
704 Note:
705 Conversions:
707 - ``%2C`` -> ``,``
708 - ``%3A`` -> ``:``
709 - ``%3B`` -> ``;``
710 - ``%5C`` -> ``\``
711 """
712 return (
713 val.replace("%2C", ",")
714 .replace("%3A", ":")
715 .replace("%3B", ";")
716 .replace("%5C", "\\")
717 )
720_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])")
723def unescape_backslash(val: str):
724 r"""Unescape backslash sequences in iCalendar text.
726 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes
727 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values
728 like ``%3A`` in URLs.
730 Processes backslash escape sequences in a single pass using regex matching.
731 """
732 return _unescape_backslash_regex.sub(
733 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val
734 )
737def split_on_unescaped_comma(text: str) -> list[str]:
738 r"""Split text on unescaped commas and unescape each part.
740 Splits only on commas not preceded by backslash.
741 After splitting, unescapes backslash sequences in each part.
743 Parameters:
744 text: Text with potential escaped commas (e.g., "foo\\, bar,baz")
746 Returns:
747 List of unescaped category strings
749 Examples:
750 .. code-block:: pycon
752 >>> from icalendar.parser import split_on_unescaped_comma
753 >>> split_on_unescaped_comma(r"foo\, bar,baz")
754 ['foo, bar', 'baz']
755 >>> split_on_unescaped_comma("a,b,c")
756 ['a', 'b', 'c']
757 >>> split_on_unescaped_comma(r"a\,b\,c")
758 ['a,b,c']
759 >>> split_on_unescaped_comma(r"Work,Personal\,Urgent")
760 ['Work', 'Personal,Urgent']
761 """
762 if not text:
763 return [""]
765 result = []
766 current = []
767 i = 0
769 while i < len(text):
770 if text[i] == "\\" and i + 1 < len(text):
771 # Escaped character - keep both backslash and next char
772 current.append(text[i])
773 current.append(text[i + 1])
774 i += 2
775 elif text[i] == ",":
776 # Unescaped comma - split point
777 result.append(unescape_backslash("".join(current)))
778 current = []
779 i += 1
780 else:
781 current.append(text[i])
782 i += 1
784 # Add final part
785 result.append(unescape_backslash("".join(current)))
787 return result
790def split_on_unescaped_semicolon(text: str) -> list[str]:
791 r"""Split text on unescaped semicolons and unescape each part.
793 Splits only on semicolons not preceded by a backslash.
794 After splitting, unescapes backslash sequences in each part.
795 Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`.
797 Parameters:
798 text: Text with potential escaped semicolons (e.g., "field1\\;with;field2")
800 Returns:
801 List of unescaped field strings
803 Examples:
804 .. code-block:: pycon
806 >>> from icalendar.parser import split_on_unescaped_semicolon
807 >>> split_on_unescaped_semicolon(r"field1\;with;field2")
808 ['field1;with', 'field2']
809 >>> split_on_unescaped_semicolon("a;b;c")
810 ['a', 'b', 'c']
811 >>> split_on_unescaped_semicolon(r"a\;b\;c")
812 ['a;b;c']
813 >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City")
814 ['PO Box 123;Suite 200', 'City']
815 """
816 if not text:
817 return [""]
819 result = []
820 current = []
821 i = 0
823 while i < len(text):
824 if text[i] == "\\" and i + 1 < len(text):
825 # Escaped character - keep both backslash and next char
826 current.append(text[i])
827 current.append(text[i + 1])
828 i += 2
829 elif text[i] == ";":
830 # Unescaped semicolon - split point
831 result.append(unescape_backslash("".join(current)))
832 current = []
833 i += 1
834 else:
835 current.append(text[i])
836 i += 1
838 # Add final part
839 result.append(unescape_backslash("".join(current)))
841 return result
844RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'")
847def rfc_6868_unescape(param_value: str) -> str:
848 """Take care of :rfc:`6868` unescaping.
850 - ^^ -> ^
851 - ^n -> system specific newline
852 - ^' -> "
853 - ^ with others stay intact
854 """
855 replacements = {
856 "^^": "^",
857 "^n": os.linesep,
858 "^'": '"',
859 }
860 return RFC_6868_UNESCAPE_REGEX.sub(
861 lambda m: replacements.get(m.group(0), m.group(0)), param_value
862 )
865RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"')
868def rfc_6868_escape(param_value: str) -> str:
869 """Take care of :rfc:`6868` escaping.
871 - ^ -> ^^
872 - " -> ^'
873 - newline -> ^n
874 """
875 replacements = {
876 "^": "^^",
877 "\n": "^n",
878 "\r": "^n",
879 "\r\n": "^n",
880 '"': "^'",
881 }
882 return RFC_6868_ESCAPE_REGEX.sub(
883 lambda m: replacements.get(m.group(0), m.group(0)), param_value
884 )
887def unescape_list_or_string(val: str | list[str]) -> str | list[str]:
888 """Unescape a value that may be a string or list of strings.
890 Applies :func:`unescape_string` to the value. If the value is a list,
891 unescapes each element.
893 Parameters:
894 val: A string or list of strings to unescape.
896 Returns:
897 The unescaped values.
898 """
899 if isinstance(val, list):
900 return [unescape_string(s) for s in val]
901 return unescape_string(val)
904#########################################
905# parsing and generation of content lines
908class Contentline(str):
909 """A content line is basically a string that can be folded and parsed into
910 parts.
911 """
913 __slots__ = ("strict",)
915 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
916 value = to_unicode(value, encoding=encoding)
917 assert "\n" not in value, (
918 "Content line can not contain unescaped new line characters."
919 )
920 self = super().__new__(cls, value)
921 self.strict = strict
922 return self
924 @classmethod
925 def from_parts(
926 cls,
927 name: ICAL_TYPE,
928 params: Parameters,
929 values,
930 sorted: bool = True, # noqa: A002, FBT001
931 ):
932 """Turn a parts into a content line."""
933 assert isinstance(params, Parameters)
934 if hasattr(values, "to_ical"):
935 values = values.to_ical()
936 else:
937 from icalendar.prop import vText
939 values = vText(values).to_ical()
940 # elif isinstance(values, basestring):
941 # values = escape_char(values)
943 # TODO: after unicode only, remove this
944 # Convert back to unicode, after to_ical encoded it.
945 name = to_unicode(name)
946 values = to_unicode(values)
947 if params:
948 params = to_unicode(params.to_ical(sorted=sorted))
949 if params:
950 # some parameter values can be skipped during serialization
951 return cls(f"{name};{params}:{values}")
952 return cls(f"{name}:{values}")
954 def parts(self) -> tuple[str, Parameters, str]:
955 """Split the content line into ``name``, ``parameters``, and ``values`` parts.
957 Properly handles escaping with backslashes and double-quote sections
958 to avoid corrupting URL-encoded characters in values.
960 Example with parameter:
962 .. code-block:: text
964 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild
966 Example without parameters:
968 .. code-block:: text
970 DESCRIPTION:The Fall'98 Wild
971 """
972 try:
973 name_split: int | None = None
974 value_split: int | None = None
975 in_quotes: bool = False
976 escaped: bool = False
978 for i, ch in enumerate(self):
979 if ch == '"' and not escaped:
980 in_quotes = not in_quotes
981 elif ch == "\\" and not in_quotes:
982 escaped = True
983 continue
984 elif not in_quotes and not escaped:
985 # Find first delimiter for name
986 if ch in ":;" and name_split is None:
987 name_split = i
988 # Find value delimiter (first colon)
989 if ch == ":" and value_split is None:
990 value_split = i
992 escaped = False
994 # Validate parsing results
995 if not value_split:
996 # No colon found - value is empty, use end of string
997 value_split = len(self)
999 # Extract name - if no delimiter,
1000 # take whole string for validate_token to reject
1001 name = self[:name_split] if name_split else self
1002 validate_token(name)
1004 if not name_split or name_split + 1 == value_split:
1005 # No delimiter or empty parameter section
1006 raise ValueError("Invalid content line") # noqa: TRY301
1007 # Parse parameters - they still need to be escaped/unescaped
1008 # for proper handling of commas, semicolons, etc. in parameter values
1009 param_str = escape_string(self[name_split + 1 : value_split])
1010 params = Parameters.from_ical(param_str, strict=self.strict)
1011 params = Parameters(
1012 (unescape_string(key), unescape_list_or_string(value))
1013 for key, value in iter(params.items())
1014 )
1015 # Unescape backslash sequences in values but preserve URL encoding
1016 values = unescape_backslash(self[value_split + 1 :])
1017 except ValueError as exc:
1018 raise ValueError(
1019 f"Content line could not be parsed into parts: '{self}': {exc}"
1020 ) from exc
1021 return (name, params, values)
1023 @classmethod
1024 def from_ical(cls, ical, strict=False):
1025 """Unfold the content lines in an iCalendar into long content lines."""
1026 ical = to_unicode(ical)
1027 # a fold is carriage return followed by either a space or a tab
1028 return cls(UFOLD.sub("", ical), strict=strict)
1030 def to_ical(self):
1031 """Long content lines are folded so they are less than 75 characters
1032 wide.
1033 """
1034 return foldline(self).encode(DEFAULT_ENCODING)
1037class Contentlines(list):
1038 """I assume that iCalendar files generally are a few kilobytes in size.
1039 Then this should be efficient. for Huge files, an iterator should probably
1040 be used instead.
1041 """
1043 def to_ical(self):
1044 """Simply join self."""
1045 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"
1047 @classmethod
1048 def from_ical(cls, st):
1049 """Parses a string into content lines."""
1050 st = to_unicode(st)
1051 try:
1052 # a fold is carriage return followed by either a space or a tab
1053 unfolded = UFOLD.sub("", st)
1054 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)
1055 lines.append("") # '\r\n' at the end of every content line
1056 except Exception as e:
1057 raise ValueError("Expected StringType with content lines") from e
1058 return lines
1061__all__ = [
1062 "FOLD",
1063 "NAME",
1064 "NEWLINE",
1065 "QUNSAFE_CHAR",
1066 "QUOTABLE",
1067 "UFOLD",
1068 "UNSAFE_CHAR",
1069 "Contentline",
1070 "Contentlines",
1071 "Parameters",
1072 "dquote",
1073 "escape_char",
1074 "escape_string",
1075 "foldline",
1076 "param_value",
1077 "q_join",
1078 "q_split",
1079 "rfc_6868_escape",
1080 "rfc_6868_unescape",
1081 "split_on_unescaped_comma",
1082 "split_on_unescaped_semicolon",
1083 "unescape_backslash",
1084 "unescape_char",
1085 "unescape_list_or_string",
1086 "unescape_string",
1087 "validate_param_value",
1088 "validate_token",
1089]