1"""parsing and generation of content lines"""
2
3import re
4
5from icalendar.parser.parameter import Parameters
6from icalendar.parser.property import unescape_backslash, unescape_list_or_string
7from icalendar.parser.string import (
8 escape_string,
9 foldline,
10 unescape_string,
11 validate_token,
12)
13from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode
14
15UFOLD = re.compile("(\r?\n)+[ \t]")
16NEWLINE = re.compile(r"\r?\n")
17
18
19class Contentline(str):
20 """A content line is basically a string that can be folded and parsed into
21 parts.
22 """
23
24 __slots__ = ("strict",)
25
26 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):
27 value = to_unicode(value, encoding=encoding)
28 assert "\n" not in value, (
29 "Content line can not contain unescaped new line characters."
30 )
31 self = super().__new__(cls, value)
32 self.strict = strict
33 return self
34
35 @classmethod
36 def from_parts(
37 cls,
38 name: ICAL_TYPE,
39 params: Parameters,
40 values,
41 sorted: bool = True, # noqa: A002
42 ):
43 """Turn a parts into a content line."""
44 assert isinstance(params, Parameters)
45 if hasattr(values, "to_ical"):
46 values = values.to_ical()
47 else:
48 from icalendar.prop import vText
49
50 values = vText(values).to_ical()
51 # elif isinstance(values, basestring):
52 # values = escape_char(values)
53
54 # TODO: after unicode only, remove this
55 # Convert back to unicode, after to_ical encoded it.
56 name = to_unicode(name)
57 values = to_unicode(values)
58 if params:
59 params = to_unicode(params.to_ical(sorted=sorted))
60 if params:
61 # some parameter values can be skipped during serialization
62 return cls(f"{name};{params}:{values}")
63 return cls(f"{name}:{values}")
64
65 def parts(self) -> tuple[str, Parameters, str]:
66 """Split the content line into ``name``, ``parameters``, and ``values`` parts.
67
68 Properly handles escaping with backslashes and double-quote sections
69 to avoid corrupting URL-encoded characters in values.
70
71 Example with parameter:
72
73 .. code-block:: text
74
75 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild
76
77 Example without parameters:
78
79 .. code-block:: text
80
81 DESCRIPTION:The Fall'98 Wild
82 """
83 try:
84 name_split: int | None = None
85 value_split: int | None = None
86 in_quotes: bool = False
87 escaped: bool = False
88
89 for i, ch in enumerate(self):
90 if ch == '"' and not escaped:
91 in_quotes = not in_quotes
92 elif ch == "\\" and not in_quotes:
93 escaped = True
94 continue
95 elif not in_quotes and not escaped:
96 # Find first delimiter for name
97 if ch in ":;" and name_split is None:
98 name_split = i
99 # Find value delimiter (first colon)
100 if ch == ":" and value_split is None:
101 value_split = i
102
103 escaped = False
104
105 # Validate parsing results
106 if not value_split:
107 # No colon found - value is empty, use end of string
108 value_split = len(self)
109
110 # Extract name - if no delimiter,
111 # take whole string for validate_token to reject
112 name = self[:name_split] if name_split else self
113 validate_token(name)
114
115 if not name_split or name_split + 1 == value_split:
116 # No delimiter or empty parameter section
117 raise ValueError("Invalid content line") # noqa: TRY301
118 # Parse parameters - they still need to be escaped/unescaped
119 # for proper handling of commas, semicolons, etc. in parameter values
120 param_str = escape_string(self[name_split + 1 : value_split])
121 params = Parameters.from_ical(param_str, strict=self.strict)
122 params = Parameters(
123 (unescape_string(key), unescape_list_or_string(value))
124 for key, value in iter(params.items())
125 )
126 # Unescape backslash sequences in values but preserve URL encoding
127 values = unescape_backslash(self[value_split + 1 :])
128 except ValueError as exc:
129 raise ValueError(
130 f"Content line could not be parsed into parts: '{self}': {exc}"
131 ) from exc
132 return (name, params, values)
133
134 @classmethod
135 def from_ical(cls, ical, strict=False):
136 """Unfold the content lines in an iCalendar into long content lines."""
137 ical = to_unicode(ical)
138 # a fold is carriage return followed by either a space or a tab
139 return cls(UFOLD.sub("", ical), strict=strict)
140
141 def to_ical(self):
142 """Long content lines are folded so they are less than 75 characters
143 wide.
144 """
145 return foldline(self).encode(DEFAULT_ENCODING)
146
147
148class Contentlines(list):
149 """I assume that iCalendar files generally are a few kilobytes in size.
150 Then this should be efficient. for Huge files, an iterator should probably
151 be used instead.
152 """
153
154 def to_ical(self):
155 """Simply join self."""
156 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"
157
158 @classmethod
159 def from_ical(cls, st):
160 """Parses a string into content lines."""
161 st = to_unicode(st)
162 try:
163 # a fold is carriage return followed by either a space or a tab
164 unfolded = UFOLD.sub("", st)
165 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)
166 lines.append("") # '\r\n' at the end of every content line
167 except Exception as e:
168 raise ValueError("Expected StringType with content lines") from e
169 return lines
170
171
172__all__ = ["Contentline", "Contentlines"]