1"""Parsing a component's iCalendar data."""
2
3from __future__ import annotations
4
5from typing import TYPE_CHECKING, ClassVar
6
7from icalendar.parser.content_line import Contentline, Contentlines
8from icalendar.parser.property import split_on_unescaped_comma
9from icalendar.prop import vBroken
10from icalendar.timezone import tzp
11
12if TYPE_CHECKING:
13 from icalendar.cal import Component, ComponentFactory
14 from icalendar.parser.parameter import Parameters
15 from icalendar.prop import VPROPERTY, TypesFactory
16
17
18class ComponentIcalParser:
19 """A parser for a component's iCalendar data.
20
21 This uses the template method pattern, where the main parsing
22 logic can be refined in subclasses.
23 """
24
25 datetime_names: ClassVar[tuple[str, ...]] = (
26 "DTSTART",
27 "DTEND",
28 "RECURRENCE-ID",
29 "DUE",
30 "RDATE",
31 "EXDATE",
32 )
33 """Names to check for TZID parameter when parsing datetimes.
34
35 Their ``from_ical`` methods take an optional ``tzid`` argument,
36 which is used if the property has a TZID parameter.
37 """
38
39 def __init__(
40 self,
41 data: bytes | str | list[Contentline],
42 component_factory: ComponentFactory,
43 types_factory: TypesFactory,
44 ):
45 """Initialize the parser with the raw data.
46
47 Parameters:
48 data: The raw iCalendar data to parse, either as bytes
49 or a list of content lines.
50 component_factory: The factory to use for creating components.
51 types_factory: The factory to use for creating property values.
52 """
53 self._data = data
54 self._component_factory = component_factory
55 self._types_factory = types_factory
56 self._tzp = tzp
57
58 _content_lines: list[Contentline]
59
60 def contains_component(self, name: str) -> bool:
61 """Check if the parser contains a component."""
62 self.initialize_parsing()
63 begin_line = "BEGIN:" + name.upper()
64 return any(
65 len(content_line) == len(begin_line) and content_line.upper() == begin_line
66 for content_line in self._content_lines
67 )
68
69 def contains_uid(self, uid: str) -> bool:
70 """Determines whether the component contains a ``uid``.
71
72 Returns:
73 ``True`` if the component contains a ``uid``, else ``False``.
74 """
75 self.initialize_parsing()
76 return any(uid in line for line in self._content_lines)
77
78 def initialize_parsing(self):
79 self._stack: list[Component] = []
80 self._components: list[Component] = []
81 self._data = self._content_lines = (
82 self._data
83 if isinstance(self._data, list)
84 else Contentlines.from_ical(self._data)
85 )
86 self._content_lines_iterator = iter(self._content_lines)
87
88 def handle_line_parse_error(self, exception: Exception):
89 """Handle a line parsing error."""
90 # if unable to parse a line within a component
91 # that ignores exceptions, mark the component
92 # as broken and skip the line. otherwise raise.
93 component = self.component
94 if not component or not component.ignore_exceptions:
95 raise exception
96 component.errors.append((None, str(exception)))
97
98 def handle_begin_component(self, vals: str) -> None:
99 """Handle the beginning of a component."""
100 # try and create one of the components defined in the spec,
101 # otherwise get a general Components for robustness.
102 c_name = vals.upper()
103 c_class = self._component_factory.get_component_class(c_name)
104 # If component factory cannot resolve ``c_name``, the generic
105 # ``Component`` class is used which does not have the name set.
106 # That's opposed to the usage of ``cls``, which represents a
107 # more concrete subclass with a name set (e.g. VCALENDAR).
108 component = c_class()
109 if not getattr(component, "name", ""): # undefined components
110 component.name = c_name
111 self._stack.append(component)
112
113 def handle_end_component(self, vals: str) -> None:
114 """Handle the end of a component."""
115 # we are done adding properties to this component
116 # so pop it from the stack and add it to the new top.
117 if not self._stack:
118 # The stack is currently empty, the input must be invalid
119 raise ValueError("END encountered without an accompanying BEGIN!")
120
121 component = self._stack.pop()
122 if not self._stack: # we are at the end
123 self._components.append(component)
124 else:
125 self._stack[-1].add_component(component)
126 if vals.upper() == "VTIMEZONE" and "TZID" in component:
127 tzp.cache_timezone_component(component)
128
129 def prepare_components(self) -> None:
130 """Prepare the parsed components.
131
132 This is called when all components are parsed.
133 """
134
135 def parse(self) -> list[Component]:
136 """Parse the raw data."""
137 self.initialize_parsing()
138 self.parse_content_lines()
139 self.prepare_components()
140 return self._components
141
142 def parse_content_lines(self) -> None:
143 """Parse the content lines."""
144 for line in self._content_lines_iterator:
145 if not line:
146 continue
147 try:
148 name, params, vals = line.parts()
149 except ValueError as e:
150 self.handle_line_parse_error(e)
151 continue
152
153 uname = name.upper()
154 if uname == "BEGIN":
155 self.handle_begin_component(vals)
156 elif uname == "END":
157 self.handle_end_component(vals)
158 else:
159 self.handle_property(uname, params, vals, line)
160
161 @property
162 def component(self) -> Component | None:
163 return self._stack[-1] if self._stack else None
164
165 def get_factory_for_property(self, name: str, params: Parameters) -> VPROPERTY:
166 """Get the factory for a property."""
167 return self._types_factory.for_property(name, params.value)
168
169 def handle_property(
170 self, name: str, params: Parameters, vals: str, line: Contentline
171 ) -> None:
172 """Handle a property line.
173
174 Add properties to the top of the current stack.
175
176 Parameters:
177 name: The name of the property, uppercased.
178 params: The parameters of the property.
179 vals: The value of the property.
180 line: The original content line.
181 """
182 # Extract VALUE parameter if present
183 if not self.component:
184 # only accept X-COMMENT at the end of the .ics file
185 # ignore these components in parsing
186 if name == "X-COMMENT":
187 return
188 raise ValueError(f'Property "{name}" does not have a parent component.')
189 # Determine TZID for datetime properties
190 tzid = params.get("TZID") if params and name in self.datetime_names else None
191
192 # Handle special cases for value list preparation
193 if name == "CATEGORIES":
194 if self.handle_categories(params, vals, line):
195 return
196 # Fallback to normal processing if we can't find colon
197 vals_list = [vals]
198 elif name == "FREEBUSY":
199 # Handle FREEBUSY comma-separated values
200 vals_list = vals.split(",")
201 # Workaround broken ICS files with empty RDATE
202 # (not EXDATE - let it parse and fail)
203 elif name == "RDATE" and vals == "":
204 vals_list = []
205 else:
206 vals_list = [vals]
207
208 # Parse all properties eagerly
209 for val in vals_list:
210 self.parse_and_add_property(name, params, val, tzid, line)
211
212 def parse_and_add_property(
213 self,
214 name: str,
215 params: Parameters,
216 val: str,
217 tzid: str | None,
218 line: Contentline,
219 ):
220 """Parse a property value and add it to the current component."""
221 factory = self.get_factory_for_property(name, params)
222 try:
223 if tzid:
224 parsed_val = factory.from_ical(val, tzid)
225 else:
226 parsed_val = factory.from_ical(val)
227 except (ValueError, TypeError) as e:
228 self.handle_property_parse_error(e, name, params, val, line)
229 else:
230 vals_inst = factory(parsed_val)
231 vals_inst.params = params
232 self.component.add(name, vals_inst, encode=False)
233
234 def handle_property_parse_error(
235 self,
236 exception: Exception,
237 name: str,
238 params: Parameters,
239 val: str,
240 line: Contentline,
241 ):
242 """Handle the parse error for a property."""
243 if not self.component.ignore_exceptions and not name[:2].upper() == "X-":
244 raise exception
245 # Error-tolerant mode: create vBroken
246 factory = self.get_factory_for_property(name, params)
247 expected_type = getattr(factory, "__name__", "unknown")
248 broken_prop = vBroken.from_parse_error(
249 raw_value=val,
250 params=params,
251 property_name=name,
252 expected_type=expected_type,
253 error=exception,
254 )
255 self.component.errors.append((name, str(exception)))
256 self.component.add(name, broken_prop, encode=0)
257
258 def handle_categories(
259 self, params: Parameters, vals: str, line: Contentline
260 ) -> bool:
261 """Handle the special case of CATEGORIES property.
262
263 Returns:
264 ``True`` if handled, else ``False``.
265 """
266 # Special handling for CATEGORIES - need raw value
267 # before unescaping to properly split on unescaped commas
268 line_str = str(line)
269 # Use rfind to get the last colon (value separator)
270 # to handle parameters with colons like ALTREP="http://..."
271 colon_idx = line_str.rfind(":")
272 if colon_idx > 0:
273 raw_value = line_str[colon_idx + 1 :]
274 # Parse categories immediately (not lazily) for both
275 # strict and tolerant components.
276 # CATEGORIES needs special comma handling
277 try:
278 category_list = split_on_unescaped_comma(raw_value)
279 factory = self.get_factory_for_property("CATEGORIES", params)
280 vals_inst = factory(category_list)
281 vals_inst.params = params
282 self.component.add("CATEGORIES", vals_inst, encode=0)
283 except ValueError as e:
284 self.handle_property_parse_error(
285 e, "CATEGORIES", params, raw_value, line
286 )
287 return True
288 return False