Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

344 statements  

1"""This module parses and generates contentlines as defined in RFC 5545 

2(iCalendar), but will probably work for other MIME types with similar syntax. 

3Eg. RFC 2426 (vCard) 

4 

5It is stupid in the sense that it treats the content purely as strings. No type 

6conversion is attempted. 

7""" 

8 

9from __future__ import annotations 

10 

11import functools 

12import os 

13import re 

14from collections.abc import Sequence 

15from datetime import datetime, time 

16from typing import TYPE_CHECKING, Any, Callable, Protocol 

17 

18from icalendar.caselessdict import CaselessDict 

19from icalendar.error import JCalParsingError 

20from icalendar.parser_tools import ( 

21 DEFAULT_ENCODING, 

22 ICAL_TYPE, 

23 SEQUENCE_TYPES, 

24 to_unicode, 

25) 

26from icalendar.timezone.tzid import tzid_from_dt 

27 

28if TYPE_CHECKING: 

29 from icalendar.enums import VALUE 

30 from icalendar.prop import VPROPERTY 

31 

32 

33class HasToIcal(Protocol): 

34 """Protocol for objects with a to_ical method.""" 

35 

36 def to_ical(self) -> bytes: 

37 """Convert to iCalendar format.""" 

38 ... 

39 

40 

41def escape_char(text: str | bytes) -> str | bytes: 

42 r"""Format value according to iCalendar TEXT escaping rules. 

43 

44 Escapes special characters in text values according to :rfc:`5545#section-3.3.11` rules. 

45 The order of replacements matters to avoid double-escaping. 

46 

47 Parameters: 

48 text: The text to escape. 

49 

50 Returns: 

51 The escaped text with special characters escaped. 

52 

53 Note: 

54 The replacement order is critical: 

55 

56 1. ``\N`` -> ``\n`` (normalize newlines to lowercase) 

57 2. ``\`` -> ``\\`` (escape backslashes) 

58 3. ``;`` -> ``\;`` (escape semicolons) 

59 4. ``,`` -> ``\,`` (escape commas) 

60 5. ``\r\n`` -> ``\n`` (normalize line endings) 

61 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, newline character) 

62 """ 

63 assert isinstance(text, (str, bytes)) 

64 # NOTE: ORDER MATTERS! 

65 return ( 

66 text.replace(r"\N", "\n") 

67 .replace("\\", "\\\\") 

68 .replace(";", r"\;") 

69 .replace(",", r"\,") 

70 .replace("\r\n", r"\n") 

71 .replace("\n", r"\n") 

72 ) 

73 

74 

75def unescape_char(text: str | bytes) -> str | bytes | None: 

76 r"""Unescape iCalendar TEXT values. 

77 

78 Reverses the escaping applied by :func:`escape_char` according to 

79 :rfc:`5545#section-3.3.11` TEXT escaping rules. 

80 

81 Parameters: 

82 text: The escaped text. 

83 

84 Returns: 

85 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``. 

86 

87 Note: 

88 The replacement order is critical to avoid double-unescaping: 

89 

90 1. ``\N`` -> ``\n`` (intermediate step) 

91 2. ``\r\n`` -> ``\n`` (normalize line endings) 

92 3. ``\n`` -> newline (unescape newlines) 

93 4. ``\,`` -> ``,`` (unescape commas) 

94 5. ``\;`` -> ``;`` (unescape semicolons) 

95 6. ``\\`` -> ``\`` (unescape backslashes last) 

96 """ 

97 assert isinstance(text, (str, bytes)) 

98 # NOTE: ORDER MATTERS! 

99 if isinstance(text, str): 

100 return ( 

101 text.replace("\\N", "\\n") 

102 .replace("\r\n", "\n") 

103 .replace("\\n", "\n") 

104 .replace("\\,", ",") 

105 .replace("\\;", ";") 

106 .replace("\\\\", "\\") 

107 ) 

108 if isinstance(text, bytes): 

109 return ( 

110 text.replace(b"\\N", b"\\n") 

111 .replace(b"\r\n", b"\n") 

112 .replace(b"\\n", b"\n") 

113 .replace(b"\\,", b",") 

114 .replace(b"\\;", b";") 

115 .replace(b"\\\\", b"\\") 

116 ) 

117 return None 

118 

119 

120def foldline(line: str, limit: int=75, fold_sep: str="\r\n ") -> str: 

121 """Make a string folded as defined in RFC5545 

122 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

123 break. Long content lines SHOULD be split into a multiple line 

124 representations using a line "folding" technique. That is, a long 

125 line can be split between any two characters by inserting a CRLF 

126 immediately followed by a single linear white-space character (i.e., 

127 SPACE or HTAB). 

128 """ 

129 assert isinstance(line, str) 

130 assert "\n" not in line 

131 

132 # Use a fast and simple variant for the common case that line is all ASCII. 

133 try: 

134 line.encode("ascii") 

135 except (UnicodeEncodeError, UnicodeDecodeError): 

136 pass 

137 else: 

138 return fold_sep.join( 

139 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

140 ) 

141 

142 ret_chars: list[str] = [] 

143 byte_count = 0 

144 for char in line: 

145 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

146 byte_count += char_byte_len 

147 if byte_count >= limit: 

148 ret_chars.append(fold_sep) 

149 byte_count = char_byte_len 

150 ret_chars.append(char) 

151 

152 return "".join(ret_chars) 

153 

154 

155################################################################# 

156# Property parameter stuff 

157 

158 

159def param_value(value: Sequence[str] | str | HasToIcal, always_quote: bool = False) -> str: 

160 """Convert a parameter value to its iCalendar representation. 

161 

162 Applies :rfc:`6868` escaping and optionally quotes the value according 

163 to :rfc:`5545` parameter value formatting rules. 

164 

165 Parameters: 

166 value: The parameter value to convert. Can be a sequence, string, or 

167 object with a ``to_ical()`` method. 

168 always_quote: If ``True``, always enclose the value in double quotes. 

169 Defaults to ``False`` (only quote when necessary). 

170 

171 Returns: 

172 The formatted parameter value, escaped and quoted as needed. 

173 """ 

174 if isinstance(value, SEQUENCE_TYPES): 

175 return q_join(map(rfc_6868_escape, value), always_quote=always_quote) 

176 if isinstance(value, str): 

177 return dquote(rfc_6868_escape(value), always_quote=always_quote) 

178 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING))) 

179 

180 

181# Could be improved 

182 

183# [\w-] because of the iCalendar RFC 

184# . because of the vCard RFC 

185NAME = re.compile(r"[\w.-]+") 

186 

187UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]') 

188QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]') 

189FOLD = re.compile(b"(\r?\n)+[ \t]") 

190UFOLD = re.compile("(\r?\n)+[ \t]") 

191NEWLINE = re.compile(r"\r?\n") 

192 

193 

194def validate_token(name: str) -> None: 

195 r"""Validate that a name is a valid iCalendar token. 

196 

197 Checks if the name matches the :rfc:`5545` token syntax using the NAME 

198 regex pattern (``[\w.-]+``). 

199 

200 Parameters: 

201 name: The token name to validate. 

202 

203 Raises: 

204 ValueError: If the name is not a valid token. 

205 """ 

206 match = NAME.findall(name) 

207 if len(match) == 1 and name == match[0]: 

208 return 

209 raise ValueError(name) 

210 

211 

212def validate_param_value(value: str, quoted: bool = True) -> None: 

213 """Validate a parameter value for unsafe characters. 

214 

215 Checks parameter values for characters that are not allowed according to 

216 :rfc:`5545`. Uses different validation rules for quoted and unquoted values. 

217 

218 Parameters: 

219 value: The parameter value to validate. 

220 quoted: If ``True``, validate as a quoted value (allows more characters). 

221 If ``False``, validate as an unquoted value (stricter). Defaults to ``True``. 

222 

223 Raises: 

224 ValueError: If the value contains unsafe characters for its quote state. 

225 """ 

226 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR 

227 if validator.findall(value): 

228 raise ValueError(value) 

229 

230 

231# chars presence of which in parameter value will be cause the value 

232# to be enclosed in double-quotes 

233QUOTABLE = re.compile("[,;:’]") # noqa: RUF001 

234 

235 

236def dquote(val: str, always_quote: bool = False) -> str: 

237 """Enclose parameter values in double quotes when needed. 

238 

239 Parameter values containing special characters ``,``, ``;``, ``:``, or ``'`` must be enclosed 

240 in double quotes according to :rfc:`5545`. Double-quote characters in the 

241 value are replaced with single quotes since they're forbidden in parameter 

242 values. 

243 

244 Parameters: 

245 val: The parameter value to quote. 

246 always_quote: If ``True``, always enclose in quotes regardless of content. 

247 Defaults to ``False`` (only quote when necessary). 

248 

249 Returns: 

250 The value, enclosed in double quotes if needed or requested. 

251 """ 

252 # a double-quote character is forbidden to appear in a parameter value 

253 # so replace it with a single-quote character 

254 val = val.replace('"', "'") 

255 if QUOTABLE.search(val) or always_quote: 

256 return f'"{val}"' 

257 return val 

258 

259 

260# parsing helper 

261def q_split(st: str, sep: str = ",", maxsplit: int = -1) -> list[str]: 

262 """Split a string on a separator, respecting double quotes. 

263 

264 Splits the string on the separator character, but ignores separators that 

265 appear inside double-quoted sections. This is needed for parsing parameter 

266 values that may contain quoted strings. 

267 

268 Parameters: 

269 st: The string to split. 

270 sep: The separator character. Defaults to ``,``. 

271 maxsplit: Maximum number of splits to perform. If ``-1`` (default), 

272 then perform all possible splits. 

273 

274 Returns: 

275 The split string parts. 

276 

277 Examples: 

278 .. code-block:: pycon 

279 

280 >>> from icalendar.parser import q_split 

281 >>> q_split('a,b,c') 

282 ['a', 'b', 'c'] 

283 >>> q_split('a,"b,c",d') 

284 ['a', '"b,c"', 'd'] 

285 >>> q_split('a;b;c', sep=';') 

286 ['a', 'b', 'c'] 

287 """ 

288 if maxsplit == 0: 

289 return [st] 

290 

291 result = [] 

292 cursor = 0 

293 length = len(st) 

294 inquote = 0 

295 splits = 0 

296 for i, ch in enumerate(st): 

297 if ch == '"': 

298 inquote = not inquote 

299 if not inquote and ch == sep: 

300 result.append(st[cursor:i]) 

301 cursor = i + 1 

302 splits += 1 

303 if i + 1 == length or splits == maxsplit: 

304 result.append(st[cursor:]) 

305 break 

306 return result 

307 

308 

309def q_join(lst: list[str], sep: str = ",", always_quote: bool = False) -> str: 

310 """Join a list with a separator, quoting items as needed. 

311 

312 Joins list items with the separator, applying :func:`dquote` to each item 

313 to add double quotes when they contain special characters. 

314 

315 Parameters: 

316 lst: The list of items to join. 

317 sep: The separator to use. Defaults to ``,``. 

318 always_quote: If ``True``, always quote all items. Defaults to ``False`` 

319 (only quote when necessary). 

320 

321 Returns: 

322 The joined string with items quoted as needed. 

323 

324 Examples: 

325 .. code-block:: pycon 

326 

327 >>> from icalendar.parser import q_join 

328 >>> q_join(['a', 'b', 'c']) 

329 'a,b,c' 

330 >>> q_join(['plain', 'has,comma']) 

331 'plain,"has,comma"' 

332 """ 

333 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst) 

334 

335 

336def single_string_parameter(func: Callable | None = None, upper=False): 

337 """Create a parameter getter/setter for a single string parameter. 

338 

339 Parameters: 

340 upper: Convert the value to uppercase 

341 func: The function to decorate. 

342 

343 Returns: 

344 The property for the parameter or a decorator for the parameter 

345 if func is ``None``. 

346 """ 

347 

348 def decorator(func): 

349 name = func.__name__ 

350 

351 @functools.wraps(func) 

352 def fget(self: Parameters): 

353 """Get the value.""" 

354 value = self.get(name) 

355 if value is not None and upper: 

356 value = value.upper() 

357 return value 

358 

359 def fset(self: Parameters, value: str | None): 

360 """Set the value""" 

361 if value is None: 

362 fdel(self) 

363 else: 

364 if upper: 

365 value = value.upper() 

366 self[name] = value 

367 

368 def fdel(self: Parameters): 

369 """Delete the value.""" 

370 self.pop(name, None) 

371 

372 return property(fget, fset, fdel, doc=func.__doc__) 

373 

374 if func is None: 

375 return decorator 

376 return decorator(func) 

377 

378 

379class Parameters(CaselessDict): 

380 """Parser and generator of Property parameter strings. 

381 

382 It knows nothing of datatypes. 

383 Its main concern is textual structure. 

384 

385 Examples: 

386 

387 Modify parameters: 

388 

389 .. code-block:: pycon 

390 

391 >>> from icalendar import Parameters 

392 >>> params = Parameters() 

393 >>> params['VALUE'] = 'TEXT' 

394 >>> params.value 

395 'TEXT' 

396 >>> params 

397 Parameters({'VALUE': 'TEXT'}) 

398 

399 Create new parameters: 

400 

401 .. code-block:: pycon 

402 

403 >>> params = Parameters(value="BINARY") 

404 >>> params.value 

405 'BINARY' 

406 

407 Set a default: 

408 

409 .. code-block:: pycon 

410 

411 >>> params = Parameters(value="BINARY", default_value="TEXT") 

412 >>> params 

413 Parameters({'VALUE': 'BINARY'}) 

414 

415 """ 

416 

417 def __init__(self, *args, **kwargs): 

418 """Create new parameters.""" 

419 if args and args[0] is None: 

420 # allow passing None 

421 args = args[1:] 

422 defaults = { 

423 key[8:]: kwargs.pop(key) 

424 for key in list(kwargs.keys()) 

425 if key.lower().startswith("default_") 

426 } 

427 super().__init__(*args, **kwargs) 

428 for key, value in defaults.items(): 

429 self.setdefault(key, value) 

430 

431 # The following paremeters must always be enclosed in double quotes 

432 always_quoted = ( 

433 "ALTREP", 

434 "DELEGATED-FROM", 

435 "DELEGATED-TO", 

436 "DIR", 

437 "MEMBER", 

438 "SENT-BY", 

439 # Part of X-APPLE-STRUCTURED-LOCATION 

440 "X-ADDRESS", 

441 "X-TITLE", 

442 # RFC 9253 

443 "LINKREL", 

444 ) 

445 # this is quoted should one of the values be present 

446 quote_also = { 

447 # This is escaped in the RFC 

448 "CN": " '", 

449 } 

450 

451 def params(self): 

452 """In RFC 5545 keys are called parameters, so this is to be consitent 

453 with the naming conventions. 

454 """ 

455 return self.keys() 

456 

457 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001 

458 """Returns an :rfc:`5545` representation of the parameters. 

459 

460 Parameters: 

461 sorted (bool): Sort the parameters before encoding. 

462 exclude_utc (bool): Exclude TZID if it is set to ``"UTC"`` 

463 """ 

464 result = [] 

465 items = list(self.items()) 

466 if sorted: 

467 items.sort() 

468 

469 for key, value in items: 

470 if key == "TZID" and value == "UTC": 

471 # The "TZID" property parameter MUST NOT be applied to DATE-TIME 

472 # properties whose time values are specified in UTC. 

473 continue 

474 upper_key = key.upper() 

475 check_quoteable_characters = self.quote_also.get(key.upper()) 

476 always_quote = upper_key in self.always_quoted or ( 

477 check_quoteable_characters 

478 and any(c in value for c in check_quoteable_characters) 

479 ) 

480 quoted_value = param_value(value, always_quote=always_quote) 

481 if isinstance(quoted_value, str): 

482 quoted_value = quoted_value.encode(DEFAULT_ENCODING) 

483 # CaselessDict keys are always unicode 

484 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value) 

485 return b";".join(result) 

486 

487 @classmethod 

488 def from_ical(cls, st, strict=False): 

489 """Parses the parameter format from ical text format.""" 

490 

491 # parse into strings 

492 result = cls() 

493 for param in q_split(st, ";"): 

494 try: 

495 key, val = q_split(param, "=", maxsplit=1) 

496 validate_token(key) 

497 # Property parameter values that are not in quoted 

498 # strings are case insensitive. 

499 vals = [] 

500 for v in q_split(val, ","): 

501 if v.startswith('"') and v.endswith('"'): 

502 v2 = v.strip('"') 

503 validate_param_value(v2, quoted=True) 

504 vals.append(rfc_6868_unescape(v2)) 

505 else: 

506 validate_param_value(v, quoted=False) 

507 if strict: 

508 vals.append(rfc_6868_unescape(v.upper())) 

509 else: 

510 vals.append(rfc_6868_unescape(v)) 

511 if not vals: 

512 result[key] = val 

513 elif len(vals) == 1: 

514 result[key] = vals[0] 

515 else: 

516 result[key] = vals 

517 except ValueError as exc: # noqa: PERF203 

518 raise ValueError( 

519 f"{param!r} is not a valid parameter string: {exc}" 

520 ) from exc 

521 return result 

522 

523 @single_string_parameter(upper=True) 

524 def value(self) -> VALUE | str | None: 

525 """The VALUE parameter from :rfc:`5545`. 

526 

527 Description: 

528 This parameter specifies the value type and format of 

529 the property value. The property values MUST be of a single value 

530 type. For example, a "RDATE" property cannot have a combination 

531 of DATE-TIME and TIME value types. 

532 

533 If the property's value is the default value type, then this 

534 parameter need not be specified. However, if the property's 

535 default value type is overridden by some other allowable value 

536 type, then this parameter MUST be specified. 

537 

538 Applications MUST preserve the value data for x-name and iana- 

539 token values that they don't recognize without attempting to 

540 interpret or parse the value data. 

541 

542 For convenience, using this property, the value will be converted to 

543 an uppercase string. 

544 

545 .. code-block:: pycon 

546 

547 >>> from icalendar import Parameters 

548 >>> params = Parameters() 

549 >>> params.value = "unknown" 

550 >>> params 

551 Parameters({'VALUE': 'UNKNOWN'}) 

552 

553 """ 

554 

555 def _parameter_value_to_jcal( 

556 self, value: str | float | list | VPROPERTY 

557 ) -> str | int | float | list[str] | list[int] | list[float]: 

558 """Convert a parameter value to jCal format. 

559 

560 Parameters: 

561 value: The parameter value 

562 

563 Returns: 

564 The jCal representation of the parameter value 

565 """ 

566 if isinstance(value, list): 

567 return [self._parameter_value_to_jcal(v) for v in value] 

568 if hasattr(value, "to_jcal"): 

569 # proprty values respond to this 

570 jcal = value.to_jcal() 

571 # we only need the value part 

572 if len(jcal) == 4: 

573 return jcal[3] 

574 return jcal[3:] 

575 for t in (int, float, str): 

576 if isinstance(value, t): 

577 return t(value) 

578 raise TypeError( 

579 "Unsupported parameter value type for jCal conversion: " 

580 f"{type(value)} {value!r}" 

581 ) 

582 

583 def to_jcal(self, exclude_utc=False) -> dict[str, str]: 

584 """Return the jCal representation of the parameters. 

585 

586 Parameters: 

587 exclude_utc (bool): Exclude the TZID parameter if it is UTC 

588 """ 

589 jcal = { 

590 k.lower(): self._parameter_value_to_jcal(v) 

591 for k, v in self.items() 

592 if k.lower() != "value" 

593 } 

594 if exclude_utc and jcal.get("tzid") == "UTC": 

595 del jcal["tzid"] 

596 return jcal 

597 

598 @single_string_parameter 

599 def tzid(self) -> str | None: 

600 """The TZID parameter from :rfc:`5545`.""" 

601 

602 def is_utc(self): 

603 """Whether the TZID parameter is UTC.""" 

604 return self.tzid == "UTC" 

605 

606 def update_tzid_from(self, dt: datetime | time | Any) -> None: 

607 """Update the TZID parameter from a datetime object. 

608 

609 This sets the TZID parameter or deletes it according to the datetime. 

610 """ 

611 if isinstance(dt, (datetime, time)): 

612 self.tzid = tzid_from_dt(dt) 

613 

614 @classmethod 

615 def from_jcal(cls, jcal: dict[str : str | list[str]]): 

616 """Parse jCal parameters.""" 

617 if not isinstance(jcal, dict): 

618 raise JCalParsingError("The parameters must be a mapping.", cls) 

619 for name, value in jcal.items(): 

620 if not isinstance(name, str): 

621 raise JCalParsingError( 

622 "All parameter names must be strings.", cls, value=name 

623 ) 

624 if not ( 

625 ( 

626 isinstance(value, list) 

627 and all(isinstance(v, (str, int, float)) for v in value) 

628 and value 

629 ) 

630 or isinstance(value, (str, int, float)) 

631 ): 

632 raise JCalParsingError( 

633 "Parameter values must be a string, integer or " 

634 "float or a list of those.", 

635 cls, 

636 name, 

637 value=value, 

638 ) 

639 return cls(jcal) 

640 

641 @classmethod 

642 def from_jcal_property(cls, jcal_property: list): 

643 """Create the parameters for a jCal property. 

644 

645 Parameters: 

646 jcal_property (list): The jCal property [name, params, value, ...] 

647 default_value (str, optional): The default value of the property. 

648 If this is given, the default value will not be set. 

649 """ 

650 if not isinstance(jcal_property, list) or len(jcal_property) < 4: 

651 raise JCalParsingError( 

652 "The property must be a list with at least 4 items.", cls 

653 ) 

654 jcal_params = jcal_property[1] 

655 with JCalParsingError.reraise_with_path_added(1): 

656 self = cls.from_jcal(jcal_params) 

657 if self.is_utc(): 

658 del self.tzid # we do not want this parameter 

659 return self 

660 

661 

662def escape_string(val: str) -> str: 

663 r"""Escape backslash sequences to URL-encoded hex values. 

664 

665 Converts backslash-escaped characters to their percent-encoded hex 

666 equivalents. This is used for parameter parsing to preserve escaped 

667 characters during processing. 

668 

669 Parameters: 

670 val: The string with backslash escapes. 

671 

672 Returns: 

673 The string with backslash escapes converted to percent encoding. 

674 

675 Note: 

676 Conversions: 

677 

678 - ``\,`` -> ``%2C`` 

679 - ``\:`` -> ``%3A`` 

680 - ``\;`` -> ``%3B`` 

681 - ``\\`` -> ``%5C`` 

682 """ 

683 # f'{i:02X}' 

684 return ( 

685 val.replace(r"\,", "%2C") 

686 .replace(r"\:", "%3A") 

687 .replace(r"\;", "%3B") 

688 .replace(r"\\", "%5C") 

689 ) 

690 

691 

692def unescape_string(val: str) -> str: 

693 r"""Unescape URL-encoded hex values to their original characters. 

694 

695 Reverses :func:`escape_string` by converting percent-encoded hex values 

696 back to their original characters. This is used for parameter parsing. 

697 

698 Parameters: 

699 val: The string with percent-encoded values. 

700 

701 Returns: 

702 The string with percent encoding converted to characters. 

703 

704 Note: 

705 Conversions: 

706 

707 - ``%2C`` -> ``,`` 

708 - ``%3A`` -> ``:`` 

709 - ``%3B`` -> ``;`` 

710 - ``%5C`` -> ``\`` 

711 """ 

712 return ( 

713 val.replace("%2C", ",") 

714 .replace("%3A", ":") 

715 .replace("%3B", ";") 

716 .replace("%5C", "\\") 

717 ) 

718 

719 

720_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])") 

721 

722 

723def unescape_backslash(val: str): 

724 r"""Unescape backslash sequences in iCalendar text. 

725 

726 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes 

727 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values 

728 like ``%3A`` in URLs. 

729 

730 Processes backslash escape sequences in a single pass using regex matching. 

731 """ 

732 return _unescape_backslash_regex.sub( 

733 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val 

734 ) 

735 

736 

737def split_on_unescaped_comma(text: str) -> list[str]: 

738 r"""Split text on unescaped commas and unescape each part. 

739 

740 Splits only on commas not preceded by backslash. 

741 After splitting, unescapes backslash sequences in each part. 

742 

743 Parameters: 

744 text: Text with potential escaped commas (e.g., "foo\\, bar,baz") 

745 

746 Returns: 

747 List of unescaped category strings 

748 

749 Examples: 

750 .. code-block:: pycon 

751 

752 >>> from icalendar.parser import split_on_unescaped_comma 

753 >>> split_on_unescaped_comma(r"foo\, bar,baz") 

754 ['foo, bar', 'baz'] 

755 >>> split_on_unescaped_comma("a,b,c") 

756 ['a', 'b', 'c'] 

757 >>> split_on_unescaped_comma(r"a\,b\,c") 

758 ['a,b,c'] 

759 >>> split_on_unescaped_comma(r"Work,Personal\,Urgent") 

760 ['Work', 'Personal,Urgent'] 

761 """ 

762 if not text: 

763 return [""] 

764 

765 result = [] 

766 current = [] 

767 i = 0 

768 

769 while i < len(text): 

770 if text[i] == "\\" and i + 1 < len(text): 

771 # Escaped character - keep both backslash and next char 

772 current.append(text[i]) 

773 current.append(text[i + 1]) 

774 i += 2 

775 elif text[i] == ",": 

776 # Unescaped comma - split point 

777 result.append(unescape_backslash("".join(current))) 

778 current = [] 

779 i += 1 

780 else: 

781 current.append(text[i]) 

782 i += 1 

783 

784 # Add final part 

785 result.append(unescape_backslash("".join(current))) 

786 

787 return result 

788 

789 

790def split_on_unescaped_semicolon(text: str) -> list[str]: 

791 r"""Split text on unescaped semicolons and unescape each part. 

792 

793 Splits only on semicolons not preceded by a backslash. 

794 After splitting, unescapes backslash sequences in each part. 

795 Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`. 

796 

797 Parameters: 

798 text: Text with potential escaped semicolons (e.g., "field1\\;with;field2") 

799 

800 Returns: 

801 List of unescaped field strings 

802 

803 Examples: 

804 .. code-block:: pycon 

805 

806 >>> from icalendar.parser import split_on_unescaped_semicolon 

807 >>> split_on_unescaped_semicolon(r"field1\;with;field2") 

808 ['field1;with', 'field2'] 

809 >>> split_on_unescaped_semicolon("a;b;c") 

810 ['a', 'b', 'c'] 

811 >>> split_on_unescaped_semicolon(r"a\;b\;c") 

812 ['a;b;c'] 

813 >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City") 

814 ['PO Box 123;Suite 200', 'City'] 

815 """ 

816 if not text: 

817 return [""] 

818 

819 result = [] 

820 current = [] 

821 i = 0 

822 

823 while i < len(text): 

824 if text[i] == "\\" and i + 1 < len(text): 

825 # Escaped character - keep both backslash and next char 

826 current.append(text[i]) 

827 current.append(text[i + 1]) 

828 i += 2 

829 elif text[i] == ";": 

830 # Unescaped semicolon - split point 

831 result.append(unescape_backslash("".join(current))) 

832 current = [] 

833 i += 1 

834 else: 

835 current.append(text[i]) 

836 i += 1 

837 

838 # Add final part 

839 result.append(unescape_backslash("".join(current))) 

840 

841 return result 

842 

843 

844RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'") 

845 

846 

847def rfc_6868_unescape(param_value: str) -> str: 

848 """Take care of :rfc:`6868` unescaping. 

849 

850 - ^^ -> ^ 

851 - ^n -> system specific newline 

852 - ^' -> " 

853 - ^ with others stay intact 

854 """ 

855 replacements = { 

856 "^^": "^", 

857 "^n": os.linesep, 

858 "^'": '"', 

859 } 

860 return RFC_6868_UNESCAPE_REGEX.sub( 

861 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

862 ) 

863 

864 

865RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"') 

866 

867 

868def rfc_6868_escape(param_value: str) -> str: 

869 """Take care of :rfc:`6868` escaping. 

870 

871 - ^ -> ^^ 

872 - " -> ^' 

873 - newline -> ^n 

874 """ 

875 replacements = { 

876 "^": "^^", 

877 "\n": "^n", 

878 "\r": "^n", 

879 "\r\n": "^n", 

880 '"': "^'", 

881 } 

882 return RFC_6868_ESCAPE_REGEX.sub( 

883 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

884 ) 

885 

886 

887def unescape_list_or_string(val: str | list[str]) -> str | list[str]: 

888 """Unescape a value that may be a string or list of strings. 

889 

890 Applies :func:`unescape_string` to the value. If the value is a list, 

891 unescapes each element. 

892 

893 Parameters: 

894 val: A string or list of strings to unescape. 

895 

896 Returns: 

897 The unescaped values. 

898 """ 

899 if isinstance(val, list): 

900 return [unescape_string(s) for s in val] 

901 return unescape_string(val) 

902 

903 

904######################################### 

905# parsing and generation of content lines 

906 

907 

908class Contentline(str): 

909 """A content line is basically a string that can be folded and parsed into 

910 parts. 

911 """ 

912 

913 __slots__ = ("strict",) 

914 

915 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

916 value = to_unicode(value, encoding=encoding) 

917 assert "\n" not in value, ( 

918 "Content line can not contain unescaped new line characters." 

919 ) 

920 self = super().__new__(cls, value) 

921 self.strict = strict 

922 return self 

923 

924 @classmethod 

925 def from_parts( 

926 cls, 

927 name: ICAL_TYPE, 

928 params: Parameters, 

929 values, 

930 sorted: bool = True, # noqa: A002, FBT001 

931 ): 

932 """Turn a parts into a content line.""" 

933 assert isinstance(params, Parameters) 

934 if hasattr(values, "to_ical"): 

935 values = values.to_ical() 

936 else: 

937 from icalendar.prop import vText 

938 

939 values = vText(values).to_ical() 

940 # elif isinstance(values, basestring): 

941 # values = escape_char(values) 

942 

943 # TODO: after unicode only, remove this 

944 # Convert back to unicode, after to_ical encoded it. 

945 name = to_unicode(name) 

946 values = to_unicode(values) 

947 if params: 

948 params = to_unicode(params.to_ical(sorted=sorted)) 

949 if params: 

950 # some parameter values can be skipped during serialization 

951 return cls(f"{name};{params}:{values}") 

952 return cls(f"{name}:{values}") 

953 

954 def parts(self) -> tuple[str, Parameters, str]: 

955 """Split the content line into ``name``, ``parameters``, and ``values`` parts. 

956 

957 Properly handles escaping with backslashes and double-quote sections 

958 to avoid corrupting URL-encoded characters in values. 

959 

960 Example with parameter: 

961 

962 .. code-block:: text 

963 

964 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild 

965 

966 Example without parameters: 

967 

968 .. code-block:: text 

969 

970 DESCRIPTION:The Fall'98 Wild 

971 """ 

972 try: 

973 name_split: int | None = None 

974 value_split: int | None = None 

975 in_quotes: bool = False 

976 escaped: bool = False 

977 

978 for i, ch in enumerate(self): 

979 if ch == '"' and not escaped: 

980 in_quotes = not in_quotes 

981 elif ch == "\\" and not in_quotes: 

982 escaped = True 

983 continue 

984 elif not in_quotes and not escaped: 

985 # Find first delimiter for name 

986 if ch in ":;" and name_split is None: 

987 name_split = i 

988 # Find value delimiter (first colon) 

989 if ch == ":" and value_split is None: 

990 value_split = i 

991 

992 escaped = False 

993 

994 # Validate parsing results 

995 if not value_split: 

996 # No colon found - value is empty, use end of string 

997 value_split = len(self) 

998 

999 # Extract name - if no delimiter, 

1000 # take whole string for validate_token to reject 

1001 name = self[:name_split] if name_split else self 

1002 validate_token(name) 

1003 

1004 if not name_split or name_split + 1 == value_split: 

1005 # No delimiter or empty parameter section 

1006 raise ValueError("Invalid content line") # noqa: TRY301 

1007 # Parse parameters - they still need to be escaped/unescaped 

1008 # for proper handling of commas, semicolons, etc. in parameter values 

1009 param_str = escape_string(self[name_split + 1 : value_split]) 

1010 params = Parameters.from_ical(param_str, strict=self.strict) 

1011 params = Parameters( 

1012 (unescape_string(key), unescape_list_or_string(value)) 

1013 for key, value in iter(params.items()) 

1014 ) 

1015 # Unescape backslash sequences in values but preserve URL encoding 

1016 values = unescape_backslash(self[value_split + 1 :]) 

1017 except ValueError as exc: 

1018 raise ValueError( 

1019 f"Content line could not be parsed into parts: '{self}': {exc}" 

1020 ) from exc 

1021 return (name, params, values) 

1022 

1023 @classmethod 

1024 def from_ical(cls, ical, strict=False): 

1025 """Unfold the content lines in an iCalendar into long content lines.""" 

1026 ical = to_unicode(ical) 

1027 # a fold is carriage return followed by either a space or a tab 

1028 return cls(UFOLD.sub("", ical), strict=strict) 

1029 

1030 def to_ical(self): 

1031 """Long content lines are folded so they are less than 75 characters 

1032 wide. 

1033 """ 

1034 return foldline(self).encode(DEFAULT_ENCODING) 

1035 

1036 

1037class Contentlines(list): 

1038 """I assume that iCalendar files generally are a few kilobytes in size. 

1039 Then this should be efficient. for Huge files, an iterator should probably 

1040 be used instead. 

1041 """ 

1042 

1043 def to_ical(self): 

1044 """Simply join self.""" 

1045 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

1046 

1047 @classmethod 

1048 def from_ical(cls, st): 

1049 """Parses a string into content lines.""" 

1050 st = to_unicode(st) 

1051 try: 

1052 # a fold is carriage return followed by either a space or a tab 

1053 unfolded = UFOLD.sub("", st) 

1054 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

1055 lines.append("") # '\r\n' at the end of every content line 

1056 except Exception as e: 

1057 raise ValueError("Expected StringType with content lines") from e 

1058 return lines 

1059 

1060 

1061__all__ = [ 

1062 "FOLD", 

1063 "NAME", 

1064 "NEWLINE", 

1065 "QUNSAFE_CHAR", 

1066 "QUOTABLE", 

1067 "UFOLD", 

1068 "UNSAFE_CHAR", 

1069 "Contentline", 

1070 "Contentlines", 

1071 "Parameters", 

1072 "dquote", 

1073 "escape_char", 

1074 "escape_string", 

1075 "foldline", 

1076 "param_value", 

1077 "q_join", 

1078 "q_split", 

1079 "rfc_6868_escape", 

1080 "rfc_6868_unescape", 

1081 "split_on_unescaped_comma", 

1082 "split_on_unescaped_semicolon", 

1083 "unescape_backslash", 

1084 "unescape_char", 

1085 "unescape_list_or_string", 

1086 "unescape_string", 

1087 "validate_param_value", 

1088 "validate_token", 

1089]