Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 82%

1"""This module parses and generates contentlines as defined in RFC 5545

2(iCalendar), but will probably work for other MIME types with similar syntax.

3Eg. RFC 2426 (vCard)

5It is stupid in the sense that it treats the content purely as strings. No type

6conversion is attempted.

7"""

9from __future__ import annotations

11import functools

12import os

13import re

14from collections.abc import Sequence

15from datetime import datetime, time

16from typing import TYPE_CHECKING, Any, Callable, Protocol

18from icalendar.caselessdict import CaselessDict

19from icalendar.error import JCalParsingError

20from icalendar.parser_tools import (

21 DEFAULT_ENCODING,

22 ICAL_TYPE,

23 SEQUENCE_TYPES,

24 to_unicode,

25)

26from icalendar.timezone.tzid import tzid_from_dt

28if TYPE_CHECKING:

29 from icalendar.enums import VALUE

30 from icalendar.prop import VPROPERTY

33class HasToIcal(Protocol):

34 """Protocol for objects with a to_ical method."""

36 def to_ical(self) -> bytes:

37 """Convert to iCalendar format."""

38 ...

41def escape_char(text: str | bytes) -> str | bytes:

42 r"""Format value according to iCalendar TEXT escaping rules.

44 Escapes special characters in text values according to :rfc:`5545#section-3.3.11` rules.

45 The order of replacements matters to avoid double-escaping.

47 Parameters:

48 text: The text to escape.

50 Returns:

51 The escaped text with special characters escaped.

53 Note:

54 The replacement order is critical:

56 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)

57 2. ``\`` -> ``\\`` (escape backslashes)

58 3. ``;`` -> ``\;`` (escape semicolons)

59 4. ``,`` -> ``\,`` (escape commas)

60 5. ``\r\n`` -> ``\n`` (normalize line endings)

61 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, newline character)

62 """

63 assert isinstance(text, (str, bytes))

64 # NOTE: ORDER MATTERS!

65 return (

66 text.replace(r"\N", "\n")

67 .replace("\\", "\\\\")

68 .replace(";", r"\;")

69 .replace(",", r"\,")

70 .replace("\r\n", r"\n")

71 .replace("\n", r"\n")

72 )

75def unescape_char(text: str | bytes) -> str | bytes | None:

76 r"""Unescape iCalendar TEXT values.

78 Reverses the escaping applied by :func:`escape_char` according to

79 :rfc:`5545#section-3.3.11` TEXT escaping rules.

81 Parameters:

82 text: The escaped text.

84 Returns:

85 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.

87 Note:

88 The replacement order is critical to avoid double-unescaping:

90 1. ``\N`` -> ``\n`` (intermediate step)

91 2. ``\r\n`` -> ``\n`` (normalize line endings)

92 3. ``\n`` -> newline (unescape newlines)

93 4. ``\,`` -> ``,`` (unescape commas)

94 5. ``\;`` -> ``;`` (unescape semicolons)

95 6. ``\\`` -> ``\`` (unescape backslashes last)

96 """

97 assert isinstance(text, (str, bytes))

98 # NOTE: ORDER MATTERS!

99 if isinstance(text, str):

100 return (

101 text.replace("\\N", "\\n")

102 .replace("\r\n", "\n")

103 .replace("\\n", "\n")

104 .replace("\\,", ",")

105 .replace("\\;", ";")

106 .replace("\\\\", "\\")

107 )

108 if isinstance(text, bytes):

109 return (

110 text.replace(b"\\N", b"\\n")

111 .replace(b"\r\n", b"\n")

112 .replace(b"\\n", b"\n")

113 .replace(b"\\,", b",")

114 .replace(b"\\;", b";")

115 .replace(b"\\\\", b"\\")

116 )

117 return None

118

119

120def foldline(line: str, limit: int=75, fold_sep: str="\r\n ") -> str:

121 """Make a string folded as defined in RFC5545

122 Lines of text SHOULD NOT be longer than 75 octets, excluding the line

123 break. Long content lines SHOULD be split into a multiple line

124 representations using a line "folding" technique. That is, a long

125 line can be split between any two characters by inserting a CRLF

126 immediately followed by a single linear white-space character (i.e.,

127 SPACE or HTAB).

128 """

129 assert isinstance(line, str)

130 assert "\n" not in line

131

132 # Use a fast and simple variant for the common case that line is all ASCII.

133 try:

134 line.encode("ascii")

135 except (UnicodeEncodeError, UnicodeDecodeError):

136 pass

137 else:

138 return fold_sep.join(

139 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)

140 )

141

142 ret_chars: list[str] = []

143 byte_count = 0

144 for char in line:

145 char_byte_len = len(char.encode(DEFAULT_ENCODING))

146 byte_count += char_byte_len

147 if byte_count >= limit:

148 ret_chars.append(fold_sep)

149 byte_count = char_byte_len

150 ret_chars.append(char)

151

152 return "".join(ret_chars)

153

154

155#################################################################

156# Property parameter stuff

157

158

159def param_value(value: Sequence[str] | str | HasToIcal, always_quote: bool = False) -> str:

160 """Convert a parameter value to its iCalendar representation.

161

162 Applies :rfc:`6868` escaping and optionally quotes the value according

163 to :rfc:`5545` parameter value formatting rules.

164

165 Parameters:

166 value: The parameter value to convert. Can be a sequence, string, or

167 object with a ``to_ical()`` method.

168 always_quote: If ``True``, always enclose the value in double quotes.

169 Defaults to ``False`` (only quote when necessary).

170

171 Returns:

172 The formatted parameter value, escaped and quoted as needed.

173 """

174 if isinstance(value, SEQUENCE_TYPES):

175 return q_join(map(rfc_6868_escape, value), always_quote=always_quote)

176 if isinstance(value, str):

177 return dquote(rfc_6868_escape(value), always_quote=always_quote)

178 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING)))

179

180

181# Could be improved

182

183# [\w-] because of the iCalendar RFC

184# . because of the vCard RFC

185NAME = re.compile(r"[\w.-]+")

186

187UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]')

188QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]')

189FOLD = re.compile(b"(\r?\n)+[ \t]")

190UFOLD = re.compile("(\r?\n)+[ \t]")

191NEWLINE = re.compile(r"\r?\n")

192

193

194def validate_token(name: str) -> None:

195 r"""Validate that a name is a valid iCalendar token.

196

197 Checks if the name matches the :rfc:`5545` token syntax using the NAME

198 regex pattern (``[\w.-]+``).

199

200 Parameters:

201 name: The token name to validate.

202

203 Raises:

204 ValueError: If the name is not a valid token.

205 """

206 match = NAME.findall(name)

207 if len(match) == 1 and name == match[0]:

208 return

209 raise ValueError(name)

210

211

212def validate_param_value(value: str, quoted: bool = True) -> None:

213 """Validate a parameter value for unsafe characters.

214

215 Checks parameter values for characters that are not allowed according to

216 :rfc:`5545`. Uses different validation rules for quoted and unquoted values.

217

218 Parameters:

219 value: The parameter value to validate.

220 quoted: If ``True``, validate as a quoted value (allows more characters).

221 If ``False``, validate as an unquoted value (stricter). Defaults to ``True``.

222

223 Raises:

224 ValueError: If the value contains unsafe characters for its quote state.

225 """

226 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR

227 if validator.findall(value):

228 raise ValueError(value)

229

230

231# chars presence of which in parameter value will be cause the value

232# to be enclosed in double-quotes

233QUOTABLE = re.compile("[,;:’]") # noqa: RUF001

234

235

236def dquote(val: str, always_quote: bool = False) -> str:

237 """Enclose parameter values in double quotes when needed.

238

239 Parameter values containing special characters ``,``, ``;``, ``:``, or ``'`` must be enclosed

240 in double quotes according to :rfc:`5545`. Double-quote characters in the

241 value are replaced with single quotes since they're forbidden in parameter

242 values.

243

244 Parameters:

245 val: The parameter value to quote.

246 always_quote: If ``True``, always enclose in quotes regardless of content.

247 Defaults to ``False`` (only quote when necessary).

248

249 Returns:

250 The value, enclosed in double quotes if needed or requested.

251 """

252 # a double-quote character is forbidden to appear in a parameter value

253 # so replace it with a single-quote character

254 val = val.replace('"', "'")

255 if QUOTABLE.search(val) or always_quote:

256 return f'"{val}"'

257 return val

258

259

260# parsing helper

261def q_split(st: str, sep: str = ",", maxsplit: int = -1) -> list[str]:

262 """Split a string on a separator, respecting double quotes.

263

264 Splits the string on the separator character, but ignores separators that

265 appear inside double-quoted sections. This is needed for parsing parameter

266 values that may contain quoted strings.

267

268 Parameters:

269 st: The string to split.

270 sep: The separator character. Defaults to ``,``.

271 maxsplit: Maximum number of splits to perform. If ``-1`` (default),

272 then perform all possible splits.

273

274 Returns:

275 The split string parts.

276

277 Examples:

278 .. code-block:: pycon

279

280 >>> from icalendar.parser import q_split

281 >>> q_split('a,b,c')

282 ['a', 'b', 'c']

283 >>> q_split('a,"b,c",d')

284 ['a', '"b,c"', 'd']

285 >>> q_split('a;b;c', sep=';')

286 ['a', 'b', 'c']

287 """

288 if maxsplit == 0:

289 return [st]

290

291 result = []

292 cursor = 0

293 length = len(st)

294 inquote = 0

295 splits = 0

296 for i, ch in enumerate(st):

297 if ch == '"':

298 inquote = not inquote

299 if not inquote and ch == sep:

300 result.append(st[cursor:i])

301 cursor = i + 1

302 splits += 1

303 if i + 1 == length or splits == maxsplit:

304 result.append(st[cursor:])

305 break

306 return result

307

308

309def q_join(lst: list[str], sep: str = ",", always_quote: bool = False) -> str:

310 """Join a list with a separator, quoting items as needed.

311

312 Joins list items with the separator, applying :func:`dquote` to each item

313 to add double quotes when they contain special characters.

314

315 Parameters:

316 lst: The list of items to join.

317 sep: The separator to use. Defaults to ``,``.

318 always_quote: If ``True``, always quote all items. Defaults to ``False``

319 (only quote when necessary).

320

321 Returns:

322 The joined string with items quoted as needed.

323

324 Examples:

325 .. code-block:: pycon

326

327 >>> from icalendar.parser import q_join

328 >>> q_join(['a', 'b', 'c'])

329 'a,b,c'

330 >>> q_join(['plain', 'has,comma'])

331 'plain,"has,comma"'

332 """

333 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst)

334

335

336def single_string_parameter(func: Callable | None = None, upper=False):

337 """Create a parameter getter/setter for a single string parameter.

338

339 Parameters:

340 upper: Convert the value to uppercase

341 func: The function to decorate.

342

343 Returns:

344 The property for the parameter or a decorator for the parameter

345 if func is ``None``.

346 """

347

348 def decorator(func):

349 name = func.__name__

350

351 @functools.wraps(func)

352 def fget(self: Parameters):

353 """Get the value."""

354 value = self.get(name)

355 if value is not None and upper:

356 value = value.upper()

357 return value

358

359 def fset(self: Parameters, value: str | None):

360 """Set the value"""

361 if value is None:

362 fdel(self)

363 else:

364 if upper:

365 value = value.upper()

366 self[name] = value

367

368 def fdel(self: Parameters):

369 """Delete the value."""

370 self.pop(name, None)

371

372 return property(fget, fset, fdel, doc=func.__doc__)

373

374 if func is None:

375 return decorator

376 return decorator(func)

377

378

379class Parameters(CaselessDict):

380 """Parser and generator of Property parameter strings.

381

382 It knows nothing of datatypes.

383 Its main concern is textual structure.

384

385 Examples:

386

387 Modify parameters:

388

389 .. code-block:: pycon

390

391 >>> from icalendar import Parameters

392 >>> params = Parameters()

393 >>> params['VALUE'] = 'TEXT'

394 >>> params.value

395 'TEXT'

396 >>> params

397 Parameters({'VALUE': 'TEXT'})

398

399 Create new parameters:

400

401 .. code-block:: pycon

402

403 >>> params = Parameters(value="BINARY")

404 >>> params.value

405 'BINARY'

406

407 Set a default:

408

409 .. code-block:: pycon

410

411 >>> params = Parameters(value="BINARY", default_value="TEXT")

412 >>> params

413 Parameters({'VALUE': 'BINARY'})

414

415 """

416

417 def __init__(self, *args, **kwargs):

418 """Create new parameters."""

419 if args and args[0] is None:

420 # allow passing None

421 args = args[1:]

422 defaults = {

423 key[8:]: kwargs.pop(key)

424 for key in list(kwargs.keys())

425 if key.lower().startswith("default_")

426 }

427 super().__init__(*args, **kwargs)

428 for key, value in defaults.items():

429 self.setdefault(key, value)

430

431 # The following paremeters must always be enclosed in double quotes

432 always_quoted = (

433 "ALTREP",

434 "DELEGATED-FROM",

435 "DELEGATED-TO",

436 "DIR",

437 "MEMBER",

438 "SENT-BY",

439 # Part of X-APPLE-STRUCTURED-LOCATION

440 "X-ADDRESS",

441 "X-TITLE",

442 # RFC 9253

443 "LINKREL",

444 )

445 # this is quoted should one of the values be present

446 quote_also = {

447 # This is escaped in the RFC

448 "CN": " '",

449 }

450

451 def params(self):

452 """In RFC 5545 keys are called parameters, so this is to be consitent

453 with the naming conventions.

454 """

455 return self.keys()

456

457 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001

458 """Returns an :rfc:`5545` representation of the parameters.

459

460 Parameters:

461 sorted (bool): Sort the parameters before encoding.

462 exclude_utc (bool): Exclude TZID if it is set to ``"UTC"``

463 """

464 result = []

465 items = list(self.items())

466 if sorted:

467 items.sort()

468

469 for key, value in items:

470 if key == "TZID" and value == "UTC":

471 # The "TZID" property parameter MUST NOT be applied to DATE-TIME

472 # properties whose time values are specified in UTC.

473 continue

474 upper_key = key.upper()

475 check_quoteable_characters = self.quote_also.get(key.upper())

476 always_quote = upper_key in self.always_quoted or (

477 check_quoteable_characters

478 and any(c in value for c in check_quoteable_characters)

479 )

480 quoted_value = param_value(value, always_quote=always_quote)

481 if isinstance(quoted_value, str):

482 quoted_value = quoted_value.encode(DEFAULT_ENCODING)

483 # CaselessDict keys are always unicode

484 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value)

485 return b";".join(result)

486

487 @classmethod

488 def from_ical(cls, st, strict=False):

489 """Parses the parameter format from ical text format."""

490

491 # parse into strings

492 result = cls()

493 for param in q_split(st, ";"):

494 try:

495 key, val = q_split(param, "=", maxsplit=1)

496 validate_token(key)

497 # Property parameter values that are not in quoted

498 # strings are case insensitive.

499 vals = []

500 for v in q_split(val, ","):

501 if v.startswith('"') and v.endswith('"'):

502 v2 = v.strip('"')

503 validate_param_value(v2, quoted=True)

504 vals.append(rfc_6868_unescape(v2))

505 else:

506 validate_param_value(v, quoted=False)

507 if strict:

508 vals.append(rfc_6868_unescape(v.upper()))

509 else:

510 vals.append(rfc_6868_unescape(v))

511 if not vals:

512 result[key] = val

513 elif len(vals) == 1:

514 result[key] = vals[0]

515 else:

516 result[key] = vals

517 except ValueError as exc: # noqa: PERF203

518 raise ValueError(

519 f"{param!r} is not a valid parameter string: {exc}"

520 ) from exc

521 return result

522

523 @single_string_parameter(upper=True)

524 def value(self) -> VALUE | str | None:

525 """The VALUE parameter from :rfc:`5545`.

526

527 Description:

528 This parameter specifies the value type and format of

529 the property value. The property values MUST be of a single value

530 type. For example, a "RDATE" property cannot have a combination

531 of DATE-TIME and TIME value types.

532

533 If the property's value is the default value type, then this

534 parameter need not be specified. However, if the property's

535 default value type is overridden by some other allowable value

536 type, then this parameter MUST be specified.

537

538 Applications MUST preserve the value data for x-name and iana-

539 token values that they don't recognize without attempting to

540 interpret or parse the value data.

541

542 For convenience, using this property, the value will be converted to

543 an uppercase string.

544

545 .. code-block:: pycon

546

547 >>> from icalendar import Parameters

548 >>> params = Parameters()

549 >>> params.value = "unknown"

550 >>> params

551 Parameters({'VALUE': 'UNKNOWN'})

552

553 """

554

555 def _parameter_value_to_jcal(

556 self, value: str | float | list | VPROPERTY

558 """Convert a parameter value to jCal format.

559

560 Parameters:

561 value: The parameter value

562

563 Returns:

564 The jCal representation of the parameter value

565 """

566 if isinstance(value, list):

567 return [self._parameter_value_to_jcal(v) for v in value]

568 if hasattr(value, "to_jcal"):

569 # proprty values respond to this

570 jcal = value.to_jcal()

571 # we only need the value part

572 if len(jcal) == 4:

573 return jcal[3]

574 return jcal[3:]

575 for t in (int, float, str):

576 if isinstance(value, t):

577 return t(value)

578 raise TypeError(

579 "Unsupported parameter value type for jCal conversion: "

580 f"{type(value)} {value!r}"

581 )

582

583 def to_jcal(self, exclude_utc=False) -> dict[str, str]:

584 """Return the jCal representation of the parameters.

585

586 Parameters:

587 exclude_utc (bool): Exclude the TZID parameter if it is UTC

588 """

589 jcal = {

590 k.lower(): self._parameter_value_to_jcal(v)

591 for k, v in self.items()

592 if k.lower() != "value"

593 }

594 if exclude_utc and jcal.get("tzid") == "UTC":

595 del jcal["tzid"]

596 return jcal

597

598 @single_string_parameter

599 def tzid(self) -> str | None:

600 """The TZID parameter from :rfc:`5545`."""

601

602 def is_utc(self):

603 """Whether the TZID parameter is UTC."""

604 return self.tzid == "UTC"

605

606 def update_tzid_from(self, dt: datetime | time | Any) -> None:

607 """Update the TZID parameter from a datetime object.

608

609 This sets the TZID parameter or deletes it according to the datetime.

610 """

611 if isinstance(dt, (datetime, time)):

612 self.tzid = tzid_from_dt(dt)

613

614 @classmethod

615 def from_jcal(cls, jcal: dict[str : str | list[str]]):

616 """Parse jCal parameters."""

617 if not isinstance(jcal, dict):

618 raise JCalParsingError("The parameters must be a mapping.", cls)

619 for name, value in jcal.items():

620 if not isinstance(name, str):

621 raise JCalParsingError(

622 "All parameter names must be strings.", cls, value=name

623 )

624 if not (

625 (

626 isinstance(value, list)

627 and all(isinstance(v, (str, int, float)) for v in value)

628 and value

629 )

630 or isinstance(value, (str, int, float))

631 ):

632 raise JCalParsingError(

633 "Parameter values must be a string, integer or "

634 "float or a list of those.",

635 cls,

636 name,

637 value=value,

638 )

639 return cls(jcal)

640

641 @classmethod

642 def from_jcal_property(cls, jcal_property: list):

643 """Create the parameters for a jCal property.

644

645 Parameters:

646 jcal_property (list): The jCal property [name, params, value, ...]

647 default_value (str, optional): The default value of the property.

648 If this is given, the default value will not be set.

649 """

650 if not isinstance(jcal_property, list) or len(jcal_property) < 4:

651 raise JCalParsingError(

652 "The property must be a list with at least 4 items.", cls

653 )

654 jcal_params = jcal_property[1]

655 with JCalParsingError.reraise_with_path_added(1):

656 self = cls.from_jcal(jcal_params)

657 if self.is_utc():

658 del self.tzid # we do not want this parameter

659 return self

660

661

662def escape_string(val: str) -> str:

663 r"""Escape backslash sequences to URL-encoded hex values.

664

665 Converts backslash-escaped characters to their percent-encoded hex

666 equivalents. This is used for parameter parsing to preserve escaped

667 characters during processing.

668

669 Parameters:

670 val: The string with backslash escapes.

671

672 Returns:

673 The string with backslash escapes converted to percent encoding.

674

675 Note:

676 Conversions:

677

678 - ``\,`` -> ``%2C``

679 - ``\:`` -> ``%3A``

680 - ``\;`` -> ``%3B``

681 - ``\\`` -> ``%5C``

682 """

683 # f'{i:02X}'

684 return (

685 val.replace(r"\,", "%2C")

686 .replace(r"\:", "%3A")

687 .replace(r"\;", "%3B")

688 .replace(r"\\", "%5C")

689 )

690

691

692def unescape_string(val: str) -> str:

693 r"""Unescape URL-encoded hex values to their original characters.

694

695 Reverses :func:`escape_string` by converting percent-encoded hex values

696 back to their original characters. This is used for parameter parsing.

697

698 Parameters:

699 val: The string with percent-encoded values.

700

701 Returns:

702 The string with percent encoding converted to characters.

703

704 Note:

705 Conversions:

706

707 - ``%2C`` -> ``,``

708 - ``%3A`` -> ``:``

709 - ``%3B`` -> ``;``

710 - ``%5C`` -> ``\``

711 """

712 return (

713 val.replace("%2C", ",")

714 .replace("%3A", ":")

715 .replace("%3B", ";")

716 .replace("%5C", "\\")

717 )

718

719

720_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])")

721

722

723def unescape_backslash(val: str):

724 r"""Unescape backslash sequences in iCalendar text.

725

726 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes

727 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values

728 like ``%3A`` in URLs.

729

730 Processes backslash escape sequences in a single pass using regex matching.

731 """

732 return _unescape_backslash_regex.sub(

733 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val

734 )

735

736

737def split_on_unescaped_comma(text: str) -> list[str]:

738 r"""Split text on unescaped commas and unescape each part.

739

740 Splits only on commas not preceded by backslash.

741 After splitting, unescapes backslash sequences in each part.

742

743 Parameters:

744 text: Text with potential escaped commas (e.g., "foo\\, bar,baz")

745

746 Returns:

747 List of unescaped category strings

748

749 Examples:

750 .. code-block:: pycon

751

752 >>> from icalendar.parser import split_on_unescaped_comma

753 >>> split_on_unescaped_comma(r"foo\, bar,baz")

754 ['foo, bar', 'baz']

755 >>> split_on_unescaped_comma("a,b,c")

756 ['a', 'b', 'c']

757 >>> split_on_unescaped_comma(r"a\,b\,c")

758 ['a,b,c']

759 >>> split_on_unescaped_comma(r"Work,Personal\,Urgent")

760 ['Work', 'Personal,Urgent']

761 """

762 if not text:

763 return [""]

764

765 result = []

766 current = []

767 i = 0

768

769 while i < len(text):

770 if text[i] == "\\" and i + 1 < len(text):

771 # Escaped character - keep both backslash and next char

772 current.append(text[i])

773 current.append(text[i + 1])

774 i += 2

775 elif text[i] == ",":

776 # Unescaped comma - split point

777 result.append(unescape_backslash("".join(current)))

778 current = []

779 i += 1

780 else:

781 current.append(text[i])

782 i += 1

783

784 # Add final part

785 result.append(unescape_backslash("".join(current)))

786

787 return result

788

789

790def split_on_unescaped_semicolon(text: str) -> list[str]:

791 r"""Split text on unescaped semicolons and unescape each part.

792

793 Splits only on semicolons not preceded by a backslash.

794 After splitting, unescapes backslash sequences in each part.

795 Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`.

796

797 Parameters:

798 text: Text with potential escaped semicolons (e.g., "field1\\;with;field2")

799

800 Returns:

801 List of unescaped field strings

802

803 Examples:

804 .. code-block:: pycon

805

806 >>> from icalendar.parser import split_on_unescaped_semicolon

807 >>> split_on_unescaped_semicolon(r"field1\;with;field2")

808 ['field1;with', 'field2']

809 >>> split_on_unescaped_semicolon("a;b;c")

810 ['a', 'b', 'c']

811 >>> split_on_unescaped_semicolon(r"a\;b\;c")

812 ['a;b;c']

813 >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City")

814 ['PO Box 123;Suite 200', 'City']

815 """

816 if not text:

817 return [""]

818

819 result = []

820 current = []

821 i = 0

822

823 while i < len(text):

824 if text[i] == "\\" and i + 1 < len(text):

825 # Escaped character - keep both backslash and next char

826 current.append(text[i])

827 current.append(text[i + 1])

828 i += 2

829 elif text[i] == ";":

830 # Unescaped semicolon - split point

831 result.append(unescape_backslash("".join(current)))

832 current = []

833 i += 1

834 else:

835 current.append(text[i])

836 i += 1

837

838 # Add final part

839 result.append(unescape_backslash("".join(current)))

840

841 return result

842

843

844RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'")

845

846

847def rfc_6868_unescape(param_value: str) -> str:

848 """Take care of :rfc:`6868` unescaping.

849

850 - ^^ -> ^

851 - ^n -> system specific newline

852 - ^' -> "

853 - ^ with others stay intact

854 """

855 replacements = {

856 "^^": "^",

857 "^n": os.linesep,

858 "^'": '"',

859 }

860 return RFC_6868_UNESCAPE_REGEX.sub(

861 lambda m: replacements.get(m.group(0), m.group(0)), param_value

862 )

863

864

865RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"')

866

867

868def rfc_6868_escape(param_value: str) -> str:

869 """Take care of :rfc:`6868` escaping.

870

871 - ^ -> ^^

872 - " -> ^'

873 - newline -> ^n

874 """

875 replacements = {

876 "^": "^^",

877 "\n": "^n",

878 "\r": "^n",

879 "\r\n": "^n",

880 '"': "^'",

881 }

882 return RFC_6868_ESCAPE_REGEX.sub(

883 lambda m: replacements.get(m.group(0), m.group(0)), param_value

884 )

885

886

887def unescape_list_or_string(val: str | list[str]) -> str | list[str]:

888 """Unescape a value that may be a string or list of strings.

889

890 Applies :func:`unescape_string` to the value. If the value is a list,

891 unescapes each element.

892

893 Parameters:

894 val: A string or list of strings to unescape.

895

896 Returns:

897 The unescaped values.

898 """

899 if isinstance(val, list):

900 return [unescape_string(s) for s in val]

901 return unescape_string(val)

902

903

904#########################################

905# parsing and generation of content lines

906

907

908class Contentline(str):

909 """A content line is basically a string that can be folded and parsed into

910 parts.

911 """

912

913 __slots__ = ("strict",)

914

915 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):

916 value = to_unicode(value, encoding=encoding)

917 assert "\n" not in value, (

918 "Content line can not contain unescaped new line characters."

919 )

920 self = super().__new__(cls, value)

921 self.strict = strict

922 return self

923

924 @classmethod

925 def from_parts(

926 cls,

927 name: ICAL_TYPE,

928 params: Parameters,

929 values,

930 sorted: bool = True, # noqa: A002, FBT001

931 ):

932 """Turn a parts into a content line."""

933 assert isinstance(params, Parameters)

934 if hasattr(values, "to_ical"):

935 values = values.to_ical()

936 else:

937 from icalendar.prop import vText

938

939 values = vText(values).to_ical()

940 # elif isinstance(values, basestring):

941 # values = escape_char(values)

942

943 # TODO: after unicode only, remove this

944 # Convert back to unicode, after to_ical encoded it.

945 name = to_unicode(name)

946 values = to_unicode(values)

947 if params:

948 params = to_unicode(params.to_ical(sorted=sorted))

949 if params:

950 # some parameter values can be skipped during serialization

951 return cls(f"{name};{params}:{values}")

952 return cls(f"{name}:{values}")

953

954 def parts(self) -> tuple[str, Parameters, str]:

955 """Split the content line into ``name``, ``parameters``, and ``values`` parts.

956

957 Properly handles escaping with backslashes and double-quote sections

958 to avoid corrupting URL-encoded characters in values.

959

960 Example with parameter:

961

962 .. code-block:: text

963

964 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild

965

966 Example without parameters:

967

968 .. code-block:: text

969

970 DESCRIPTION:The Fall'98 Wild

971 """

972 try:

973 name_split: int | None = None

974 value_split: int | None = None

975 in_quotes: bool = False

976 escaped: bool = False

977

978 for i, ch in enumerate(self):

979 if ch == '"' and not escaped:

980 in_quotes = not in_quotes

981 elif ch == "\\" and not in_quotes:

982 escaped = True

983 continue

984 elif not in_quotes and not escaped:

985 # Find first delimiter for name

986 if ch in ":;" and name_split is None:

987 name_split = i

988 # Find value delimiter (first colon)

989 if ch == ":" and value_split is None:

990 value_split = i

991

992 escaped = False

993

994 # Validate parsing results

995 if not value_split:

996 # No colon found - value is empty, use end of string

997 value_split = len(self)

998

999 # Extract name - if no delimiter,

1000 # take whole string for validate_token to reject

1001 name = self[:name_split] if name_split else self

1002 validate_token(name)

1003

1004 if not name_split or name_split + 1 == value_split:

1005 # No delimiter or empty parameter section

1006 raise ValueError("Invalid content line") # noqa: TRY301

1007 # Parse parameters - they still need to be escaped/unescaped

1008 # for proper handling of commas, semicolons, etc. in parameter values

1009 param_str = escape_string(self[name_split + 1 : value_split])

1010 params = Parameters.from_ical(param_str, strict=self.strict)

1011 params = Parameters(

1012 (unescape_string(key), unescape_list_or_string(value))

1013 for key, value in iter(params.items())

1014 )

1015 # Unescape backslash sequences in values but preserve URL encoding

1016 values = unescape_backslash(self[value_split + 1 :])

1017 except ValueError as exc:

1018 raise ValueError(

1019 f"Content line could not be parsed into parts: '{self}': {exc}"

1020 ) from exc

1021 return (name, params, values)

1022

1023 @classmethod

1024 def from_ical(cls, ical, strict=False):

1025 """Unfold the content lines in an iCalendar into long content lines."""

1026 ical = to_unicode(ical)

1027 # a fold is carriage return followed by either a space or a tab

1028 return cls(UFOLD.sub("", ical), strict=strict)

1029

1030 def to_ical(self):

1031 """Long content lines are folded so they are less than 75 characters

1032 wide.

1033 """

1034 return foldline(self).encode(DEFAULT_ENCODING)

1035

1036

1037class Contentlines(list):

1038 """I assume that iCalendar files generally are a few kilobytes in size.

1039 Then this should be efficient. for Huge files, an iterator should probably

1040 be used instead.

1041 """

1042

1043 def to_ical(self):

1044 """Simply join self."""

1045 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"

1046

1047 @classmethod

1048 def from_ical(cls, st):

1049 """Parses a string into content lines."""

1050 st = to_unicode(st)

1051 try:

1052 # a fold is carriage return followed by either a space or a tab

1053 unfolded = UFOLD.sub("", st)

1054 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)

1055 lines.append("") # '\r\n' at the end of every content line

1056 except Exception as e:

1057 raise ValueError("Expected StringType with content lines") from e

1058 return lines

1059

1060

1061__all__ = [

1062 "FOLD",

1063 "NAME",

1064 "NEWLINE",

1065 "QUNSAFE_CHAR",

1066 "QUOTABLE",

1067 "UFOLD",

1068 "UNSAFE_CHAR",

1069 "Contentline",

1070 "Contentlines",

1071 "Parameters",

1072 "dquote",

1073 "escape_char",

1074 "escape_string",

1075 "foldline",

1076 "param_value",

1077 "q_join",

1078 "q_split",

1079 "rfc_6868_escape",

1080 "rfc_6868_unescape",

1081 "split_on_unescaped_comma",

1082 "split_on_unescaped_semicolon",

1083 "unescape_backslash",

1084 "unescape_char",

1085 "unescape_list_or_string",

1086 "unescape_string",

1087 "validate_param_value",

1088 "validate_token",

1089]