Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

242 statements  

1"""This module parses and generates contentlines as defined in RFC 5545 

2(iCalendar), but will probably work for other MIME types with similar syntax. 

3Eg. RFC 2426 (vCard) 

4 

5It is stupid in the sense that it treats the content purely as strings. No type 

6conversion is attempted. 

7""" 

8 

9from __future__ import annotations 

10 

11import functools 

12import os 

13import re 

14from typing import TYPE_CHECKING 

15 

16from icalendar.caselessdict import CaselessDict 

17from icalendar.parser_tools import ( 

18 DEFAULT_ENCODING, 

19 ICAL_TYPE, 

20 SEQUENCE_TYPES, 

21 to_unicode, 

22) 

23 

24if TYPE_CHECKING: 

25 from icalendar.enums import VALUE 

26 

27 

28def escape_char(text): 

29 """Format value according to iCalendar TEXT escaping rules.""" 

30 assert isinstance(text, (str, bytes)) 

31 # NOTE: ORDER MATTERS! 

32 return ( 

33 text.replace(r"\N", "\n") 

34 .replace("\\", "\\\\") 

35 .replace(";", r"\;") 

36 .replace(",", r"\,") 

37 .replace("\r\n", r"\n") 

38 .replace("\n", r"\n") 

39 ) 

40 

41 

42def unescape_char(text): 

43 assert isinstance(text, (str, bytes)) 

44 # NOTE: ORDER MATTERS! 

45 if isinstance(text, str): 

46 return ( 

47 text.replace("\\N", "\\n") 

48 .replace("\r\n", "\n") 

49 .replace("\\n", "\n") 

50 .replace("\\,", ",") 

51 .replace("\\;", ";") 

52 .replace("\\\\", "\\") 

53 ) 

54 if isinstance(text, bytes): 

55 return ( 

56 text.replace(b"\\N", b"\\n") 

57 .replace(b"\r\n", b"\n") 

58 .replace(b"\\n", b"\n") 

59 .replace(b"\\,", b",") 

60 .replace(b"\\;", b";") 

61 .replace(b"\\\\", b"\\") 

62 ) 

63 return None 

64 

65 

66def foldline(line, limit=75, fold_sep="\r\n "): 

67 """Make a string folded as defined in RFC5545 

68 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

69 break. Long content lines SHOULD be split into a multiple line 

70 representations using a line "folding" technique. That is, a long 

71 line can be split between any two characters by inserting a CRLF 

72 immediately followed by a single linear white-space character (i.e., 

73 SPACE or HTAB). 

74 """ 

75 assert isinstance(line, str) 

76 assert "\n" not in line 

77 

78 # Use a fast and simple variant for the common case that line is all ASCII. 

79 try: 

80 line.encode("ascii") 

81 except (UnicodeEncodeError, UnicodeDecodeError): 

82 pass 

83 else: 

84 return fold_sep.join( 

85 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

86 ) 

87 

88 ret_chars = [] 

89 byte_count = 0 

90 for char in line: 

91 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

92 byte_count += char_byte_len 

93 if byte_count >= limit: 

94 ret_chars.append(fold_sep) 

95 byte_count = char_byte_len 

96 ret_chars.append(char) 

97 

98 return "".join(ret_chars) 

99 

100 

101################################################################# 

102# Property parameter stuff 

103 

104 

105def param_value(value, always_quote=False): 

106 """Returns a parameter value.""" 

107 if isinstance(value, SEQUENCE_TYPES): 

108 return q_join(map(rfc_6868_escape, value), always_quote=always_quote) 

109 if isinstance(value, str): 

110 return dquote(rfc_6868_escape(value), always_quote=always_quote) 

111 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING))) 

112 

113 

114# Could be improved 

115 

116# [\w-] because of the iCalendar RFC 

117# . because of the vCard RFC 

118NAME = re.compile(r"[\w.-]+") 

119 

120UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]') 

121QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]') 

122FOLD = re.compile(b"(\r?\n)+[ \t]") 

123UFOLD = re.compile("(\r?\n)+[ \t]") 

124NEWLINE = re.compile(r"\r?\n") 

125 

126 

127def validate_token(name): 

128 match = NAME.findall(name) 

129 if len(match) == 1 and name == match[0]: 

130 return 

131 raise ValueError(name) 

132 

133 

134def validate_param_value(value, quoted=True): 

135 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR 

136 if validator.findall(value): 

137 raise ValueError(value) 

138 

139 

140# chars presence of which in parameter value will be cause the value 

141# to be enclosed in double-quotes 

142QUOTABLE = re.compile("[,;:’]") # noqa: RUF001 

143 

144 

145def dquote(val, always_quote=False): 

146 """Enclose parameter values containing [,;:] in double quotes.""" 

147 # a double-quote character is forbidden to appear in a parameter value 

148 # so replace it with a single-quote character 

149 val = val.replace('"', "'") 

150 if QUOTABLE.search(val) or always_quote: 

151 return f'"{val}"' 

152 return val 

153 

154 

155# parsing helper 

156def q_split(st, sep=",", maxsplit=-1): 

157 """Splits a string on char, taking double (q)uotes into considderation.""" 

158 if maxsplit == 0: 

159 return [st] 

160 

161 result = [] 

162 cursor = 0 

163 length = len(st) 

164 inquote = 0 

165 splits = 0 

166 for i, ch in enumerate(st): 

167 if ch == '"': 

168 inquote = not inquote 

169 if not inquote and ch == sep: 

170 result.append(st[cursor:i]) 

171 cursor = i + 1 

172 splits += 1 

173 if i + 1 == length or splits == maxsplit: 

174 result.append(st[cursor:]) 

175 break 

176 return result 

177 

178 

179def q_join(lst, sep=",", always_quote=False): 

180 """Joins a list on sep, quoting strings with QUOTABLE chars.""" 

181 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst) 

182 

183 

184def single_string_parameter(func): 

185 """Create a parameter getter/setter for a single string parameter.""" 

186 

187 name = func.__name__ 

188 

189 @functools.wraps(func) 

190 def fget(self: Parameters): 

191 """Get the value.""" 

192 return self.get(name) 

193 

194 def fset(self: Parameters, value: str | None): 

195 """Set the value""" 

196 if value is None: 

197 fdel(self) 

198 else: 

199 self[name] = value 

200 

201 def fdel(self: Parameters): 

202 """Delete the value.""" 

203 self.pop(name, None) 

204 

205 return property(fget, fset, fdel, doc=func.__doc__) 

206 

207 

208class Parameters(CaselessDict): 

209 """Parser and generator of Property parameter strings. 

210 

211 It knows nothing of datatypes. 

212 Its main concern is textual structure. 

213 

214 Examples: 

215 

216 Modify parameters: 

217 

218 .. code-block:: pycon 

219 

220 >>> from icalendar import Parameters 

221 >>> params = Parameters() 

222 >>> params['VALUE'] = 'TEXT' 

223 >>> params.value 

224 'TEXT' 

225 >>> params 

226 Parameters({'VALUE': 'TEXT'}) 

227 

228 Create new parameters: 

229 

230 .. code-block:: pycon 

231 

232 >>> params = Parameters(value="BINARY") 

233 >>> params.value 

234 'BINARY' 

235 

236 Set a default: 

237 

238 .. code-block:: pycon 

239  

240 >>> params = Parameters(value="BINARY", default_value="TEXT") 

241 >>> params 

242 Parameters({'VALUE': 'BINARY'}) 

243 

244 """ 

245 

246 def __init__(self, *args, **kwargs): 

247 """Create new parameters.""" 

248 if args and args[0] is None: 

249 # allow passing None 

250 args = args[1:] 

251 defaults = { 

252 key[8:]: kwargs.pop(key) 

253 for key in list(kwargs.keys()) 

254 if key.lower().startswith("default_") 

255 } 

256 super().__init__(*args, **kwargs) 

257 for key, value in defaults.items(): 

258 self.setdefault(key, value) 

259 

260 # The following paremeters must always be enclosed in double quotes 

261 always_quoted = ( 

262 "ALTREP", 

263 "DELEGATED-FROM", 

264 "DELEGATED-TO", 

265 "DIR", 

266 "MEMBER", 

267 "SENT-BY", 

268 # Part of X-APPLE-STRUCTURED-LOCATION 

269 "X-ADDRESS", 

270 "X-TITLE", 

271 # RFC 9253 

272 "LINKREL", 

273 ) 

274 # this is quoted should one of the values be present 

275 quote_also = { 

276 # This is escaped in the RFC 

277 "CN": " '", 

278 } 

279 

280 def params(self): 

281 """In RFC 5545 keys are called parameters, so this is to be consitent 

282 with the naming conventions. 

283 """ 

284 return self.keys() 

285 

286 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001 

287 result = [] 

288 items = list(self.items()) 

289 if sorted: 

290 items.sort() 

291 

292 for key, value in items: 

293 upper_key = key.upper() 

294 check_quoteable_characters = self.quote_also.get(key.upper()) 

295 always_quote = upper_key in self.always_quoted or ( 

296 check_quoteable_characters 

297 and any(c in value for c in check_quoteable_characters) 

298 ) 

299 quoted_value = param_value(value, always_quote=always_quote) 

300 if isinstance(quoted_value, str): 

301 quoted_value = quoted_value.encode(DEFAULT_ENCODING) 

302 # CaselessDict keys are always unicode 

303 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value) 

304 return b";".join(result) 

305 

306 @classmethod 

307 def from_ical(cls, st, strict=False): 

308 """Parses the parameter format from ical text format.""" 

309 

310 # parse into strings 

311 result = cls() 

312 for param in q_split(st, ";"): 

313 try: 

314 key, val = q_split(param, "=", maxsplit=1) 

315 validate_token(key) 

316 # Property parameter values that are not in quoted 

317 # strings are case insensitive. 

318 vals = [] 

319 for v in q_split(val, ","): 

320 if v.startswith('"') and v.endswith('"'): 

321 v2 = v.strip('"') 

322 validate_param_value(v2, quoted=True) 

323 vals.append(rfc_6868_unescape(v2)) 

324 else: 

325 validate_param_value(v, quoted=False) 

326 if strict: 

327 vals.append(rfc_6868_unescape(v.upper())) 

328 else: 

329 vals.append(rfc_6868_unescape(v)) 

330 if not vals: 

331 result[key] = val 

332 elif len(vals) == 1: 

333 result[key] = vals[0] 

334 else: 

335 result[key] = vals 

336 except ValueError as exc: # noqa: PERF203 

337 raise ValueError( 

338 f"{param!r} is not a valid parameter string: {exc}" 

339 ) from exc 

340 return result 

341 

342 @single_string_parameter 

343 def value(self) -> VALUE | str | None: 

344 """The VALUE parameter from :rfc:`5545`. 

345 

346 Description: 

347 This parameter specifies the value type and format of 

348 the property value. The property values MUST be of a single value 

349 type. For example, a "RDATE" property cannot have a combination 

350 of DATE-TIME and TIME value types. 

351 

352 If the property's value is the default value type, then this 

353 parameter need not be specified. However, if the property's 

354 default value type is overridden by some other allowable value 

355 type, then this parameter MUST be specified. 

356 

357 Applications MUST preserve the value data for x-name and iana- 

358 token values that they don't recognize without attempting to 

359 interpret or parse the value data. 

360 """ 

361 

362 

363def escape_string(val): 

364 # f'{i:02X}' 

365 return ( 

366 val.replace(r"\,", "%2C") 

367 .replace(r"\:", "%3A") 

368 .replace(r"\;", "%3B") 

369 .replace(r"\\", "%5C") 

370 ) 

371 

372 

373def unescape_string(val): 

374 return ( 

375 val.replace("%2C", ",") 

376 .replace("%3A", ":") 

377 .replace("%3B", ";") 

378 .replace("%5C", "\\") 

379 ) 

380 

381 

382_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])") 

383 

384 

385def unescape_backslash(val: str): 

386 r"""Unescape backslash sequences in iCalendar text. 

387 

388 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes 

389 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values 

390 like ``%3A`` in URLs. 

391 

392 Processes backslash escape sequences in a single pass using regex matching. 

393 """ 

394 return _unescape_backslash_regex.sub( 

395 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val 

396 ) 

397 

398 

399RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'") 

400 

401 

402def rfc_6868_unescape(param_value: str) -> str: 

403 """Take care of :rfc:`6868` unescaping. 

404 

405 - ^^ -> ^ 

406 - ^n -> system specific newline 

407 - ^' -> " 

408 - ^ with others stay intact 

409 """ 

410 replacements = { 

411 "^^": "^", 

412 "^n": os.linesep, 

413 "^'": '"', 

414 } 

415 return RFC_6868_UNESCAPE_REGEX.sub( 

416 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

417 ) 

418 

419 

420RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"') 

421 

422 

423def rfc_6868_escape(param_value: str) -> str: 

424 """Take care of :rfc:`6868` escaping. 

425 

426 - ^ -> ^^ 

427 - " -> ^' 

428 - newline -> ^n 

429 """ 

430 replacements = { 

431 "^": "^^", 

432 "\n": "^n", 

433 "\r": "^n", 

434 "\r\n": "^n", 

435 '"': "^'", 

436 } 

437 return RFC_6868_ESCAPE_REGEX.sub( 

438 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

439 ) 

440 

441 

442def unescape_list_or_string(val): 

443 if isinstance(val, list): 

444 return [unescape_string(s) for s in val] 

445 return unescape_string(val) 

446 

447 

448######################################### 

449# parsing and generation of content lines 

450 

451 

452class Contentline(str): 

453 """A content line is basically a string that can be folded and parsed into 

454 parts. 

455 """ 

456 

457 __slots__ = ("strict",) 

458 

459 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

460 value = to_unicode(value, encoding=encoding) 

461 assert "\n" not in value, ( 

462 "Content line can not contain unescaped new line characters." 

463 ) 

464 self = super().__new__(cls, value) 

465 self.strict = strict 

466 return self 

467 

468 @classmethod 

469 def from_parts( 

470 cls, 

471 name: ICAL_TYPE, 

472 params: Parameters, 

473 values, 

474 sorted: bool = True, # noqa: A002, FBT001 

475 ): 

476 """Turn a parts into a content line.""" 

477 assert isinstance(params, Parameters) 

478 if hasattr(values, "to_ical"): 

479 values = values.to_ical() 

480 else: 

481 from icalendar.prop import vText 

482 

483 values = vText(values).to_ical() 

484 # elif isinstance(values, basestring): 

485 # values = escape_char(values) 

486 

487 # TODO: after unicode only, remove this 

488 # Convert back to unicode, after to_ical encoded it. 

489 name = to_unicode(name) 

490 values = to_unicode(values) 

491 if params: 

492 params = to_unicode(params.to_ical(sorted=sorted)) 

493 return cls(f"{name};{params}:{values}") 

494 return cls(f"{name}:{values}") 

495 

496 def parts(self) -> tuple[str, Parameters, str]: 

497 """Split the content line into ``name``, ``parameters``, and ``values`` parts. 

498 

499 Properly handles escaping with backslashes and double-quote sections 

500 to avoid corrupting URL-encoded characters in values. 

501 

502 Example with parameter: 

503 

504 .. code-block:: text 

505 

506 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild 

507 

508 Example without parameters: 

509 

510 .. code-block:: text 

511 

512 DESCRIPTION:The Fall'98 Wild 

513 """ 

514 try: 

515 name_split: int | None = None 

516 value_split: int | None = None 

517 in_quotes: bool = False 

518 escaped: bool = False 

519 

520 for i, ch in enumerate(self): 

521 if ch == '"' and not escaped: 

522 in_quotes = not in_quotes 

523 elif ch == "\\" and not in_quotes: 

524 escaped = True 

525 continue 

526 elif not in_quotes and not escaped: 

527 # Find first delimiter for name 

528 if ch in ":;" and name_split is None: 

529 name_split = i 

530 # Find value delimiter (first colon) 

531 if ch == ":" and value_split is None: 

532 value_split = i 

533 

534 escaped = False 

535 

536 # Validate parsing results 

537 if not value_split: 

538 # No colon found - value is empty, use end of string 

539 value_split = len(self) 

540 

541 # Extract name - if no delimiter, 

542 # take whole string for validate_token to reject 

543 name = self[:name_split] if name_split else self 

544 validate_token(name) 

545 

546 if not name_split or name_split + 1 == value_split: 

547 # No delimiter or empty parameter section 

548 raise ValueError("Invalid content line") # noqa: TRY301 

549 # Parse parameters - they still need to be escaped/unescaped 

550 # for proper handling of commas, semicolons, etc. in parameter values 

551 param_str = escape_string(self[name_split + 1 : value_split]) 

552 params = Parameters.from_ical(param_str, strict=self.strict) 

553 params = Parameters( 

554 (unescape_string(key), unescape_list_or_string(value)) 

555 for key, value in iter(params.items()) 

556 ) 

557 # Unescape backslash sequences in values but preserve URL encoding 

558 values = unescape_backslash(self[value_split + 1 :]) 

559 except ValueError as exc: 

560 raise ValueError( 

561 f"Content line could not be parsed into parts: '{self}': {exc}" 

562 ) from exc 

563 return (name, params, values) 

564 

565 @classmethod 

566 def from_ical(cls, ical, strict=False): 

567 """Unfold the content lines in an iCalendar into long content lines.""" 

568 ical = to_unicode(ical) 

569 # a fold is carriage return followed by either a space or a tab 

570 return cls(UFOLD.sub("", ical), strict=strict) 

571 

572 def to_ical(self): 

573 """Long content lines are folded so they are less than 75 characters 

574 wide. 

575 """ 

576 return foldline(self).encode(DEFAULT_ENCODING) 

577 

578 

579class Contentlines(list): 

580 """I assume that iCalendar files generally are a few kilobytes in size. 

581 Then this should be efficient. for Huge files, an iterator should probably 

582 be used instead. 

583 """ 

584 

585 def to_ical(self): 

586 """Simply join self.""" 

587 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

588 

589 @classmethod 

590 def from_ical(cls, st): 

591 """Parses a string into content lines.""" 

592 st = to_unicode(st) 

593 try: 

594 # a fold is carriage return followed by either a space or a tab 

595 unfolded = UFOLD.sub("", st) 

596 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

597 lines.append("") # '\r\n' at the end of every content line 

598 except Exception as e: 

599 raise ValueError("Expected StringType with content lines") from e 

600 return lines 

601 

602 

603__all__ = [ 

604 "FOLD", 

605 "NAME", 

606 "NEWLINE", 

607 "QUNSAFE_CHAR", 

608 "QUOTABLE", 

609 "UFOLD", 

610 "UNSAFE_CHAR", 

611 "Contentline", 

612 "Contentlines", 

613 "Parameters", 

614 "dquote", 

615 "escape_char", 

616 "escape_string", 

617 "foldline", 

618 "param_value", 

619 "q_join", 

620 "q_split", 

621 "rfc_6868_escape", 

622 "rfc_6868_unescape", 

623 "unescape_backslash", 

624 "unescape_char", 

625 "unescape_list_or_string", 

626 "unescape_string", 

627 "validate_param_value", 

628 "validate_token", 

629]