Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 88%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

235 statements  

1"""This module parses and generates contentlines as defined in RFC 5545 

2(iCalendar), but will probably work for other MIME types with similar syntax. 

3Eg. RFC 2426 (vCard) 

4 

5It is stupid in the sense that it treats the content purely as strings. No type 

6conversion is attempted. 

7""" 

8 

9from __future__ import annotations 

10 

11import functools 

12import os 

13import re 

14from typing import TYPE_CHECKING 

15 

16from icalendar.caselessdict import CaselessDict 

17from icalendar.parser_tools import ( 

18 DEFAULT_ENCODING, 

19 ICAL_TYPE, 

20 SEQUENCE_TYPES, 

21 to_unicode, 

22) 

23 

24if TYPE_CHECKING: 

25 from icalendar.enums import VALUE 

26 

27 

28def escape_char(text): 

29 """Format value according to iCalendar TEXT escaping rules.""" 

30 assert isinstance(text, (str, bytes)) 

31 # NOTE: ORDER MATTERS! 

32 return ( 

33 text.replace(r"\N", "\n") 

34 .replace("\\", "\\\\") 

35 .replace(";", r"\;") 

36 .replace(",", r"\,") 

37 .replace("\r\n", r"\n") 

38 .replace("\n", r"\n") 

39 ) 

40 

41 

42def unescape_char(text): 

43 assert isinstance(text, (str, bytes)) 

44 # NOTE: ORDER MATTERS! 

45 if isinstance(text, str): 

46 return ( 

47 text.replace("\\N", "\\n") 

48 .replace("\r\n", "\n") 

49 .replace("\\n", "\n") 

50 .replace("\\,", ",") 

51 .replace("\\;", ";") 

52 .replace("\\\\", "\\") 

53 ) 

54 if isinstance(text, bytes): 

55 return ( 

56 text.replace(b"\\N", b"\\n") 

57 .replace(b"\r\n", b"\n") 

58 .replace(b"\\n", b"\n") 

59 .replace(b"\\,", b",") 

60 .replace(b"\\;", b";") 

61 .replace(b"\\\\", b"\\") 

62 ) 

63 return None 

64 

65 

66def foldline(line, limit=75, fold_sep="\r\n "): 

67 """Make a string folded as defined in RFC5545 

68 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

69 break. Long content lines SHOULD be split into a multiple line 

70 representations using a line "folding" technique. That is, a long 

71 line can be split between any two characters by inserting a CRLF 

72 immediately followed by a single linear white-space character (i.e., 

73 SPACE or HTAB). 

74 """ 

75 assert isinstance(line, str) 

76 assert "\n" not in line 

77 

78 # Use a fast and simple variant for the common case that line is all ASCII. 

79 try: 

80 line.encode("ascii") 

81 except (UnicodeEncodeError, UnicodeDecodeError): 

82 pass 

83 else: 

84 return fold_sep.join( 

85 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

86 ) 

87 

88 ret_chars = [] 

89 byte_count = 0 

90 for char in line: 

91 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

92 byte_count += char_byte_len 

93 if byte_count >= limit: 

94 ret_chars.append(fold_sep) 

95 byte_count = char_byte_len 

96 ret_chars.append(char) 

97 

98 return "".join(ret_chars) 

99 

100 

101################################################################# 

102# Property parameter stuff 

103 

104 

105def param_value(value, always_quote=False): 

106 """Returns a parameter value.""" 

107 if isinstance(value, SEQUENCE_TYPES): 

108 return q_join(map(rfc_6868_escape, value), always_quote=always_quote) 

109 if isinstance(value, str): 

110 return dquote(rfc_6868_escape(value), always_quote=always_quote) 

111 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING))) 

112 

113 

114# Could be improved 

115 

116# [\w-] because of the iCalendar RFC 

117# . because of the vCard RFC 

118NAME = re.compile(r"[\w.-]+") 

119 

120UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]') 

121QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]') 

122FOLD = re.compile(b"(\r?\n)+[ \t]") 

123UFOLD = re.compile("(\r?\n)+[ \t]") 

124NEWLINE = re.compile(r"\r?\n") 

125 

126 

127def validate_token(name): 

128 match = NAME.findall(name) 

129 if len(match) == 1 and name == match[0]: 

130 return 

131 raise ValueError(name) 

132 

133 

134def validate_param_value(value, quoted=True): 

135 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR 

136 if validator.findall(value): 

137 raise ValueError(value) 

138 

139 

140# chars presence of which in parameter value will be cause the value 

141# to be enclosed in double-quotes 

142QUOTABLE = re.compile("[,;:’]") # noqa: RUF001 

143 

144 

145def dquote(val, always_quote=False): 

146 """Enclose parameter values containing [,;:] in double quotes.""" 

147 # a double-quote character is forbidden to appear in a parameter value 

148 # so replace it with a single-quote character 

149 val = val.replace('"', "'") 

150 if QUOTABLE.search(val) or always_quote: 

151 return f'"{val}"' 

152 return val 

153 

154 

155# parsing helper 

156def q_split(st, sep=",", maxsplit=-1): 

157 """Splits a string on char, taking double (q)uotes into considderation.""" 

158 if maxsplit == 0: 

159 return [st] 

160 

161 result = [] 

162 cursor = 0 

163 length = len(st) 

164 inquote = 0 

165 splits = 0 

166 for i, ch in enumerate(st): 

167 if ch == '"': 

168 inquote = not inquote 

169 if not inquote and ch == sep: 

170 result.append(st[cursor:i]) 

171 cursor = i + 1 

172 splits += 1 

173 if i + 1 == length or splits == maxsplit: 

174 result.append(st[cursor:]) 

175 break 

176 return result 

177 

178 

179def q_join(lst, sep=",", always_quote=False): 

180 """Joins a list on sep, quoting strings with QUOTABLE chars.""" 

181 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst) 

182 

183 

184def single_string_parameter(func): 

185 """Create a parameter getter/setter for a single string parameter.""" 

186 

187 name = func.__name__ 

188 

189 @functools.wraps(func) 

190 def fget(self: Parameters): 

191 """Get the value.""" 

192 return self.get(name) 

193 

194 def fset(self: Parameters, value: str | None): 

195 """Set the value""" 

196 if value is None: 

197 fdel(self) 

198 else: 

199 self[name] = value 

200 

201 def fdel(self: Parameters): 

202 """Delete the value.""" 

203 self.pop(name, None) 

204 

205 return property(fget, fset, fdel, doc=func.__doc__) 

206 

207 

208class Parameters(CaselessDict): 

209 """Parser and generator of Property parameter strings. It knows nothing of 

210 datatypes. Its main concern is textual structure. 

211 """ 

212 

213 # The following paremeters must always be enclosed in double quotes 

214 always_quoted = ( 

215 "ALTREP", 

216 "DELEGATED-FROM", 

217 "DELEGATED-TO", 

218 "DIR", 

219 "MEMBER", 

220 "SENT-BY", 

221 # Part of X-APPLE-STRUCTURED-LOCATION 

222 "X-ADDRESS", 

223 "X-TITLE", 

224 ) 

225 # this is quoted should one of the values be present 

226 quote_also = { 

227 # This is escaped in the RFC 

228 "CN": " '", 

229 } 

230 

231 def params(self): 

232 """In RFC 5545 keys are called parameters, so this is to be consitent 

233 with the naming conventions. 

234 """ 

235 return self.keys() 

236 

237 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001 

238 result = [] 

239 items = list(self.items()) 

240 if sorted: 

241 items.sort() 

242 

243 for key, value in items: 

244 upper_key = key.upper() 

245 check_quoteable_characters = self.quote_also.get(key.upper()) 

246 always_quote = upper_key in self.always_quoted or ( 

247 check_quoteable_characters 

248 and any(c in value for c in check_quoteable_characters) 

249 ) 

250 quoted_value = param_value(value, always_quote=always_quote) 

251 if isinstance(quoted_value, str): 

252 quoted_value = quoted_value.encode(DEFAULT_ENCODING) 

253 # CaselessDict keys are always unicode 

254 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value) 

255 return b";".join(result) 

256 

257 @classmethod 

258 def from_ical(cls, st, strict=False): 

259 """Parses the parameter format from ical text format.""" 

260 

261 # parse into strings 

262 result = cls() 

263 for param in q_split(st, ";"): 

264 try: 

265 key, val = q_split(param, "=", maxsplit=1) 

266 validate_token(key) 

267 # Property parameter values that are not in quoted 

268 # strings are case insensitive. 

269 vals = [] 

270 for v in q_split(val, ","): 

271 if v.startswith('"') and v.endswith('"'): 

272 v2 = v.strip('"') 

273 validate_param_value(v2, quoted=True) 

274 vals.append(rfc_6868_unescape(v2)) 

275 else: 

276 validate_param_value(v, quoted=False) 

277 if strict: 

278 vals.append(rfc_6868_unescape(v.upper())) 

279 else: 

280 vals.append(rfc_6868_unescape(v)) 

281 if not vals: 

282 result[key] = val 

283 elif len(vals) == 1: 

284 result[key] = vals[0] 

285 else: 

286 result[key] = vals 

287 except ValueError as exc: # noqa: PERF203 

288 raise ValueError( 

289 f"{param!r} is not a valid parameter string: {exc}" 

290 ) from exc 

291 return result 

292 

293 @single_string_parameter 

294 def value(self) -> VALUE | str | None: 

295 """The VALUE parameter from :rfc:`5545`. 

296 

297 Description: 

298 This parameter specifies the value type and format of 

299 the property value. The property values MUST be of a single value 

300 type. For example, a "RDATE" property cannot have a combination 

301 of DATE-TIME and TIME value types. 

302 

303 If the property's value is the default value type, then this 

304 parameter need not be specified. However, if the property's 

305 default value type is overridden by some other allowable value 

306 type, then this parameter MUST be specified. 

307 

308 Applications MUST preserve the value data for x-name and iana- 

309 token values that they don't recognize without attempting to 

310 interpret or parse the value data. 

311 """ 

312 

313 

314def escape_string(val): 

315 # f'{i:02X}' 

316 return ( 

317 val.replace(r"\,", "%2C") 

318 .replace(r"\:", "%3A") 

319 .replace(r"\;", "%3B") 

320 .replace(r"\\", "%5C") 

321 ) 

322 

323 

324def unescape_string(val): 

325 return ( 

326 val.replace("%2C", ",") 

327 .replace("%3A", ":") 

328 .replace("%3B", ";") 

329 .replace("%5C", "\\") 

330 ) 

331 

332 

333_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])") 

334 

335 

336def unescape_backslash(val: str): 

337 r"""Unescape backslash sequences in iCalendar text. 

338 

339 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes 

340 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values 

341 like ``%3A`` in URLs. 

342 

343 Processes backslash escape sequences in a single pass using regex matching. 

344 """ 

345 return _unescape_backslash_regex.sub( 

346 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val 

347 ) 

348 

349 

350RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'") 

351 

352 

353def rfc_6868_unescape(param_value: str) -> str: 

354 """Take care of :rfc:`6868` unescaping. 

355 

356 - ^^ -> ^ 

357 - ^n -> system specific newline 

358 - ^' -> " 

359 - ^ with others stay intact 

360 """ 

361 replacements = { 

362 "^^": "^", 

363 "^n": os.linesep, 

364 "^'": '"', 

365 } 

366 return RFC_6868_UNESCAPE_REGEX.sub( 

367 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

368 ) 

369 

370 

371RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"') 

372 

373 

374def rfc_6868_escape(param_value: str) -> str: 

375 """Take care of :rfc:`6868` escaping. 

376 

377 - ^ -> ^^ 

378 - " -> ^' 

379 - newline -> ^n 

380 """ 

381 replacements = { 

382 "^": "^^", 

383 "\n": "^n", 

384 "\r": "^n", 

385 "\r\n": "^n", 

386 '"': "^'", 

387 } 

388 return RFC_6868_ESCAPE_REGEX.sub( 

389 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

390 ) 

391 

392 

393def unescape_list_or_string(val): 

394 if isinstance(val, list): 

395 return [unescape_string(s) for s in val] 

396 return unescape_string(val) 

397 

398 

399######################################### 

400# parsing and generation of content lines 

401 

402 

403class Contentline(str): 

404 """A content line is basically a string that can be folded and parsed into 

405 parts. 

406 """ 

407 

408 __slots__ = ("strict",) 

409 

410 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

411 value = to_unicode(value, encoding=encoding) 

412 assert "\n" not in value, ( 

413 "Content line can not contain unescaped new line characters." 

414 ) 

415 self = super().__new__(cls, value) 

416 self.strict = strict 

417 return self 

418 

419 @classmethod 

420 def from_parts( 

421 cls, 

422 name: ICAL_TYPE, 

423 params: Parameters, 

424 values, 

425 sorted: bool = True, # noqa: A002, FBT001 

426 ): 

427 """Turn a parts into a content line.""" 

428 assert isinstance(params, Parameters) 

429 if hasattr(values, "to_ical"): 

430 values = values.to_ical() 

431 else: 

432 from icalendar.prop import vText 

433 

434 values = vText(values).to_ical() 

435 # elif isinstance(values, basestring): 

436 # values = escape_char(values) 

437 

438 # TODO: after unicode only, remove this 

439 # Convert back to unicode, after to_ical encoded it. 

440 name = to_unicode(name) 

441 values = to_unicode(values) 

442 if params: 

443 params = to_unicode(params.to_ical(sorted=sorted)) 

444 return cls(f"{name};{params}:{values}") 

445 return cls(f"{name}:{values}") 

446 

447 def parts(self) -> tuple[str, Parameters, str]: 

448 """Split the content line into ``name``, ``parameters``, and ``values`` parts. 

449 

450 Properly handles escaping with backslashes and double-quote sections 

451 to avoid corrupting URL-encoded characters in values. 

452 

453 Example with parameter: 

454 

455 .. code-block:: text 

456 

457 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild 

458 

459 Example without parameters: 

460 

461 .. code-block:: text 

462 

463 DESCRIPTION:The Fall'98 Wild 

464 """ 

465 try: 

466 name_split: int | None = None 

467 value_split: int | None = None 

468 in_quotes: bool = False 

469 escaped: bool = False 

470 

471 for i, ch in enumerate(self): 

472 if ch == '"' and not escaped: 

473 in_quotes = not in_quotes 

474 elif ch == "\\" and not in_quotes: 

475 escaped = True 

476 continue 

477 elif not in_quotes and not escaped: 

478 # Find first delimiter for name 

479 if ch in ":;" and name_split is None: 

480 name_split = i 

481 # Find value delimiter (first colon) 

482 if ch == ":" and value_split is None: 

483 value_split = i 

484 

485 escaped = False 

486 

487 # Validate parsing results 

488 if not value_split: 

489 # No colon found - value is empty, use end of string 

490 value_split = len(self) 

491 

492 # Extract name - if no delimiter, 

493 # take whole string for validate_token to reject 

494 name = self[:name_split] if name_split else self 

495 validate_token(name) 

496 

497 if not name_split or name_split + 1 == value_split: 

498 # No delimiter or empty parameter section 

499 raise ValueError("Invalid content line") # noqa: TRY301 

500 # Parse parameters - they still need to be escaped/unescaped 

501 # for proper handling of commas, semicolons, etc. in parameter values 

502 param_str = escape_string(self[name_split + 1 : value_split]) 

503 params = Parameters.from_ical(param_str, strict=self.strict) 

504 params = Parameters( 

505 (unescape_string(key), unescape_list_or_string(value)) 

506 for key, value in iter(params.items()) 

507 ) 

508 # Unescape backslash sequences in values but preserve URL encoding 

509 values = unescape_backslash(self[value_split + 1 :]) 

510 except ValueError as exc: 

511 raise ValueError( 

512 f"Content line could not be parsed into parts: '{self}': {exc}" 

513 ) from exc 

514 return (name, params, values) 

515 

516 @classmethod 

517 def from_ical(cls, ical, strict=False): 

518 """Unfold the content lines in an iCalendar into long content lines.""" 

519 ical = to_unicode(ical) 

520 # a fold is carriage return followed by either a space or a tab 

521 return cls(UFOLD.sub("", ical), strict=strict) 

522 

523 def to_ical(self): 

524 """Long content lines are folded so they are less than 75 characters 

525 wide. 

526 """ 

527 return foldline(self).encode(DEFAULT_ENCODING) 

528 

529 

530class Contentlines(list): 

531 """I assume that iCalendar files generally are a few kilobytes in size. 

532 Then this should be efficient. for Huge files, an iterator should probably 

533 be used instead. 

534 """ 

535 

536 def to_ical(self): 

537 """Simply join self.""" 

538 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

539 

540 @classmethod 

541 def from_ical(cls, st): 

542 """Parses a string into content lines.""" 

543 st = to_unicode(st) 

544 try: 

545 # a fold is carriage return followed by either a space or a tab 

546 unfolded = UFOLD.sub("", st) 

547 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

548 lines.append("") # '\r\n' at the end of every content line 

549 except Exception as e: 

550 raise ValueError("Expected StringType with content lines") from e 

551 return lines 

552 

553 

554__all__ = [ 

555 "FOLD", 

556 "NAME", 

557 "NEWLINE", 

558 "QUNSAFE_CHAR", 

559 "QUOTABLE", 

560 "UFOLD", 

561 "UNSAFE_CHAR", 

562 "Contentline", 

563 "Contentlines", 

564 "Parameters", 

565 "dquote", 

566 "escape_char", 

567 "escape_string", 

568 "foldline", 

569 "param_value", 

570 "q_join", 

571 "q_split", 

572 "rfc_6868_escape", 

573 "rfc_6868_unescape", 

574 "unescape_backslash", 

575 "unescape_char", 

576 "unescape_list_or_string", 

577 "unescape_string", 

578 "validate_param_value", 

579 "validate_token", 

580]