Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 87%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

229 statements  

1"""This module parses and generates contentlines as defined in RFC 5545 

2(iCalendar), but will probably work for other MIME types with similar syntax. 

3Eg. RFC 2426 (vCard) 

4 

5It is stupid in the sense that it treats the content purely as strings. No type 

6conversion is attempted. 

7""" 

8 

9from __future__ import annotations 

10 

11import functools 

12import os 

13import re 

14from typing import TYPE_CHECKING 

15 

16from icalendar.caselessdict import CaselessDict 

17from icalendar.parser_tools import ( 

18 DEFAULT_ENCODING, 

19 ICAL_TYPE, 

20 SEQUENCE_TYPES, 

21 to_unicode, 

22) 

23 

24if TYPE_CHECKING: 

25 from icalendar.enums import VALUE 

26 

27 

28def escape_char(text): 

29 """Format value according to iCalendar TEXT escaping rules.""" 

30 assert isinstance(text, (str, bytes)) 

31 # NOTE: ORDER MATTERS! 

32 return ( 

33 text.replace(r"\N", "\n") 

34 .replace("\\", "\\\\") 

35 .replace(";", r"\;") 

36 .replace(",", r"\,") 

37 .replace("\r\n", r"\n") 

38 .replace("\n", r"\n") 

39 ) 

40 

41 

42def unescape_char(text): 

43 assert isinstance(text, (str, bytes)) 

44 # NOTE: ORDER MATTERS! 

45 if isinstance(text, str): 

46 return ( 

47 text.replace("\\N", "\\n") 

48 .replace("\r\n", "\n") 

49 .replace("\\n", "\n") 

50 .replace("\\,", ",") 

51 .replace("\\;", ";") 

52 .replace("\\\\", "\\") 

53 ) 

54 if isinstance(text, bytes): 

55 return ( 

56 text.replace(b"\\N", b"\\n") 

57 .replace(b"\r\n", b"\n") 

58 .replace(b"\\n", b"\n") 

59 .replace(b"\\,", b",") 

60 .replace(b"\\;", b";") 

61 .replace(b"\\\\", b"\\") 

62 ) 

63 return None 

64 

65 

66def foldline(line, limit=75, fold_sep="\r\n "): 

67 """Make a string folded as defined in RFC5545 

68 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

69 break. Long content lines SHOULD be split into a multiple line 

70 representations using a line "folding" technique. That is, a long 

71 line can be split between any two characters by inserting a CRLF 

72 immediately followed by a single linear white-space character (i.e., 

73 SPACE or HTAB). 

74 """ 

75 assert isinstance(line, str) 

76 assert "\n" not in line 

77 

78 # Use a fast and simple variant for the common case that line is all ASCII. 

79 try: 

80 line.encode("ascii") 

81 except (UnicodeEncodeError, UnicodeDecodeError): 

82 pass 

83 else: 

84 return fold_sep.join( 

85 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

86 ) 

87 

88 ret_chars = [] 

89 byte_count = 0 

90 for char in line: 

91 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

92 byte_count += char_byte_len 

93 if byte_count >= limit: 

94 ret_chars.append(fold_sep) 

95 byte_count = char_byte_len 

96 ret_chars.append(char) 

97 

98 return "".join(ret_chars) 

99 

100 

101################################################################# 

102# Property parameter stuff 

103 

104 

105def param_value(value, always_quote=False): 

106 """Returns a parameter value.""" 

107 if isinstance(value, SEQUENCE_TYPES): 

108 return q_join(map(rfc_6868_escape, value), always_quote=always_quote) 

109 if isinstance(value, str): 

110 return dquote(rfc_6868_escape(value), always_quote=always_quote) 

111 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING))) 

112 

113 

114# Could be improved 

115 

116# [\w-] because of the iCalendar RFC 

117# . because of the vCard RFC 

118NAME = re.compile(r"[\w.-]+") 

119 

120UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]') 

121QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]') 

122FOLD = re.compile(b"(\r?\n)+[ \t]") 

123UFOLD = re.compile("(\r?\n)+[ \t]") 

124NEWLINE = re.compile(r"\r?\n") 

125 

126 

127def validate_token(name): 

128 match = NAME.findall(name) 

129 if len(match) == 1 and name == match[0]: 

130 return 

131 raise ValueError(name) 

132 

133 

134def validate_param_value(value, quoted=True): 

135 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR 

136 if validator.findall(value): 

137 raise ValueError(value) 

138 

139 

140# chars presence of which in parameter value will be cause the value 

141# to be enclosed in double-quotes 

142QUOTABLE = re.compile("[,;:’]")# noqa: RUF001 

143 

144 

145def dquote(val, always_quote=False): 

146 """Enclose parameter values containing [,;:] in double quotes.""" 

147 # a double-quote character is forbidden to appear in a parameter value 

148 # so replace it with a single-quote character 

149 val = val.replace('"', "'") 

150 if QUOTABLE.search(val) or always_quote: 

151 return f'"{val}"' 

152 return val 

153 

154 

155# parsing helper 

156def q_split(st, sep=",", maxsplit=-1): 

157 """Splits a string on char, taking double (q)uotes into considderation.""" 

158 if maxsplit == 0: 

159 return [st] 

160 

161 result = [] 

162 cursor = 0 

163 length = len(st) 

164 inquote = 0 

165 splits = 0 

166 for i, ch in enumerate(st): 

167 if ch == '"': 

168 inquote = not inquote 

169 if not inquote and ch == sep: 

170 result.append(st[cursor:i]) 

171 cursor = i + 1 

172 splits += 1 

173 if i + 1 == length or splits == maxsplit: 

174 result.append(st[cursor:]) 

175 break 

176 return result 

177 

178 

179def q_join(lst, sep=",", always_quote=False): 

180 """Joins a list on sep, quoting strings with QUOTABLE chars.""" 

181 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst) 

182 

183 

184def single_string_parameter(func): 

185 """Create a parameter getter/setter for a single string parameter.""" 

186 

187 name = func.__name__ 

188 

189 @functools.wraps(func) 

190 def fget(self: Parameters): 

191 """Get the value.""" 

192 return self.get(name) 

193 

194 def fset(self: Parameters, value: str|None): 

195 """Set the value""" 

196 if value is None: 

197 fdel(self) 

198 else: 

199 self[name] = value 

200 

201 def fdel(self: Parameters): 

202 """Delete the value.""" 

203 self.pop(name, None) 

204 

205 return property(fget, fset, fdel, doc=func.__doc__) 

206 

207class Parameters(CaselessDict): 

208 """Parser and generator of Property parameter strings. It knows nothing of 

209 datatypes. Its main concern is textual structure. 

210 """ 

211 

212 # The following paremeters must always be enclosed in double quotes 

213 always_quoted = ( 

214 "ALTREP", 

215 "DELEGATED-FROM", 

216 "DELEGATED-TO", 

217 "DIR", 

218 "MEMBER", 

219 "SENT-BY", 

220 # Part of X-APPLE-STRUCTURED-LOCATION 

221 "X-ADDRESS", 

222 "X-TITLE", 

223 ) 

224 # this is quoted should one of the values be present 

225 quote_also = { 

226 # This is escaped in the RFC 

227 "CN" : " '", 

228 } 

229 

230 def params(self): 

231 """In RFC 5545 keys are called parameters, so this is to be consitent 

232 with the naming conventions. 

233 """ 

234 return self.keys() 

235 

236 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001 

237 result = [] 

238 items = list(self.items()) 

239 if sorted: 

240 items.sort() 

241 

242 for key, value in items: 

243 upper_key = key.upper() 

244 check_quoteable_characters = self.quote_also.get(key.upper()) 

245 always_quote = ( 

246 upper_key in self.always_quoted or ( 

247 check_quoteable_characters and 

248 any(c in value for c in check_quoteable_characters) 

249 ) 

250 ) 

251 quoted_value = param_value(value, always_quote=always_quote) 

252 if isinstance(quoted_value, str): 

253 quoted_value = quoted_value.encode(DEFAULT_ENCODING) 

254 # CaselessDict keys are always unicode 

255 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value) 

256 return b";".join(result) 

257 

258 @classmethod 

259 def from_ical(cls, st, strict=False): 

260 """Parses the parameter format from ical text format.""" 

261 

262 # parse into strings 

263 result = cls() 

264 for param in q_split(st, ";"): 

265 try: 

266 key, val = q_split(param, "=", maxsplit=1) 

267 validate_token(key) 

268 # Property parameter values that are not in quoted 

269 # strings are case insensitive. 

270 vals = [] 

271 for v in q_split(val, ","): 

272 if v.startswith('"') and v.endswith('"'): 

273 v2 = v.strip('"') 

274 validate_param_value(v2, quoted=True) 

275 vals.append(rfc_6868_unescape(v2)) 

276 else: 

277 validate_param_value(v, quoted=False) 

278 if strict: 

279 vals.append(rfc_6868_unescape(v.upper())) 

280 else: 

281 vals.append(rfc_6868_unescape(v)) 

282 if not vals: 

283 result[key] = val 

284 elif len(vals) == 1: 

285 result[key] = vals[0] 

286 else: 

287 result[key] = vals 

288 except ValueError as exc: # noqa: PERF203 

289 raise ValueError( 

290 f"{param!r} is not a valid parameter string: {exc}" 

291 ) from exc 

292 return result 

293 

294 @single_string_parameter 

295 def value(self) -> VALUE | str | None: 

296 """The VALUE parameter from :rfc:`5545`. 

297 

298 Description: 

299 This parameter specifies the value type and format of 

300 the property value. The property values MUST be of a single value 

301 type. For example, a "RDATE" property cannot have a combination 

302 of DATE-TIME and TIME value types. 

303 

304 If the property's value is the default value type, then this 

305 parameter need not be specified. However, if the property's 

306 default value type is overridden by some other allowable value 

307 type, then this parameter MUST be specified. 

308 

309 Applications MUST preserve the value data for x-name and iana- 

310 token values that they don't recognize without attempting to 

311 interpret or parse the value data. 

312 """ 

313 

314 

315def escape_string(val): 

316 # f'{i:02X}' 

317 return ( 

318 val.replace(r"\,", "%2C") 

319 .replace(r"\:", "%3A") 

320 .replace(r"\;", "%3B") 

321 .replace(r"\\", "%5C") 

322 ) 

323 

324 

325def unescape_string(val): 

326 return ( 

327 val.replace("%2C", ",") 

328 .replace("%3A", ":") 

329 .replace("%3B", ";") 

330 .replace("%5C", "\\") 

331 ) 

332 

333 

334RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'") 

335 

336 

337def rfc_6868_unescape(param_value: str) -> str: 

338 """Take care of :rfc:`6868` unescaping. 

339 

340 - ^^ -> ^ 

341 - ^n -> system specific newline 

342 - ^' -> " 

343 - ^ with others stay intact 

344 """ 

345 replacements = { 

346 "^^": "^", 

347 "^n": os.linesep, 

348 "^'": '"', 

349 } 

350 return RFC_6868_UNESCAPE_REGEX.sub( 

351 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

352 ) 

353 

354 

355RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"') 

356 

357 

358def rfc_6868_escape(param_value: str) -> str: 

359 """Take care of :rfc:`6868` escaping. 

360 

361 - ^ -> ^^ 

362 - " -> ^' 

363 - newline -> ^n 

364 """ 

365 replacements = { 

366 "^": "^^", 

367 "\n": "^n", 

368 "\r": "^n", 

369 "\r\n": "^n", 

370 '"': "^'", 

371 } 

372 return RFC_6868_ESCAPE_REGEX.sub( 

373 lambda m: replacements.get(m.group(0), m.group(0)), param_value 

374 ) 

375 

376 

377def unescape_list_or_string(val): 

378 if isinstance(val, list): 

379 return [unescape_string(s) for s in val] 

380 return unescape_string(val) 

381 

382 

383######################################### 

384# parsing and generation of content lines 

385 

386 

387class Contentline(str): 

388 """A content line is basically a string that can be folded and parsed into 

389 parts. 

390 """ 

391 

392 __slots__ = ("strict",) 

393 

394 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

395 value = to_unicode(value, encoding=encoding) 

396 assert "\n" not in value, ( 

397 "Content line can not contain unescaped new line characters." 

398 ) 

399 self = super().__new__(cls, value) 

400 self.strict = strict 

401 return self 

402 

403 @classmethod 

404 def from_parts( 

405 cls, 

406 name: ICAL_TYPE, 

407 params: Parameters, 

408 values, 

409 sorted: bool = True, # noqa: A002, FBT001 

410 ): 

411 """Turn a parts into a content line.""" 

412 assert isinstance(params, Parameters) 

413 if hasattr(values, "to_ical"): 

414 values = values.to_ical() 

415 else: 

416 from icalendar.prop import vText 

417 

418 values = vText(values).to_ical() 

419 # elif isinstance(values, basestring): 

420 # values = escape_char(values) 

421 

422 # TODO: after unicode only, remove this 

423 # Convert back to unicode, after to_ical encoded it. 

424 name = to_unicode(name) 

425 values = to_unicode(values) 

426 if params: 

427 params = to_unicode(params.to_ical(sorted=sorted)) 

428 return cls(f"{name};{params}:{values}") 

429 return cls(f"{name}:{values}") 

430 

431 def parts(self): 

432 """Split the content line up into (name, parameters, values) parts.""" 

433 try: 

434 st = escape_string(self) 

435 name_split = None 

436 value_split = None 

437 in_quotes = False 

438 for i, ch in enumerate(st): 

439 if not in_quotes: 

440 if ch in ":;" and not name_split: 

441 name_split = i 

442 if ch == ":" and not value_split: 

443 value_split = i 

444 if ch == '"': 

445 in_quotes = not in_quotes 

446 name = unescape_string(st[:name_split]) 

447 if not name: 

448 raise ValueError("Key name is required") # noqa: TRY301 

449 validate_token(name) 

450 if not value_split: 

451 value_split = i + 1 

452 if not name_split or name_split + 1 == value_split: 

453 raise ValueError("Invalid content line") # noqa: TRY301 

454 params = Parameters.from_ical( 

455 st[name_split + 1 : value_split], strict=self.strict 

456 ) 

457 params = Parameters( 

458 (unescape_string(key), unescape_list_or_string(value)) 

459 for key, value in iter(params.items()) 

460 ) 

461 values = unescape_string(st[value_split + 1 :]) 

462 except ValueError as exc: 

463 raise ValueError( 

464 f"Content line could not be parsed into parts: '{self}': {exc}" 

465 ) from exc 

466 return (name, params, values) 

467 

468 @classmethod 

469 def from_ical(cls, ical, strict=False): 

470 """Unfold the content lines in an iCalendar into long content lines.""" 

471 ical = to_unicode(ical) 

472 # a fold is carriage return followed by either a space or a tab 

473 return cls(UFOLD.sub("", ical), strict=strict) 

474 

475 def to_ical(self): 

476 """Long content lines are folded so they are less than 75 characters 

477 wide. 

478 """ 

479 return foldline(self).encode(DEFAULT_ENCODING) 

480 

481 

482class Contentlines(list): 

483 """I assume that iCalendar files generally are a few kilobytes in size. 

484 Then this should be efficient. for Huge files, an iterator should probably 

485 be used instead. 

486 """ 

487 

488 def to_ical(self): 

489 """Simply join self.""" 

490 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

491 

492 @classmethod 

493 def from_ical(cls, st): 

494 """Parses a string into content lines.""" 

495 st = to_unicode(st) 

496 try: 

497 # a fold is carriage return followed by either a space or a tab 

498 unfolded = UFOLD.sub("", st) 

499 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

500 lines.append("") # '\r\n' at the end of every content line 

501 except Exception as e: 

502 raise ValueError("Expected StringType with content lines") from e 

503 return lines 

504 

505 

506__all__ = [ 

507 "FOLD", 

508 "NAME", 

509 "NEWLINE", 

510 "QUNSAFE_CHAR", 

511 "QUOTABLE", 

512 "UFOLD", 

513 "UNSAFE_CHAR", 

514 "Contentline", 

515 "Contentlines", 

516 "Parameters", 

517 "dquote", 

518 "escape_char", 

519 "escape_string", 

520 "foldline", 

521 "param_value", 

522 "q_join", 

523 "q_split", 

524 "rfc_6868_escape", 

525 "rfc_6868_unescape", 

526 "unescape_char", 

527 "unescape_list_or_string", 

528 "unescape_string", 

529 "validate_param_value", 

530 "validate_token", 

531]