Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser.py: 87%

1"""This module parses and generates contentlines as defined in RFC 5545

2(iCalendar), but will probably work for other MIME types with similar syntax.

3Eg. RFC 2426 (vCard)

5It is stupid in the sense that it treats the content purely as strings. No type

6conversion is attempted.

7"""

9from __future__ import annotations

11import functools

12import os

13import re

14from typing import TYPE_CHECKING

16from icalendar.caselessdict import CaselessDict

17from icalendar.parser_tools import (

18 DEFAULT_ENCODING,

19 ICAL_TYPE,

20 SEQUENCE_TYPES,

21 to_unicode,

22)

24if TYPE_CHECKING:

25 from icalendar.enums import VALUE

28def escape_char(text):

29 """Format value according to iCalendar TEXT escaping rules."""

30 assert isinstance(text, (str, bytes))

31 # NOTE: ORDER MATTERS!

32 return (

33 text.replace(r"\N", "\n")

34 .replace("\\", "\\\\")

35 .replace(";", r"\;")

36 .replace(",", r"\,")

37 .replace("\r\n", r"\n")

38 .replace("\n", r"\n")

39 )

42def unescape_char(text):

43 assert isinstance(text, (str, bytes))

44 # NOTE: ORDER MATTERS!

45 if isinstance(text, str):

46 return (

47 text.replace("\\N", "\\n")

48 .replace("\r\n", "\n")

49 .replace("\\n", "\n")

50 .replace("\\,", ",")

51 .replace("\\;", ";")

52 .replace("\\\\", "\\")

53 )

54 if isinstance(text, bytes):

55 return (

56 text.replace(b"\\N", b"\\n")

57 .replace(b"\r\n", b"\n")

58 .replace(b"\\n", b"\n")

59 .replace(b"\\,", b",")

60 .replace(b"\\;", b";")

61 .replace(b"\\\\", b"\\")

62 )

63 return None

66def foldline(line, limit=75, fold_sep="\r\n "):

67 """Make a string folded as defined in RFC5545

68 Lines of text SHOULD NOT be longer than 75 octets, excluding the line

69 break. Long content lines SHOULD be split into a multiple line

70 representations using a line "folding" technique. That is, a long

71 line can be split between any two characters by inserting a CRLF

72 immediately followed by a single linear white-space character (i.e.,

73 SPACE or HTAB).

74 """

75 assert isinstance(line, str)

76 assert "\n" not in line

78 # Use a fast and simple variant for the common case that line is all ASCII.

79 try:

80 line.encode("ascii")

81 except (UnicodeEncodeError, UnicodeDecodeError):

82 pass

83 else:

84 return fold_sep.join(

85 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)

86 )

88 ret_chars = []

89 byte_count = 0

90 for char in line:

91 char_byte_len = len(char.encode(DEFAULT_ENCODING))

92 byte_count += char_byte_len

93 if byte_count >= limit:

94 ret_chars.append(fold_sep)

95 byte_count = char_byte_len

96 ret_chars.append(char)

98 return "".join(ret_chars)

100

101#################################################################

102# Property parameter stuff

103

104

105def param_value(value, always_quote=False):

106 """Returns a parameter value."""

107 if isinstance(value, SEQUENCE_TYPES):

108 return q_join(map(rfc_6868_escape, value), always_quote=always_quote)

109 if isinstance(value, str):

110 return dquote(rfc_6868_escape(value), always_quote=always_quote)

111 return dquote(rfc_6868_escape(value.to_ical().decode(DEFAULT_ENCODING)))

112

113

114# Could be improved

115

116# [\w-] because of the iCalendar RFC

117# . because of the vCard RFC

118NAME = re.compile(r"[\w.-]+")

119

120UNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f",:;]')

121QUNSAFE_CHAR = re.compile('[\x00-\x08\x0a-\x1f\x7f"]')

122FOLD = re.compile(b"(\r?\n)+[ \t]")

123UFOLD = re.compile("(\r?\n)+[ \t]")

124NEWLINE = re.compile(r"\r?\n")

125

126

127def validate_token(name):

128 match = NAME.findall(name)

129 if len(match) == 1 and name == match[0]:

130 return

131 raise ValueError(name)

132

133

134def validate_param_value(value, quoted=True):

135 validator = QUNSAFE_CHAR if quoted else UNSAFE_CHAR

136 if validator.findall(value):

137 raise ValueError(value)

138

139

140# chars presence of which in parameter value will be cause the value

141# to be enclosed in double-quotes

142QUOTABLE = re.compile("[,;:’]")# noqa: RUF001

143

144

145def dquote(val, always_quote=False):

146 """Enclose parameter values containing [,;:] in double quotes."""

147 # a double-quote character is forbidden to appear in a parameter value

148 # so replace it with a single-quote character

149 val = val.replace('"', "'")

150 if QUOTABLE.search(val) or always_quote:

151 return f'"{val}"'

152 return val

153

154

155# parsing helper

156def q_split(st, sep=",", maxsplit=-1):

157 """Splits a string on char, taking double (q)uotes into considderation."""

158 if maxsplit == 0:

159 return [st]

160

161 result = []

162 cursor = 0

163 length = len(st)

164 inquote = 0

165 splits = 0

166 for i, ch in enumerate(st):

167 if ch == '"':

168 inquote = not inquote

169 if not inquote and ch == sep:

170 result.append(st[cursor:i])

171 cursor = i + 1

172 splits += 1

173 if i + 1 == length or splits == maxsplit:

174 result.append(st[cursor:])

175 break

176 return result

177

178

179def q_join(lst, sep=",", always_quote=False):

180 """Joins a list on sep, quoting strings with QUOTABLE chars."""

181 return sep.join(dquote(itm, always_quote=always_quote) for itm in lst)

182

183

184def single_string_parameter(func):

185 """Create a parameter getter/setter for a single string parameter."""

186

187 name = func.__name__

188

189 @functools.wraps(func)

190 def fget(self: Parameters):

191 """Get the value."""

192 return self.get(name)

193

194 def fset(self: Parameters, value: str|None):

195 """Set the value"""

196 if value is None:

197 fdel(self)

198 else:

199 self[name] = value

200

201 def fdel(self: Parameters):

202 """Delete the value."""

203 self.pop(name, None)

204

205 return property(fget, fset, fdel, doc=func.__doc__)

206

207class Parameters(CaselessDict):

208 """Parser and generator of Property parameter strings. It knows nothing of

209 datatypes. Its main concern is textual structure.

210 """

211

212 # The following paremeters must always be enclosed in double quotes

213 always_quoted = (

214 "ALTREP",

215 "DELEGATED-FROM",

216 "DELEGATED-TO",

217 "DIR",

218 "MEMBER",

219 "SENT-BY",

220 # Part of X-APPLE-STRUCTURED-LOCATION

221 "X-ADDRESS",

222 "X-TITLE",

223 )

224 # this is quoted should one of the values be present

225 quote_also = {

226 # This is escaped in the RFC

227 "CN" : " '",

228 }

229

230 def params(self):

231 """In RFC 5545 keys are called parameters, so this is to be consitent

232 with the naming conventions.

233 """

234 return self.keys()

235

236 def to_ical(self, sorted: bool = True): # noqa: A002, FBT001

237 result = []

238 items = list(self.items())

239 if sorted:

240 items.sort()

241

242 for key, value in items:

243 upper_key = key.upper()

244 check_quoteable_characters = self.quote_also.get(key.upper())

245 always_quote = (

246 upper_key in self.always_quoted or (

247 check_quoteable_characters and

248 any(c in value for c in check_quoteable_characters)

249 )

250 )

251 quoted_value = param_value(value, always_quote=always_quote)

252 if isinstance(quoted_value, str):

253 quoted_value = quoted_value.encode(DEFAULT_ENCODING)

254 # CaselessDict keys are always unicode

255 result.append(upper_key.encode(DEFAULT_ENCODING) + b"=" + quoted_value)

256 return b";".join(result)

257

258 @classmethod

259 def from_ical(cls, st, strict=False):

260 """Parses the parameter format from ical text format."""

261

262 # parse into strings

263 result = cls()

264 for param in q_split(st, ";"):

265 try:

266 key, val = q_split(param, "=", maxsplit=1)

267 validate_token(key)

268 # Property parameter values that are not in quoted

269 # strings are case insensitive.

270 vals = []

271 for v in q_split(val, ","):

272 if v.startswith('"') and v.endswith('"'):

273 v2 = v.strip('"')

274 validate_param_value(v2, quoted=True)

275 vals.append(rfc_6868_unescape(v2))

276 else:

277 validate_param_value(v, quoted=False)

278 if strict:

279 vals.append(rfc_6868_unescape(v.upper()))

280 else:

281 vals.append(rfc_6868_unescape(v))

282 if not vals:

283 result[key] = val

284 elif len(vals) == 1:

285 result[key] = vals[0]

286 else:

287 result[key] = vals

288 except ValueError as exc: # noqa: PERF203

289 raise ValueError(

290 f"{param!r} is not a valid parameter string: {exc}"

291 ) from exc

292 return result

293

294 @single_string_parameter

295 def value(self) -> VALUE | str | None:

296 """The VALUE parameter from :rfc:`5545`.

297

298 Description:

299 This parameter specifies the value type and format of

300 the property value. The property values MUST be of a single value

301 type. For example, a "RDATE" property cannot have a combination

302 of DATE-TIME and TIME value types.

303

304 If the property's value is the default value type, then this

305 parameter need not be specified. However, if the property's

306 default value type is overridden by some other allowable value

307 type, then this parameter MUST be specified.

308

309 Applications MUST preserve the value data for x-name and iana-

310 token values that they don't recognize without attempting to

311 interpret or parse the value data.

312 """

313

314

315def escape_string(val):

316 # f'{i:02X}'

317 return (

318 val.replace(r"\,", "%2C")

319 .replace(r"\:", "%3A")

320 .replace(r"\;", "%3B")

321 .replace(r"\\", "%5C")

322 )

323

324

325def unescape_string(val):

326 return (

327 val.replace("%2C", ",")

328 .replace("%3A", ":")

329 .replace("%3B", ";")

330 .replace("%5C", "\\")

331 )

332

333

334RFC_6868_UNESCAPE_REGEX = re.compile(r"\^\^|\^n|\^'")

335

336

337def rfc_6868_unescape(param_value: str) -> str:

338 """Take care of :rfc:`6868` unescaping.

339

340 - ^^ -> ^

341 - ^n -> system specific newline

342 - ^' -> "

343 - ^ with others stay intact

344 """

345 replacements = {

346 "^^": "^",

347 "^n": os.linesep,

348 "^'": '"',

349 }

350 return RFC_6868_UNESCAPE_REGEX.sub(

351 lambda m: replacements.get(m.group(0), m.group(0)), param_value

352 )

353

354

355RFC_6868_ESCAPE_REGEX = re.compile(r'\^|\r\n|\r|\n|"')

356

357

358def rfc_6868_escape(param_value: str) -> str:

359 """Take care of :rfc:`6868` escaping.

360

361 - ^ -> ^^

362 - " -> ^'

363 - newline -> ^n

364 """

365 replacements = {

366 "^": "^^",

367 "\n": "^n",

368 "\r": "^n",

369 "\r\n": "^n",

370 '"': "^'",

371 }

372 return RFC_6868_ESCAPE_REGEX.sub(

373 lambda m: replacements.get(m.group(0), m.group(0)), param_value

374 )

375

376

377def unescape_list_or_string(val):

378 if isinstance(val, list):

379 return [unescape_string(s) for s in val]

380 return unescape_string(val)

381

382

383#########################################

384# parsing and generation of content lines

385

386

387class Contentline(str):

388 """A content line is basically a string that can be folded and parsed into

389 parts.

390 """

391

392 __slots__ = ("strict",)

393

394 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):

395 value = to_unicode(value, encoding=encoding)

396 assert "\n" not in value, (

397 "Content line can not contain unescaped new line characters."

398 )

399 self = super().__new__(cls, value)

400 self.strict = strict

401 return self

402

403 @classmethod

404 def from_parts(

405 cls,

406 name: ICAL_TYPE,

407 params: Parameters,

408 values,

409 sorted: bool = True, # noqa: A002, FBT001

410 ):

411 """Turn a parts into a content line."""

412 assert isinstance(params, Parameters)

413 if hasattr(values, "to_ical"):

414 values = values.to_ical()

415 else:

416 from icalendar.prop import vText

417

418 values = vText(values).to_ical()

419 # elif isinstance(values, basestring):

420 # values = escape_char(values)

421

422 # TODO: after unicode only, remove this

423 # Convert back to unicode, after to_ical encoded it.

424 name = to_unicode(name)

425 values = to_unicode(values)

426 if params:

427 params = to_unicode(params.to_ical(sorted=sorted))

428 return cls(f"{name};{params}:{values}")

429 return cls(f"{name}:{values}")

430

431 def parts(self):

432 """Split the content line up into (name, parameters, values) parts."""

433 try:

434 st = escape_string(self)

435 name_split = None

436 value_split = None

437 in_quotes = False

438 for i, ch in enumerate(st):

439 if not in_quotes:

440 if ch in ":;" and not name_split:

441 name_split = i

442 if ch == ":" and not value_split:

443 value_split = i

444 if ch == '"':

445 in_quotes = not in_quotes

446 name = unescape_string(st[:name_split])

447 if not name:

448 raise ValueError("Key name is required") # noqa: TRY301

449 validate_token(name)

450 if not value_split:

451 value_split = i + 1

452 if not name_split or name_split + 1 == value_split:

453 raise ValueError("Invalid content line") # noqa: TRY301

454 params = Parameters.from_ical(

455 st[name_split + 1 : value_split], strict=self.strict

456 )

457 params = Parameters(

458 (unescape_string(key), unescape_list_or_string(value))

459 for key, value in iter(params.items())

460 )

461 values = unescape_string(st[value_split + 1 :])

462 except ValueError as exc:

463 raise ValueError(

464 f"Content line could not be parsed into parts: '{self}': {exc}"

465 ) from exc

466 return (name, params, values)

467

468 @classmethod

469 def from_ical(cls, ical, strict=False):

470 """Unfold the content lines in an iCalendar into long content lines."""

471 ical = to_unicode(ical)

472 # a fold is carriage return followed by either a space or a tab

473 return cls(UFOLD.sub("", ical), strict=strict)

474

475 def to_ical(self):

476 """Long content lines are folded so they are less than 75 characters

477 wide.

478 """

479 return foldline(self).encode(DEFAULT_ENCODING)

480

481

482class Contentlines(list):

483 """I assume that iCalendar files generally are a few kilobytes in size.

484 Then this should be efficient. for Huge files, an iterator should probably

485 be used instead.

486 """

487

488 def to_ical(self):

489 """Simply join self."""

490 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"

491

492 @classmethod

493 def from_ical(cls, st):

494 """Parses a string into content lines."""

495 st = to_unicode(st)

496 try:

497 # a fold is carriage return followed by either a space or a tab

498 unfolded = UFOLD.sub("", st)

499 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)

500 lines.append("") # '\r\n' at the end of every content line

501 except Exception as e:

502 raise ValueError("Expected StringType with content lines") from e

503 return lines

504

505

506__all__ = [

507 "FOLD",

508 "NAME",

509 "NEWLINE",

510 "QUNSAFE_CHAR",

511 "QUOTABLE",

512 "UFOLD",

513 "UNSAFE_CHAR",

514 "Contentline",

515 "Contentlines",

516 "Parameters",

517 "dquote",

518 "escape_char",

519 "escape_string",

520 "foldline",

521 "param_value",

522 "q_join",

523 "q_split",

524 "rfc_6868_escape",

525 "rfc_6868_unescape",

526 "unescape_char",

527 "unescape_list_or_string",

528 "unescape_string",

529 "validate_param_value",

530 "validate_token",

531]