Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/content

1"""parsing and generation of content lines"""

3import re

5from icalendar.parser.parameter import Parameters

6from icalendar.parser.property import unescape_backslash, unescape_list_or_string

7from icalendar.parser.string import (

8 escape_string,

9 foldline,

10 unescape_string,

11 validate_token,

12)

13from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode

15UFOLD = re.compile("(\r?\n)+[ \t]")

16NEWLINE = re.compile(r"\r?\n")

18OWS = " \t"

19OWS_AROUND_DELIMITERS_RE = re.compile(r"[ \t]*([;=])[ \t]*")

22def _strip_ows_around_delimiters(st: str, delimiters: str = ";=") -> str:

23 """Strip optional whitespace around delimiters outside of quoted sections,

24 respecting backslash escapes so that escaped delimiters are not treated as

25 separators.

27 This is a lenient parsing helper (used when strict=False) to support

28 iCalendar content lines that contain extra whitespace around tokens.

29 """

30 if not st:

31 return st

33 # Fast path for the common case in non-strict mode:

34 # no whitespace in the parameter section means there is nothing to normalize.

35 if " " not in st and "\t" not in st:

36 return st

38 # Fast regex-based path for simple parameter sections without quoting/escaping.

39 if delimiters == ";=" and '"' not in st and "\\" not in st:

40 return OWS_AROUND_DELIMITERS_RE.sub(r"\1", st).strip()

42 out: list[str] = []

43 pending_ws: list[str] = []

44 in_quotes = False

45 escaped = False

46 # True only if the last appended char was a raw delimiter.

47 last_was_delimiter = False

49 def flush_pending() -> None:

50 nonlocal pending_ws

51 if not pending_ws:

52 return

53 if not last_was_delimiter:

54 out.extend(pending_ws)

55 pending_ws.clear()

57 for ch in st:

58 # Handle escaped character (the backslash set escaped in previous iteration)

59 if escaped:

60 flush_pending()

61 out.append(ch)

62 escaped = False

63 last_was_delimiter = False

64 continue

66 # Handle backslash to escape next character

67 if ch == "\\" and not in_quotes:

68 flush_pending()

69 out.append(ch)

70 escaped = True

71 last_was_delimiter = False

72 continue

74 # Handle quote toggling

75 if ch == '"' and not escaped:

76 in_quotes = not in_quotes

77 flush_pending()

78 out.append(ch)

79 last_was_delimiter = False

80 continue

82 # Whitespace outside quotes is buffered

83 if not in_quotes and not escaped and ch in OWS:

84 pending_ws.append(ch)

85 continue

87 # Raw delimiter (unescaped and outside quotes)

88 if not in_quotes and not escaped and ch in delimiters:

89 pending_ws.clear()

90 while out and out[-1] in OWS:

91 out.pop()

92 out.append(ch)

93 last_was_delimiter = True

94 continue

96 # Regular character

97 flush_pending()

98 out.append(ch)

99 last_was_delimiter = False

100

101 if pending_ws and not last_was_delimiter:

102 out.extend(pending_ws)

103

104 return "".join(out).strip()

105

106

107class Contentline(str):

108 """A content line is basically a string that can be folded and parsed into

109 parts.

110 """

111

112 __slots__ = ("strict",)

113

114 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING):

115 value = to_unicode(value, encoding=encoding)

116 assert "\n" not in value, (

117 "Content line can not contain unescaped new line characters."

118 )

119 self = super().__new__(cls, value)

120 self.strict = strict

121 return self

122

123 @classmethod

124 def from_parts(

125 cls,

126 name: ICAL_TYPE,

127 params: Parameters,

128 values,

129 sorted: bool = True, # noqa: A002

130 ):

131 """Turn a parts into a content line."""

132 assert isinstance(params, Parameters)

133 if hasattr(values, "to_ical"):

134 values = values.to_ical()

135 else:

136 from icalendar.prop import vText

137

138 values = vText(values).to_ical()

139 # elif isinstance(values, basestring):

140 # values = escape_char(values)

141

142 # TODO: after unicode only, remove this

143 # Convert back to unicode, after to_ical encoded it.

144 name = to_unicode(name)

145 values = to_unicode(values)

146 if params:

147 params = to_unicode(params.to_ical(sorted=sorted))

148 if params:

149 # some parameter values can be skipped during serialization

150 return cls(f"{name};{params}:{values}")

151 return cls(f"{name}:{values}")

152

153 def parts(self) -> tuple[str, Parameters, str]:

154 """Split the content line into ``name``, ``parameters``, and ``values`` parts.

155

156 Properly handles escaping with backslashes and double-quote sections

157 to avoid corrupting URL-encoded characters in values.

158

159 Example with parameter:

160

161 .. code-block:: ics

162

163 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild

164

165 Example without parameters:

166

167 .. code-block:: ics

168

169 DESCRIPTION:The Fall'98 Wild

170 """

171 try:

172 name_split: int | None = None

173 value_split: int | None = None

174 in_quotes: bool = False

175 escaped: bool = False

176

177 for i, ch in enumerate(self):

178 if ch == '"' and not escaped:

179 in_quotes = not in_quotes

180 elif ch == "\\" and not in_quotes:

181 escaped = True

182 continue

183 elif not in_quotes and not escaped:

184 # Find first delimiter for name

185 if ch in ":;" and name_split is None:

186 name_split = i

187 # Find value delimiter (first colon)

188 if ch == ":" and value_split is None:

189 value_split = i

190

191 escaped = False

192

193 # Validate parsing results

194 if not value_split:

195 # No colon found - value is empty, use end of string

196 value_split = len(self)

197

198 # Extract name - if no delimiter,

199 # take whole string for validate_token to reject

200 name = self[:name_split] if name_split else self

201 if not self.strict:

202 name = re.sub(r"[ \t]+", "", name.strip())

203 validate_token(name)

204

205 if not name_split or name_split + 1 == value_split:

206 # No delimiter or empty parameter section

207 raise ValueError("Invalid content line") # noqa: TRY301

208 # Parse parameters - they still need to be escaped/unescaped

209 # for proper handling of commas, semicolons, etc. in parameter values

210 raw_param_str = self[name_split + 1 : value_split]

211 if not self.strict:

212 raw_param_str = _strip_ows_around_delimiters(raw_param_str)

213 param_str = escape_string(raw_param_str)

214 params = Parameters.from_ical(param_str, strict=self.strict)

215 params = Parameters(

216 (unescape_string(key), unescape_list_or_string(value))

217 for key, value in iter(params.items())

218 )

219 # Unescape backslash sequences in values but preserve URL encoding

220 values = unescape_backslash(self[value_split + 1 :])

221 except ValueError as exc:

222 raise ValueError(

223 f"Content line could not be parsed into parts: '{self}': {exc}"

224 ) from exc

225 return (name, params, values)

226

227 @classmethod

228 def from_ical(cls, ical, strict=False):

229 """Unfold the content lines in an iCalendar into long content lines."""

230 ical = to_unicode(ical)

231 # a fold is carriage return followed by either a space or a tab

232 return cls(UFOLD.sub("", ical), strict=strict)

233

234 def to_ical(self):

235 """Long content lines are folded so they are less than 75 characters

236 wide.

237 """

238 return foldline(self).encode(DEFAULT_ENCODING)

239

240

241class Contentlines(list[Contentline]):

242 """I assume that iCalendar files generally are a few kilobytes in size.

243 Then this should be efficient. for Huge files, an iterator should probably

244 be used instead.

245 """

246

247 def to_ical(self):

248 """Simply join self."""

249 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n"

250

251 @classmethod

252 def from_ical(cls, st):

253 """Parses a string into content lines."""

254 st = to_unicode(st)

255 try:

256 # a fold is carriage return followed by either a space or a tab

257 unfolded = UFOLD.sub("", st)

258 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line)

259 lines.append("") # '\r\n' at the end of every content line

260 except Exception as e:

261 raise ValueError("Expected StringType with content lines") from e

262 return lines

263

264

265__all__ = ["Contentline", "Contentlines"]

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/content_line.py: 96%

141 statements