Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/content_line.py: 96%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

141 statements  

1"""parsing and generation of content lines""" 

2 

3import re 

4 

5from icalendar.parser.parameter import Parameters 

6from icalendar.parser.property import unescape_backslash, unescape_list_or_string 

7from icalendar.parser.string import ( 

8 escape_string, 

9 foldline, 

10 unescape_string, 

11 validate_token, 

12) 

13from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode 

14 

15UFOLD = re.compile("(\r?\n)+[ \t]") 

16NEWLINE = re.compile(r"\r?\n") 

17 

18OWS = " \t" 

19OWS_AROUND_DELIMITERS_RE = re.compile(r"[ \t]*([;=])[ \t]*") 

20 

21 

22def _strip_ows_around_delimiters(st: str, delimiters: str = ";=") -> str: 

23 """Strip optional whitespace around delimiters outside of quoted sections, 

24 respecting backslash escapes so that escaped delimiters are not treated as 

25 separators. 

26 

27 This is a lenient parsing helper (used when strict=False) to support 

28 iCalendar content lines that contain extra whitespace around tokens. 

29 """ 

30 if not st: 

31 return st 

32 

33 # Fast path for the common case in non-strict mode: 

34 # no whitespace in the parameter section means there is nothing to normalize. 

35 if " " not in st and "\t" not in st: 

36 return st 

37 

38 # Fast regex-based path for simple parameter sections without quoting/escaping. 

39 if delimiters == ";=" and '"' not in st and "\\" not in st: 

40 return OWS_AROUND_DELIMITERS_RE.sub(r"\1", st).strip() 

41 

42 out: list[str] = [] 

43 pending_ws: list[str] = [] 

44 in_quotes = False 

45 escaped = False 

46 # True only if the last appended char was a raw delimiter. 

47 last_was_delimiter = False 

48 

49 def flush_pending() -> None: 

50 nonlocal pending_ws 

51 if not pending_ws: 

52 return 

53 if not last_was_delimiter: 

54 out.extend(pending_ws) 

55 pending_ws.clear() 

56 

57 for ch in st: 

58 # Handle escaped character (the backslash set escaped in previous iteration) 

59 if escaped: 

60 flush_pending() 

61 out.append(ch) 

62 escaped = False 

63 last_was_delimiter = False 

64 continue 

65 

66 # Handle backslash to escape next character 

67 if ch == "\\" and not in_quotes: 

68 flush_pending() 

69 out.append(ch) 

70 escaped = True 

71 last_was_delimiter = False 

72 continue 

73 

74 # Handle quote toggling 

75 if ch == '"' and not escaped: 

76 in_quotes = not in_quotes 

77 flush_pending() 

78 out.append(ch) 

79 last_was_delimiter = False 

80 continue 

81 

82 # Whitespace outside quotes is buffered 

83 if not in_quotes and not escaped and ch in OWS: 

84 pending_ws.append(ch) 

85 continue 

86 

87 # Raw delimiter (unescaped and outside quotes) 

88 if not in_quotes and not escaped and ch in delimiters: 

89 pending_ws.clear() 

90 while out and out[-1] in OWS: 

91 out.pop() 

92 out.append(ch) 

93 last_was_delimiter = True 

94 continue 

95 

96 # Regular character 

97 flush_pending() 

98 out.append(ch) 

99 last_was_delimiter = False 

100 

101 if pending_ws and not last_was_delimiter: 

102 out.extend(pending_ws) 

103 

104 return "".join(out).strip() 

105 

106 

107class Contentline(str): 

108 """A content line is basically a string that can be folded and parsed into 

109 parts. 

110 """ 

111 

112 __slots__ = ("strict",) 

113 

114 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

115 value = to_unicode(value, encoding=encoding) 

116 assert "\n" not in value, ( 

117 "Content line can not contain unescaped new line characters." 

118 ) 

119 self = super().__new__(cls, value) 

120 self.strict = strict 

121 return self 

122 

123 @classmethod 

124 def from_parts( 

125 cls, 

126 name: ICAL_TYPE, 

127 params: Parameters, 

128 values, 

129 sorted: bool = True, # noqa: A002 

130 ): 

131 """Turn a parts into a content line.""" 

132 assert isinstance(params, Parameters) 

133 if hasattr(values, "to_ical"): 

134 values = values.to_ical() 

135 else: 

136 from icalendar.prop import vText 

137 

138 values = vText(values).to_ical() 

139 # elif isinstance(values, basestring): 

140 # values = escape_char(values) 

141 

142 # TODO: after unicode only, remove this 

143 # Convert back to unicode, after to_ical encoded it. 

144 name = to_unicode(name) 

145 values = to_unicode(values) 

146 if params: 

147 params = to_unicode(params.to_ical(sorted=sorted)) 

148 if params: 

149 # some parameter values can be skipped during serialization 

150 return cls(f"{name};{params}:{values}") 

151 return cls(f"{name}:{values}") 

152 

153 def parts(self) -> tuple[str, Parameters, str]: 

154 """Split the content line into ``name``, ``parameters``, and ``values`` parts. 

155 

156 Properly handles escaping with backslashes and double-quote sections 

157 to avoid corrupting URL-encoded characters in values. 

158 

159 Example with parameter: 

160 

161 .. code-block:: ics 

162 

163 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild 

164 

165 Example without parameters: 

166 

167 .. code-block:: ics 

168 

169 DESCRIPTION:The Fall'98 Wild 

170 """ 

171 try: 

172 name_split: int | None = None 

173 value_split: int | None = None 

174 in_quotes: bool = False 

175 escaped: bool = False 

176 

177 for i, ch in enumerate(self): 

178 if ch == '"' and not escaped: 

179 in_quotes = not in_quotes 

180 elif ch == "\\" and not in_quotes: 

181 escaped = True 

182 continue 

183 elif not in_quotes and not escaped: 

184 # Find first delimiter for name 

185 if ch in ":;" and name_split is None: 

186 name_split = i 

187 # Find value delimiter (first colon) 

188 if ch == ":" and value_split is None: 

189 value_split = i 

190 

191 escaped = False 

192 

193 # Validate parsing results 

194 if not value_split: 

195 # No colon found - value is empty, use end of string 

196 value_split = len(self) 

197 

198 # Extract name - if no delimiter, 

199 # take whole string for validate_token to reject 

200 name = self[:name_split] if name_split else self 

201 if not self.strict: 

202 name = re.sub(r"[ \t]+", "", name.strip()) 

203 validate_token(name) 

204 

205 if not name_split or name_split + 1 == value_split: 

206 # No delimiter or empty parameter section 

207 raise ValueError("Invalid content line") # noqa: TRY301 

208 # Parse parameters - they still need to be escaped/unescaped 

209 # for proper handling of commas, semicolons, etc. in parameter values 

210 raw_param_str = self[name_split + 1 : value_split] 

211 if not self.strict: 

212 raw_param_str = _strip_ows_around_delimiters(raw_param_str) 

213 param_str = escape_string(raw_param_str) 

214 params = Parameters.from_ical(param_str, strict=self.strict) 

215 params = Parameters( 

216 (unescape_string(key), unescape_list_or_string(value)) 

217 for key, value in iter(params.items()) 

218 ) 

219 # Unescape backslash sequences in values but preserve URL encoding 

220 values = unescape_backslash(self[value_split + 1 :]) 

221 except ValueError as exc: 

222 raise ValueError( 

223 f"Content line could not be parsed into parts: '{self}': {exc}" 

224 ) from exc 

225 return (name, params, values) 

226 

227 @classmethod 

228 def from_ical(cls, ical, strict=False): 

229 """Unfold the content lines in an iCalendar into long content lines.""" 

230 ical = to_unicode(ical) 

231 # a fold is carriage return followed by either a space or a tab 

232 return cls(UFOLD.sub("", ical), strict=strict) 

233 

234 def to_ical(self): 

235 """Long content lines are folded so they are less than 75 characters 

236 wide. 

237 """ 

238 return foldline(self).encode(DEFAULT_ENCODING) 

239 

240 

241class Contentlines(list[Contentline]): 

242 """I assume that iCalendar files generally are a few kilobytes in size. 

243 Then this should be efficient. for Huge files, an iterator should probably 

244 be used instead. 

245 """ 

246 

247 def to_ical(self): 

248 """Simply join self.""" 

249 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

250 

251 @classmethod 

252 def from_ical(cls, st): 

253 """Parses a string into content lines.""" 

254 st = to_unicode(st) 

255 try: 

256 # a fold is carriage return followed by either a space or a tab 

257 unfolded = UFOLD.sub("", st) 

258 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

259 lines.append("") # '\r\n' at the end of every content line 

260 except Exception as e: 

261 raise ValueError("Expected StringType with content lines") from e 

262 return lines 

263 

264 

265__all__ = ["Contentline", "Contentlines"]