Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/string.py: 76%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

54 statements  

1"""Functions for manipulating strings and bytes.""" 

2 

3import re 

4 

5from icalendar.compatibility import deprecate_for_version_8 

6from icalendar.parser_tools import DEFAULT_ENCODING, to_unicode 

7 

8 

9def _escape_char(text: str | bytes) -> str: 

10 r"""Format value according to iCalendar TEXT escaping rules. 

11 

12 Escapes special characters in text values according to :rfc:`5545#section-3.3.11` 

13 rules. 

14 The order of replacements matters to avoid double-escaping. 

15 

16 Parameters: 

17 text: The text to escape. 

18 

19 Returns: 

20 The escaped text with special characters escaped. 

21 

22 Note: 

23 The replacement order is critical: 

24 

25 1. ``\N`` -> ``\n`` (normalize newlines to lowercase) 

26 2. ``\`` -> ``\\`` (escape backslashes) 

27 3. ``;`` -> ``\;`` (escape semicolons) 

28 4. ``,`` -> ``\,`` (escape commas) 

29 5. ``\r\n`` -> ``\n`` (normalize line endings) 

30 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, 

31 newline character) 

32 """ 

33 assert isinstance(text, (str, bytes)) 

34 text = to_unicode(text) 

35 # NOTE: ORDER MATTERS! 

36 return ( 

37 text.replace(r"\N", "\n") 

38 .replace("\\", "\\\\") 

39 .replace(";", r"\;") 

40 .replace(",", r"\,") 

41 .replace("\r\n", r"\n") 

42 .replace("\n", r"\n") 

43 ) 

44 

45 

46escape_char = deprecate_for_version_8(_escape_char) 

47"""Format value according to iCalendar TEXT escaping rules. 

48 

49.. deprecated:: 7.0.0 

50 Use the private :func:`_escape_char` internally. For external use, 

51 this function is deprecated. Please use alternative escaping methods 

52 or contact the maintainers. 

53""" 

54 

55 

56def _unescape_char(text: str | bytes) -> str | bytes | None: 

57 r"""Unescape iCalendar TEXT values. 

58 

59 Reverses the escaping applied by :func:`_escape_char` according to 

60 :rfc:`5545#section-3.3.11` TEXT escaping rules. 

61 

62 Parameters: 

63 text: The escaped text. 

64 

65 Returns: 

66 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``. 

67 

68 Note: 

69 The replacement order is critical to avoid double-unescaping: 

70 

71 1. ``\N`` -> ``\n`` (intermediate step) 

72 2. ``\r\n`` -> ``\n`` (normalize line endings) 

73 3. ``\n`` -> newline (unescape newlines) 

74 4. ``\,`` -> ``,`` (unescape commas) 

75 5. ``\;`` -> ``;`` (unescape semicolons) 

76 6. ``\\`` -> ``\`` (unescape backslashes last) 

77 """ 

78 assert isinstance(text, (str, bytes)) 

79 # NOTE: ORDER MATTERS! 

80 if isinstance(text, str): 

81 return ( 

82 text.replace("\\N", "\\n") 

83 .replace("\r\n", "\n") 

84 .replace("\\n", "\n") 

85 .replace("\\,", ",") 

86 .replace("\\;", ";") 

87 .replace("\\\\", "\\") 

88 ) 

89 if isinstance(text, bytes): 

90 return ( 

91 text.replace(b"\\N", b"\\n") 

92 .replace(b"\r\n", b"\n") 

93 .replace(b"\\n", b"\n") 

94 .replace(b"\\,", b",") 

95 .replace(b"\\;", b";") 

96 .replace(b"\\\\", b"\\") 

97 ) 

98 return None 

99 

100 

101unescape_char = deprecate_for_version_8(_unescape_char) 

102"""Unescape iCalendar TEXT values. 

103 

104.. deprecated:: 7.0.0 

105 Use the private :func:`_unescape_char` internally. For external use, 

106 this function is deprecated. Please use alternative unescaping methods 

107 or contact the maintainers. 

108""" 

109 

110 

111def _foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str: 

112 """Make a string folded as defined in RFC5545. 

113 

114 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

115 break. Long content lines SHOULD be split into a multiple line 

116 representations using a line "folding" technique. That is, a long 

117 line can be split between any two characters by inserting a CRLF 

118 immediately followed by a single linear white-space character (i.e., 

119 SPACE or HTAB). 

120 """ 

121 assert isinstance(line, str) 

122 assert "\n" not in line 

123 

124 # Use a fast and simple variant for the common case that line is all ASCII. 

125 try: 

126 line.encode("ascii") 

127 except (UnicodeEncodeError, UnicodeDecodeError): 

128 pass 

129 else: 

130 return fold_sep.join( 

131 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

132 ) 

133 

134 ret_chars: list[str] = [] 

135 byte_count = 0 

136 for char in line: 

137 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

138 byte_count += char_byte_len 

139 if byte_count >= limit: 

140 ret_chars.append(fold_sep) 

141 byte_count = char_byte_len 

142 ret_chars.append(char) 

143 

144 return "".join(ret_chars) 

145 

146 

147foldline = deprecate_for_version_8(_foldline) 

148"""Make a string folded as defined in RFC5545. 

149 

150.. deprecated:: 7.0.0 

151 Use the private :func:`_foldline` internally. 

152""" 

153 

154 

155def _escape_string(val: str) -> str: 

156 r"""Escape backslash sequences to URL-encoded hex values. 

157 

158 Converts backslash-escaped characters to their percent-encoded hex 

159 equivalents. This is used for parameter parsing to preserve escaped 

160 characters during processing. 

161 

162 Parameters: 

163 val: The string with backslash escapes. 

164 

165 Returns: 

166 The string with backslash escapes converted to percent encoding. 

167 

168 Note: 

169 Conversions: 

170 

171 - ``\,`` -> ``%2C`` 

172 - ``\:`` -> ``%3A`` 

173 - ``\;`` -> ``%3B`` 

174 - ``\\`` -> ``%5C`` 

175 """ 

176 # f'{i:02X}' 

177 return ( 

178 val.replace(r"\,", "%2C") 

179 .replace(r"\:", "%3A") 

180 .replace(r"\;", "%3B") 

181 .replace(r"\\", "%5C") 

182 ) 

183 

184 

185escape_string = deprecate_for_version_8(_escape_string) 

186"""Escape backslash sequences to URL-encoded hex values. 

187 

188.. deprecated:: 7.0.0 

189 Use the private :func:`_escape_string` internally. For external use, 

190 this function is deprecated. 

191""" 

192 

193 

194def _unescape_string(val: str) -> str: 

195 r"""Unescape URL-encoded hex values to their original characters. 

196 

197 Reverses :func:`_escape_string` by converting percent-encoded hex values 

198 back to their original characters. This is used for parameter parsing. 

199 

200 Parameters: 

201 val: The string with percent-encoded values. 

202 

203 Returns: 

204 The string with percent encoding converted to characters. 

205 

206 Note: 

207 Conversions: 

208 

209 - ``%2C`` -> ``,`` 

210 - ``%3A`` -> ``:`` 

211 - ``%3B`` -> ``;`` 

212 - ``%5C`` -> ``\`` 

213 """ 

214 return ( 

215 val.replace("%2C", ",") 

216 .replace("%3A", ":") 

217 .replace("%3B", ";") 

218 .replace("%5C", "\\") 

219 ) 

220 

221 

222unescape_string = deprecate_for_version_8(_unescape_string) 

223"""Unescape URL-encoded hex values to their original characters. 

224 

225.. deprecated:: 7.0.0 

226 Use the private :func:`_unescape_string` internally. For external use, 

227 this function is deprecated. 

228""" 

229 

230 

231# [\w-] because of the iCalendar RFC 

232# . because of the vCard RFC 

233NAME = re.compile(r"[\w.-]+") 

234 

235 

236def validate_token(name: str) -> None: 

237 r"""Validate that a name is a valid iCalendar token. 

238 

239 Checks if the name matches the :rfc:`5545` token syntax using the NAME 

240 regex pattern (``[\w.-]+``). 

241 

242 Parameters: 

243 name: The token name to validate. 

244 

245 Raises: 

246 ValueError: If the name is not a valid token. 

247 """ 

248 match = NAME.findall(name) 

249 if len(match) == 1 and name == match[0]: 

250 return 

251 raise ValueError(name) 

252 

253 

254__all__ = [ 

255 "_escape_char", 

256 "_escape_string", 

257 "_foldline", 

258 "_unescape_char", 

259 "_unescape_string", 

260 "escape_char", 

261 "escape_string", 

262 "foldline", 

263 "unescape_char", 

264 "unescape_string", 

265 "validate_token", 

266]