Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/string.py: 69%

1"""Functions for manipulating strings and bytes."""

3import re

5from icalendar.parser_tools import DEFAULT_ENCODING

8def escape_char(text: str | bytes) -> str | bytes:

9 r"""Format value according to iCalendar TEXT escaping rules.

11 Escapes special characters in text values according to :rfc:`5545#section-3.3.11`

12 rules.

13 The order of replacements matters to avoid double-escaping.

15 Parameters:

16 text: The text to escape.

18 Returns:

19 The escaped text with special characters escaped.

21 Note:

22 The replacement order is critical:

24 1. ``\N`` -> ``\n`` (normalize newlines to lowercase)

25 2. ``\`` -> ``\\`` (escape backslashes)

26 3. ``;`` -> ``\;`` (escape semicolons)

27 4. ``,`` -> ``\,`` (escape commas)

28 5. ``\r\n`` -> ``\n`` (normalize line endings)

29 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw,

30 newline character)

31 """

32 assert isinstance(text, (str, bytes))

33 # NOTE: ORDER MATTERS!

34 return (

35 text.replace(r"\N", "\n")

36 .replace("\\", "\\\\")

37 .replace(";", r"\;")

38 .replace(",", r"\,")

39 .replace("\r\n", r"\n")

40 .replace("\n", r"\n")

41 )

44def unescape_char(text: str | bytes) -> str | bytes | None:

45 r"""Unescape iCalendar TEXT values.

47 Reverses the escaping applied by :func:`escape_char` according to

48 :rfc:`5545#section-3.3.11` TEXT escaping rules.

50 Parameters:

51 text: The escaped text.

53 Returns:

54 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``.

56 Note:

57 The replacement order is critical to avoid double-unescaping:

59 1. ``\N`` -> ``\n`` (intermediate step)

60 2. ``\r\n`` -> ``\n`` (normalize line endings)

61 3. ``\n`` -> newline (unescape newlines)

62 4. ``\,`` -> ``,`` (unescape commas)

63 5. ``\;`` -> ``;`` (unescape semicolons)

64 6. ``\\`` -> ``\`` (unescape backslashes last)

65 """

66 assert isinstance(text, (str, bytes))

67 # NOTE: ORDER MATTERS!

68 if isinstance(text, str):

69 return (

70 text.replace("\\N", "\\n")

71 .replace("\r\n", "\n")

72 .replace("\\n", "\n")

73 .replace("\\,", ",")

74 .replace("\\;", ";")

75 .replace("\\\\", "\\")

76 )

77 if isinstance(text, bytes):

78 return (

79 text.replace(b"\\N", b"\\n")

80 .replace(b"\r\n", b"\n")

81 .replace(b"\\n", b"\n")

82 .replace(b"\\,", b",")

83 .replace(b"\\;", b";")

84 .replace(b"\\\\", b"\\")

85 )

86 return None

89def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str:

90 """Make a string folded as defined in RFC5545

91 Lines of text SHOULD NOT be longer than 75 octets, excluding the line

92 break. Long content lines SHOULD be split into a multiple line

93 representations using a line "folding" technique. That is, a long

94 line can be split between any two characters by inserting a CRLF

95 immediately followed by a single linear white-space character (i.e.,

96 SPACE or HTAB).

97 """

98 assert isinstance(line, str)

99 assert "\n" not in line

100

101 # Use a fast and simple variant for the common case that line is all ASCII.

102 try:

103 line.encode("ascii")

104 except (UnicodeEncodeError, UnicodeDecodeError):

105 pass

106 else:

107 return fold_sep.join(

108 line[i : i + limit - 1] for i in range(0, len(line), limit - 1)

109 )

110

111 ret_chars: list[str] = []

112 byte_count = 0

113 for char in line:

114 char_byte_len = len(char.encode(DEFAULT_ENCODING))

115 byte_count += char_byte_len

116 if byte_count >= limit:

117 ret_chars.append(fold_sep)

118 byte_count = char_byte_len

119 ret_chars.append(char)

120

121 return "".join(ret_chars)

122

123

124def escape_string(val: str) -> str:

125 r"""Escape backslash sequences to URL-encoded hex values.

126

127 Converts backslash-escaped characters to their percent-encoded hex

128 equivalents. This is used for parameter parsing to preserve escaped

129 characters during processing.

130

131 Parameters:

132 val: The string with backslash escapes.

133

134 Returns:

135 The string with backslash escapes converted to percent encoding.

136

137 Note:

138 Conversions:

139

140 - ``\,`` -> ``%2C``

141 - ``\:`` -> ``%3A``

142 - ``\;`` -> ``%3B``

143 - ``\\`` -> ``%5C``

144 """

145 # f'{i:02X}'

146 return (

147 val.replace(r"\,", "%2C")

148 .replace(r"\:", "%3A")

149 .replace(r"\;", "%3B")

150 .replace(r"\\", "%5C")

151 )

152

153

154def unescape_string(val: str) -> str:

155 r"""Unescape URL-encoded hex values to their original characters.

156

157 Reverses :func:`escape_string` by converting percent-encoded hex values

158 back to their original characters. This is used for parameter parsing.

159

160 Parameters:

161 val: The string with percent-encoded values.

162

163 Returns:

164 The string with percent encoding converted to characters.

165

166 Note:

167 Conversions:

168

169 - ``%2C`` -> ``,``

170 - ``%3A`` -> ``:``

171 - ``%3B`` -> ``;``

172 - ``%5C`` -> ``\``

173 """

174 return (

175 val.replace("%2C", ",")

176 .replace("%3A", ":")

177 .replace("%3B", ";")

178 .replace("%5C", "\\")

179 )

180

181

182# [\w-] because of the iCalendar RFC

183# . because of the vCard RFC

184NAME = re.compile(r"[\w.-]+")

185

186

187def validate_token(name: str) -> None:

188 r"""Validate that a name is a valid iCalendar token.

189

190 Checks if the name matches the :rfc:`5545` token syntax using the NAME

191 regex pattern (``[\w.-]+``).

192

193 Parameters:

194 name: The token name to validate.

195

196 Raises:

197 ValueError: If the name is not a valid token.

198 """

199 match = NAME.findall(name)

200 if len(match) == 1 and name == match[0]:

201 return

202 raise ValueError(name)

203

204

205__all__ = [

206 "escape_char",

207 "escape_string",

208 "foldline",

209 "unescape_char",

210 "unescape_string",

211 "validate_token",

212]