Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/string.py: 69%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

42 statements  

1"""Functions for manipulating strings and bytes.""" 

2 

3import re 

4 

5from icalendar.parser_tools import DEFAULT_ENCODING 

6 

7 

8def escape_char(text: str | bytes) -> str | bytes: 

9 r"""Format value according to iCalendar TEXT escaping rules. 

10 

11 Escapes special characters in text values according to :rfc:`5545#section-3.3.11` 

12 rules. 

13 The order of replacements matters to avoid double-escaping. 

14 

15 Parameters: 

16 text: The text to escape. 

17 

18 Returns: 

19 The escaped text with special characters escaped. 

20 

21 Note: 

22 The replacement order is critical: 

23 

24 1. ``\N`` -> ``\n`` (normalize newlines to lowercase) 

25 2. ``\`` -> ``\\`` (escape backslashes) 

26 3. ``;`` -> ``\;`` (escape semicolons) 

27 4. ``,`` -> ``\,`` (escape commas) 

28 5. ``\r\n`` -> ``\n`` (normalize line endings) 

29 6. ``"\n"`` -> ``r"\n"`` (transform a newline character to a literal, or raw, 

30 newline character) 

31 """ 

32 assert isinstance(text, (str, bytes)) 

33 # NOTE: ORDER MATTERS! 

34 return ( 

35 text.replace(r"\N", "\n") 

36 .replace("\\", "\\\\") 

37 .replace(";", r"\;") 

38 .replace(",", r"\,") 

39 .replace("\r\n", r"\n") 

40 .replace("\n", r"\n") 

41 ) 

42 

43 

44def unescape_char(text: str | bytes) -> str | bytes | None: 

45 r"""Unescape iCalendar TEXT values. 

46 

47 Reverses the escaping applied by :func:`escape_char` according to 

48 :rfc:`5545#section-3.3.11` TEXT escaping rules. 

49 

50 Parameters: 

51 text: The escaped text. 

52 

53 Returns: 

54 The unescaped text, or ``None`` if ``text`` is neither ``str`` nor ``bytes``. 

55 

56 Note: 

57 The replacement order is critical to avoid double-unescaping: 

58 

59 1. ``\N`` -> ``\n`` (intermediate step) 

60 2. ``\r\n`` -> ``\n`` (normalize line endings) 

61 3. ``\n`` -> newline (unescape newlines) 

62 4. ``\,`` -> ``,`` (unescape commas) 

63 5. ``\;`` -> ``;`` (unescape semicolons) 

64 6. ``\\`` -> ``\`` (unescape backslashes last) 

65 """ 

66 assert isinstance(text, (str, bytes)) 

67 # NOTE: ORDER MATTERS! 

68 if isinstance(text, str): 

69 return ( 

70 text.replace("\\N", "\\n") 

71 .replace("\r\n", "\n") 

72 .replace("\\n", "\n") 

73 .replace("\\,", ",") 

74 .replace("\\;", ";") 

75 .replace("\\\\", "\\") 

76 ) 

77 if isinstance(text, bytes): 

78 return ( 

79 text.replace(b"\\N", b"\\n") 

80 .replace(b"\r\n", b"\n") 

81 .replace(b"\\n", b"\n") 

82 .replace(b"\\,", b",") 

83 .replace(b"\\;", b";") 

84 .replace(b"\\\\", b"\\") 

85 ) 

86 return None 

87 

88 

89def foldline(line: str, limit: int = 75, fold_sep: str = "\r\n ") -> str: 

90 """Make a string folded as defined in RFC5545 

91 Lines of text SHOULD NOT be longer than 75 octets, excluding the line 

92 break. Long content lines SHOULD be split into a multiple line 

93 representations using a line "folding" technique. That is, a long 

94 line can be split between any two characters by inserting a CRLF 

95 immediately followed by a single linear white-space character (i.e., 

96 SPACE or HTAB). 

97 """ 

98 assert isinstance(line, str) 

99 assert "\n" not in line 

100 

101 # Use a fast and simple variant for the common case that line is all ASCII. 

102 try: 

103 line.encode("ascii") 

104 except (UnicodeEncodeError, UnicodeDecodeError): 

105 pass 

106 else: 

107 return fold_sep.join( 

108 line[i : i + limit - 1] for i in range(0, len(line), limit - 1) 

109 ) 

110 

111 ret_chars: list[str] = [] 

112 byte_count = 0 

113 for char in line: 

114 char_byte_len = len(char.encode(DEFAULT_ENCODING)) 

115 byte_count += char_byte_len 

116 if byte_count >= limit: 

117 ret_chars.append(fold_sep) 

118 byte_count = char_byte_len 

119 ret_chars.append(char) 

120 

121 return "".join(ret_chars) 

122 

123 

124def escape_string(val: str) -> str: 

125 r"""Escape backslash sequences to URL-encoded hex values. 

126 

127 Converts backslash-escaped characters to their percent-encoded hex 

128 equivalents. This is used for parameter parsing to preserve escaped 

129 characters during processing. 

130 

131 Parameters: 

132 val: The string with backslash escapes. 

133 

134 Returns: 

135 The string with backslash escapes converted to percent encoding. 

136 

137 Note: 

138 Conversions: 

139 

140 - ``\,`` -> ``%2C`` 

141 - ``\:`` -> ``%3A`` 

142 - ``\;`` -> ``%3B`` 

143 - ``\\`` -> ``%5C`` 

144 """ 

145 # f'{i:02X}' 

146 return ( 

147 val.replace(r"\,", "%2C") 

148 .replace(r"\:", "%3A") 

149 .replace(r"\;", "%3B") 

150 .replace(r"\\", "%5C") 

151 ) 

152 

153 

154def unescape_string(val: str) -> str: 

155 r"""Unescape URL-encoded hex values to their original characters. 

156 

157 Reverses :func:`escape_string` by converting percent-encoded hex values 

158 back to their original characters. This is used for parameter parsing. 

159 

160 Parameters: 

161 val: The string with percent-encoded values. 

162 

163 Returns: 

164 The string with percent encoding converted to characters. 

165 

166 Note: 

167 Conversions: 

168 

169 - ``%2C`` -> ``,`` 

170 - ``%3A`` -> ``:`` 

171 - ``%3B`` -> ``;`` 

172 - ``%5C`` -> ``\`` 

173 """ 

174 return ( 

175 val.replace("%2C", ",") 

176 .replace("%3A", ":") 

177 .replace("%3B", ";") 

178 .replace("%5C", "\\") 

179 ) 

180 

181 

182# [\w-] because of the iCalendar RFC 

183# . because of the vCard RFC 

184NAME = re.compile(r"[\w.-]+") 

185 

186 

187def validate_token(name: str) -> None: 

188 r"""Validate that a name is a valid iCalendar token. 

189 

190 Checks if the name matches the :rfc:`5545` token syntax using the NAME 

191 regex pattern (``[\w.-]+``). 

192 

193 Parameters: 

194 name: The token name to validate. 

195 

196 Raises: 

197 ValueError: If the name is not a valid token. 

198 """ 

199 match = NAME.findall(name) 

200 if len(match) == 1 and name == match[0]: 

201 return 

202 raise ValueError(name) 

203 

204 

205__all__ = [ 

206 "escape_char", 

207 "escape_string", 

208 "foldline", 

209 "unescape_char", 

210 "unescape_string", 

211 "validate_token", 

212]