1"""Tools for parsing properties."""
2
3import re
4
5from icalendar.parser.string import unescape_string
6
7
8def unescape_list_or_string(val: str | list[str]) -> str | list[str]:
9 """Unescape a value that may be a string or list of strings.
10
11 Applies :func:`unescape_string` to the value. If the value is a list,
12 unescapes each element.
13
14 Parameters:
15 val: A string or list of strings to unescape.
16
17 Returns:
18 The unescaped values.
19 """
20 if isinstance(val, list):
21 return [unescape_string(s) for s in val]
22 return unescape_string(val)
23
24
25_unescape_backslash_regex = re.compile(r"\\([\\,;:nN])")
26
27
28def unescape_backslash(val: str):
29 r"""Unescape backslash sequences in iCalendar text.
30
31 Unlike :py:meth:`unescape_string`, this only handles actual backslash escapes
32 per :rfc:`5545`, not URL encoding. This preserves URL-encoded values
33 like ``%3A`` in URLs.
34
35 Processes backslash escape sequences in a single pass using regex matching.
36 """
37 return _unescape_backslash_regex.sub(
38 lambda m: "\n" if m.group(1) in "nN" else m.group(1), val
39 )
40
41
42def split_on_unescaped_comma(text: str) -> list[str]:
43 r"""Split text on unescaped commas and unescape each part.
44
45 Splits only on commas not preceded by backslash.
46 After splitting, unescapes backslash sequences in each part.
47
48 Parameters:
49 text: Text with potential escaped commas (e.g., "foo\\, bar,baz")
50
51 Returns:
52 List of unescaped category strings
53
54 Examples:
55 .. code-block:: pycon
56
57 >>> from icalendar.parser import split_on_unescaped_comma
58 >>> split_on_unescaped_comma(r"foo\, bar,baz")
59 ['foo, bar', 'baz']
60 >>> split_on_unescaped_comma("a,b,c")
61 ['a', 'b', 'c']
62 >>> split_on_unescaped_comma(r"a\,b\,c")
63 ['a,b,c']
64 >>> split_on_unescaped_comma(r"Work,Personal\,Urgent")
65 ['Work', 'Personal,Urgent']
66 """
67 if not text:
68 return [""]
69
70 result = []
71 current = []
72 i = 0
73
74 while i < len(text):
75 if text[i] == "\\" and i + 1 < len(text):
76 # Escaped character - keep both backslash and next char
77 current.append(text[i])
78 current.append(text[i + 1])
79 i += 2
80 elif text[i] == ",":
81 # Unescaped comma - split point
82 result.append(unescape_backslash("".join(current)))
83 current = []
84 i += 1
85 else:
86 current.append(text[i])
87 i += 1
88
89 # Add final part
90 result.append(unescape_backslash("".join(current)))
91
92 return result
93
94
95def split_on_unescaped_semicolon(text: str) -> list[str]:
96 r"""Split text on unescaped semicolons and unescape each part.
97
98 Splits only on semicolons not preceded by a backslash.
99 After splitting, unescapes backslash sequences in each part.
100 Used by vCard structured properties (ADR, N, ORG) per :rfc:`6350`.
101
102 Parameters:
103 text: Text with potential escaped semicolons (e.g., "field1\\;with;field2")
104
105 Returns:
106 List of unescaped field strings
107
108 Examples:
109 .. code-block:: pycon
110
111 >>> from icalendar.parser import split_on_unescaped_semicolon
112 >>> split_on_unescaped_semicolon(r"field1\;with;field2")
113 ['field1;with', 'field2']
114 >>> split_on_unescaped_semicolon("a;b;c")
115 ['a', 'b', 'c']
116 >>> split_on_unescaped_semicolon(r"a\;b\;c")
117 ['a;b;c']
118 >>> split_on_unescaped_semicolon(r"PO Box 123\;Suite 200;City")
119 ['PO Box 123;Suite 200', 'City']
120 """
121 if not text:
122 return [""]
123
124 result = []
125 current = []
126 i = 0
127
128 while i < len(text):
129 if text[i] == "\\" and i + 1 < len(text):
130 # Escaped character - keep both backslash and next char
131 current.append(text[i])
132 current.append(text[i + 1])
133 i += 2
134 elif text[i] == ";":
135 # Unescaped semicolon - split point
136 result.append(unescape_backslash("".join(current)))
137 current = []
138 i += 1
139 else:
140 current.append(text[i])
141 i += 1
142
143 # Add final part
144 result.append(unescape_backslash("".join(current)))
145
146 return result
147
148
149__all__ = [
150 "split_on_unescaped_comma",
151 "split_on_unescaped_semicolon",
152 "unescape_backslash",
153 "unescape_list_or_string",
154]