Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/icalendar/parser/content_line.py: 95%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

81 statements  

1"""parsing and generation of content lines""" 

2 

3import re 

4 

5from icalendar.parser.parameter import Parameters 

6from icalendar.parser.property import unescape_backslash, unescape_list_or_string 

7from icalendar.parser.string import ( 

8 escape_string, 

9 foldline, 

10 unescape_string, 

11 validate_token, 

12) 

13from icalendar.parser_tools import DEFAULT_ENCODING, ICAL_TYPE, to_unicode 

14 

15UFOLD = re.compile("(\r?\n)+[ \t]") 

16NEWLINE = re.compile(r"\r?\n") 

17 

18 

19class Contentline(str): 

20 """A content line is basically a string that can be folded and parsed into 

21 parts. 

22 """ 

23 

24 __slots__ = ("strict",) 

25 

26 def __new__(cls, value, strict=False, encoding=DEFAULT_ENCODING): 

27 value = to_unicode(value, encoding=encoding) 

28 assert "\n" not in value, ( 

29 "Content line can not contain unescaped new line characters." 

30 ) 

31 self = super().__new__(cls, value) 

32 self.strict = strict 

33 return self 

34 

35 @classmethod 

36 def from_parts( 

37 cls, 

38 name: ICAL_TYPE, 

39 params: Parameters, 

40 values, 

41 sorted: bool = True, # noqa: A002 

42 ): 

43 """Turn a parts into a content line.""" 

44 assert isinstance(params, Parameters) 

45 if hasattr(values, "to_ical"): 

46 values = values.to_ical() 

47 else: 

48 from icalendar.prop import vText 

49 

50 values = vText(values).to_ical() 

51 # elif isinstance(values, basestring): 

52 # values = escape_char(values) 

53 

54 # TODO: after unicode only, remove this 

55 # Convert back to unicode, after to_ical encoded it. 

56 name = to_unicode(name) 

57 values = to_unicode(values) 

58 if params: 

59 params = to_unicode(params.to_ical(sorted=sorted)) 

60 if params: 

61 # some parameter values can be skipped during serialization 

62 return cls(f"{name};{params}:{values}") 

63 return cls(f"{name}:{values}") 

64 

65 def parts(self) -> tuple[str, Parameters, str]: 

66 """Split the content line into ``name``, ``parameters``, and ``values`` parts. 

67 

68 Properly handles escaping with backslashes and double-quote sections 

69 to avoid corrupting URL-encoded characters in values. 

70 

71 Example with parameter: 

72 

73 .. code-block:: text 

74 

75 DESCRIPTION;ALTREP="cid:part1.0001@example.org":The Fall'98 Wild 

76 

77 Example without parameters: 

78 

79 .. code-block:: text 

80 

81 DESCRIPTION:The Fall'98 Wild 

82 """ 

83 try: 

84 name_split: int | None = None 

85 value_split: int | None = None 

86 in_quotes: bool = False 

87 escaped: bool = False 

88 

89 for i, ch in enumerate(self): 

90 if ch == '"' and not escaped: 

91 in_quotes = not in_quotes 

92 elif ch == "\\" and not in_quotes: 

93 escaped = True 

94 continue 

95 elif not in_quotes and not escaped: 

96 # Find first delimiter for name 

97 if ch in ":;" and name_split is None: 

98 name_split = i 

99 # Find value delimiter (first colon) 

100 if ch == ":" and value_split is None: 

101 value_split = i 

102 

103 escaped = False 

104 

105 # Validate parsing results 

106 if not value_split: 

107 # No colon found - value is empty, use end of string 

108 value_split = len(self) 

109 

110 # Extract name - if no delimiter, 

111 # take whole string for validate_token to reject 

112 name = self[:name_split] if name_split else self 

113 validate_token(name) 

114 

115 if not name_split or name_split + 1 == value_split: 

116 # No delimiter or empty parameter section 

117 raise ValueError("Invalid content line") # noqa: TRY301 

118 # Parse parameters - they still need to be escaped/unescaped 

119 # for proper handling of commas, semicolons, etc. in parameter values 

120 param_str = escape_string(self[name_split + 1 : value_split]) 

121 params = Parameters.from_ical(param_str, strict=self.strict) 

122 params = Parameters( 

123 (unescape_string(key), unescape_list_or_string(value)) 

124 for key, value in iter(params.items()) 

125 ) 

126 # Unescape backslash sequences in values but preserve URL encoding 

127 values = unescape_backslash(self[value_split + 1 :]) 

128 except ValueError as exc: 

129 raise ValueError( 

130 f"Content line could not be parsed into parts: '{self}': {exc}" 

131 ) from exc 

132 return (name, params, values) 

133 

134 @classmethod 

135 def from_ical(cls, ical, strict=False): 

136 """Unfold the content lines in an iCalendar into long content lines.""" 

137 ical = to_unicode(ical) 

138 # a fold is carriage return followed by either a space or a tab 

139 return cls(UFOLD.sub("", ical), strict=strict) 

140 

141 def to_ical(self): 

142 """Long content lines are folded so they are less than 75 characters 

143 wide. 

144 """ 

145 return foldline(self).encode(DEFAULT_ENCODING) 

146 

147 

148class Contentlines(list): 

149 """I assume that iCalendar files generally are a few kilobytes in size. 

150 Then this should be efficient. for Huge files, an iterator should probably 

151 be used instead. 

152 """ 

153 

154 def to_ical(self): 

155 """Simply join self.""" 

156 return b"\r\n".join(line.to_ical() for line in self if line) + b"\r\n" 

157 

158 @classmethod 

159 def from_ical(cls, st): 

160 """Parses a string into content lines.""" 

161 st = to_unicode(st) 

162 try: 

163 # a fold is carriage return followed by either a space or a tab 

164 unfolded = UFOLD.sub("", st) 

165 lines = cls(Contentline(line) for line in NEWLINE.split(unfolded) if line) 

166 lines.append("") # '\r\n' at the end of every content line 

167 except Exception as e: 

168 raise ValueError("Expected StringType with content lines") from e 

169 return lines 

170 

171 

172__all__ = ["Contentline", "Contentlines"]