Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/iri.py: 46%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

54 statements  

1"""Module containing the implementation of the IRIReference class.""" 

2 

3# Copyright (c) 2014 Rackspace 

4# Copyright (c) 2015 Ian Stapleton Cordasco 

5# Licensed under the Apache License, Version 2.0 (the "License"); 

6# you may not use this file except in compliance with the License. 

7# You may obtain a copy of the License at 

8# 

9# http://www.apache.org/licenses/LICENSE-2.0 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

14# implied. 

15# See the License for the specific language governing permissions and 

16# limitations under the License. 

17import typing as t 

18 

19from . import compat 

20from . import exceptions 

21from . import misc 

22from . import normalizers 

23from . import uri 

24from ._typing_compat import Self as _Self 

25 

26try: 

27 import idna 

28except ImportError: # pragma: no cover 

29 idna = None 

30 

31 

32class IRIReference(misc.URIReferenceBase, uri.URIMixin): 

33 """Immutable object representing a parsed IRI Reference. 

34 

35 Can be encoded into an URIReference object via the procedure 

36 specified in RFC 3987 Section 3.1 

37 

38 .. note:: 

39 The IRI submodule is a new interface and may possibly change in 

40 the future. Check for changes to the interface when upgrading. 

41 """ 

42 

43 encoding: str 

44 

45 def __new__( 

46 cls, 

47 scheme: t.Optional[str], 

48 authority: t.Optional[str], 

49 path: t.Optional[str], 

50 query: t.Optional[str], 

51 fragment: t.Optional[str], 

52 encoding: str = "utf-8", 

53 ) -> _Self: 

54 """Create a new IRIReference.""" 

55 ref = super().__new__( 

56 cls, 

57 scheme or None, 

58 authority or None, 

59 path or None, 

60 query, 

61 fragment, 

62 ) 

63 ref.encoding = encoding 

64 return ref 

65 

66 __hash__ = tuple.__hash__ 

67 

68 def __eq__(self, other: object) -> bool: 

69 """Compare this reference to another.""" 

70 other_ref = other 

71 if isinstance(other, tuple): 

72 other_ref = type(self)(*other) 

73 elif not isinstance(other, IRIReference): 

74 try: 

75 other_ref = self.from_string(other) 

76 except TypeError: 

77 raise TypeError( 

78 "Unable to compare {}() to {}()".format( 

79 type(self).__name__, type(other).__name__ 

80 ) 

81 ) 

82 

83 # See http://tools.ietf.org/html/rfc3986#section-6.2 

84 return tuple(self) == tuple(other_ref) 

85 

86 def _match_subauthority(self) -> t.Optional[t.Match[str]]: 

87 return misc.ISUBAUTHORITY_MATCHER.match(self.authority) 

88 

89 @classmethod 

90 def from_string( 

91 cls, 

92 iri_string: t.Union[str, bytes], 

93 encoding: str = "utf-8", 

94 ) -> _Self: 

95 """Parse a IRI reference from the given unicode IRI string. 

96 

97 :param str iri_string: Unicode IRI to be parsed into a reference. 

98 :param str encoding: The encoding of the string provided 

99 :returns: :class:`IRIReference` or subclass thereof 

100 """ 

101 iri_string = compat.to_str(iri_string, encoding) 

102 

103 split_iri = misc.IRI_MATCHER.match(iri_string).groupdict() 

104 return cls( 

105 split_iri["scheme"], 

106 split_iri["authority"], 

107 normalizers.encode_component(split_iri["path"], encoding), 

108 normalizers.encode_component(split_iri["query"], encoding), 

109 normalizers.encode_component(split_iri["fragment"], encoding), 

110 encoding, 

111 ) 

112 

113 def encode( # noqa: C901 

114 self, 

115 idna_encoder: t.Optional[ # pyright: ignore[reportRedeclaration] 

116 t.Callable[[str], t.Union[str, bytes]] 

117 ] = None, 

118 ) -> "uri.URIReference": 

119 """Encode an IRIReference into a URIReference instance. 

120 

121 If the ``idna`` module is installed or the ``rfc3986[idna]`` 

122 extra is used then unicode characters in the IRI host 

123 component will be encoded with IDNA2008. 

124 

125 :param idna_encoder: 

126 Function that encodes each part of the host component 

127 If not given will raise an exception if the IRI 

128 contains a host component. 

129 :rtype: uri.URIReference 

130 :returns: A URI reference 

131 """ 

132 authority = self.authority 

133 if authority: 

134 if idna_encoder is None: 

135 if idna is None: # pragma: no cover 

136 raise exceptions.MissingDependencyError( 

137 "Could not import the 'idna' module " 

138 "and the IRI hostname requires encoding" 

139 ) 

140 

141 def idna_encoder(name: str) -> t.Union[str, bytes]: 

142 assert idna # Known to not be None at this point. 

143 

144 if any(ord(c) > 128 for c in name): 

145 try: 

146 return idna.encode( 

147 name.lower(), strict=True, std3_rules=True 

148 ) 

149 except idna.IDNAError: 

150 raise exceptions.InvalidAuthority(self.authority) 

151 return name 

152 

153 authority = "" 

154 if self.host: 

155 authority = ".".join( 

156 [ 

157 compat.to_str(idna_encoder(part)) 

158 for part in self.host.split(".") 

159 ] 

160 ) 

161 

162 if self.userinfo is not None: 

163 authority = ( 

164 normalizers.encode_component(self.userinfo, self.encoding) 

165 + "@" 

166 + authority 

167 ) 

168 

169 if self.port is not None: 

170 authority += ":" + str(self.port) 

171 

172 return uri.URIReference( 

173 self.scheme, 

174 authority, 

175 path=self.path, 

176 query=self.query, 

177 fragment=self.fragment, 

178 encoding=self.encoding, 

179 )