Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/rfc3986/normalizers.py: 20%

79 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:12 +0000

1# -*- coding: utf-8 -*- 

2# Copyright (c) 2014 Rackspace 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

12# implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15"""Module with functions to normalize components.""" 

16import re 

17 

18from . import compat 

19from . import misc 

20 

21 

22def normalize_scheme(scheme): 

23 """Normalize the scheme component.""" 

24 return scheme.lower() 

25 

26 

27def normalize_authority(authority): 

28 """Normalize an authority tuple to a string.""" 

29 userinfo, host, port = authority 

30 result = "" 

31 if userinfo: 

32 result += normalize_percent_characters(userinfo) + "@" 

33 if host: 

34 result += normalize_host(host) 

35 if port: 

36 result += ":" + port 

37 return result 

38 

39 

40def normalize_username(username): 

41 """Normalize a username to make it safe to include in userinfo.""" 

42 return compat.urlquote(username) 

43 

44 

45def normalize_password(password): 

46 """Normalize a password to make safe for userinfo.""" 

47 return compat.urlquote(password) 

48 

49 

50def normalize_host(host): 

51 """Normalize a host string.""" 

52 if misc.IPv6_MATCHER.match(host): 

53 percent = host.find("%") 

54 if percent != -1: 

55 percent_25 = host.find("%25") 

56 

57 # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' 

58 # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we 

59 # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]' 

60 if ( 

61 percent_25 == -1 

62 or percent < percent_25 

63 or (percent == percent_25 and percent_25 == len(host) - 4) 

64 ): 

65 host = host.replace("%", "%25", 1) 

66 

67 # Don't normalize the casing of the Zone ID 

68 return host[:percent].lower() + host[percent:] 

69 

70 return host.lower() 

71 

72 

73def normalize_path(path): 

74 """Normalize the path string.""" 

75 if not path: 

76 return path 

77 

78 path = normalize_percent_characters(path) 

79 return remove_dot_segments(path) 

80 

81 

82def normalize_query(query): 

83 """Normalize the query string.""" 

84 if not query: 

85 return query 

86 return normalize_percent_characters(query) 

87 

88 

89def normalize_fragment(fragment): 

90 """Normalize the fragment string.""" 

91 if not fragment: 

92 return fragment 

93 return normalize_percent_characters(fragment) 

94 

95 

96PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") 

97 

98 

99def normalize_percent_characters(s): 

100 """All percent characters should be upper-cased. 

101 

102 For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. 

103 """ 

104 matches = set(PERCENT_MATCHER.findall(s)) 

105 for m in matches: 

106 if not m.isupper(): 

107 s = s.replace(m, m.upper()) 

108 return s 

109 

110 

111def remove_dot_segments(s): 

112 """Remove dot segments from the string. 

113 

114 See also Section 5.2.4 of :rfc:`3986`. 

115 """ 

116 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code 

117 segments = s.split("/") # Turn the path into a list of segments 

118 output = [] # Initialize the variable to use to store output 

119 

120 for segment in segments: 

121 # '.' is the current directory, so ignore it, it is superfluous 

122 if segment == ".": 

123 continue 

124 # Anything other than '..', should be appended to the output 

125 elif segment != "..": 

126 output.append(segment) 

127 # In this case segment == '..', if we can, we should pop the last 

128 # element 

129 elif output: 

130 output.pop() 

131 

132 # If the path starts with '/' and the output is empty or the first string 

133 # is non-empty 

134 if s.startswith("/") and (not output or output[0]): 

135 output.insert(0, "") 

136 

137 # If the path starts with '/.' or '/..' ensure we add one more empty 

138 # string to add a trailing '/' 

139 if s.endswith(("/.", "/..")): 

140 output.append("") 

141 

142 return "/".join(output) 

143 

144 

145def encode_component(uri_component, encoding): 

146 """Encode the specific component in the provided encoding.""" 

147 if uri_component is None: 

148 return uri_component 

149 

150 # Try to see if the component we're encoding is already percent-encoded 

151 # so we can skip all '%' characters but still encode all others. 

152 percent_encodings = len( 

153 PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding)) 

154 ) 

155 

156 uri_bytes = compat.to_bytes(uri_component, encoding) 

157 is_percent_encoded = percent_encodings == uri_bytes.count(b"%") 

158 

159 encoded_uri = bytearray() 

160 

161 for i in range(0, len(uri_bytes)): 

162 # Will return a single character bytestring on both Python 2 & 3 

163 byte = uri_bytes[i : i + 1] 

164 byte_ord = ord(byte) 

165 if (is_percent_encoded and byte == b"%") or ( 

166 byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED 

167 ): 

168 encoded_uri.extend(byte) 

169 continue 

170 encoded_uri.extend("%{0:02x}".format(byte_ord).encode().upper()) 

171 

172 return encoded_uri.decode(encoding)