Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/idna/core.py: 45%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

309 statements  

1import bisect 

2import re 

3import unicodedata 

4import warnings 

5from typing import Optional, Union 

6 

7from . import idnadata 

8from .intranges import intranges_contain 

9 

10_virama_combining_class = 9 

11_alabel_prefix = b"xn--" 

12_max_input_length = 1024 

13_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") 

14 

15 

16# Bidi category sets from RFC 5893, hoisted out of the per-codepoint loop 

17_bidi_rtl_first = frozenset({"R", "AL"}) 

18_bidi_rtl_categories = frozenset({"R", "AL", "AN"}) 

19_bidi_rtl_allowed = frozenset({"R", "AL", "AN", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) 

20_bidi_rtl_valid_ending = frozenset({"R", "AL", "EN", "AN"}) 

21_bidi_rtl_numeric = frozenset({"AN", "EN"}) 

22_bidi_ltr_allowed = frozenset({"L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) 

23_bidi_ltr_valid_ending = frozenset({"L", "EN"}) 

24_bidi_joiner_l_or_d = frozenset({"L", "D"}) 

25_bidi_joiner_r_or_d = frozenset({"R", "D"}) 

26 

27 

28def _joining_type(cp: int) -> Optional[str]: 

29 for jt, ranges in idnadata.joining_types.items(): 

30 if intranges_contain(cp, ranges): 

31 return jt 

32 return None 

33 

34 

35class IDNAError(UnicodeError): 

36 """Base exception for all IDNA-encoding related problems""" 

37 

38 

39class IDNABidiError(IDNAError): 

40 """Exception when bidirectional requirements are not satisfied""" 

41 

42 

43class InvalidCodepoint(IDNAError): 

44 """Exception when a disallowed or unallocated codepoint is used""" 

45 

46 

47class InvalidCodepointContext(IDNAError): 

48 """Exception when the codepoint is not valid in the context it is used""" 

49 

50 

51def _combining_class(cp: int) -> int: 

52 v = unicodedata.combining(chr(cp)) 

53 if v == 0 and not unicodedata.name(chr(cp)): 

54 raise ValueError("Unknown character in unicodedata") 

55 return v 

56 

57 

58def _is_script(cp: str, script: str) -> bool: 

59 return intranges_contain(ord(cp), idnadata.scripts[script]) 

60 

61 

62def _punycode(s: str) -> bytes: 

63 return s.encode("punycode") 

64 

65 

66def _unot(s: int) -> str: 

67 return f"U+{s:04X}" 

68 

69 

70def valid_label_length(label: Union[bytes, str]) -> bool: 

71 """Check that a label does not exceed the maximum permitted length. 

72 

73 Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed 

74 63 octets. The argument may be either a :class:`str` (a U-label, where 

75 length is measured in characters) or :class:`bytes` (an A-label, where 

76 length is measured in octets). 

77 

78 :param label: The label to check. 

79 :returns: ``True`` if the label is within the length limit, otherwise 

80 ``False``. 

81 """ 

82 return len(label) <= 63 

83 

84 

85def valid_string_length(domain: Union[bytes, str], trailing_dot: bool) -> bool: 

86 """Check that a full domain name does not exceed the maximum length. 

87 

88 Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing 

89 dot is present, or 254 octets when one is included. 

90 

91 :param domain: The full (possibly multi-label) domain name. 

92 :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``. 

93 :returns: ``True`` if the domain is within the length limit, otherwise 

94 ``False``. 

95 """ 

96 return len(domain) <= (254 if trailing_dot else 253) 

97 

98 

99def check_bidi(label: str, check_ltr: bool = False) -> bool: 

100 """Validate the Bidi Rule from :rfc:`5893` for a single label. 

101 

102 The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic, 

103 etc.) may appear within a label. By default the check is only applied 

104 when the label contains at least one right-to-left character (Unicode 

105 bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr`` 

106 to ``True`` to apply it to LTR-only labels as well. 

107 

108 :param label: The label to validate, as a Unicode string. 

109 :param check_ltr: If ``True``, apply the rules even when the label 

110 contains no RTL characters. 

111 :returns: ``True`` if the label satisfies the Bidi Rule. 

112 :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated, 

113 or if the directional category of a codepoint cannot be determined. 

114 """ 

115 if len(label) > _max_input_length: 

116 raise IDNAError("Label too long") 

117 # Bidi rules should only be applied if string contains RTL characters 

118 bidi_label = False 

119 for idx, cp in enumerate(label, 1): 

120 direction = unicodedata.bidirectional(cp) 

121 if direction == "": 

122 # String likely comes from a newer version of Unicode 

123 raise IDNABidiError(f"Unknown directionality in label {label!r} at position {idx}") 

124 if direction in _bidi_rtl_categories: 

125 bidi_label = True 

126 if not bidi_label and not check_ltr: 

127 return True 

128 

129 # Bidi rule 1 

130 direction = unicodedata.bidirectional(label[0]) 

131 if direction in _bidi_rtl_first: 

132 rtl = True 

133 elif direction == "L": 

134 rtl = False 

135 else: 

136 raise IDNABidiError(f"First codepoint in label {label!r} must be directionality L, R or AL") 

137 

138 valid_ending = False 

139 number_type: Optional[str] = None 

140 for idx, cp in enumerate(label, 1): 

141 direction = unicodedata.bidirectional(cp) 

142 

143 if rtl: 

144 # Bidi rule 2 

145 if direction not in _bidi_rtl_allowed: 

146 raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a right-to-left label") 

147 # Bidi rule 3 

148 if direction in _bidi_rtl_valid_ending: 

149 valid_ending = True 

150 elif direction != "NSM": 

151 valid_ending = False 

152 # Bidi rule 4 

153 if direction in _bidi_rtl_numeric: 

154 if not number_type: 

155 number_type = direction 

156 elif number_type != direction: 

157 raise IDNABidiError("Can not mix numeral types in a right-to-left label") 

158 else: 

159 # Bidi rule 5 

160 if direction not in _bidi_ltr_allowed: 

161 raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a left-to-right label") 

162 # Bidi rule 6 

163 if direction in _bidi_ltr_valid_ending: 

164 valid_ending = True 

165 elif direction != "NSM": 

166 valid_ending = False 

167 

168 if not valid_ending: 

169 raise IDNABidiError("Label ends with illegal codepoint directionality") 

170 

171 return True 

172 

173 

174def check_initial_combiner(label: str) -> bool: 

175 """Reject labels that begin with a combining mark. 

176 

177 Per :rfc:`5891` §4.2.3.2 a label must not start with a character of 

178 Unicode general category ``M`` (Mark). 

179 

180 :param label: The label to check. 

181 :returns: ``True`` if the first character is not a combining mark. 

182 :raises IDNAError: If the label begins with a combining character. 

183 """ 

184 if unicodedata.category(label[0])[0] == "M": 

185 raise IDNAError("Label begins with an illegal combining character") 

186 return True 

187 

188 

189def check_hyphen_ok(label: str) -> bool: 

190 """Validate the hyphen restrictions for a label. 

191 

192 Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen 

193 (``U+002D``), and must not have hyphens in both the third and fourth 

194 positions (the prefix reserved for A-labels). 

195 

196 :param label: The label to check. 

197 :returns: ``True`` if the hyphen restrictions are satisfied. 

198 :raises IDNAError: If any of the hyphen restrictions are violated. 

199 """ 

200 if label[2:4] == "--": 

201 raise IDNAError("Label has disallowed hyphens in 3rd and 4th position") 

202 if label[0] == "-" or label[-1] == "-": 

203 raise IDNAError("Label must not start or end with a hyphen") 

204 return True 

205 

206 

207def check_nfc(label: str) -> None: 

208 """Require that a label is in Unicode Normalization Form C. 

209 

210 :param label: The label to check. 

211 :raises IDNAError: If ``label`` differs from its NFC normalisation. 

212 """ 

213 if len(label) > _max_input_length: 

214 raise IDNAError("Label too long") 

215 if unicodedata.normalize("NFC", label) != label: 

216 raise IDNAError("Label must be in Normalization Form C") 

217 

218 

219def valid_contextj(label: str, pos: int) -> bool: 

220 """Validate the CONTEXTJ rules from :rfc:`5892` Appendix A. 

221 

222 These rules govern the contextual use of the joiner codepoints 

223 ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D`` 

224 (ZERO WIDTH JOINER, Appendix A.2) within a label. 

225 

226 :param label: The label containing the codepoint. 

227 :param pos: Index of the joiner codepoint within ``label``. 

228 :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ 

229 rule, ``False`` otherwise (including when the codepoint at 

230 ``pos`` is not a recognised joiner). 

231 :raises ValueError: If an adjacent codepoint has no Unicode name when 

232 determining its combining class. 

233 :raises IDNAError: If ``label`` exceeds the defensive input length limit. 

234 """ 

235 if len(label) > _max_input_length: 

236 raise IDNAError("Label too long") 

237 cp_value = ord(label[pos]) 

238 

239 if cp_value == 0x200C: 

240 if pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class: 

241 return True 

242 

243 ok = False 

244 for i in range(pos - 1, -1, -1): 

245 joining_type = _joining_type(ord(label[i])) 

246 if joining_type == "T": 

247 continue 

248 if joining_type in _bidi_joiner_l_or_d: 

249 ok = True 

250 break 

251 break 

252 

253 if not ok: 

254 return False 

255 

256 ok = False 

257 for i in range(pos + 1, len(label)): 

258 joining_type = _joining_type(ord(label[i])) 

259 if joining_type == "T": 

260 continue 

261 if joining_type in _bidi_joiner_r_or_d: 

262 ok = True 

263 break 

264 break 

265 return ok 

266 

267 if cp_value == 0x200D: 

268 return pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class 

269 

270 return False 

271 

272 

273def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: 

274 """Validate the CONTEXTO rules from :rfc:`5892` Appendix A. 

275 

276 Covers the contextual rules for codepoints such as MIDDLE DOT 

277 (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana 

278 middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges. 

279 

280 :param label: The label containing the codepoint. 

281 :param pos: Index of the codepoint within ``label``. 

282 :param exception: Reserved for forward compatibility; currently unused. 

283 :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO 

284 rule, ``False`` otherwise (including when the codepoint is not a 

285 recognised CONTEXTO codepoint). 

286 :raises IDNAError: If ``label`` exceeds the defensive input length limit. 

287 """ 

288 if len(label) > _max_input_length: 

289 raise IDNAError("Label too long") 

290 cp_value = ord(label[pos]) 

291 

292 if cp_value == 0x00B7: 

293 return 0 < pos < len(label) - 1 and ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C 

294 

295 if cp_value == 0x0375: 

296 if pos < len(label) - 1 and len(label) > 1: 

297 return _is_script(label[pos + 1], "Greek") 

298 return False 

299 

300 if cp_value in {0x05F3, 0x05F4}: 

301 if pos > 0: 

302 return _is_script(label[pos - 1], "Hebrew") 

303 return False 

304 

305 if cp_value == 0x30FB: 

306 for cp in label: 

307 if cp == "\u30fb": 

308 continue 

309 if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"): 

310 return True 

311 return False 

312 

313 if 0x660 <= cp_value <= 0x669: 

314 return not any(0x6F0 <= ord(cp) <= 0x06F9 for cp in label) 

315 

316 if 0x6F0 <= cp_value <= 0x6F9: 

317 return not any(0x660 <= ord(cp) <= 0x0669 for cp in label) 

318 

319 return False 

320 

321 

322def check_label(label: Union[str, bytes, bytearray]) -> None: 

323 """Run the full set of IDNA 2008 validity checks on a single label. 

324 

325 Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen 

326 restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule 

327 (:func:`check_initial_combiner`), per-codepoint validity (PVALID, 

328 CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule 

329 (:func:`check_bidi`). 

330 

331 :param label: The label to validate. ``bytes`` or ``bytearray`` input 

332 is decoded as UTF-8 first. 

333 :raises IDNAError: If the label is empty or fails a structural rule. 

334 :raises InvalidCodepoint: If the label contains a DISALLOWED or 

335 UNASSIGNED codepoint. 

336 :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint 

337 is not valid in its context. 

338 :raises IDNABidiError: If the Bidi Rule is violated. 

339 """ 

340 if len(label) > _max_input_length: 

341 raise IDNAError("Label too long") 

342 if isinstance(label, (bytes, bytearray)): 

343 label = label.decode("utf-8") 

344 if len(label) == 0: 

345 raise IDNAError("Empty Label") 

346 

347 # Reject on domain length rather than label length so support some UTS 46 

348 # use cases, still reducing processing of label contextual rules 

349 if not valid_string_length(label, trailing_dot=True): 

350 raise IDNAError("Label too long") 

351 

352 check_nfc(label) 

353 check_hyphen_ok(label) 

354 check_initial_combiner(label) 

355 

356 for pos, cp in enumerate(label): 

357 cp_value = ord(cp) 

358 if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]): 

359 continue 

360 if intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]): 

361 try: 

362 if not valid_contextj(label, pos): 

363 raise InvalidCodepointContext(f"Joiner {_unot(cp_value)} not allowed at position {pos + 1} in {label!r}") 

364 except ValueError as err: 

365 raise IDNAError( 

366 f"Unknown codepoint adjacent to joiner {_unot(cp_value)} at position {pos + 1} in {label!r}" 

367 ) from err 

368 elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]): 

369 if not valid_contexto(label, pos): 

370 raise InvalidCodepointContext(f"Codepoint {_unot(cp_value)} not allowed at position {pos + 1} in {label!r}") 

371 else: 

372 raise InvalidCodepoint(f"Codepoint {_unot(cp_value)} at position {pos + 1} of {label!r} not allowed") 

373 

374 check_bidi(label) 

375 

376 

377def alabel(label: str) -> bytes: 

378 """Convert a single U-label into its A-label form. 

379 

380 The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891` 

381 §4: the label is validated, Punycode-encoded, and prefixed with 

382 ``xn--``. Pure ASCII labels that are already valid IDNA labels are 

383 returned unchanged (as :class:`bytes`). 

384 

385 :param label: The label to convert, as a Unicode string. 

386 :returns: The A-label as ASCII-encoded :class:`bytes`. 

387 :raises IDNAError: If the label is invalid or the resulting A-label 

388 exceeds 63 octets. 

389 """ 

390 if len(label) > _max_input_length: 

391 raise IDNAError("Label too long") 

392 try: 

393 label_bytes = label.encode("ascii") 

394 except UnicodeEncodeError: 

395 pass 

396 else: 

397 ulabel(label_bytes) 

398 if not valid_label_length(label_bytes): 

399 raise IDNAError("Label too long") 

400 return label_bytes 

401 

402 check_label(label) 

403 label_bytes = _alabel_prefix + _punycode(label) 

404 

405 if not valid_label_length(label_bytes): 

406 raise IDNAError("Label too long") 

407 

408 return label_bytes 

409 

410 

411def ulabel(label: Union[str, bytes, bytearray]) -> str: 

412 """Convert a single A-label into its U-label form. 

413 

414 Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is 

415 Punycode-decoded and validated. Labels that are already Unicode (or 

416 plain ASCII without the ACE prefix) are validated and returned as a 

417 Unicode string. 

418 

419 :param label: The label to convert. ``bytes`` or ``bytearray`` input 

420 is treated as ASCII. 

421 :returns: The U-label as a Unicode string. 

422 :raises IDNAError: If the label is malformed or fails validation. 

423 """ 

424 if len(label) > _max_input_length: 

425 raise IDNAError("Label too long") 

426 if not isinstance(label, (bytes, bytearray)): 

427 try: 

428 label_bytes = label.encode("ascii") 

429 except UnicodeEncodeError: 

430 check_label(label) 

431 return label 

432 else: 

433 label_bytes = bytes(label) 

434 

435 label_bytes = label_bytes.lower() 

436 if label_bytes.startswith(_alabel_prefix): 

437 label_bytes = label_bytes[len(_alabel_prefix) :] 

438 if not label_bytes: 

439 raise IDNAError("Malformed A-label, no Punycode eligible content found") 

440 if label_bytes.endswith(b"-"): 

441 raise IDNAError("A-label must not end with a hyphen") 

442 else: 

443 check_label(label_bytes) 

444 return label_bytes.decode("ascii") 

445 

446 try: 

447 label = label_bytes.decode("punycode") 

448 except UnicodeError as err: 

449 raise IDNAError("Invalid A-label") from err 

450 check_label(label) 

451 return label 

452 

453 

454def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: 

455 """Apply the UTS #46 character mapping to a domain string. 

456 

457 Implements the mapping table from `UTS #46 §4 

458 <https://www.unicode.org/reports/tr46/>`_: each character is kept, 

459 replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``, 

460 ``I``). The result is returned in Normalisation Form C. 

461 

462 :param domain: The full domain name to remap. 

463 :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules 

464 (status ``3`` codepoints raise instead of being kept or mapped). 

465 :param transitional: If ``True``, use transitional processing (status 

466 ``D`` codepoints are mapped instead of kept). Transitional 

467 processing has been removed from UTS #46 and this option is 

468 retained only for backwards compatibility. 

469 :returns: The remapped domain, in Normalisation Form C. 

470 :raises InvalidCodepoint: If the domain contains a disallowed 

471 codepoint under the chosen rules. 

472 :raises IDNAError: If ``domain`` exceeds the defensive input length limit. 

473 """ 

474 if len(domain) > _max_input_length: 

475 raise IDNAError("Domain too long") 

476 from .uts46data import uts46_replacements, uts46_starts, uts46_statuses 

477 

478 output = "" 

479 

480 for pos, char in enumerate(domain): 

481 code_point = ord(char) 

482 i = code_point if code_point < 256 else bisect.bisect_right(uts46_starts, code_point) - 1 

483 status = chr(uts46_statuses[i]) 

484 replacement: Optional[str] = uts46_replacements[i] 

485 

486 # UTS #46 §4: V is always valid, D is deviation (kept unless transitional), 

487 # 3 is disallowed-STD3 (kept unmapped if std3_rules is off and no mapping). 

488 keep_as_is = ( 

489 status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None) 

490 ) 

491 # M is mapped, 3-with-replacement and transitional D fall through to the 

492 # same replacement output path. 

493 use_replacement = replacement is not None and ( 

494 status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) 

495 ) 

496 

497 if keep_as_is: 

498 output += char 

499 elif use_replacement: 

500 assert replacement is not None # narrowed by use_replacement 

501 output += replacement 

502 elif status == "I": 

503 continue 

504 else: 

505 raise InvalidCodepoint(f"Codepoint {_unot(code_point)} not allowed at position {pos + 1} in {domain!r}") 

506 

507 return unicodedata.normalize("NFC", output) 

508 

509 

510def encode( 

511 s: Union[str, bytes, bytearray], 

512 strict: bool = False, 

513 uts46: bool = False, 

514 std3_rules: bool = False, 

515 transitional: bool = False, 

516) -> bytes: 

517 """Encode a Unicode domain name into its ASCII (A-label) form. 

518 

519 Splits the input on label separators (only ``U+002E`` if ``strict`` is 

520 set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL 

521 STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``), 

522 encodes each label with :func:`alabel`, and rejoins them with ``.``. 

523 Optionally pre-processes the input through :func:`uts46_remap`. 

524 

525 :param s: The domain name to encode. 

526 :param strict: If ``True``, only ``U+002E`` is recognised as a label 

527 separator. 

528 :param uts46: If ``True``, apply UTS #46 mapping before encoding. 

529 :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is 

530 ``True``. 

531 :param transitional: Forwarded to :func:`uts46_remap` when ``uts46`` 

532 is ``True``. Deprecated: emits a :class:`DeprecationWarning` and 

533 will be removed in a future version. 

534 :returns: The encoded domain as ASCII :class:`bytes`. 

535 :raises IDNAError: If the domain is empty, contains an invalid label, 

536 or exceeds the maximum domain length. 

537 """ 

538 if transitional: 

539 warnings.warn( 

540 "Transitional processing has been removed from UTS #46. " 

541 "The transitional argument will be removed in a future version.", 

542 DeprecationWarning, 

543 stacklevel=2, 

544 ) 

545 if not isinstance(s, str): 

546 try: 

547 s = str(s, "ascii") 

548 except (UnicodeDecodeError, TypeError) as err: 

549 raise IDNAError("should pass a unicode string to the function rather than a byte string.") from err 

550 if len(s) > _max_input_length: 

551 raise IDNAError("Domain too long") 

552 if uts46: 

553 s = uts46_remap(s, std3_rules, transitional) 

554 

555 # Reject inputs that exceed the maximum DNS domain length up-front 

556 # to avoid expensive computation on long inputs. 

557 if not valid_string_length(s, trailing_dot=True): 

558 raise IDNAError("Domain too long") 

559 

560 trailing_dot = False 

561 result = [] 

562 labels = s.split(".") if strict else _unicode_dots_re.split(s) 

563 if not labels or labels == [""]: 

564 raise IDNAError("Empty domain") 

565 if labels[-1] == "": 

566 del labels[-1] 

567 trailing_dot = True 

568 for label in labels: 

569 s = alabel(label) 

570 if s: 

571 result.append(s) 

572 else: 

573 raise IDNAError("Empty label") 

574 if trailing_dot: 

575 result.append(b"") 

576 s = b".".join(result) 

577 if not valid_string_length(s, trailing_dot): 

578 raise IDNAError("Domain too long") 

579 return s 

580 

581 

582def decode( 

583 s: Union[str, bytes, bytearray], 

584 strict: bool = False, 

585 uts46: bool = False, 

586 std3_rules: bool = False, 

587) -> str: 

588 """Decode an A-label-encoded domain name back to Unicode. 

589 

590 Splits the input on label separators (see :func:`encode` for the 

591 rules), decodes each label with :func:`ulabel`, and rejoins them 

592 with ``.``. Optionally pre-processes the input through 

593 :func:`uts46_remap`. 

594 

595 :param s: The domain name to decode. 

596 :param strict: If ``True``, only ``U+002E`` is recognised as a label 

597 separator. 

598 :param uts46: If ``True``, apply UTS #46 mapping before decoding. 

599 :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is 

600 ``True``. 

601 :returns: The decoded domain as a Unicode string. 

602 :raises IDNAError: If the input is not valid ASCII, contains an 

603 invalid label, or is empty. 

604 """ 

605 if not isinstance(s, str): 

606 try: 

607 s = str(s, "ascii") 

608 except (UnicodeDecodeError, TypeError) as err: 

609 raise IDNAError("Invalid ASCII in A-label") from err 

610 if len(s) > _max_input_length: 

611 raise IDNAError("Domain too long") 

612 if uts46: 

613 s = uts46_remap(s, std3_rules, False) 

614 # Reject inputs that exceed the maximum DNS domain length up-front 

615 # to avoid expensive computation on long inputs. 

616 if not valid_string_length(s, trailing_dot=True): 

617 raise IDNAError("Domain too long") 

618 trailing_dot = False 

619 result = [] 

620 labels = s.split(".") if strict else _unicode_dots_re.split(s) 

621 if not labels or labels == [""]: 

622 raise IDNAError("Empty domain") 

623 if not labels[-1]: 

624 del labels[-1] 

625 trailing_dot = True 

626 for label in labels: 

627 s = ulabel(label) 

628 if s: 

629 result.append(s) 

630 else: 

631 raise IDNAError("Empty label") 

632 if trailing_dot: 

633 result.append("") 

634 return ".".join(result)