Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/idna/core.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

289 statements  

1import bisect 

2import re 

3import unicodedata 

4import warnings 

5from typing import Optional, Union 

6 

7from . import idnadata 

8from .intranges import intranges_contain 

9 

10_virama_combining_class = 9 

11_alabel_prefix = b"xn--" 

12_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") 

13 

14 

15# Bidi category sets from RFC 5893, hoisted out of the per-codepoint loop 

16_bidi_rtl_first = frozenset({"R", "AL"}) 

17_bidi_rtl_categories = frozenset({"R", "AL", "AN"}) 

18_bidi_rtl_allowed = frozenset({"R", "AL", "AN", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) 

19_bidi_rtl_valid_ending = frozenset({"R", "AL", "EN", "AN"}) 

20_bidi_rtl_numeric = frozenset({"AN", "EN"}) 

21_bidi_ltr_allowed = frozenset({"L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) 

22_bidi_ltr_valid_ending = frozenset({"L", "EN"}) 

23_bidi_joiner_l_or_d = frozenset({ord("L"), ord("D")}) 

24_bidi_joiner_r_or_d = frozenset({ord("R"), ord("D")}) 

25 

26 

27class IDNAError(UnicodeError): 

28 """Base exception for all IDNA-encoding related problems""" 

29 

30 pass 

31 

32 

33class IDNABidiError(IDNAError): 

34 """Exception when bidirectional requirements are not satisfied""" 

35 

36 pass 

37 

38 

39class InvalidCodepoint(IDNAError): 

40 """Exception when a disallowed or unallocated codepoint is used""" 

41 

42 pass 

43 

44 

45class InvalidCodepointContext(IDNAError): 

46 """Exception when the codepoint is not valid in the context it is used""" 

47 

48 pass 

49 

50 

51def _combining_class(cp: int) -> int: 

52 v = unicodedata.combining(chr(cp)) 

53 if v == 0 and not unicodedata.name(chr(cp)): 

54 raise ValueError("Unknown character in unicodedata") 

55 return v 

56 

57 

58def _is_script(cp: str, script: str) -> bool: 

59 return intranges_contain(ord(cp), idnadata.scripts[script]) 

60 

61 

62def _punycode(s: str) -> bytes: 

63 return s.encode("punycode") 

64 

65 

66def _unot(s: int) -> str: 

67 return f"U+{s:04X}" 

68 

69 

70def valid_label_length(label: Union[bytes, str]) -> bool: 

71 """Check that a label does not exceed the maximum permitted length. 

72 

73 Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed 

74 63 octets. The argument may be either a :class:`str` (a U-label, where 

75 length is measured in characters) or :class:`bytes` (an A-label, where 

76 length is measured in octets). 

77 

78 :param label: The label to check. 

79 :returns: ``True`` if the label is within the length limit, otherwise 

80 ``False``. 

81 """ 

82 return len(label) <= 63 

83 

84 

85def valid_string_length(domain: Union[bytes, str], trailing_dot: bool) -> bool: 

86 """Check that a full domain name does not exceed the maximum length. 

87 

88 Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing 

89 dot is present, or 254 octets when one is included. 

90 

91 :param domain: The full (possibly multi-label) domain name. 

92 :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``. 

93 :returns: ``True`` if the domain is within the length limit, otherwise 

94 ``False``. 

95 """ 

96 return len(domain) <= (254 if trailing_dot else 253) 

97 

98 

99def check_bidi(label: str, check_ltr: bool = False) -> bool: 

100 """Validate the Bidi Rule from :rfc:`5893` for a single label. 

101 

102 The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic, 

103 etc.) may appear within a label. By default the check is only applied 

104 when the label contains at least one right-to-left character (Unicode 

105 bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr`` 

106 to ``True`` to apply it to LTR-only labels as well. 

107 

108 :param label: The label to validate, as a Unicode string. 

109 :param check_ltr: If ``True``, apply the rules even when the label 

110 contains no RTL characters. 

111 :returns: ``True`` if the label satisfies the Bidi Rule. 

112 :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated, 

113 or if the directional category of a codepoint cannot be determined. 

114 """ 

115 # Bidi rules should only be applied if string contains RTL characters 

116 bidi_label = False 

117 for idx, cp in enumerate(label, 1): 

118 direction = unicodedata.bidirectional(cp) 

119 if direction == "": 

120 # String likely comes from a newer version of Unicode 

121 raise IDNABidiError(f"Unknown directionality in label {repr(label)} at position {idx}") 

122 if direction in _bidi_rtl_categories: 

123 bidi_label = True 

124 if not bidi_label and not check_ltr: 

125 return True 

126 

127 # Bidi rule 1 

128 direction = unicodedata.bidirectional(label[0]) 

129 if direction in _bidi_rtl_first: 

130 rtl = True 

131 elif direction == "L": 

132 rtl = False 

133 else: 

134 raise IDNABidiError(f"First codepoint in label {repr(label)} must be directionality L, R or AL") 

135 

136 valid_ending = False 

137 number_type: Optional[str] = None 

138 for idx, cp in enumerate(label, 1): 

139 direction = unicodedata.bidirectional(cp) 

140 

141 if rtl: 

142 # Bidi rule 2 

143 if direction not in _bidi_rtl_allowed: 

144 raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a right-to-left label") 

145 # Bidi rule 3 

146 if direction in _bidi_rtl_valid_ending: 

147 valid_ending = True 

148 elif direction != "NSM": 

149 valid_ending = False 

150 # Bidi rule 4 

151 if direction in _bidi_rtl_numeric: 

152 if not number_type: 

153 number_type = direction 

154 else: 

155 if number_type != direction: 

156 raise IDNABidiError("Can not mix numeral types in a right-to-left label") 

157 else: 

158 # Bidi rule 5 

159 if direction not in _bidi_ltr_allowed: 

160 raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a left-to-right label") 

161 # Bidi rule 6 

162 if direction in _bidi_ltr_valid_ending: 

163 valid_ending = True 

164 elif direction != "NSM": 

165 valid_ending = False 

166 

167 if not valid_ending: 

168 raise IDNABidiError("Label ends with illegal codepoint directionality") 

169 

170 return True 

171 

172 

173def check_initial_combiner(label: str) -> bool: 

174 """Reject labels that begin with a combining mark. 

175 

176 Per :rfc:`5891` §4.2.3.2 a label must not start with a character of 

177 Unicode general category ``M`` (Mark). 

178 

179 :param label: The label to check. 

180 :returns: ``True`` if the first character is not a combining mark. 

181 :raises IDNAError: If the label begins with a combining character. 

182 """ 

183 if unicodedata.category(label[0])[0] == "M": 

184 raise IDNAError("Label begins with an illegal combining character") 

185 return True 

186 

187 

188def check_hyphen_ok(label: str) -> bool: 

189 """Validate the hyphen restrictions for a label. 

190 

191 Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen 

192 (``U+002D``), and must not have hyphens in both the third and fourth 

193 positions (the prefix reserved for A-labels). 

194 

195 :param label: The label to check. 

196 :returns: ``True`` if the hyphen restrictions are satisfied. 

197 :raises IDNAError: If any of the hyphen restrictions are violated. 

198 """ 

199 if label[2:4] == "--": 

200 raise IDNAError("Label has disallowed hyphens in 3rd and 4th position") 

201 if label[0] == "-" or label[-1] == "-": 

202 raise IDNAError("Label must not start or end with a hyphen") 

203 return True 

204 

205 

206def check_nfc(label: str) -> None: 

207 """Require that a label is in Unicode Normalization Form C. 

208 

209 :param label: The label to check. 

210 :raises IDNAError: If ``label`` differs from its NFC normalisation. 

211 """ 

212 if unicodedata.normalize("NFC", label) != label: 

213 raise IDNAError("Label must be in Normalization Form C") 

214 

215 

216def valid_contextj(label: str, pos: int) -> bool: 

217 """Validate the CONTEXTJ rules from :rfc:`5892` Appendix A. 

218 

219 These rules govern the contextual use of the joiner codepoints 

220 ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D`` 

221 (ZERO WIDTH JOINER, Appendix A.2) within a label. 

222 

223 :param label: The label containing the codepoint. 

224 :param pos: Index of the joiner codepoint within ``label``. 

225 :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ 

226 rule, ``False`` otherwise (including when the codepoint at 

227 ``pos`` is not a recognised joiner). 

228 :raises ValueError: If an adjacent codepoint has no Unicode name when 

229 determining its combining class. 

230 """ 

231 cp_value = ord(label[pos]) 

232 

233 if cp_value == 0x200C: 

234 if pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class: 

235 return True 

236 

237 ok = False 

238 for i in range(pos - 1, -1, -1): 

239 joining_type = idnadata.joining_types().get(ord(label[i])) 

240 if joining_type == ord("T"): 

241 continue 

242 elif joining_type in _bidi_joiner_l_or_d: 

243 ok = True 

244 break 

245 else: 

246 break 

247 

248 if not ok: 

249 return False 

250 

251 ok = False 

252 for i in range(pos + 1, len(label)): 

253 joining_type = idnadata.joining_types().get(ord(label[i])) 

254 if joining_type == ord("T"): 

255 continue 

256 elif joining_type in _bidi_joiner_r_or_d: 

257 ok = True 

258 break 

259 else: 

260 break 

261 return ok 

262 

263 if cp_value == 0x200D: 

264 return pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class 

265 

266 else: 

267 return False 

268 

269 

270def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: 

271 """Validate the CONTEXTO rules from :rfc:`5892` Appendix A. 

272 

273 Covers the contextual rules for codepoints such as MIDDLE DOT 

274 (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana 

275 middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges. 

276 

277 :param label: The label containing the codepoint. 

278 :param pos: Index of the codepoint within ``label``. 

279 :param exception: Reserved for forward compatibility; currently unused. 

280 :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO 

281 rule, ``False`` otherwise (including when the codepoint is not a 

282 recognised CONTEXTO codepoint). 

283 """ 

284 cp_value = ord(label[pos]) 

285 

286 if cp_value == 0x00B7: 

287 return 0 < pos < len(label) - 1 and ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C 

288 

289 elif cp_value == 0x0375: 

290 if pos < len(label) - 1 and len(label) > 1: 

291 return _is_script(label[pos + 1], "Greek") 

292 return False 

293 

294 elif cp_value == 0x05F3 or cp_value == 0x05F4: 

295 if pos > 0: 

296 return _is_script(label[pos - 1], "Hebrew") 

297 return False 

298 

299 elif cp_value == 0x30FB: 

300 for cp in label: 

301 if cp == "\u30fb": 

302 continue 

303 if _is_script(cp, "Hiragana") or _is_script(cp, "Katakana") or _is_script(cp, "Han"): 

304 return True 

305 return False 

306 

307 elif 0x660 <= cp_value <= 0x669: 

308 return not any(0x6F0 <= ord(cp) <= 0x06F9 for cp in label) 

309 

310 elif 0x6F0 <= cp_value <= 0x6F9: 

311 return not any(0x660 <= ord(cp) <= 0x0669 for cp in label) 

312 

313 return False 

314 

315 

316def check_label(label: Union[str, bytes, bytearray]) -> None: 

317 """Run the full set of IDNA 2008 validity checks on a single label. 

318 

319 Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen 

320 restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule 

321 (:func:`check_initial_combiner`), per-codepoint validity (PVALID, 

322 CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule 

323 (:func:`check_bidi`). 

324 

325 :param label: The label to validate. ``bytes`` or ``bytearray`` input 

326 is decoded as UTF-8 first. 

327 :raises IDNAError: If the label is empty or fails a structural rule. 

328 :raises InvalidCodepoint: If the label contains a DISALLOWED or 

329 UNASSIGNED codepoint. 

330 :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint 

331 is not valid in its context. 

332 :raises IDNABidiError: If the Bidi Rule is violated. 

333 """ 

334 if isinstance(label, (bytes, bytearray)): 

335 label = label.decode("utf-8") 

336 if len(label) == 0: 

337 raise IDNAError("Empty Label") 

338 

339 # Reject on domain length rather than label length so support some UTS 46 

340 # use cases, still reducing processing of label contextual rules 

341 if not valid_string_length(label, trailing_dot=True): 

342 raise IDNAError("Label too long") 

343 

344 check_nfc(label) 

345 check_hyphen_ok(label) 

346 check_initial_combiner(label) 

347 

348 for pos, cp in enumerate(label): 

349 cp_value = ord(cp) 

350 if intranges_contain(cp_value, idnadata.codepoint_classes["PVALID"]): 

351 continue 

352 elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTJ"]): 

353 try: 

354 if not valid_contextj(label, pos): 

355 raise InvalidCodepointContext( 

356 f"Joiner {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}" 

357 ) 

358 except ValueError as err: 

359 raise IDNAError( 

360 f"Unknown codepoint adjacent to joiner {_unot(cp_value)} at position {pos + 1} in {repr(label)}" 

361 ) from err 

362 elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]): 

363 if not valid_contexto(label, pos): 

364 raise InvalidCodepointContext( 

365 f"Codepoint {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}" 

366 ) 

367 else: 

368 raise InvalidCodepoint(f"Codepoint {_unot(cp_value)} at position {pos + 1} of {repr(label)} not allowed") 

369 

370 check_bidi(label) 

371 

372 

373def alabel(label: str) -> bytes: 

374 """Convert a single U-label into its A-label form. 

375 

376 The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891` 

377 §4: the label is validated, Punycode-encoded, and prefixed with 

378 ``xn--``. Pure ASCII labels that are already valid IDNA labels are 

379 returned unchanged (as :class:`bytes`). 

380 

381 :param label: The label to convert, as a Unicode string. 

382 :returns: The A-label as ASCII-encoded :class:`bytes`. 

383 :raises IDNAError: If the label is invalid or the resulting A-label 

384 exceeds 63 octets. 

385 """ 

386 try: 

387 label_bytes = label.encode("ascii") 

388 ulabel(label_bytes) 

389 if not valid_label_length(label_bytes): 

390 raise IDNAError("Label too long") 

391 return label_bytes 

392 except UnicodeEncodeError: 

393 pass 

394 

395 check_label(label) 

396 label_bytes = _alabel_prefix + _punycode(label) 

397 

398 if not valid_label_length(label_bytes): 

399 raise IDNAError("Label too long") 

400 

401 return label_bytes 

402 

403 

404def ulabel(label: Union[str, bytes, bytearray]) -> str: 

405 """Convert a single A-label into its U-label form. 

406 

407 Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is 

408 Punycode-decoded and validated. Labels that are already Unicode (or 

409 plain ASCII without the ACE prefix) are validated and returned as a 

410 Unicode string. 

411 

412 :param label: The label to convert. ``bytes`` or ``bytearray`` input 

413 is treated as ASCII. 

414 :returns: The U-label as a Unicode string. 

415 :raises IDNAError: If the label is malformed or fails validation. 

416 """ 

417 if not isinstance(label, (bytes, bytearray)): 

418 try: 

419 label_bytes = label.encode("ascii") 

420 except UnicodeEncodeError: 

421 check_label(label) 

422 return label 

423 else: 

424 label_bytes = bytes(label) 

425 

426 label_bytes = label_bytes.lower() 

427 if label_bytes.startswith(_alabel_prefix): 

428 label_bytes = label_bytes[len(_alabel_prefix) :] 

429 if not label_bytes: 

430 raise IDNAError("Malformed A-label, no Punycode eligible content found") 

431 if label_bytes.endswith(b"-"): 

432 raise IDNAError("A-label must not end with a hyphen") 

433 else: 

434 check_label(label_bytes) 

435 return label_bytes.decode("ascii") 

436 

437 try: 

438 label = label_bytes.decode("punycode") 

439 except UnicodeError as err: 

440 raise IDNAError("Invalid A-label") from err 

441 check_label(label) 

442 return label 

443 

444 

445def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: 

446 """Apply the UTS #46 character mapping to a domain string. 

447 

448 Implements the mapping table from `UTS #46 §4 

449 <https://www.unicode.org/reports/tr46/>`_: each character is kept, 

450 replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``, 

451 ``I``). The result is returned in Normalisation Form C. 

452 

453 :param domain: The full domain name to remap. 

454 :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules 

455 (status ``3`` codepoints raise instead of being kept or mapped). 

456 :param transitional: If ``True``, use transitional processing (status 

457 ``D`` codepoints are mapped instead of kept). Transitional 

458 processing has been removed from UTS #46 and this option is 

459 retained only for backwards compatibility. 

460 :returns: The remapped domain, in Normalisation Form C. 

461 :raises InvalidCodepoint: If the domain contains a disallowed 

462 codepoint under the chosen rules. 

463 """ 

464 from .uts46data import uts46data 

465 

466 output = "" 

467 

468 for pos, char in enumerate(domain): 

469 code_point = ord(char) 

470 uts46row = uts46data[code_point if code_point < 256 else bisect.bisect_left(uts46data, (code_point, "Z")) - 1] 

471 status = uts46row[1] 

472 replacement: Optional[str] = None 

473 if len(uts46row) == 3: 

474 replacement = uts46row[2] # ty: ignore[index-out-of-bounds] 

475 

476 # UTS #46 §4: V is always valid, D is deviation (kept unless transitional), 

477 # 3 is disallowed-STD3 (kept unmapped if std3_rules is off and no mapping). 

478 keep_as_is = ( 

479 status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None) 

480 ) 

481 # M is mapped, 3-with-replacement and transitional D fall through to the 

482 # same replacement output path. 

483 use_replacement = replacement is not None and ( 

484 status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) 

485 ) 

486 

487 if keep_as_is: 

488 output += char 

489 elif use_replacement: 

490 assert replacement is not None # narrowed by use_replacement 

491 output += replacement 

492 elif status == "I": 

493 continue 

494 else: 

495 raise InvalidCodepoint(f"Codepoint {_unot(code_point)} not allowed at position {pos + 1} in {repr(domain)}") 

496 

497 return unicodedata.normalize("NFC", output) 

498 

499 

500def encode( 

501 s: Union[str, bytes, bytearray], 

502 strict: bool = False, 

503 uts46: bool = False, 

504 std3_rules: bool = False, 

505 transitional: bool = False, 

506) -> bytes: 

507 """Encode a Unicode domain name into its ASCII (A-label) form. 

508 

509 Splits the input on label separators (only ``U+002E`` if ``strict`` is 

510 set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL 

511 STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``), 

512 encodes each label with :func:`alabel`, and rejoins them with ``.``. 

513 Optionally pre-processes the input through :func:`uts46_remap`. 

514 

515 :param s: The domain name to encode. 

516 :param strict: If ``True``, only ``U+002E`` is recognised as a label 

517 separator. 

518 :param uts46: If ``True``, apply UTS #46 mapping before encoding. 

519 :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is 

520 ``True``. 

521 :param transitional: Forwarded to :func:`uts46_remap` when ``uts46`` 

522 is ``True``. Deprecated: emits a :class:`DeprecationWarning` and 

523 will be removed in a future version. 

524 :returns: The encoded domain as ASCII :class:`bytes`. 

525 :raises IDNAError: If the domain is empty, contains an invalid label, 

526 or exceeds the maximum domain length. 

527 """ 

528 if transitional: 

529 warnings.warn( 

530 "Transitional processing has been removed from UTS #46. " 

531 "The transitional argument will be removed in a future version.", 

532 DeprecationWarning, 

533 stacklevel=2, 

534 ) 

535 if not isinstance(s, str): 

536 try: 

537 s = str(s, "ascii") 

538 except (UnicodeDecodeError, TypeError) as err: 

539 raise IDNAError("should pass a unicode string to the function rather than a byte string.") from err 

540 if uts46: 

541 s = uts46_remap(s, std3_rules, transitional) 

542 

543 # Reject inputs that exceed the maximum DNS domain length up-front 

544 # to avoid expensive computation on long inputs. 

545 if not valid_string_length(s, trailing_dot=True): 

546 raise IDNAError("Domain too long") 

547 

548 trailing_dot = False 

549 result = [] 

550 labels = s.split(".") if strict else _unicode_dots_re.split(s) 

551 if not labels or labels == [""]: 

552 raise IDNAError("Empty domain") 

553 if labels[-1] == "": 

554 del labels[-1] 

555 trailing_dot = True 

556 for label in labels: 

557 s = alabel(label) 

558 if s: 

559 result.append(s) 

560 else: 

561 raise IDNAError("Empty label") 

562 if trailing_dot: 

563 result.append(b"") 

564 s = b".".join(result) 

565 if not valid_string_length(s, trailing_dot): 

566 raise IDNAError("Domain too long") 

567 return s 

568 

569 

570def decode( 

571 s: Union[str, bytes, bytearray], 

572 strict: bool = False, 

573 uts46: bool = False, 

574 std3_rules: bool = False, 

575) -> str: 

576 """Decode an A-label-encoded domain name back to Unicode. 

577 

578 Splits the input on label separators (see :func:`encode` for the 

579 rules), decodes each label with :func:`ulabel`, and rejoins them 

580 with ``.``. Optionally pre-processes the input through 

581 :func:`uts46_remap`. 

582 

583 :param s: The domain name to decode. 

584 :param strict: If ``True``, only ``U+002E`` is recognised as a label 

585 separator. 

586 :param uts46: If ``True``, apply UTS #46 mapping before decoding. 

587 :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is 

588 ``True``. 

589 :returns: The decoded domain as a Unicode string. 

590 :raises IDNAError: If the input is not valid ASCII, contains an 

591 invalid label, or is empty. 

592 """ 

593 if not isinstance(s, str): 

594 try: 

595 s = str(s, "ascii") 

596 except (UnicodeDecodeError, TypeError) as err: 

597 raise IDNAError("Invalid ASCII in A-label") from err 

598 if uts46: 

599 s = uts46_remap(s, std3_rules, False) 

600 # Reject inputs that exceed the maximum DNS domain length up-front 

601 # to avoid expensive computation on long inputs. 

602 if not valid_string_length(s, trailing_dot=True): 

603 raise IDNAError("Domain too long") 

604 trailing_dot = False 

605 result = [] 

606 labels = s.split(".") if strict else _unicode_dots_re.split(s) 

607 if not labels or labels == [""]: 

608 raise IDNAError("Empty domain") 

609 if not labels[-1]: 

610 del labels[-1] 

611 trailing_dot = True 

612 for label in labels: 

613 s = ulabel(label) 

614 if s: 

615 result.append(s) 

616 else: 

617 raise IDNAError("Empty label") 

618 if trailing_dot: 

619 result.append("") 

620 return ".".join(result)