Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbformat/validator.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

274 statements  

1"""Notebook format validators.""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5from __future__ import annotations 

6 

7import json 

8import pprint 

9import time 

10import warnings 

11from copy import deepcopy 

12from pathlib import Path 

13from textwrap import dedent 

14from typing import Any 

15 

16from ._imports import import_item 

17from .corpus.words import generate_corpus_id 

18from .json_compat import ValidationError, _validator_for_name, get_current_validator 

19from .reader import get_version 

20from .warnings import DuplicateCellId, MissingIDFieldWarning 

21 

22validators = {} 

23_deprecated = object() 

24 

25 

26__all__ = [ 

27 "NotebookValidationError", 

28 "ValidationError", 

29 "better_validation_error", 

30 "get_validator", 

31 "isvalid", 

32 "iter_validate", 

33 "normalize", 

34 "validate", 

35] 

36 

37 

38def _relax_additional_properties(obj): 

39 """relax any `additionalProperties`""" 

40 if isinstance(obj, dict): 

41 for key, value in obj.items(): 

42 value = ( # noqa: PLW2901 

43 True if key == "additionalProperties" else _relax_additional_properties(value) 

44 ) 

45 obj[key] = value 

46 elif isinstance(obj, list): 

47 for i, value in enumerate(obj): 

48 obj[i] = _relax_additional_properties(value) 

49 return obj 

50 

51 

52def _allow_undefined(schema): 

53 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"}) 

54 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"}) 

55 return schema 

56 

57 

58def get_validator(version=None, version_minor=None, relax_add_props=False, name=None): 

59 """Load the JSON schema into a Validator""" 

60 if version is None: 

61 from . import current_nbformat # noqa:PLC0415 

62 

63 version = current_nbformat 

64 

65 v = import_item("nbformat.v%s" % version) 

66 current_minor = getattr(v, "nbformat_minor", 0) 

67 if version_minor is None: 

68 version_minor = current_minor 

69 

70 current_validator = _validator_for_name(name) if name else get_current_validator() 

71 

72 version_tuple = (current_validator.name, version, version_minor) 

73 

74 if version_tuple not in validators: 

75 try: 

76 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

77 except AttributeError: 

78 return None 

79 

80 if current_minor < version_minor: 

81 # notebook from the future, relax all `additionalProperties: False` requirements 

82 schema_json = _relax_additional_properties(schema_json) 

83 # and allow undefined cell types and outputs 

84 schema_json = _allow_undefined(schema_json) 

85 

86 validators[version_tuple] = current_validator(schema_json) 

87 

88 if relax_add_props: 

89 try: 

90 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

91 except AttributeError: 

92 return None 

93 

94 # this allows properties to be added for intermediate 

95 # representations while validating for all other kinds of errors 

96 schema_json = _relax_additional_properties(schema_json) 

97 validators[version_tuple] = current_validator(schema_json) 

98 

99 return validators[version_tuple] 

100 

101 

102def _get_schema_json(v, version=None, version_minor=None): 

103 """ 

104 Gets the json schema from a given imported library and nbformat version. 

105 """ 

106 if (version, version_minor) in v.nbformat_schema: 

107 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(version, version_minor)]) 

108 elif version_minor > v.nbformat_minor: 

109 # load the latest schema 

110 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(None, None)]) 

111 else: 

112 msg = "Cannot find appropriate nbformat schema file." 

113 raise AttributeError(msg) 

114 with Path(schema_path).open(encoding="utf8") as f: 

115 schema_json = json.load(f) 

116 return schema_json # noqa: RET504 

117 

118 

119def isvalid(nbjson, ref=None, version=None, version_minor=None): 

120 """Checks whether the given notebook JSON conforms to the current 

121 notebook format schema. Returns True if the JSON is valid, and 

122 False otherwise. 

123 

124 To see the individual errors that were encountered, please use the 

125 `validate` function instead. 

126 """ 

127 orig = deepcopy(nbjson) 

128 try: 

129 with warnings.catch_warnings(): 

130 warnings.filterwarnings("ignore", category=DeprecationWarning) 

131 warnings.filterwarnings("ignore", category=MissingIDFieldWarning) 

132 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False) 

133 except ValidationError: 

134 return False 

135 else: 

136 return True 

137 finally: 

138 if nbjson != orig: 

139 raise AssertionError 

140 

141 

142def _format_as_index(indices): 

143 """ 

144 (from jsonschema._utils.format_as_index, copied to avoid relying on private API) 

145 

146 Construct a single string containing indexing operations for the indices. 

147 

148 For example, [1, 2, "foo"] -> [1][2]["foo"] 

149 """ 

150 

151 if not indices: 

152 return "" 

153 return "[%s]" % "][".join(repr(index) for index in indices) 

154 

155 

156_ITEM_LIMIT = 16 

157_STR_LIMIT = 64 

158 

159 

160def _truncate_obj(obj): 

161 """Truncate objects for use in validation tracebacks 

162 

163 Cell and output lists are squashed, as are long strings, lists, and dicts. 

164 """ 

165 if isinstance(obj, dict): 

166 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]} 

167 if isinstance(truncated_dict.get("cells"), list): 

168 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])] 

169 if isinstance(truncated_dict.get("outputs"), list): 

170 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])] 

171 

172 if len(obj) > _ITEM_LIMIT: 

173 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT) 

174 return truncated_dict 

175 if isinstance(obj, list): 

176 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]] 

177 if len(obj) > _ITEM_LIMIT: 

178 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT)) 

179 return truncated_list 

180 if isinstance(obj, str): 

181 truncated_str = obj[:_STR_LIMIT] 

182 if len(obj) > _STR_LIMIT: 

183 truncated_str += "..." 

184 return truncated_str 

185 return obj 

186 

187 

188class NotebookValidationError(ValidationError): # type:ignore[misc] 

189 """Schema ValidationError with truncated representation 

190 

191 to avoid massive verbose tracebacks. 

192 """ 

193 

194 def __init__(self, original, ref=None): 

195 """Initialize the error class.""" 

196 self.original = original 

197 self.ref = getattr(self.original, "ref", ref) 

198 self.message = self.original.message 

199 

200 def __getattr__(self, key): 

201 """Get an attribute from the error.""" 

202 return getattr(self.original, key) 

203 

204 def __unicode__(self): 

205 """Custom str for validation errors 

206 

207 avoids dumping full schema and notebook to logs 

208 """ 

209 error = self.original 

210 instance = _truncate_obj(error.instance) 

211 

212 return "\n".join( 

213 [ 

214 error.message, 

215 "", 

216 "Failed validating {!r} in {}{}:".format( 

217 error.validator, 

218 self.ref or "notebook", 

219 _format_as_index(list(error.relative_schema_path)[:-1]), 

220 ), 

221 "", 

222 "On instance%s:" % _format_as_index(error.relative_path), 

223 pprint.pformat(instance, width=78), 

224 ] 

225 ) 

226 

227 __str__ = __unicode__ 

228 

229 

230def better_validation_error(error, version, version_minor): 

231 """Get better ValidationError on oneOf failures 

232 

233 oneOf errors aren't informative. 

234 if it's a cell type or output_type error, 

235 try validating directly based on the type for a better error message 

236 """ 

237 if not len(error.schema_path): 

238 return error 

239 key = error.schema_path[-1] 

240 ref = None 

241 if key.endswith("Of"): 

242 if isinstance(error.instance, dict): 

243 if "cell_type" in error.instance: 

244 ref = error.instance["cell_type"] + "_cell" 

245 elif "output_type" in error.instance: 

246 ref = error.instance["output_type"] 

247 

248 if ref: 

249 try: 

250 validate( 

251 error.instance, 

252 ref, 

253 version=version, 

254 version_minor=version_minor, 

255 ) 

256 except ValidationError as sub_error: 

257 # keep extending relative path 

258 error.relative_path.extend(sub_error.relative_path) 

259 sub_error.relative_path = error.relative_path 

260 better = better_validation_error(sub_error, version, version_minor) 

261 if better.ref is None: 

262 better.ref = ref 

263 return better 

264 except Exception: # noqa: S110 

265 # if it fails for some reason, 

266 # let the original error through 

267 pass 

268 return NotebookValidationError(error, ref) 

269 

270 

271def normalize( 

272 nbdict: Any, 

273 version: int | None = None, 

274 version_minor: int | None = None, 

275 *, 

276 relax_add_props: bool = False, 

277 strip_invalid_metadata: bool = False, 

278) -> tuple[int, Any]: 

279 """ 

280 Normalise a notebook prior to validation. 

281 

282 This tries to implement a couple of normalisation steps to standardise 

283 notebooks and make validation easier. 

284 

285 You should in general not rely on this function and make sure the notebooks 

286 that reach nbformat are already in a normal form. If not you likely have a bug, 

287 and may have security issues. 

288 

289 Parameters 

290 ---------- 

291 nbdict : dict 

292 notebook document 

293 version : int 

294 version_minor : int 

295 relax_add_props : bool 

296 Whether to allow extra property in the Json schema validating the 

297 notebook. 

298 strip_invalid_metadata : bool 

299 Whether to strip metadata that does not exist in the Json schema when 

300 validating the notebook. 

301 

302 Returns 

303 ------- 

304 changes : int 

305 number of changes in the notebooks 

306 notebook : dict 

307 deep-copy of the original object with relevant changes. 

308 

309 """ 

310 nbdict = deepcopy(nbdict) 

311 nbdict_version, nbdict_version_minor = get_version(nbdict) 

312 if version is None: 

313 version = nbdict_version 

314 if version_minor is None: 

315 version_minor = nbdict_version_minor 

316 return _normalize( 

317 nbdict, 

318 version, 

319 version_minor, 

320 True, 

321 relax_add_props=relax_add_props, 

322 strip_invalid_metadata=strip_invalid_metadata, 

323 ) 

324 

325 

326def _normalize( 

327 nbdict: Any, 

328 version: int, 

329 version_minor: int, 

330 repair_duplicate_cell_ids: bool, 

331 relax_add_props: bool, 

332 strip_invalid_metadata: bool, 

333) -> tuple[int, Any]: 

334 """ 

335 Private normalisation routine. 

336 

337 This function attempts to normalize the `nbdict` passed to it. 

338 

339 As `_normalize()` is currently used both in `validate()` (for 

340 historical reasons), and in the `normalize()` public function, 

341 `_normalize()` does currently mutate `nbdict`. 

342 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()` 

343 may stop mutating `nbdict`. 

344 

345 """ 

346 changes = 0 

347 

348 if (version, version_minor) >= (4, 5): 

349 # if we support cell ids ensure default ids are provided 

350 for cell in nbdict["cells"]: 

351 if "id" not in cell: 

352 warnings.warn( 

353 "Cell is missing an id field, this will become" 

354 " a hard error in future nbformat versions. You may want" 

355 " to use `normalize()` on your notebooks before validations" 

356 " (available since nbformat 5.1.4). Previous versions of nbformat" 

357 " are fixing this issue transparently, and will stop doing so" 

358 " in the future.", 

359 MissingIDFieldWarning, 

360 stacklevel=3, 

361 ) 

362 # Generate cell ids if any are missing 

363 if repair_duplicate_cell_ids: 

364 cell["id"] = generate_corpus_id() 

365 changes += 1 

366 

367 # if we support cell ids check for uniqueness when validating the whole notebook 

368 seen_ids = set() 

369 for cell in nbdict["cells"]: 

370 if "id" not in cell: 

371 continue 

372 cell_id = cell["id"] 

373 if cell_id in seen_ids: 

374 # Best effort to repair if we find a duplicate id 

375 if repair_duplicate_cell_ids: 

376 new_id = generate_corpus_id() 

377 cell["id"] = new_id 

378 changes += 1 

379 warnings.warn( 

380 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.", 

381 DuplicateCellId, 

382 stacklevel=3, 

383 ) 

384 else: 

385 msg = f"Non-unique cell id '{cell_id}' detected." 

386 raise ValidationError(msg) 

387 seen_ids.add(cell_id) 

388 if strip_invalid_metadata: 

389 changes += _strip_invalida_metadata( 

390 nbdict, version, version_minor, relax_add_props=relax_add_props 

391 ) 

392 return changes, nbdict 

393 

394 

395def _dep_warn(field): 

396 # Deprecated since 2023 and security issue start to annoy people. 

397 time.sleep(2) 

398 # regularly bump this by 1 sec. 

399 

400 warnings.warn( 

401 dedent( 

402 f"""`{field}` kwargs of validate has been deprecated for security 

403 reasons, and will be removed soon. 

404 

405 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to 

406 normalise your notebook. `normalize` is available since nbformat 5.5.0 

407 

408 """ 

409 ), 

410 DeprecationWarning, 

411 stacklevel=3, 

412 ) 

413 

414 

415def validate( 

416 nbdict: Any = None, 

417 ref: str | None = None, 

418 version: int | None = None, 

419 version_minor: int | None = None, 

420 relax_add_props: bool = False, 

421 nbjson: Any = None, 

422 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore[assignment] 

423 strip_invalid_metadata: bool = _deprecated, # type: ignore[assignment] 

424) -> None: 

425 """Checks whether the given notebook dict-like object 

426 conforms to the relevant notebook format schema. 

427 

428 Parameters 

429 ---------- 

430 nbdict : dict 

431 notebook document 

432 ref : optional, str 

433 reference to the subset of the schema we want to validate against. 

434 for example ``"markdown_cell"``, `"code_cell"` .... 

435 version : int 

436 version_minor : int 

437 relax_add_props : bool 

438 Whether to allow extra properties in the JSON schema validating the notebook. 

439 When True, all known fields are validated, but unknown fields are ignored. 

440 nbjson 

441 repair_duplicate_cell_ids : bool 

442 Deprecated since 5.5.0 - will be removed in the future. 

443 strip_invalid_metadata : bool 

444 Deprecated since 5.5.0 - will be removed in the future. 

445 

446 Returns 

447 ------- 

448 None 

449 

450 Raises 

451 ------ 

452 ValidationError if not valid. 

453 

454 Notes 

455 ----- 

456 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently 

457 try to fix invalid notebook and mutate arguments. This behavior is deprecated 

458 and will be removed in a near future. 

459 

460 Please explicitly call `normalize` if you need to normalize notebooks. 

461 """ 

462 assert isinstance(ref, str) or ref is None 

463 

464 if strip_invalid_metadata is _deprecated: 

465 strip_invalid_metadata = False 

466 else: 

467 _dep_warn("strip_invalid_metadata") 

468 

469 if repair_duplicate_cell_ids is _deprecated: 

470 repair_duplicate_cell_ids = True 

471 else: 

472 _dep_warn("repair_duplicate_cell_ids") 

473 

474 # backwards compatibility for nbjson argument 

475 if nbdict is not None: 

476 pass 

477 elif nbjson is not None: 

478 nbdict = nbjson 

479 else: 

480 msg = "validate() missing 1 required argument: 'nbdict'" 

481 raise TypeError(msg) 

482 

483 if ref is None: 

484 # if ref is not specified, we have a whole notebook, so we can get the version 

485 nbdict_version, nbdict_version_minor = get_version(nbdict) 

486 if version is None: 

487 version = nbdict_version 

488 if version_minor is None: 

489 version_minor = nbdict_version_minor 

490 # if ref is specified, and we don't have a version number, assume we're validating against 1.0 

491 elif version is None: 

492 version, version_minor = 1, 0 

493 

494 if ref is None: 

495 assert isinstance(version, int) 

496 assert isinstance(version_minor, int) 

497 _normalize( 

498 nbdict, 

499 version, 

500 version_minor, 

501 repair_duplicate_cell_ids, 

502 relax_add_props=relax_add_props, 

503 strip_invalid_metadata=strip_invalid_metadata, 

504 ) 

505 

506 for error in iter_validate( 

507 nbdict, 

508 ref=ref, 

509 version=version, 

510 version_minor=version_minor, 

511 relax_add_props=relax_add_props, 

512 strip_invalid_metadata=strip_invalid_metadata, 

513 ): 

514 raise error 

515 

516 

517def _get_errors( 

518 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any 

519) -> Any: 

520 validator = get_validator(version, version_minor, relax_add_props=relax_add_props) 

521 if not validator: 

522 msg = f"No schema for validating v{version}.{version_minor} notebooks" 

523 raise ValidationError(msg) 

524 iter_errors = validator.iter_errors(nbdict, *args) 

525 errors = list(iter_errors) 

526 # jsonschema gives the best error messages. 

527 if errors and validator.name != "jsonschema": 

528 validator = get_validator( 

529 version=version, 

530 version_minor=version_minor, 

531 relax_add_props=relax_add_props, 

532 name="jsonschema", 

533 ) 

534 return validator.iter_errors(nbdict, *args) 

535 return iter(errors) 

536 

537 

538def _strip_invalida_metadata( 

539 nbdict: Any, version: int, version_minor: int, relax_add_props: bool 

540) -> int: 

541 """ 

542 This function tries to extract metadata errors from the validator and fix 

543 them if necessary. This mostly mean stripping unknown keys from metadata 

544 fields, or removing metadata fields altogether. 

545 

546 Parameters 

547 ---------- 

548 nbdict : dict 

549 notebook document 

550 version : int 

551 version_minor : int 

552 relax_add_props : bool 

553 Whether to allow extra property in the Json schema validating the 

554 notebook. 

555 

556 Returns 

557 ------- 

558 int 

559 number of modifications 

560 

561 """ 

562 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

563 changes = 0 

564 if len(list(errors)) > 0: 

565 # jsonschema gives a better error tree. 

566 validator = get_validator( 

567 version=version, 

568 version_minor=version_minor, 

569 relax_add_props=relax_add_props, 

570 name="jsonschema", 

571 ) 

572 if not validator: 

573 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks" 

574 raise ValidationError(msg) 

575 errors = validator.iter_errors(nbdict) 

576 error_tree = validator.error_tree(errors) 

577 if "metadata" in error_tree: 

578 for key in error_tree["metadata"]: 

579 nbdict["metadata"].pop(key, None) 

580 changes += 1 

581 

582 if "cells" in error_tree: 

583 number_of_cells = len(nbdict.get("cells", 0)) 

584 for cell_idx in range(number_of_cells): 

585 # Cells don't report individual metadata keys as having failed validation 

586 # Instead it reports that it failed to validate against each cell-type definition. 

587 # We have to delve into why those definitions failed to uncover which metadata 

588 # keys are misbehaving. 

589 if "oneOf" in error_tree["cells"][cell_idx].errors: 

590 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"] 

591 schemas_by_index = [ 

592 ref["$ref"] 

593 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"] 

594 ] 

595 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell" 

596 if cell_type_definition_name in schemas_by_index: 

597 schema_index = schemas_by_index.index(cell_type_definition_name) 

598 for error in error_tree["cells"][cell_idx].errors["oneOf"].context: 

599 rel_path = error.relative_path 

600 error_for_intended_schema = error.schema_path[0] == schema_index 

601 is_top_level_metadata_key = ( 

602 len(rel_path) == 2 and rel_path[0] == "metadata" 

603 ) 

604 if error_for_intended_schema and is_top_level_metadata_key: 

605 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) 

606 changes += 1 

607 

608 return changes 

609 

610 

611def iter_validate( 

612 nbdict=None, 

613 ref=None, 

614 version=None, 

615 version_minor=None, 

616 relax_add_props=False, 

617 nbjson=None, 

618 strip_invalid_metadata=False, 

619): 

620 """Checks whether the given notebook dict-like object conforms to the 

621 relevant notebook format schema. 

622 

623 Returns a generator of all ValidationErrors if not valid. 

624 

625 Notes 

626 ----- 

627 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and 

628 should *never* try to validate a mutated or modified version of its notebook. 

629 

630 """ 

631 # backwards compatibility for nbjson argument 

632 if nbdict is not None: 

633 pass 

634 elif nbjson is not None: 

635 nbdict = nbjson 

636 else: 

637 msg = "iter_validate() missing 1 required argument: 'nbdict'" 

638 raise TypeError(msg) 

639 

640 if version is None: 

641 version, version_minor = get_version(nbdict) 

642 

643 if ref: 

644 try: 

645 errors = _get_errors( 

646 nbdict, 

647 version, 

648 version_minor, 

649 relax_add_props, 

650 {"$ref": "#/definitions/%s" % ref}, 

651 ) 

652 except ValidationError as e: 

653 yield e 

654 return 

655 

656 else: 

657 if strip_invalid_metadata: 

658 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props) 

659 

660 # Validate one more time to ensure that us removing metadata 

661 # didn't cause another complex validation issue in the schema. 

662 # Also to ensure that higher-level errors produced by individual metadata validation 

663 # failures are removed. 

664 try: 

665 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

666 except ValidationError as e: 

667 yield e 

668 return 

669 

670 for error in errors: 

671 yield better_validation_error(error, version, version_minor)