Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbformat/validator.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

271 statements  

1"""Notebook format validators.""" 

2 

3# Copyright (c) IPython Development Team. 

4# Distributed under the terms of the Modified BSD License. 

5from __future__ import annotations 

6 

7import json 

8import pprint 

9import warnings 

10from copy import deepcopy 

11from pathlib import Path 

12from textwrap import dedent 

13from typing import Any, Optional 

14 

15from ._imports import import_item 

16from .corpus.words import generate_corpus_id 

17from .json_compat import ValidationError, _validator_for_name, get_current_validator 

18from .reader import get_version 

19from .warnings import DuplicateCellId, MissingIDFieldWarning 

20 

21validators = {} 

22_deprecated = object() 

23 

24 

25__all__ = [ 

26 "ValidationError", 

27 "get_validator", 

28 "isvalid", 

29 "NotebookValidationError", 

30 "better_validation_error", 

31 "normalize", 

32 "validate", 

33 "iter_validate", 

34] 

35 

36 

37def _relax_additional_properties(obj): 

38 """relax any `additionalProperties`""" 

39 if isinstance(obj, dict): 

40 for key, value in obj.items(): 

41 value = ( # noqa: PLW2901 

42 True if key == "additionalProperties" else _relax_additional_properties(value) 

43 ) 

44 obj[key] = value 

45 elif isinstance(obj, list): 

46 for i, value in enumerate(obj): 

47 obj[i] = _relax_additional_properties(value) 

48 return obj 

49 

50 

51def _allow_undefined(schema): 

52 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"}) 

53 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"}) 

54 return schema 

55 

56 

57def get_validator(version=None, version_minor=None, relax_add_props=False, name=None): 

58 """Load the JSON schema into a Validator""" 

59 if version is None: 

60 from . import current_nbformat 

61 

62 version = current_nbformat 

63 

64 v = import_item("nbformat.v%s" % version) 

65 current_minor = getattr(v, "nbformat_minor", 0) 

66 if version_minor is None: 

67 version_minor = current_minor 

68 

69 current_validator = _validator_for_name(name) if name else get_current_validator() 

70 

71 version_tuple = (current_validator.name, version, version_minor) 

72 

73 if version_tuple not in validators: 

74 try: 

75 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

76 except AttributeError: 

77 return None 

78 

79 if current_minor < version_minor: 

80 # notebook from the future, relax all `additionalProperties: False` requirements 

81 schema_json = _relax_additional_properties(schema_json) 

82 # and allow undefined cell types and outputs 

83 schema_json = _allow_undefined(schema_json) 

84 

85 validators[version_tuple] = current_validator(schema_json) 

86 

87 if relax_add_props: 

88 try: 

89 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

90 except AttributeError: 

91 return None 

92 

93 # this allows properties to be added for intermediate 

94 # representations while validating for all other kinds of errors 

95 schema_json = _relax_additional_properties(schema_json) 

96 validators[version_tuple] = current_validator(schema_json) 

97 

98 return validators[version_tuple] 

99 

100 

101def _get_schema_json(v, version=None, version_minor=None): 

102 """ 

103 Gets the json schema from a given imported library and nbformat version. 

104 """ 

105 if (version, version_minor) in v.nbformat_schema: 

106 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(version, version_minor)]) 

107 elif version_minor > v.nbformat_minor: 

108 # load the latest schema 

109 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(None, None)]) 

110 else: 

111 msg = "Cannot find appropriate nbformat schema file." 

112 raise AttributeError(msg) 

113 with Path(schema_path).open(encoding="utf8") as f: 

114 schema_json = json.load(f) 

115 return schema_json # noqa: RET504 

116 

117 

118def isvalid(nbjson, ref=None, version=None, version_minor=None): 

119 """Checks whether the given notebook JSON conforms to the current 

120 notebook format schema. Returns True if the JSON is valid, and 

121 False otherwise. 

122 

123 To see the individual errors that were encountered, please use the 

124 `validate` function instead. 

125 """ 

126 orig = deepcopy(nbjson) 

127 try: 

128 with warnings.catch_warnings(): 

129 warnings.filterwarnings("ignore", category=DeprecationWarning) 

130 warnings.filterwarnings("ignore", category=MissingIDFieldWarning) 

131 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False) 

132 except ValidationError: 

133 return False 

134 else: 

135 return True 

136 finally: 

137 if nbjson != orig: 

138 raise AssertionError 

139 

140 

141def _format_as_index(indices): 

142 """ 

143 (from jsonschema._utils.format_as_index, copied to avoid relying on private API) 

144 

145 Construct a single string containing indexing operations for the indices. 

146 

147 For example, [1, 2, "foo"] -> [1][2]["foo"] 

148 """ 

149 

150 if not indices: 

151 return "" 

152 return "[%s]" % "][".join(repr(index) for index in indices) 

153 

154 

155_ITEM_LIMIT = 16 

156_STR_LIMIT = 64 

157 

158 

159def _truncate_obj(obj): 

160 """Truncate objects for use in validation tracebacks 

161 

162 Cell and output lists are squashed, as are long strings, lists, and dicts. 

163 """ 

164 if isinstance(obj, dict): 

165 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]} 

166 if isinstance(truncated_dict.get("cells"), list): 

167 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])] 

168 if isinstance(truncated_dict.get("outputs"), list): 

169 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])] 

170 

171 if len(obj) > _ITEM_LIMIT: 

172 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT) 

173 return truncated_dict 

174 if isinstance(obj, list): 

175 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]] 

176 if len(obj) > _ITEM_LIMIT: 

177 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT)) 

178 return truncated_list 

179 if isinstance(obj, str): 

180 truncated_str = obj[:_STR_LIMIT] 

181 if len(obj) > _STR_LIMIT: 

182 truncated_str += "..." 

183 return truncated_str 

184 return obj 

185 

186 

187class NotebookValidationError(ValidationError): # type:ignore[misc] 

188 """Schema ValidationError with truncated representation 

189 

190 to avoid massive verbose tracebacks. 

191 """ 

192 

193 def __init__(self, original, ref=None): 

194 """Initialize the error class.""" 

195 self.original = original 

196 self.ref = getattr(self.original, "ref", ref) 

197 self.message = self.original.message 

198 

199 def __getattr__(self, key): 

200 """Get an attribute from the error.""" 

201 return getattr(self.original, key) 

202 

203 def __unicode__(self): 

204 """Custom str for validation errors 

205 

206 avoids dumping full schema and notebook to logs 

207 """ 

208 error = self.original 

209 instance = _truncate_obj(error.instance) 

210 

211 return "\n".join( 

212 [ 

213 error.message, 

214 "", 

215 "Failed validating {!r} in {}{}:".format( 

216 error.validator, 

217 self.ref or "notebook", 

218 _format_as_index(list(error.relative_schema_path)[:-1]), 

219 ), 

220 "", 

221 "On instance%s:" % _format_as_index(error.relative_path), 

222 pprint.pformat(instance, width=78), 

223 ] 

224 ) 

225 

226 __str__ = __unicode__ 

227 

228 

229def better_validation_error(error, version, version_minor): 

230 """Get better ValidationError on oneOf failures 

231 

232 oneOf errors aren't informative. 

233 if it's a cell type or output_type error, 

234 try validating directly based on the type for a better error message 

235 """ 

236 if not len(error.schema_path): 

237 return error 

238 key = error.schema_path[-1] 

239 ref = None 

240 if key.endswith("Of"): 

241 if isinstance(error.instance, dict): 

242 if "cell_type" in error.instance: 

243 ref = error.instance["cell_type"] + "_cell" 

244 elif "output_type" in error.instance: 

245 ref = error.instance["output_type"] 

246 

247 if ref: 

248 try: 

249 validate( 

250 error.instance, 

251 ref, 

252 version=version, 

253 version_minor=version_minor, 

254 ) 

255 except ValidationError as sub_error: 

256 # keep extending relative path 

257 error.relative_path.extend(sub_error.relative_path) 

258 sub_error.relative_path = error.relative_path 

259 better = better_validation_error(sub_error, version, version_minor) 

260 if better.ref is None: 

261 better.ref = ref 

262 return better 

263 except Exception: # noqa: S110 

264 # if it fails for some reason, 

265 # let the original error through 

266 pass 

267 return NotebookValidationError(error, ref) 

268 

269 

270def normalize( 

271 nbdict: Any, 

272 version: Optional[int] = None, 

273 version_minor: Optional[int] = None, 

274 *, 

275 relax_add_props: bool = False, 

276 strip_invalid_metadata: bool = False, 

277) -> tuple[int, Any]: 

278 """ 

279 Normalise a notebook prior to validation. 

280 

281 This tries to implement a couple of normalisation steps to standardise 

282 notebooks and make validation easier. 

283 

284 You should in general not rely on this function and make sure the notebooks 

285 that reach nbformat are already in a normal form. If not you likely have a bug, 

286 and may have security issues. 

287 

288 Parameters 

289 ---------- 

290 nbdict : dict 

291 notebook document 

292 version : int 

293 version_minor : int 

294 relax_add_props : bool 

295 Whether to allow extra property in the Json schema validating the 

296 notebook. 

297 strip_invalid_metadata : bool 

298 Whether to strip metadata that does not exist in the Json schema when 

299 validating the notebook. 

300 

301 Returns 

302 ------- 

303 changes : int 

304 number of changes in the notebooks 

305 notebook : dict 

306 deep-copy of the original object with relevant changes. 

307 

308 """ 

309 nbdict = deepcopy(nbdict) 

310 nbdict_version, nbdict_version_minor = get_version(nbdict) 

311 if version is None: 

312 version = nbdict_version 

313 if version_minor is None: 

314 version_minor = nbdict_version_minor 

315 return _normalize( 

316 nbdict, 

317 version, 

318 version_minor, 

319 True, 

320 relax_add_props=relax_add_props, 

321 strip_invalid_metadata=strip_invalid_metadata, 

322 ) 

323 

324 

325def _normalize( 

326 nbdict: Any, 

327 version: int, 

328 version_minor: int, 

329 repair_duplicate_cell_ids: bool, 

330 relax_add_props: bool, 

331 strip_invalid_metadata: bool, 

332) -> tuple[int, Any]: 

333 """ 

334 Private normalisation routine. 

335 

336 This function attempts to normalize the `nbdict` passed to it. 

337 

338 As `_normalize()` is currently used both in `validate()` (for 

339 historical reasons), and in the `normalize()` public function, 

340 `_normalize()` does currently mutate `nbdict`. 

341 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()` 

342 may stop mutating `nbdict`. 

343 

344 """ 

345 changes = 0 

346 

347 if (version, version_minor) >= (4, 5): 

348 # if we support cell ids ensure default ids are provided 

349 for cell in nbdict["cells"]: 

350 if "id" not in cell: 

351 warnings.warn( 

352 "Cell is missing an id field, this will become" 

353 " a hard error in future nbformat versions. You may want" 

354 " to use `normalize()` on your notebooks before validations" 

355 " (available since nbformat 5.1.4). Previous versions of nbformat" 

356 " are fixing this issue transparently, and will stop doing so" 

357 " in the future.", 

358 MissingIDFieldWarning, 

359 stacklevel=3, 

360 ) 

361 # Generate cell ids if any are missing 

362 if repair_duplicate_cell_ids: 

363 cell["id"] = generate_corpus_id() 

364 changes += 1 

365 

366 # if we support cell ids check for uniqueness when validating the whole notebook 

367 seen_ids = set() 

368 for cell in nbdict["cells"]: 

369 if "id" not in cell: 

370 continue 

371 cell_id = cell["id"] 

372 if cell_id in seen_ids: 

373 # Best effort to repair if we find a duplicate id 

374 if repair_duplicate_cell_ids: 

375 new_id = generate_corpus_id() 

376 cell["id"] = new_id 

377 changes += 1 

378 warnings.warn( 

379 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.", 

380 DuplicateCellId, 

381 stacklevel=3, 

382 ) 

383 else: 

384 msg = f"Non-unique cell id '{cell_id}' detected." 

385 raise ValidationError(msg) 

386 seen_ids.add(cell_id) 

387 if strip_invalid_metadata: 

388 changes += _strip_invalida_metadata( 

389 nbdict, version, version_minor, relax_add_props=relax_add_props 

390 ) 

391 return changes, nbdict 

392 

393 

394def _dep_warn(field): 

395 warnings.warn( 

396 dedent( 

397 f"""`{field}` kwargs of validate has been deprecated for security 

398 reasons, and will be removed soon. 

399 

400 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to 

401 normalise your notebook. `normalize` is available since nbformat 5.5.0 

402 

403 """ 

404 ), 

405 DeprecationWarning, 

406 stacklevel=3, 

407 ) 

408 

409 

410def validate( 

411 nbdict: Any = None, 

412 ref: Optional[str] = None, 

413 version: Optional[int] = None, 

414 version_minor: Optional[int] = None, 

415 relax_add_props: bool = False, 

416 nbjson: Any = None, 

417 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore[assignment] 

418 strip_invalid_metadata: bool = _deprecated, # type: ignore[assignment] 

419) -> None: 

420 """Checks whether the given notebook dict-like object 

421 conforms to the relevant notebook format schema. 

422 

423 Parameters 

424 ---------- 

425 nbdict : dict 

426 notebook document 

427 ref : optional, str 

428 reference to the subset of the schema we want to validate against. 

429 for example ``"markdown_cell"``, `"code_cell"` .... 

430 version : int 

431 version_minor : int 

432 relax_add_props : bool 

433 Whether to allow extra properties in the JSON schema validating the notebook. 

434 When True, all known fields are validated, but unknown fields are ignored. 

435 nbjson 

436 repair_duplicate_cell_ids : bool 

437 Deprecated since 5.5.0 - will be removed in the future. 

438 strip_invalid_metadata : bool 

439 Deprecated since 5.5.0 - will be removed in the future. 

440 

441 Returns 

442 ------- 

443 None 

444 

445 Raises 

446 ------ 

447 ValidationError if not valid. 

448 

449 Notes 

450 ----- 

451 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently 

452 try to fix invalid notebook and mutate arguments. This behavior is deprecated 

453 and will be removed in a near future. 

454 

455 Please explicitly call `normalize` if you need to normalize notebooks. 

456 """ 

457 assert isinstance(ref, str) or ref is None 

458 

459 if strip_invalid_metadata is _deprecated: 

460 strip_invalid_metadata = False 

461 else: 

462 _dep_warn("strip_invalid_metadata") 

463 

464 if repair_duplicate_cell_ids is _deprecated: 

465 repair_duplicate_cell_ids = True 

466 else: 

467 _dep_warn("repair_duplicate_cell_ids") 

468 

469 # backwards compatibility for nbjson argument 

470 if nbdict is not None: 

471 pass 

472 elif nbjson is not None: 

473 nbdict = nbjson 

474 else: 

475 msg = "validate() missing 1 required argument: 'nbdict'" 

476 raise TypeError(msg) 

477 

478 if ref is None: 

479 # if ref is not specified, we have a whole notebook, so we can get the version 

480 nbdict_version, nbdict_version_minor = get_version(nbdict) 

481 if version is None: 

482 version = nbdict_version 

483 if version_minor is None: 

484 version_minor = nbdict_version_minor 

485 # if ref is specified, and we don't have a version number, assume we're validating against 1.0 

486 elif version is None: 

487 version, version_minor = 1, 0 

488 

489 if ref is None: 

490 assert isinstance(version, int) 

491 assert isinstance(version_minor, int) 

492 _normalize( 

493 nbdict, 

494 version, 

495 version_minor, 

496 repair_duplicate_cell_ids, 

497 relax_add_props=relax_add_props, 

498 strip_invalid_metadata=strip_invalid_metadata, 

499 ) 

500 

501 for error in iter_validate( 

502 nbdict, 

503 ref=ref, 

504 version=version, 

505 version_minor=version_minor, 

506 relax_add_props=relax_add_props, 

507 strip_invalid_metadata=strip_invalid_metadata, 

508 ): 

509 raise error 

510 

511 

512def _get_errors( 

513 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any 

514) -> Any: 

515 validator = get_validator(version, version_minor, relax_add_props=relax_add_props) 

516 if not validator: 

517 msg = f"No schema for validating v{version}.{version_minor} notebooks" 

518 raise ValidationError(msg) 

519 iter_errors = validator.iter_errors(nbdict, *args) 

520 errors = list(iter_errors) 

521 # jsonschema gives the best error messages. 

522 if len(errors) and validator.name != "jsonschema": 

523 validator = get_validator( 

524 version=version, 

525 version_minor=version_minor, 

526 relax_add_props=relax_add_props, 

527 name="jsonschema", 

528 ) 

529 return validator.iter_errors(nbdict, *args) 

530 return iter(errors) 

531 

532 

533def _strip_invalida_metadata( 

534 nbdict: Any, version: int, version_minor: int, relax_add_props: bool 

535) -> int: 

536 """ 

537 This function tries to extract metadata errors from the validator and fix 

538 them if necessary. This mostly mean stripping unknown keys from metadata 

539 fields, or removing metadata fields altogether. 

540 

541 Parameters 

542 ---------- 

543 nbdict : dict 

544 notebook document 

545 version : int 

546 version_minor : int 

547 relax_add_props : bool 

548 Whether to allow extra property in the Json schema validating the 

549 notebook. 

550 

551 Returns 

552 ------- 

553 int 

554 number of modifications 

555 

556 """ 

557 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

558 changes = 0 

559 if len(list(errors)) > 0: 

560 # jsonschema gives a better error tree. 

561 validator = get_validator( 

562 version=version, 

563 version_minor=version_minor, 

564 relax_add_props=relax_add_props, 

565 name="jsonschema", 

566 ) 

567 if not validator: 

568 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks" 

569 raise ValidationError(msg) 

570 errors = validator.iter_errors(nbdict) 

571 error_tree = validator.error_tree(errors) 

572 if "metadata" in error_tree: 

573 for key in error_tree["metadata"]: 

574 nbdict["metadata"].pop(key, None) 

575 changes += 1 

576 

577 if "cells" in error_tree: 

578 number_of_cells = len(nbdict.get("cells", 0)) 

579 for cell_idx in range(number_of_cells): 

580 # Cells don't report individual metadata keys as having failed validation 

581 # Instead it reports that it failed to validate against each cell-type definition. 

582 # We have to delve into why those definitions failed to uncover which metadata 

583 # keys are misbehaving. 

584 if "oneOf" in error_tree["cells"][cell_idx].errors: 

585 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"] 

586 schemas_by_index = [ 

587 ref["$ref"] 

588 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"] 

589 ] 

590 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell" 

591 if cell_type_definition_name in schemas_by_index: 

592 schema_index = schemas_by_index.index(cell_type_definition_name) 

593 for error in error_tree["cells"][cell_idx].errors["oneOf"].context: 

594 rel_path = error.relative_path 

595 error_for_intended_schema = error.schema_path[0] == schema_index 

596 is_top_level_metadata_key = ( 

597 len(rel_path) == 2 and rel_path[0] == "metadata" 

598 ) 

599 if error_for_intended_schema and is_top_level_metadata_key: 

600 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) 

601 changes += 1 

602 

603 return changes 

604 

605 

606def iter_validate( 

607 nbdict=None, 

608 ref=None, 

609 version=None, 

610 version_minor=None, 

611 relax_add_props=False, 

612 nbjson=None, 

613 strip_invalid_metadata=False, 

614): 

615 """Checks whether the given notebook dict-like object conforms to the 

616 relevant notebook format schema. 

617 

618 Returns a generator of all ValidationErrors if not valid. 

619 

620 Notes 

621 ----- 

622 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and 

623 should *never* try to validate a mutated or modified version of its notebook. 

624 

625 """ 

626 # backwards compatibility for nbjson argument 

627 if nbdict is not None: 

628 pass 

629 elif nbjson is not None: 

630 nbdict = nbjson 

631 else: 

632 msg = "iter_validate() missing 1 required argument: 'nbdict'" 

633 raise TypeError(msg) 

634 

635 if version is None: 

636 version, version_minor = get_version(nbdict) 

637 

638 if ref: 

639 try: 

640 errors = _get_errors( 

641 nbdict, 

642 version, 

643 version_minor, 

644 relax_add_props, 

645 {"$ref": "#/definitions/%s" % ref}, 

646 ) 

647 except ValidationError as e: 

648 yield e 

649 return 

650 

651 else: 

652 if strip_invalid_metadata: 

653 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props) 

654 

655 # Validate one more time to ensure that us removing metadata 

656 # didn't cause another complex validation issue in the schema. 

657 # Also to ensure that higher-level errors produced by individual metadata validation 

658 # failures are removed. 

659 try: 

660 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

661 except ValidationError as e: 

662 yield e 

663 return 

664 

665 for error in errors: 

666 yield better_validation_error(error, version, version_minor)