Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbformat/validator.py: 13%

269 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1"""Notebook format validators.""" 

2# Copyright (c) IPython Development Team. 

3# Distributed under the terms of the Modified BSD License. 

4 

5 

6import json 

7import os 

8import pprint 

9import warnings 

10from copy import deepcopy 

11from textwrap import dedent 

12from typing import Any, Optional, Tuple 

13 

14from ._imports import import_item 

15from .corpus.words import generate_corpus_id 

16from .json_compat import ValidationError, _validator_for_name, get_current_validator 

17from .reader import get_version 

18from .warnings import DuplicateCellId, MissingIDFieldWarning 

19 

20validators = {} 

21_deprecated = object() 

22 

23 

24def _relax_additional_properties(obj): 

25 """relax any `additionalProperties`""" 

26 if isinstance(obj, dict): 

27 for key, value in obj.items(): 

28 value = ( # noqa 

29 True if key == "additionalProperties" else _relax_additional_properties(value) 

30 ) 

31 obj[key] = value 

32 elif isinstance(obj, list): 

33 for i, value in enumerate(obj): 

34 obj[i] = _relax_additional_properties(value) 

35 return obj 

36 

37 

38def _allow_undefined(schema): 

39 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"}) 

40 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"}) 

41 return schema 

42 

43 

44def get_validator(version=None, version_minor=None, relax_add_props=False, name=None): 

45 """Load the JSON schema into a Validator""" 

46 if version is None: 

47 from . import current_nbformat 

48 

49 version = current_nbformat 

50 

51 v = import_item("nbformat.v%s" % version) 

52 current_minor = getattr(v, "nbformat_minor", 0) 

53 if version_minor is None: 

54 version_minor = current_minor 

55 

56 current_validator = _validator_for_name(name) if name else get_current_validator() 

57 

58 version_tuple = (current_validator.name, version, version_minor) 

59 

60 if version_tuple not in validators: 

61 try: 

62 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

63 except AttributeError: 

64 return None 

65 

66 if current_minor < version_minor: 

67 # notebook from the future, relax all `additionalProperties: False` requirements 

68 schema_json = _relax_additional_properties(schema_json) 

69 # and allow undefined cell types and outputs 

70 schema_json = _allow_undefined(schema_json) 

71 

72 validators[version_tuple] = current_validator(schema_json) 

73 

74 if relax_add_props: 

75 try: 

76 schema_json = _get_schema_json(v, version=version, version_minor=version_minor) 

77 except AttributeError: 

78 return None 

79 

80 # this allows properties to be added for intermediate 

81 # representations while validating for all other kinds of errors 

82 schema_json = _relax_additional_properties(schema_json) 

83 validators[version_tuple] = current_validator(schema_json) 

84 

85 return validators[version_tuple] 

86 

87 

88def _get_schema_json(v, version=None, version_minor=None): 

89 """ 

90 Gets the json schema from a given imported library and nbformat version. 

91 """ 

92 if (version, version_minor) in v.nbformat_schema: 

93 schema_path = os.path.join( 

94 os.path.dirname(v.__file__), v.nbformat_schema[(version, version_minor)] 

95 ) 

96 elif version_minor > v.nbformat_minor: 

97 # load the latest schema 

98 schema_path = os.path.join(os.path.dirname(v.__file__), v.nbformat_schema[(None, None)]) 

99 else: 

100 msg = "Cannot find appropriate nbformat schema file." 

101 raise AttributeError(msg) 

102 with open(schema_path) as f: 

103 schema_json = json.load(f) 

104 return schema_json 

105 

106 

107def isvalid(nbjson, ref=None, version=None, version_minor=None): 

108 """Checks whether the given notebook JSON conforms to the current 

109 notebook format schema. Returns True if the JSON is valid, and 

110 False otherwise. 

111 

112 To see the individual errors that were encountered, please use the 

113 `validate` function instead. 

114 """ 

115 orig = deepcopy(nbjson) 

116 try: 

117 with warnings.catch_warnings(): 

118 warnings.filterwarnings("ignore", category=DeprecationWarning) 

119 warnings.filterwarnings("ignore", category=MissingIDFieldWarning) 

120 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False) 

121 except ValidationError: 

122 return False 

123 else: 

124 return True 

125 finally: 

126 if nbjson != orig: 

127 raise AssertionError 

128 

129 

130def _format_as_index(indices): 

131 """ 

132 (from jsonschema._utils.format_as_index, copied to avoid relying on private API) 

133 

134 Construct a single string containing indexing operations for the indices. 

135 

136 For example, [1, 2, "foo"] -> [1][2]["foo"] 

137 """ 

138 

139 if not indices: 

140 return "" 

141 return "[%s]" % "][".join(repr(index) for index in indices) 

142 

143 

144_ITEM_LIMIT = 16 

145_STR_LIMIT = 64 

146 

147 

148def _truncate_obj(obj): 

149 """Truncate objects for use in validation tracebacks 

150 

151 Cell and output lists are squashed, as are long strings, lists, and dicts. 

152 """ 

153 if isinstance(obj, dict): 

154 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]} 

155 if isinstance(truncated_dict.get("cells"), list): 

156 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])] 

157 if isinstance(truncated_dict.get("outputs"), list): 

158 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])] 

159 

160 if len(obj) > _ITEM_LIMIT: 

161 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT) 

162 return truncated_dict 

163 elif isinstance(obj, list): 

164 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]] 

165 if len(obj) > _ITEM_LIMIT: 

166 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT)) 

167 return truncated_list 

168 elif isinstance(obj, str): 

169 truncated_str = obj[:_STR_LIMIT] 

170 if len(obj) > _STR_LIMIT: 

171 truncated_str += "..." 

172 return truncated_str 

173 else: 

174 return obj 

175 

176 

177class NotebookValidationError(ValidationError): 

178 """Schema ValidationError with truncated representation 

179 

180 to avoid massive verbose tracebacks. 

181 """ 

182 

183 def __init__(self, original, ref=None): 

184 """Initialize the error class.""" 

185 self.original = original 

186 self.ref = getattr(self.original, "ref", ref) 

187 self.message = self.original.message 

188 

189 def __getattr__(self, key): 

190 """Get an attribute from the error.""" 

191 return getattr(self.original, key) 

192 

193 def __unicode__(self): 

194 """Custom str for validation errors 

195 

196 avoids dumping full schema and notebook to logs 

197 """ 

198 error = self.original 

199 instance = _truncate_obj(error.instance) 

200 

201 return "\n".join( 

202 [ 

203 error.message, 

204 "", 

205 "Failed validating {!r} in {}{}:".format( 

206 error.validator, 

207 self.ref or "notebook", 

208 _format_as_index(list(error.relative_schema_path)[:-1]), 

209 ), 

210 "", 

211 "On instance%s:" % _format_as_index(error.relative_path), 

212 pprint.pformat(instance, width=78), 

213 ] 

214 ) 

215 

216 __str__ = __unicode__ 

217 

218 

219def better_validation_error(error, version, version_minor): 

220 """Get better ValidationError on oneOf failures 

221 

222 oneOf errors aren't informative. 

223 if it's a cell type or output_type error, 

224 try validating directly based on the type for a better error message 

225 """ 

226 if not len(error.schema_path): 

227 return error 

228 key = error.schema_path[-1] 

229 ref = None 

230 if key.endswith("Of"): 

231 if isinstance(error.instance, dict): 

232 if "cell_type" in error.instance: 

233 ref = error.instance["cell_type"] + "_cell" 

234 elif "output_type" in error.instance: 

235 ref = error.instance["output_type"] 

236 

237 if ref: 

238 try: 

239 validate( 

240 error.instance, 

241 ref, 

242 version=version, 

243 version_minor=version_minor, 

244 ) 

245 except ValidationError as sub_error: 

246 # keep extending relative path 

247 error.relative_path.extend(sub_error.relative_path) 

248 sub_error.relative_path = error.relative_path 

249 better = better_validation_error(sub_error, version, version_minor) 

250 if better.ref is None: 

251 better.ref = ref 

252 return better 

253 except Exception: # noqa 

254 # if it fails for some reason, 

255 # let the original error through 

256 pass 

257 return NotebookValidationError(error, ref) 

258 

259 

260def normalize( 

261 nbdict: Any, 

262 version: Optional[int] = None, 

263 version_minor: Optional[int] = None, 

264 *, 

265 relax_add_props: bool = False, 

266 strip_invalid_metadata: bool = False, 

267) -> Tuple[int, Any]: 

268 """ 

269 Normalise a notebook prior to validation. 

270 

271 This tries to implement a couple of normalisation steps to standardise 

272 notebooks and make validation easier. 

273 

274 You should in general not rely on this function and make sure the notebooks 

275 that reach nbformat are already in a normal form. If not you likely have a bug, 

276 and may have security issues. 

277 

278 Parameters 

279 ---------- 

280 nbdict : dict 

281 notebook document 

282 version : int 

283 version_minor : int 

284 relax_add_props : bool 

285 Whether to allow extra property in the Json schema validating the 

286 notebook. 

287 strip_invalid_metadata : bool 

288 Whether to strip metadata that does not exist in the Json schema when 

289 validating the notebook. 

290 

291 Returns 

292 ------- 

293 changes : int 

294 number of changes in the notebooks 

295 notebook : dict 

296 deep-copy of the original object with relevant changes. 

297 

298 """ 

299 nbdict = deepcopy(nbdict) 

300 nbdict_version, nbdict_version_minor = get_version(nbdict) 

301 if version is None: 

302 version = nbdict_version 

303 if version_minor is None: 

304 version_minor = nbdict_version_minor 

305 return _normalize( 

306 nbdict, 

307 version, 

308 version_minor, 

309 True, 

310 relax_add_props=relax_add_props, 

311 strip_invalid_metadata=strip_invalid_metadata, 

312 ) 

313 

314 

315def _normalize( 

316 nbdict: Any, 

317 version: int, 

318 version_minor: int, 

319 repair_duplicate_cell_ids: bool, 

320 relax_add_props: bool, 

321 strip_invalid_metadata: bool, 

322) -> Tuple[int, Any]: 

323 """ 

324 Private normalisation routine. 

325 

326 This function attempts to normalize the `nbdict` passed to it. 

327 

328 As `_normalize()` is currently used both in `validate()` (for 

329 historical reasons), and in the `normalize()` public function, 

330 `_normalize()` does currently mutate `nbdict`. 

331 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()` 

332 may stop mutating `nbdict`. 

333 

334 """ 

335 changes = 0 

336 

337 if (version, version_minor) >= (4, 5): 

338 # if we support cell ids ensure default ids are provided 

339 for cell in nbdict["cells"]: 

340 if "id" not in cell: 

341 warnings.warn( 

342 "Code cell is missing an id field, this will become" 

343 " a hard error in future nbformat versions. You may want" 

344 " to use `normalize()` on your notebooks before validations" 

345 " (available since nbformat 5.1.4). Previous versions of nbformat" 

346 " are fixing this issue transparently, and will stop doing so" 

347 " in the future.", 

348 MissingIDFieldWarning, 

349 stacklevel=3, 

350 ) 

351 # Generate cell ids if any are missing 

352 if repair_duplicate_cell_ids: 

353 cell["id"] = generate_corpus_id() 

354 changes += 1 

355 

356 # if we support cell ids check for uniqueness when validating the whole notebook 

357 seen_ids = set() 

358 for cell in nbdict["cells"]: 

359 if "id" not in cell: 

360 continue 

361 cell_id = cell["id"] 

362 if cell_id in seen_ids: 

363 # Best effort to repair if we find a duplicate id 

364 if repair_duplicate_cell_ids: 

365 new_id = generate_corpus_id() 

366 cell["id"] = new_id 

367 changes += 1 

368 warnings.warn( 

369 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.", 

370 DuplicateCellId, 

371 stacklevel=3, 

372 ) 

373 else: 

374 msg = f"Non-unique cell id '{cell_id}' detected." 

375 raise ValidationError(msg) 

376 seen_ids.add(cell_id) 

377 if strip_invalid_metadata: 

378 changes += _strip_invalida_metadata( 

379 nbdict, version, version_minor, relax_add_props=relax_add_props 

380 ) 

381 return changes, nbdict 

382 

383 

384def _dep_warn(field): 

385 warnings.warn( 

386 dedent( 

387 f"""`{field}` kwargs of validate has been deprecated for security 

388 reasons, and will be removed soon. 

389 

390 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to 

391 normalise your notebook. `normalize` is available since nbformat 5.5.0 

392 

393 """ 

394 ), 

395 DeprecationWarning, 

396 stacklevel=3, 

397 ) 

398 

399 

400def validate( # noqa 

401 nbdict: Any = None, 

402 ref: Optional[str] = None, 

403 version: Optional[int] = None, 

404 version_minor: Optional[int] = None, 

405 relax_add_props: bool = False, 

406 nbjson: Any = None, 

407 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore 

408 strip_invalid_metadata: bool = _deprecated, # type: ignore 

409) -> None: 

410 """Checks whether the given notebook dict-like object 

411 conforms to the relevant notebook format schema. 

412 

413 Parameters 

414 ---------- 

415 nbdict : dict 

416 notebook document 

417 ref : optional, str 

418 reference to the subset of the schema we want to validate against. 

419 for example ``"markdown_cell"``, `"code_cell"` .... 

420 version : int 

421 version_minor : int 

422 relax_add_props : bool 

423 Wether to allow extra properties in the JSON schema validating the notebook. 

424 When True, all known fields are validated, but unknown fields are ignored. 

425 nbjson 

426 repair_duplicate_cell_ids : bool 

427 Deprecated since 5.5.0 - will be removed in the future. 

428 strip_invalid_metadata : bool 

429 Deprecated since 5.5.0 - will be removed in the future. 

430 

431 Returns 

432 ------- 

433 None 

434 

435 Raises 

436 ------ 

437 ValidationError if not valid. 

438 

439 Notes 

440 ----- 

441 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently 

442 try to fix invalid notebook and mutate arguments. This behavior is deprecated 

443 and will be removed in a near future. 

444 

445 Please explicitly call `normalize` if you need to normalize notebooks. 

446 """ 

447 assert isinstance(ref, str) or ref is None # noqa 

448 

449 if strip_invalid_metadata is _deprecated: 

450 strip_invalid_metadata = False 

451 else: 

452 _dep_warn("strip_invalid_metadata") 

453 pass 

454 

455 if repair_duplicate_cell_ids is _deprecated: 

456 repair_duplicate_cell_ids = True 

457 else: 

458 _dep_warn("repair_duplicate_cell_ids") 

459 pass 

460 

461 # backwards compatibility for nbjson argument 

462 if nbdict is not None: 

463 pass 

464 elif nbjson is not None: 

465 nbdict = nbjson 

466 else: 

467 msg = "validate() missing 1 required argument: 'nbdict'" 

468 raise TypeError(msg) 

469 

470 if ref is None: 

471 # if ref is not specified, we have a whole notebook, so we can get the version 

472 nbdict_version, nbdict_version_minor = get_version(nbdict) 

473 if version is None: 

474 version = nbdict_version 

475 if version_minor is None: 

476 version_minor = nbdict_version_minor 

477 else: 

478 # if ref is specified, and we don't have a version number, assume we're validating against 1.0 

479 if version is None: # noqa 

480 version, version_minor = 1, 0 

481 

482 if ref is None: 

483 assert isinstance(version, int) # noqa 

484 assert isinstance(version_minor, int) # noqa 

485 _normalize( 

486 nbdict, 

487 version, 

488 version_minor, 

489 repair_duplicate_cell_ids, 

490 relax_add_props=relax_add_props, 

491 strip_invalid_metadata=strip_invalid_metadata, 

492 ) 

493 

494 for error in iter_validate( 

495 nbdict, 

496 ref=ref, 

497 version=version, 

498 version_minor=version_minor, 

499 relax_add_props=relax_add_props, 

500 strip_invalid_metadata=strip_invalid_metadata, 

501 ): 

502 raise error 

503 

504 

505def _get_errors( 

506 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any 

507) -> Any: 

508 validator = get_validator(version, version_minor, relax_add_props=relax_add_props) 

509 if not validator: 

510 msg = f"No schema for validating v{version}.{version_minor} notebooks" 

511 raise ValidationError(msg) 

512 iter_errors = validator.iter_errors(nbdict, *args) 

513 errors = list(iter_errors) 

514 # jsonschema gives the best error messages. 

515 if len(errors) and validator.name != "jsonschema": 

516 validator = get_validator( 

517 version=version, 

518 version_minor=version_minor, 

519 relax_add_props=relax_add_props, 

520 name="jsonschema", 

521 ) 

522 return validator.iter_errors(nbdict, *args) 

523 return iter(errors) 

524 

525 

526def _strip_invalida_metadata( # noqa 

527 nbdict: Any, version: int, version_minor: int, relax_add_props: bool 

528) -> int: 

529 """ 

530 This function tries to extract metadata errors from the validator and fix 

531 them if necessary. This mostly mean stripping unknown keys from metadata 

532 fields, or removing metadata fields altogether. 

533 

534 Parameters 

535 ---------- 

536 nbdict : dict 

537 notebook document 

538 version : int 

539 version_minor : int 

540 relax_add_props : bool 

541 Wether to allow extra property in the Json schema validating the 

542 notebook. 

543 

544 Returns 

545 ------- 

546 int 

547 number of modifications 

548 

549 """ 

550 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

551 changes = 0 

552 if len(list(errors)) > 0: 

553 # jsonschema gives a better error tree. 

554 validator = get_validator( 

555 version=version, 

556 version_minor=version_minor, 

557 relax_add_props=relax_add_props, 

558 name="jsonschema", 

559 ) 

560 if not validator: 

561 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks" 

562 raise ValidationError(msg) 

563 errors = validator.iter_errors(nbdict) 

564 error_tree = validator.error_tree(errors) 

565 if "metadata" in error_tree: 

566 for key in error_tree["metadata"]: 

567 nbdict["metadata"].pop(key, None) 

568 changes += 1 

569 

570 if "cells" in error_tree: 

571 number_of_cells = len(nbdict.get("cells", 0)) 

572 for cell_idx in range(number_of_cells): 

573 # Cells don't report individual metadata keys as having failed validation 

574 # Instead it reports that it failed to validate against each cell-type definition. 

575 # We have to delve into why those definitions failed to uncover which metadata 

576 # keys are misbehaving. 

577 if "oneOf" in error_tree["cells"][cell_idx].errors: 

578 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"] 

579 schemas_by_index = [ 

580 ref["$ref"] 

581 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"] 

582 ] 

583 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell" 

584 if cell_type_definition_name in schemas_by_index: 

585 schema_index = schemas_by_index.index(cell_type_definition_name) 

586 for error in error_tree["cells"][cell_idx].errors["oneOf"].context: 

587 rel_path = error.relative_path 

588 error_for_intended_schema = error.schema_path[0] == schema_index 

589 is_top_level_metadata_key = ( 

590 len(rel_path) == 2 and rel_path[0] == "metadata" # noqa 

591 ) 

592 if error_for_intended_schema and is_top_level_metadata_key: 

593 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None) 

594 changes += 1 

595 

596 return changes 

597 

598 

599def iter_validate( 

600 nbdict=None, 

601 ref=None, 

602 version=None, 

603 version_minor=None, 

604 relax_add_props=False, 

605 nbjson=None, 

606 strip_invalid_metadata=False, 

607): 

608 """Checks whether the given notebook dict-like object conforms to the 

609 relevant notebook format schema. 

610 

611 Returns a generator of all ValidationErrors if not valid. 

612 

613 Notes 

614 ----- 

615 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and 

616 should *never* try to validate a mutated or modified version of its notebook. 

617 

618 """ 

619 # backwards compatibility for nbjson argument 

620 if nbdict is not None: 

621 pass 

622 elif nbjson is not None: 

623 nbdict = nbjson 

624 else: 

625 msg = "iter_validate() missing 1 required argument: 'nbdict'" 

626 raise TypeError(msg) 

627 

628 if version is None: 

629 version, version_minor = get_version(nbdict) 

630 

631 if ref: 

632 try: 

633 errors = _get_errors( 

634 nbdict, 

635 version, 

636 version_minor, 

637 relax_add_props, 

638 {"$ref": "#/definitions/%s" % ref}, 

639 ) 

640 except ValidationError as e: 

641 yield e 

642 return 

643 

644 else: 

645 if strip_invalid_metadata: 

646 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props) 

647 

648 # Validate one more time to ensure that us removing metadata 

649 # didn't cause another complex validation issue in the schema. 

650 # Also to ensure that higher-level errors produced by individual metadata validation 

651 # failures are removed. 

652 try: 

653 errors = _get_errors(nbdict, version, version_minor, relax_add_props) 

654 except ValidationError as e: 

655 yield e 

656 return 

657 

658 for error in errors: 

659 yield better_validation_error(error, version, version_minor)