Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbformat/validator.py: 30%

1"""Notebook format validators."""

3# Copyright (c) IPython Development Team.

4# Distributed under the terms of the Modified BSD License.

5from __future__ import annotations

7import json

8import pprint

9import warnings

10from copy import deepcopy

11from pathlib import Path

12from textwrap import dedent

13from typing import Any, Optional

15from ._imports import import_item

16from .corpus.words import generate_corpus_id

17from .json_compat import ValidationError, _validator_for_name, get_current_validator

18from .reader import get_version

19from .warnings import DuplicateCellId, MissingIDFieldWarning

21validators = {}

22_deprecated = object()

25__all__ = [

26 "ValidationError",

27 "get_validator",

28 "isvalid",

29 "NotebookValidationError",

30 "better_validation_error",

31 "normalize",

32 "validate",

33 "iter_validate",

34]

37def _relax_additional_properties(obj):

38 """relax any `additionalProperties`"""

39 if isinstance(obj, dict):

40 for key, value in obj.items():

41 value = ( # noqa: PLW2901

42 True if key == "additionalProperties" else _relax_additional_properties(value)

43 )

44 obj[key] = value

45 elif isinstance(obj, list):

46 for i, value in enumerate(obj):

47 obj[i] = _relax_additional_properties(value)

48 return obj

51def _allow_undefined(schema):

52 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"})

53 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"})

54 return schema

57def get_validator(version=None, version_minor=None, relax_add_props=False, name=None):

58 """Load the JSON schema into a Validator"""

59 if version is None:

60 from . import current_nbformat

62 version = current_nbformat

64 v = import_item("nbformat.v%s" % version)

65 current_minor = getattr(v, "nbformat_minor", 0)

66 if version_minor is None:

67 version_minor = current_minor

69 current_validator = _validator_for_name(name) if name else get_current_validator()

71 version_tuple = (current_validator.name, version, version_minor)

73 if version_tuple not in validators:

74 try:

75 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

76 except AttributeError:

77 return None

79 if current_minor < version_minor:

80 # notebook from the future, relax all `additionalProperties: False` requirements

81 schema_json = _relax_additional_properties(schema_json)

82 # and allow undefined cell types and outputs

83 schema_json = _allow_undefined(schema_json)

85 validators[version_tuple] = current_validator(schema_json)

87 if relax_add_props:

88 try:

89 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

90 except AttributeError:

91 return None

93 # this allows properties to be added for intermediate

94 # representations while validating for all other kinds of errors

95 schema_json = _relax_additional_properties(schema_json)

96 validators[version_tuple] = current_validator(schema_json)

98 return validators[version_tuple]

100

101def _get_schema_json(v, version=None, version_minor=None):

102 """

103 Gets the json schema from a given imported library and nbformat version.

104 """

105 if (version, version_minor) in v.nbformat_schema:

106 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(version, version_minor)])

107 elif version_minor > v.nbformat_minor:

108 # load the latest schema

109 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(None, None)])

110 else:

111 msg = "Cannot find appropriate nbformat schema file."

112 raise AttributeError(msg)

113 with Path(schema_path).open(encoding="utf8") as f:

114 schema_json = json.load(f)

115 return schema_json # noqa: RET504

116

117

118def isvalid(nbjson, ref=None, version=None, version_minor=None):

119 """Checks whether the given notebook JSON conforms to the current

120 notebook format schema. Returns True if the JSON is valid, and

121 False otherwise.

122

123 To see the individual errors that were encountered, please use the

124 `validate` function instead.

125 """

126 orig = deepcopy(nbjson)

127 try:

128 with warnings.catch_warnings():

129 warnings.filterwarnings("ignore", category=DeprecationWarning)

130 warnings.filterwarnings("ignore", category=MissingIDFieldWarning)

131 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False)

132 except ValidationError:

133 return False

134 else:

135 return True

136 finally:

137 if nbjson != orig:

138 raise AssertionError

139

140

141def _format_as_index(indices):

142 """

143 (from jsonschema._utils.format_as_index, copied to avoid relying on private API)

144

145 Construct a single string containing indexing operations for the indices.

146

147 For example, [1, 2, "foo"] -> [1][2]["foo"]

148 """

149

150 if not indices:

151 return ""

152 return "[%s]" % "][".join(repr(index) for index in indices)

153

154

155_ITEM_LIMIT = 16

156_STR_LIMIT = 64

157

158

159def _truncate_obj(obj):

160 """Truncate objects for use in validation tracebacks

161

162 Cell and output lists are squashed, as are long strings, lists, and dicts.

163 """

164 if isinstance(obj, dict):

165 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]}

166 if isinstance(truncated_dict.get("cells"), list):

167 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])]

168 if isinstance(truncated_dict.get("outputs"), list):

169 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])]

170

171 if len(obj) > _ITEM_LIMIT:

172 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT)

173 return truncated_dict

174 if isinstance(obj, list):

175 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]]

176 if len(obj) > _ITEM_LIMIT:

177 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT))

178 return truncated_list

179 if isinstance(obj, str):

180 truncated_str = obj[:_STR_LIMIT]

181 if len(obj) > _STR_LIMIT:

182 truncated_str += "..."

183 return truncated_str

184 return obj

185

186

187class NotebookValidationError(ValidationError): # type:ignore[misc]

188 """Schema ValidationError with truncated representation

189

190 to avoid massive verbose tracebacks.

191 """

192

193 def __init__(self, original, ref=None):

194 """Initialize the error class."""

195 self.original = original

196 self.ref = getattr(self.original, "ref", ref)

197 self.message = self.original.message

198

199 def __getattr__(self, key):

200 """Get an attribute from the error."""

201 return getattr(self.original, key)

202

203 def __unicode__(self):

204 """Custom str for validation errors

205

206 avoids dumping full schema and notebook to logs

207 """

208 error = self.original

209 instance = _truncate_obj(error.instance)

210

211 return "\n".join(

212 [

213 error.message,

214 "",

215 "Failed validating {!r} in {}{}:".format(

216 error.validator,

217 self.ref or "notebook",

218 _format_as_index(list(error.relative_schema_path)[:-1]),

219 ),

220 "",

221 "On instance%s:" % _format_as_index(error.relative_path),

222 pprint.pformat(instance, width=78),

223 ]

224 )

225

226 __str__ = __unicode__

227

228

229def better_validation_error(error, version, version_minor):

230 """Get better ValidationError on oneOf failures

231

232 oneOf errors aren't informative.

233 if it's a cell type or output_type error,

234 try validating directly based on the type for a better error message

235 """

236 if not len(error.schema_path):

237 return error

238 key = error.schema_path[-1]

239 ref = None

240 if key.endswith("Of"):

241 if isinstance(error.instance, dict):

242 if "cell_type" in error.instance:

243 ref = error.instance["cell_type"] + "_cell"

244 elif "output_type" in error.instance:

245 ref = error.instance["output_type"]

246

247 if ref:

248 try:

249 validate(

250 error.instance,

251 ref,

252 version=version,

253 version_minor=version_minor,

254 )

255 except ValidationError as sub_error:

256 # keep extending relative path

257 error.relative_path.extend(sub_error.relative_path)

258 sub_error.relative_path = error.relative_path

259 better = better_validation_error(sub_error, version, version_minor)

260 if better.ref is None:

261 better.ref = ref

262 return better

263 except Exception: # noqa: S110

264 # if it fails for some reason,

265 # let the original error through

266 pass

267 return NotebookValidationError(error, ref)

268

269

270def normalize(

271 nbdict: Any,

272 version: Optional[int] = None,

273 version_minor: Optional[int] = None,

274 *,

275 relax_add_props: bool = False,

276 strip_invalid_metadata: bool = False,

277) -> tuple[int, Any]:

278 """

279 Normalise a notebook prior to validation.

280

281 This tries to implement a couple of normalisation steps to standardise

282 notebooks and make validation easier.

283

284 You should in general not rely on this function and make sure the notebooks

285 that reach nbformat are already in a normal form. If not you likely have a bug,

286 and may have security issues.

287

288 Parameters

289 ----------

290 nbdict : dict

291 notebook document

292 version : int

293 version_minor : int

294 relax_add_props : bool

295 Whether to allow extra property in the Json schema validating the

296 notebook.

297 strip_invalid_metadata : bool

298 Whether to strip metadata that does not exist in the Json schema when

299 validating the notebook.

300

301 Returns

302 -------

303 changes : int

304 number of changes in the notebooks

305 notebook : dict

306 deep-copy of the original object with relevant changes.

307

308 """

309 nbdict = deepcopy(nbdict)

310 nbdict_version, nbdict_version_minor = get_version(nbdict)

311 if version is None:

312 version = nbdict_version

313 if version_minor is None:

314 version_minor = nbdict_version_minor

315 return _normalize(

316 nbdict,

317 version,

318 version_minor,

319 True,

320 relax_add_props=relax_add_props,

321 strip_invalid_metadata=strip_invalid_metadata,

322 )

323

324

325def _normalize(

326 nbdict: Any,

327 version: int,

328 version_minor: int,

329 repair_duplicate_cell_ids: bool,

330 relax_add_props: bool,

331 strip_invalid_metadata: bool,

332) -> tuple[int, Any]:

333 """

334 Private normalisation routine.

335

336 This function attempts to normalize the `nbdict` passed to it.

337

338 As `_normalize()` is currently used both in `validate()` (for

339 historical reasons), and in the `normalize()` public function,

340 `_normalize()` does currently mutate `nbdict`.

341 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()`

342 may stop mutating `nbdict`.

343

344 """

345 changes = 0

346

347 if (version, version_minor) >= (4, 5):

348 # if we support cell ids ensure default ids are provided

349 for cell in nbdict["cells"]:

350 if "id" not in cell:

351 warnings.warn(

352 "Cell is missing an id field, this will become"

353 " a hard error in future nbformat versions. You may want"

354 " to use `normalize()` on your notebooks before validations"

355 " (available since nbformat 5.1.4). Previous versions of nbformat"

356 " are fixing this issue transparently, and will stop doing so"

357 " in the future.",

358 MissingIDFieldWarning,

359 stacklevel=3,

360 )

361 # Generate cell ids if any are missing

362 if repair_duplicate_cell_ids:

363 cell["id"] = generate_corpus_id()

364 changes += 1

365

366 # if we support cell ids check for uniqueness when validating the whole notebook

367 seen_ids = set()

368 for cell in nbdict["cells"]:

369 if "id" not in cell:

370 continue

371 cell_id = cell["id"]

372 if cell_id in seen_ids:

373 # Best effort to repair if we find a duplicate id

374 if repair_duplicate_cell_ids:

375 new_id = generate_corpus_id()

376 cell["id"] = new_id

377 changes += 1

378 warnings.warn(

379 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.",

380 DuplicateCellId,

381 stacklevel=3,

382 )

383 else:

384 msg = f"Non-unique cell id '{cell_id}' detected."

385 raise ValidationError(msg)

386 seen_ids.add(cell_id)

387 if strip_invalid_metadata:

388 changes += _strip_invalida_metadata(

389 nbdict, version, version_minor, relax_add_props=relax_add_props

390 )

391 return changes, nbdict

392

393

394def _dep_warn(field):

395 warnings.warn(

396 dedent(

397 f"""`{field}` kwargs of validate has been deprecated for security

398 reasons, and will be removed soon.

399

400 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to

401 normalise your notebook. `normalize` is available since nbformat 5.5.0

402

403 """

404 ),

405 DeprecationWarning,

406 stacklevel=3,

407 )

408

409

410def validate(

411 nbdict: Any = None,

412 ref: Optional[str] = None,

413 version: Optional[int] = None,

414 version_minor: Optional[int] = None,

415 relax_add_props: bool = False,

416 nbjson: Any = None,

417 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore[assignment]

418 strip_invalid_metadata: bool = _deprecated, # type: ignore[assignment]

419) -> None:

420 """Checks whether the given notebook dict-like object

421 conforms to the relevant notebook format schema.

422

423 Parameters

424 ----------

425 nbdict : dict

426 notebook document

427 ref : optional, str

428 reference to the subset of the schema we want to validate against.

429 for example ``"markdown_cell"``, `"code_cell"` ....

430 version : int

431 version_minor : int

432 relax_add_props : bool

433 Whether to allow extra properties in the JSON schema validating the notebook.

434 When True, all known fields are validated, but unknown fields are ignored.

435 nbjson

436 repair_duplicate_cell_ids : bool

437 Deprecated since 5.5.0 - will be removed in the future.

438 strip_invalid_metadata : bool

439 Deprecated since 5.5.0 - will be removed in the future.

440

441 Returns

442 -------

443 None

444

445 Raises

446 ------

447 ValidationError if not valid.

448

449 Notes

450 -----

451 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently

452 try to fix invalid notebook and mutate arguments. This behavior is deprecated

453 and will be removed in a near future.

454

455 Please explicitly call `normalize` if you need to normalize notebooks.

456 """

457 assert isinstance(ref, str) or ref is None

458

459 if strip_invalid_metadata is _deprecated:

460 strip_invalid_metadata = False

461 else:

462 _dep_warn("strip_invalid_metadata")

463

464 if repair_duplicate_cell_ids is _deprecated:

465 repair_duplicate_cell_ids = True

466 else:

467 _dep_warn("repair_duplicate_cell_ids")

468

469 # backwards compatibility for nbjson argument

470 if nbdict is not None:

471 pass

472 elif nbjson is not None:

473 nbdict = nbjson

474 else:

475 msg = "validate() missing 1 required argument: 'nbdict'"

476 raise TypeError(msg)

477

478 if ref is None:

479 # if ref is not specified, we have a whole notebook, so we can get the version

480 nbdict_version, nbdict_version_minor = get_version(nbdict)

481 if version is None:

482 version = nbdict_version

483 if version_minor is None:

484 version_minor = nbdict_version_minor

485 # if ref is specified, and we don't have a version number, assume we're validating against 1.0

486 elif version is None:

487 version, version_minor = 1, 0

488

489 if ref is None:

490 assert isinstance(version, int)

491 assert isinstance(version_minor, int)

492 _normalize(

493 nbdict,

494 version,

495 version_minor,

496 repair_duplicate_cell_ids,

497 relax_add_props=relax_add_props,

498 strip_invalid_metadata=strip_invalid_metadata,

499 )

500

501 for error in iter_validate(

502 nbdict,

503 ref=ref,

504 version=version,

505 version_minor=version_minor,

506 relax_add_props=relax_add_props,

507 strip_invalid_metadata=strip_invalid_metadata,

508 ):

509 raise error

510

511

512def _get_errors(

513 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any

514) -> Any:

515 validator = get_validator(version, version_minor, relax_add_props=relax_add_props)

516 if not validator:

517 msg = f"No schema for validating v{version}.{version_minor} notebooks"

518 raise ValidationError(msg)

519 iter_errors = validator.iter_errors(nbdict, *args)

520 errors = list(iter_errors)

521 # jsonschema gives the best error messages.

522 if len(errors) and validator.name != "jsonschema":

523 validator = get_validator(

524 version=version,

525 version_minor=version_minor,

526 relax_add_props=relax_add_props,

527 name="jsonschema",

528 )

529 return validator.iter_errors(nbdict, *args)

530 return iter(errors)

531

532

533def _strip_invalida_metadata(

534 nbdict: Any, version: int, version_minor: int, relax_add_props: bool

535) -> int:

536 """

537 This function tries to extract metadata errors from the validator and fix

538 them if necessary. This mostly mean stripping unknown keys from metadata

539 fields, or removing metadata fields altogether.

540

541 Parameters

542 ----------

543 nbdict : dict

544 notebook document

545 version : int

546 version_minor : int

547 relax_add_props : bool

548 Whether to allow extra property in the Json schema validating the

549 notebook.

550

551 Returns

552 -------

553 int

554 number of modifications

555

556 """

557 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

558 changes = 0

559 if len(list(errors)) > 0:

560 # jsonschema gives a better error tree.

561 validator = get_validator(

562 version=version,

563 version_minor=version_minor,

564 relax_add_props=relax_add_props,

565 name="jsonschema",

566 )

567 if not validator:

568 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks"

569 raise ValidationError(msg)

570 errors = validator.iter_errors(nbdict)

571 error_tree = validator.error_tree(errors)

572 if "metadata" in error_tree:

573 for key in error_tree["metadata"]:

574 nbdict["metadata"].pop(key, None)

575 changes += 1

576

577 if "cells" in error_tree:

578 number_of_cells = len(nbdict.get("cells", 0))

579 for cell_idx in range(number_of_cells):

580 # Cells don't report individual metadata keys as having failed validation

581 # Instead it reports that it failed to validate against each cell-type definition.

582 # We have to delve into why those definitions failed to uncover which metadata

583 # keys are misbehaving.

584 if "oneOf" in error_tree["cells"][cell_idx].errors:

585 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"]

586 schemas_by_index = [

587 ref["$ref"]

588 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"]

589 ]

590 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell"

591 if cell_type_definition_name in schemas_by_index:

592 schema_index = schemas_by_index.index(cell_type_definition_name)

593 for error in error_tree["cells"][cell_idx].errors["oneOf"].context:

594 rel_path = error.relative_path

595 error_for_intended_schema = error.schema_path[0] == schema_index

596 is_top_level_metadata_key = (

597 len(rel_path) == 2 and rel_path[0] == "metadata"

598 )

599 if error_for_intended_schema and is_top_level_metadata_key:

600 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None)

601 changes += 1

602

603 return changes

604

605

606def iter_validate(

607 nbdict=None,

608 ref=None,

609 version=None,

610 version_minor=None,

611 relax_add_props=False,

612 nbjson=None,

613 strip_invalid_metadata=False,

614):

615 """Checks whether the given notebook dict-like object conforms to the

616 relevant notebook format schema.

617

618 Returns a generator of all ValidationErrors if not valid.

619

620 Notes

621 -----

622 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and

623 should *never* try to validate a mutated or modified version of its notebook.

624

625 """

626 # backwards compatibility for nbjson argument

627 if nbdict is not None:

628 pass

629 elif nbjson is not None:

630 nbdict = nbjson

631 else:

632 msg = "iter_validate() missing 1 required argument: 'nbdict'"

633 raise TypeError(msg)

634

635 if version is None:

636 version, version_minor = get_version(nbdict)

637

638 if ref:

639 try:

640 errors = _get_errors(

641 nbdict,

642 version,

643 version_minor,

644 relax_add_props,

645 {"$ref": "#/definitions/%s" % ref},

646 )

647 except ValidationError as e:

648 yield e

649 return

650

651 else:

652 if strip_invalid_metadata:

653 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props)

654

655 # Validate one more time to ensure that us removing metadata

656 # didn't cause another complex validation issue in the schema.

657 # Also to ensure that higher-level errors produced by individual metadata validation

658 # failures are removed.

659 try:

660 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

661 except ValidationError as e:

662 yield e

663 return

664

665 for error in errors:

666 yield better_validation_error(error, version, version_minor)