Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/nbformat/validator.py: 32%

1"""Notebook format validators."""

3# Copyright (c) IPython Development Team.

4# Distributed under the terms of the Modified BSD License.

5from __future__ import annotations

7import json

8import pprint

9import time

10import warnings

11from copy import deepcopy

12from pathlib import Path

13from textwrap import dedent

14from typing import Any

16from ._imports import import_item

17from .corpus.words import generate_corpus_id

18from .json_compat import ValidationError, _validator_for_name, get_current_validator

19from .reader import get_version

20from .warnings import DuplicateCellId, MissingIDFieldWarning

22validators = {}

23_deprecated = object()

26__all__ = [

27 "NotebookValidationError",

28 "ValidationError",

29 "better_validation_error",

30 "get_validator",

31 "isvalid",

32 "iter_validate",

33 "normalize",

34 "validate",

35]

38def _relax_additional_properties(obj):

39 """relax any `additionalProperties`"""

40 if isinstance(obj, dict):

41 for key, value in obj.items():

42 value = ( # noqa: PLW2901

43 True if key == "additionalProperties" else _relax_additional_properties(value)

44 )

45 obj[key] = value

46 elif isinstance(obj, list):

47 for i, value in enumerate(obj):

48 obj[i] = _relax_additional_properties(value)

49 return obj

52def _allow_undefined(schema):

53 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"})

54 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"})

55 return schema

58def get_validator(version=None, version_minor=None, relax_add_props=False, name=None):

59 """Load the JSON schema into a Validator"""

60 if version is None:

61 from . import current_nbformat # noqa:PLC0415

63 version = current_nbformat

65 v = import_item("nbformat.v%s" % version)

66 current_minor = getattr(v, "nbformat_minor", 0)

67 if version_minor is None:

68 version_minor = current_minor

70 current_validator = _validator_for_name(name) if name else get_current_validator()

72 version_tuple = (current_validator.name, version, version_minor)

74 if version_tuple not in validators:

75 try:

76 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

77 except AttributeError:

78 return None

80 if current_minor < version_minor:

81 # notebook from the future, relax all `additionalProperties: False` requirements

82 schema_json = _relax_additional_properties(schema_json)

83 # and allow undefined cell types and outputs

84 schema_json = _allow_undefined(schema_json)

86 validators[version_tuple] = current_validator(schema_json)

88 if relax_add_props:

89 try:

90 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

91 except AttributeError:

92 return None

94 # this allows properties to be added for intermediate

95 # representations while validating for all other kinds of errors

96 schema_json = _relax_additional_properties(schema_json)

97 validators[version_tuple] = current_validator(schema_json)

99 return validators[version_tuple]

100

101

102def _get_schema_json(v, version=None, version_minor=None):

103 """

104 Gets the json schema from a given imported library and nbformat version.

105 """

106 if (version, version_minor) in v.nbformat_schema:

107 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(version, version_minor)])

108 elif version_minor > v.nbformat_minor:

109 # load the latest schema

110 schema_path = str(Path(v.__file__).parent / v.nbformat_schema[(None, None)])

111 else:

112 msg = "Cannot find appropriate nbformat schema file."

113 raise AttributeError(msg)

114 with Path(schema_path).open(encoding="utf8") as f:

115 schema_json = json.load(f)

116 return schema_json # noqa: RET504

117

118

119def isvalid(nbjson, ref=None, version=None, version_minor=None):

120 """Checks whether the given notebook JSON conforms to the current

121 notebook format schema. Returns True if the JSON is valid, and

122 False otherwise.

123

124 To see the individual errors that were encountered, please use the

125 `validate` function instead.

126 """

127 orig = deepcopy(nbjson)

128 try:

129 with warnings.catch_warnings():

130 warnings.filterwarnings("ignore", category=DeprecationWarning)

131 warnings.filterwarnings("ignore", category=MissingIDFieldWarning)

132 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False)

133 except ValidationError:

134 return False

135 else:

136 return True

137 finally:

138 if nbjson != orig:

139 raise AssertionError

140

141

142def _format_as_index(indices):

143 """

144 (from jsonschema._utils.format_as_index, copied to avoid relying on private API)

145

146 Construct a single string containing indexing operations for the indices.

147

148 For example, [1, 2, "foo"] -> [1][2]["foo"]

149 """

150

151 if not indices:

152 return ""

153 return "[%s]" % "][".join(repr(index) for index in indices)

154

155

156_ITEM_LIMIT = 16

157_STR_LIMIT = 64

158

159

160def _truncate_obj(obj):

161 """Truncate objects for use in validation tracebacks

162

163 Cell and output lists are squashed, as are long strings, lists, and dicts.

164 """

165 if isinstance(obj, dict):

166 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]}

167 if isinstance(truncated_dict.get("cells"), list):

168 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])]

169 if isinstance(truncated_dict.get("outputs"), list):

170 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])]

171

172 if len(obj) > _ITEM_LIMIT:

173 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT)

174 return truncated_dict

175 if isinstance(obj, list):

176 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]]

177 if len(obj) > _ITEM_LIMIT:

178 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT))

179 return truncated_list

180 if isinstance(obj, str):

181 truncated_str = obj[:_STR_LIMIT]

182 if len(obj) > _STR_LIMIT:

183 truncated_str += "..."

184 return truncated_str

185 return obj

186

187

188class NotebookValidationError(ValidationError): # type:ignore[misc]

189 """Schema ValidationError with truncated representation

190

191 to avoid massive verbose tracebacks.

192 """

193

194 def __init__(self, original, ref=None):

195 """Initialize the error class."""

196 self.original = original

197 self.ref = getattr(self.original, "ref", ref)

198 self.message = self.original.message

199

200 def __getattr__(self, key):

201 """Get an attribute from the error."""

202 return getattr(self.original, key)

203

204 def __unicode__(self):

205 """Custom str for validation errors

206

207 avoids dumping full schema and notebook to logs

208 """

209 error = self.original

210 instance = _truncate_obj(error.instance)

211

212 return "\n".join(

213 [

214 error.message,

215 "",

216 "Failed validating {!r} in {}{}:".format(

217 error.validator,

218 self.ref or "notebook",

219 _format_as_index(list(error.relative_schema_path)[:-1]),

220 ),

221 "",

222 "On instance%s:" % _format_as_index(error.relative_path),

223 pprint.pformat(instance, width=78),

224 ]

225 )

226

227 __str__ = __unicode__

228

229

230def better_validation_error(error, version, version_minor):

231 """Get better ValidationError on oneOf failures

232

233 oneOf errors aren't informative.

234 if it's a cell type or output_type error,

235 try validating directly based on the type for a better error message

236 """

237 if not len(error.schema_path):

238 return error

239 key = error.schema_path[-1]

240 ref = None

241 if key.endswith("Of"):

242 if isinstance(error.instance, dict):

243 if "cell_type" in error.instance:

244 ref = error.instance["cell_type"] + "_cell"

245 elif "output_type" in error.instance:

246 ref = error.instance["output_type"]

247

248 if ref:

249 try:

250 validate(

251 error.instance,

252 ref,

253 version=version,

254 version_minor=version_minor,

255 )

256 except ValidationError as sub_error:

257 # keep extending relative path

258 error.relative_path.extend(sub_error.relative_path)

259 sub_error.relative_path = error.relative_path

260 better = better_validation_error(sub_error, version, version_minor)

261 if better.ref is None:

262 better.ref = ref

263 return better

264 except Exception: # noqa: S110

265 # if it fails for some reason,

266 # let the original error through

267 pass

268 return NotebookValidationError(error, ref)

269

270

271def normalize(

272 nbdict: Any,

273 version: int | None = None,

274 version_minor: int | None = None,

275 *,

276 relax_add_props: bool = False,

277 strip_invalid_metadata: bool = False,

278) -> tuple[int, Any]:

279 """

280 Normalise a notebook prior to validation.

281

282 This tries to implement a couple of normalisation steps to standardise

283 notebooks and make validation easier.

284

285 You should in general not rely on this function and make sure the notebooks

286 that reach nbformat are already in a normal form. If not you likely have a bug,

287 and may have security issues.

288

289 Parameters

290 ----------

291 nbdict : dict

292 notebook document

293 version : int

294 version_minor : int

295 relax_add_props : bool

296 Whether to allow extra property in the Json schema validating the

297 notebook.

298 strip_invalid_metadata : bool

299 Whether to strip metadata that does not exist in the Json schema when

300 validating the notebook.

301

302 Returns

303 -------

304 changes : int

305 number of changes in the notebooks

306 notebook : dict

307 deep-copy of the original object with relevant changes.

308

309 """

310 nbdict = deepcopy(nbdict)

311 nbdict_version, nbdict_version_minor = get_version(nbdict)

312 if version is None:

313 version = nbdict_version

314 if version_minor is None:

315 version_minor = nbdict_version_minor

316 return _normalize(

317 nbdict,

318 version,

319 version_minor,

320 True,

321 relax_add_props=relax_add_props,

322 strip_invalid_metadata=strip_invalid_metadata,

323 )

324

325

326def _normalize(

327 nbdict: Any,

328 version: int,

329 version_minor: int,

330 repair_duplicate_cell_ids: bool,

331 relax_add_props: bool,

332 strip_invalid_metadata: bool,

333) -> tuple[int, Any]:

334 """

335 Private normalisation routine.

336

337 This function attempts to normalize the `nbdict` passed to it.

338

339 As `_normalize()` is currently used both in `validate()` (for

340 historical reasons), and in the `normalize()` public function,

341 `_normalize()` does currently mutate `nbdict`.

342 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()`

343 may stop mutating `nbdict`.

344

345 """

346 changes = 0

347

348 if (version, version_minor) >= (4, 5):

349 # if we support cell ids ensure default ids are provided

350 for cell in nbdict["cells"]:

351 if "id" not in cell:

352 warnings.warn(

353 "Cell is missing an id field, this will become"

354 " a hard error in future nbformat versions. You may want"

355 " to use `normalize()` on your notebooks before validations"

356 " (available since nbformat 5.1.4). Previous versions of nbformat"

357 " are fixing this issue transparently, and will stop doing so"

358 " in the future.",

359 MissingIDFieldWarning,

360 stacklevel=3,

361 )

362 # Generate cell ids if any are missing

363 if repair_duplicate_cell_ids:

364 cell["id"] = generate_corpus_id()

365 changes += 1

366

367 # if we support cell ids check for uniqueness when validating the whole notebook

368 seen_ids = set()

369 for cell in nbdict["cells"]:

370 if "id" not in cell:

371 continue

372 cell_id = cell["id"]

373 if cell_id in seen_ids:

374 # Best effort to repair if we find a duplicate id

375 if repair_duplicate_cell_ids:

376 new_id = generate_corpus_id()

377 cell["id"] = new_id

378 changes += 1

379 warnings.warn(

380 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.",

381 DuplicateCellId,

382 stacklevel=3,

383 )

384 else:

385 msg = f"Non-unique cell id '{cell_id}' detected."

386 raise ValidationError(msg)

387 seen_ids.add(cell_id)

388 if strip_invalid_metadata:

389 changes += _strip_invalida_metadata(

390 nbdict, version, version_minor, relax_add_props=relax_add_props

391 )

392 return changes, nbdict

393

394

395def _dep_warn(field):

396 # Deprecated since 2023 and security issue start to annoy people.

397 time.sleep(2)

398 # regularly bump this by 1 sec.

399

400 warnings.warn(

401 dedent(

402 f"""`{field}` kwargs of validate has been deprecated for security

403 reasons, and will be removed soon.

404

405 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to

406 normalise your notebook. `normalize` is available since nbformat 5.5.0

407

408 """

409 ),

410 DeprecationWarning,

411 stacklevel=3,

412 )

413

414

415def validate(

416 nbdict: Any = None,

417 ref: str | None = None,

418 version: int | None = None,

419 version_minor: int | None = None,

420 relax_add_props: bool = False,

421 nbjson: Any = None,

422 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore[assignment]

423 strip_invalid_metadata: bool = _deprecated, # type: ignore[assignment]

424) -> None:

425 """Checks whether the given notebook dict-like object

426 conforms to the relevant notebook format schema.

427

428 Parameters

429 ----------

430 nbdict : dict

431 notebook document

432 ref : optional, str

433 reference to the subset of the schema we want to validate against.

434 for example ``"markdown_cell"``, `"code_cell"` ....

435 version : int

436 version_minor : int

437 relax_add_props : bool

438 Whether to allow extra properties in the JSON schema validating the notebook.

439 When True, all known fields are validated, but unknown fields are ignored.

440 nbjson

441 repair_duplicate_cell_ids : bool

442 Deprecated since 5.5.0 - will be removed in the future.

443 strip_invalid_metadata : bool

444 Deprecated since 5.5.0 - will be removed in the future.

445

446 Returns

447 -------

448 None

449

450 Raises

451 ------

452 ValidationError if not valid.

453

454 Notes

455 -----

456 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently

457 try to fix invalid notebook and mutate arguments. This behavior is deprecated

458 and will be removed in a near future.

459

460 Please explicitly call `normalize` if you need to normalize notebooks.

461 """

462 assert isinstance(ref, str) or ref is None

463

464 if strip_invalid_metadata is _deprecated:

465 strip_invalid_metadata = False

466 else:

467 _dep_warn("strip_invalid_metadata")

468

469 if repair_duplicate_cell_ids is _deprecated:

470 repair_duplicate_cell_ids = True

471 else:

472 _dep_warn("repair_duplicate_cell_ids")

473

474 # backwards compatibility for nbjson argument

475 if nbdict is not None:

476 pass

477 elif nbjson is not None:

478 nbdict = nbjson

479 else:

480 msg = "validate() missing 1 required argument: 'nbdict'"

481 raise TypeError(msg)

482

483 if ref is None:

484 # if ref is not specified, we have a whole notebook, so we can get the version

485 nbdict_version, nbdict_version_minor = get_version(nbdict)

486 if version is None:

487 version = nbdict_version

488 if version_minor is None:

489 version_minor = nbdict_version_minor

490 # if ref is specified, and we don't have a version number, assume we're validating against 1.0

491 elif version is None:

492 version, version_minor = 1, 0

493

494 if ref is None:

495 assert isinstance(version, int)

496 assert isinstance(version_minor, int)

497 _normalize(

498 nbdict,

499 version,

500 version_minor,

501 repair_duplicate_cell_ids,

502 relax_add_props=relax_add_props,

503 strip_invalid_metadata=strip_invalid_metadata,

504 )

505

506 for error in iter_validate(

507 nbdict,

508 ref=ref,

509 version=version,

510 version_minor=version_minor,

511 relax_add_props=relax_add_props,

512 strip_invalid_metadata=strip_invalid_metadata,

513 ):

514 raise error

515

516

517def _get_errors(

518 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any

519) -> Any:

520 validator = get_validator(version, version_minor, relax_add_props=relax_add_props)

521 if not validator:

522 msg = f"No schema for validating v{version}.{version_minor} notebooks"

523 raise ValidationError(msg)

524 iter_errors = validator.iter_errors(nbdict, *args)

525 errors = list(iter_errors)

526 # jsonschema gives the best error messages.

527 if errors and validator.name != "jsonschema":

528 validator = get_validator(

529 version=version,

530 version_minor=version_minor,

531 relax_add_props=relax_add_props,

532 name="jsonschema",

533 )

534 return validator.iter_errors(nbdict, *args)

535 return iter(errors)

536

537

538def _strip_invalida_metadata(

539 nbdict: Any, version: int, version_minor: int, relax_add_props: bool

540) -> int:

541 """

542 This function tries to extract metadata errors from the validator and fix

543 them if necessary. This mostly mean stripping unknown keys from metadata

544 fields, or removing metadata fields altogether.

545

546 Parameters

547 ----------

548 nbdict : dict

549 notebook document

550 version : int

551 version_minor : int

552 relax_add_props : bool

553 Whether to allow extra property in the Json schema validating the

554 notebook.

555

556 Returns

557 -------

558 int

559 number of modifications

560

561 """

562 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

563 changes = 0

564 if len(list(errors)) > 0:

565 # jsonschema gives a better error tree.

566 validator = get_validator(

567 version=version,

568 version_minor=version_minor,

569 relax_add_props=relax_add_props,

570 name="jsonschema",

571 )

572 if not validator:

573 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks"

574 raise ValidationError(msg)

575 errors = validator.iter_errors(nbdict)

576 error_tree = validator.error_tree(errors)

577 if "metadata" in error_tree:

578 for key in error_tree["metadata"]:

579 nbdict["metadata"].pop(key, None)

580 changes += 1

581

582 if "cells" in error_tree:

583 number_of_cells = len(nbdict.get("cells", 0))

584 for cell_idx in range(number_of_cells):

585 # Cells don't report individual metadata keys as having failed validation

586 # Instead it reports that it failed to validate against each cell-type definition.

587 # We have to delve into why those definitions failed to uncover which metadata

588 # keys are misbehaving.

589 if "oneOf" in error_tree["cells"][cell_idx].errors:

590 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"]

591 schemas_by_index = [

592 ref["$ref"]

593 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"]

594 ]

595 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell"

596 if cell_type_definition_name in schemas_by_index:

597 schema_index = schemas_by_index.index(cell_type_definition_name)

598 for error in error_tree["cells"][cell_idx].errors["oneOf"].context:

599 rel_path = error.relative_path

600 error_for_intended_schema = error.schema_path[0] == schema_index

601 is_top_level_metadata_key = (

602 len(rel_path) == 2 and rel_path[0] == "metadata"

603 )

604 if error_for_intended_schema and is_top_level_metadata_key:

605 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None)

606 changes += 1

607

608 return changes

609

610

611def iter_validate(

612 nbdict=None,

613 ref=None,

614 version=None,

615 version_minor=None,

616 relax_add_props=False,

617 nbjson=None,

618 strip_invalid_metadata=False,

619):

620 """Checks whether the given notebook dict-like object conforms to the

621 relevant notebook format schema.

622

623 Returns a generator of all ValidationErrors if not valid.

624

625 Notes

626 -----

627 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and

628 should *never* try to validate a mutated or modified version of its notebook.

629

630 """

631 # backwards compatibility for nbjson argument

632 if nbdict is not None:

633 pass

634 elif nbjson is not None:

635 nbdict = nbjson

636 else:

637 msg = "iter_validate() missing 1 required argument: 'nbdict'"

638 raise TypeError(msg)

639

640 if version is None:

641 version, version_minor = get_version(nbdict)

642

643 if ref:

644 try:

645 errors = _get_errors(

646 nbdict,

647 version,

648 version_minor,

649 relax_add_props,

650 {"$ref": "#/definitions/%s" % ref},

651 )

652 except ValidationError as e:

653 yield e

654 return

655

656 else:

657 if strip_invalid_metadata:

658 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props)

659

660 # Validate one more time to ensure that us removing metadata

661 # didn't cause another complex validation issue in the schema.

662 # Also to ensure that higher-level errors produced by individual metadata validation

663 # failures are removed.

664 try:

665 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

666 except ValidationError as e:

667 yield e

668 return

669

670 for error in errors:

671 yield better_validation_error(error, version, version_minor)