Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbformat/validator.py: 13%

1"""Notebook format validators."""

2# Copyright (c) IPython Development Team.

3# Distributed under the terms of the Modified BSD License.

6import json

7import os

8import pprint

9import warnings

10from copy import deepcopy

11from textwrap import dedent

12from typing import Any, Optional, Tuple

14from ._imports import import_item

15from .corpus.words import generate_corpus_id

16from .json_compat import ValidationError, _validator_for_name, get_current_validator

17from .reader import get_version

18from .warnings import DuplicateCellId, MissingIDFieldWarning

20validators = {}

21_deprecated = object()

24def _relax_additional_properties(obj):

25 """relax any `additionalProperties`"""

26 if isinstance(obj, dict):

27 for key, value in obj.items():

28 value = ( # noqa

29 True if key == "additionalProperties" else _relax_additional_properties(value)

30 )

31 obj[key] = value

32 elif isinstance(obj, list):

33 for i, value in enumerate(obj):

34 obj[i] = _relax_additional_properties(value)

35 return obj

38def _allow_undefined(schema):

39 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"})

40 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"})

41 return schema

44def get_validator(version=None, version_minor=None, relax_add_props=False, name=None):

45 """Load the JSON schema into a Validator"""

46 if version is None:

47 from . import current_nbformat

49 version = current_nbformat

51 v = import_item("nbformat.v%s" % version)

52 current_minor = getattr(v, "nbformat_minor", 0)

53 if version_minor is None:

54 version_minor = current_minor

56 current_validator = _validator_for_name(name) if name else get_current_validator()

58 version_tuple = (current_validator.name, version, version_minor)

60 if version_tuple not in validators:

61 try:

62 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

63 except AttributeError:

64 return None

66 if current_minor < version_minor:

67 # notebook from the future, relax all `additionalProperties: False` requirements

68 schema_json = _relax_additional_properties(schema_json)

69 # and allow undefined cell types and outputs

70 schema_json = _allow_undefined(schema_json)

72 validators[version_tuple] = current_validator(schema_json)

74 if relax_add_props:

75 try:

76 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)

77 except AttributeError:

78 return None

80 # this allows properties to be added for intermediate

81 # representations while validating for all other kinds of errors

82 schema_json = _relax_additional_properties(schema_json)

83 validators[version_tuple] = current_validator(schema_json)

85 return validators[version_tuple]

88def _get_schema_json(v, version=None, version_minor=None):

89 """

90 Gets the json schema from a given imported library and nbformat version.

91 """

92 if (version, version_minor) in v.nbformat_schema:

93 schema_path = os.path.join(

94 os.path.dirname(v.__file__), v.nbformat_schema[(version, version_minor)]

95 )

96 elif version_minor > v.nbformat_minor:

97 # load the latest schema

98 schema_path = os.path.join(os.path.dirname(v.__file__), v.nbformat_schema[(None, None)])

99 else:

100 msg = "Cannot find appropriate nbformat schema file."

101 raise AttributeError(msg)

102 with open(schema_path) as f:

103 schema_json = json.load(f)

104 return schema_json

105

106

107def isvalid(nbjson, ref=None, version=None, version_minor=None):

108 """Checks whether the given notebook JSON conforms to the current

109 notebook format schema. Returns True if the JSON is valid, and

110 False otherwise.

111

112 To see the individual errors that were encountered, please use the

113 `validate` function instead.

114 """

115 orig = deepcopy(nbjson)

116 try:

117 with warnings.catch_warnings():

118 warnings.filterwarnings("ignore", category=DeprecationWarning)

119 warnings.filterwarnings("ignore", category=MissingIDFieldWarning)

120 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False)

121 except ValidationError:

122 return False

123 else:

124 return True

125 finally:

126 if nbjson != orig:

127 raise AssertionError

128

129

130def _format_as_index(indices):

131 """

132 (from jsonschema._utils.format_as_index, copied to avoid relying on private API)

133

134 Construct a single string containing indexing operations for the indices.

135

136 For example, [1, 2, "foo"] -> [1][2]["foo"]

137 """

138

139 if not indices:

140 return ""

141 return "[%s]" % "][".join(repr(index) for index in indices)

142

143

144_ITEM_LIMIT = 16

145_STR_LIMIT = 64

146

147

148def _truncate_obj(obj):

149 """Truncate objects for use in validation tracebacks

150

151 Cell and output lists are squashed, as are long strings, lists, and dicts.

152 """

153 if isinstance(obj, dict):

154 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]}

155 if isinstance(truncated_dict.get("cells"), list):

156 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])]

157 if isinstance(truncated_dict.get("outputs"), list):

158 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])]

159

160 if len(obj) > _ITEM_LIMIT:

161 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT)

162 return truncated_dict

163 elif isinstance(obj, list):

164 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]]

165 if len(obj) > _ITEM_LIMIT:

166 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT))

167 return truncated_list

168 elif isinstance(obj, str):

169 truncated_str = obj[:_STR_LIMIT]

170 if len(obj) > _STR_LIMIT:

171 truncated_str += "..."

172 return truncated_str

173 else:

174 return obj

175

176

177class NotebookValidationError(ValidationError):

178 """Schema ValidationError with truncated representation

179

180 to avoid massive verbose tracebacks.

181 """

182

183 def __init__(self, original, ref=None):

184 """Initialize the error class."""

185 self.original = original

186 self.ref = getattr(self.original, "ref", ref)

187 self.message = self.original.message

188

189 def __getattr__(self, key):

190 """Get an attribute from the error."""

191 return getattr(self.original, key)

192

193 def __unicode__(self):

194 """Custom str for validation errors

195

196 avoids dumping full schema and notebook to logs

197 """

198 error = self.original

199 instance = _truncate_obj(error.instance)

200

201 return "\n".join(

202 [

203 error.message,

204 "",

205 "Failed validating {!r} in {}{}:".format(

206 error.validator,

207 self.ref or "notebook",

208 _format_as_index(list(error.relative_schema_path)[:-1]),

209 ),

210 "",

211 "On instance%s:" % _format_as_index(error.relative_path),

212 pprint.pformat(instance, width=78),

213 ]

214 )

215

216 __str__ = __unicode__

217

218

219def better_validation_error(error, version, version_minor):

220 """Get better ValidationError on oneOf failures

221

222 oneOf errors aren't informative.

223 if it's a cell type or output_type error,

224 try validating directly based on the type for a better error message

225 """

226 if not len(error.schema_path):

227 return error

228 key = error.schema_path[-1]

229 ref = None

230 if key.endswith("Of"):

231 if isinstance(error.instance, dict):

232 if "cell_type" in error.instance:

233 ref = error.instance["cell_type"] + "_cell"

234 elif "output_type" in error.instance:

235 ref = error.instance["output_type"]

236

237 if ref:

238 try:

239 validate(

240 error.instance,

241 ref,

242 version=version,

243 version_minor=version_minor,

244 )

245 except ValidationError as sub_error:

246 # keep extending relative path

247 error.relative_path.extend(sub_error.relative_path)

248 sub_error.relative_path = error.relative_path

249 better = better_validation_error(sub_error, version, version_minor)

250 if better.ref is None:

251 better.ref = ref

252 return better

253 except Exception: # noqa

254 # if it fails for some reason,

255 # let the original error through

256 pass

257 return NotebookValidationError(error, ref)

258

259

260def normalize(

261 nbdict: Any,

262 version: Optional[int] = None,

263 version_minor: Optional[int] = None,

264 *,

265 relax_add_props: bool = False,

266 strip_invalid_metadata: bool = False,

267) -> Tuple[int, Any]:

268 """

269 Normalise a notebook prior to validation.

270

271 This tries to implement a couple of normalisation steps to standardise

272 notebooks and make validation easier.

273

274 You should in general not rely on this function and make sure the notebooks

275 that reach nbformat are already in a normal form. If not you likely have a bug,

276 and may have security issues.

277

278 Parameters

279 ----------

280 nbdict : dict

281 notebook document

282 version : int

283 version_minor : int

284 relax_add_props : bool

285 Whether to allow extra property in the Json schema validating the

286 notebook.

287 strip_invalid_metadata : bool

288 Whether to strip metadata that does not exist in the Json schema when

289 validating the notebook.

290

291 Returns

292 -------

293 changes : int

294 number of changes in the notebooks

295 notebook : dict

296 deep-copy of the original object with relevant changes.

297

298 """

299 nbdict = deepcopy(nbdict)

300 nbdict_version, nbdict_version_minor = get_version(nbdict)

301 if version is None:

302 version = nbdict_version

303 if version_minor is None:

304 version_minor = nbdict_version_minor

305 return _normalize(

306 nbdict,

307 version,

308 version_minor,

309 True,

310 relax_add_props=relax_add_props,

311 strip_invalid_metadata=strip_invalid_metadata,

312 )

313

314

315def _normalize(

316 nbdict: Any,

317 version: int,

318 version_minor: int,

319 repair_duplicate_cell_ids: bool,

320 relax_add_props: bool,

321 strip_invalid_metadata: bool,

322) -> Tuple[int, Any]:

323 """

324 Private normalisation routine.

325

326 This function attempts to normalize the `nbdict` passed to it.

327

328 As `_normalize()` is currently used both in `validate()` (for

329 historical reasons), and in the `normalize()` public function,

330 `_normalize()` does currently mutate `nbdict`.

331 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()`

332 may stop mutating `nbdict`.

333

334 """

335 changes = 0

336

337 if (version, version_minor) >= (4, 5):

338 # if we support cell ids ensure default ids are provided

339 for cell in nbdict["cells"]:

340 if "id" not in cell:

341 warnings.warn(

342 "Code cell is missing an id field, this will become"

343 " a hard error in future nbformat versions. You may want"

344 " to use `normalize()` on your notebooks before validations"

345 " (available since nbformat 5.1.4). Previous versions of nbformat"

346 " are fixing this issue transparently, and will stop doing so"

347 " in the future.",

348 MissingIDFieldWarning,

349 stacklevel=3,

350 )

351 # Generate cell ids if any are missing

352 if repair_duplicate_cell_ids:

353 cell["id"] = generate_corpus_id()

354 changes += 1

355

356 # if we support cell ids check for uniqueness when validating the whole notebook

357 seen_ids = set()

358 for cell in nbdict["cells"]:

359 if "id" not in cell:

360 continue

361 cell_id = cell["id"]

362 if cell_id in seen_ids:

363 # Best effort to repair if we find a duplicate id

364 if repair_duplicate_cell_ids:

365 new_id = generate_corpus_id()

366 cell["id"] = new_id

367 changes += 1

368 warnings.warn(

369 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.",

370 DuplicateCellId,

371 stacklevel=3,

372 )

373 else:

374 msg = f"Non-unique cell id '{cell_id}' detected."

375 raise ValidationError(msg)

376 seen_ids.add(cell_id)

377 if strip_invalid_metadata:

378 changes += _strip_invalida_metadata(

379 nbdict, version, version_minor, relax_add_props=relax_add_props

380 )

381 return changes, nbdict

382

383

384def _dep_warn(field):

385 warnings.warn(

386 dedent(

387 f"""`{field}` kwargs of validate has been deprecated for security

388 reasons, and will be removed soon.

389

390 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to

391 normalise your notebook. `normalize` is available since nbformat 5.5.0

392

393 """

394 ),

395 DeprecationWarning,

396 stacklevel=3,

397 )

398

399

400def validate( # noqa

401 nbdict: Any = None,

402 ref: Optional[str] = None,

403 version: Optional[int] = None,

404 version_minor: Optional[int] = None,

405 relax_add_props: bool = False,

406 nbjson: Any = None,

407 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore

408 strip_invalid_metadata: bool = _deprecated, # type: ignore

409) -> None:

410 """Checks whether the given notebook dict-like object

411 conforms to the relevant notebook format schema.

412

413 Parameters

414 ----------

415 nbdict : dict

416 notebook document

417 ref : optional, str

418 reference to the subset of the schema we want to validate against.

419 for example ``"markdown_cell"``, `"code_cell"` ....

420 version : int

421 version_minor : int

422 relax_add_props : bool

423 Wether to allow extra properties in the JSON schema validating the notebook.

424 When True, all known fields are validated, but unknown fields are ignored.

425 nbjson

426 repair_duplicate_cell_ids : bool

427 Deprecated since 5.5.0 - will be removed in the future.

428 strip_invalid_metadata : bool

429 Deprecated since 5.5.0 - will be removed in the future.

430

431 Returns

432 -------

433 None

434

435 Raises

436 ------

437 ValidationError if not valid.

438

439 Notes

440 -----

441 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently

442 try to fix invalid notebook and mutate arguments. This behavior is deprecated

443 and will be removed in a near future.

444

445 Please explicitly call `normalize` if you need to normalize notebooks.

446 """

447 assert isinstance(ref, str) or ref is None # noqa

448

449 if strip_invalid_metadata is _deprecated:

450 strip_invalid_metadata = False

451 else:

452 _dep_warn("strip_invalid_metadata")

453 pass

454

455 if repair_duplicate_cell_ids is _deprecated:

456 repair_duplicate_cell_ids = True

457 else:

458 _dep_warn("repair_duplicate_cell_ids")

459 pass

460

461 # backwards compatibility for nbjson argument

462 if nbdict is not None:

463 pass

464 elif nbjson is not None:

465 nbdict = nbjson

466 else:

467 msg = "validate() missing 1 required argument: 'nbdict'"

468 raise TypeError(msg)

469

470 if ref is None:

471 # if ref is not specified, we have a whole notebook, so we can get the version

472 nbdict_version, nbdict_version_minor = get_version(nbdict)

473 if version is None:

474 version = nbdict_version

475 if version_minor is None:

476 version_minor = nbdict_version_minor

477 else:

478 # if ref is specified, and we don't have a version number, assume we're validating against 1.0

479 if version is None: # noqa

480 version, version_minor = 1, 0

481

482 if ref is None:

483 assert isinstance(version, int) # noqa

484 assert isinstance(version_minor, int) # noqa

485 _normalize(

486 nbdict,

487 version,

488 version_minor,

489 repair_duplicate_cell_ids,

490 relax_add_props=relax_add_props,

491 strip_invalid_metadata=strip_invalid_metadata,

492 )

493

494 for error in iter_validate(

495 nbdict,

496 ref=ref,

497 version=version,

498 version_minor=version_minor,

499 relax_add_props=relax_add_props,

500 strip_invalid_metadata=strip_invalid_metadata,

501 ):

502 raise error

503

504

505def _get_errors(

506 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any

507) -> Any:

508 validator = get_validator(version, version_minor, relax_add_props=relax_add_props)

509 if not validator:

510 msg = f"No schema for validating v{version}.{version_minor} notebooks"

511 raise ValidationError(msg)

512 iter_errors = validator.iter_errors(nbdict, *args)

513 errors = list(iter_errors)

514 # jsonschema gives the best error messages.

515 if len(errors) and validator.name != "jsonschema":

516 validator = get_validator(

517 version=version,

518 version_minor=version_minor,

519 relax_add_props=relax_add_props,

520 name="jsonschema",

521 )

522 return validator.iter_errors(nbdict, *args)

523 return iter(errors)

524

525

526def _strip_invalida_metadata( # noqa

527 nbdict: Any, version: int, version_minor: int, relax_add_props: bool

528) -> int:

529 """

530 This function tries to extract metadata errors from the validator and fix

531 them if necessary. This mostly mean stripping unknown keys from metadata

532 fields, or removing metadata fields altogether.

533

534 Parameters

535 ----------

536 nbdict : dict

537 notebook document

538 version : int

539 version_minor : int

540 relax_add_props : bool

541 Wether to allow extra property in the Json schema validating the

542 notebook.

543

544 Returns

545 -------

546 int

547 number of modifications

548

549 """

550 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

551 changes = 0

552 if len(list(errors)) > 0:

553 # jsonschema gives a better error tree.

554 validator = get_validator(

555 version=version,

556 version_minor=version_minor,

557 relax_add_props=relax_add_props,

558 name="jsonschema",

559 )

560 if not validator:

561 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks"

562 raise ValidationError(msg)

563 errors = validator.iter_errors(nbdict)

564 error_tree = validator.error_tree(errors)

565 if "metadata" in error_tree:

566 for key in error_tree["metadata"]:

567 nbdict["metadata"].pop(key, None)

568 changes += 1

569

570 if "cells" in error_tree:

571 number_of_cells = len(nbdict.get("cells", 0))

572 for cell_idx in range(number_of_cells):

573 # Cells don't report individual metadata keys as having failed validation

574 # Instead it reports that it failed to validate against each cell-type definition.

575 # We have to delve into why those definitions failed to uncover which metadata

576 # keys are misbehaving.

577 if "oneOf" in error_tree["cells"][cell_idx].errors:

578 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"]

579 schemas_by_index = [

580 ref["$ref"]

581 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"]

582 ]

583 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell"

584 if cell_type_definition_name in schemas_by_index:

585 schema_index = schemas_by_index.index(cell_type_definition_name)

586 for error in error_tree["cells"][cell_idx].errors["oneOf"].context:

587 rel_path = error.relative_path

588 error_for_intended_schema = error.schema_path[0] == schema_index

589 is_top_level_metadata_key = (

590 len(rel_path) == 2 and rel_path[0] == "metadata" # noqa

591 )

592 if error_for_intended_schema and is_top_level_metadata_key:

593 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None)

594 changes += 1

595

596 return changes

597

598

599def iter_validate(

600 nbdict=None,

601 ref=None,

602 version=None,

603 version_minor=None,

604 relax_add_props=False,

605 nbjson=None,

606 strip_invalid_metadata=False,

607):

608 """Checks whether the given notebook dict-like object conforms to the

609 relevant notebook format schema.

610

611 Returns a generator of all ValidationErrors if not valid.

612

613 Notes

614 -----

615 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and

616 should *never* try to validate a mutated or modified version of its notebook.

617

618 """

619 # backwards compatibility for nbjson argument

620 if nbdict is not None:

621 pass

622 elif nbjson is not None:

623 nbdict = nbjson

624 else:

625 msg = "iter_validate() missing 1 required argument: 'nbdict'"

626 raise TypeError(msg)

627

628 if version is None:

629 version, version_minor = get_version(nbdict)

630

631 if ref:

632 try:

633 errors = _get_errors(

634 nbdict,

635 version,

636 version_minor,

637 relax_add_props,

638 {"$ref": "#/definitions/%s" % ref},

639 )

640 except ValidationError as e:

641 yield e

642 return

643

644 else:

645 if strip_invalid_metadata:

646 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props)

647

648 # Validate one more time to ensure that us removing metadata

649 # didn't cause another complex validation issue in the schema.

650 # Also to ensure that higher-level errors produced by individual metadata validation

651 # failures are removed.

652 try:

653 errors = _get_errors(nbdict, version, version_minor, relax_add_props)

654 except ValidationError as e:

655 yield e

656 return

657

658 for error in errors:

659 yield better_validation_error(error, version, version_minor)