Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/nbformat/validator.py: 13%
269 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""Notebook format validators."""
2# Copyright (c) IPython Development Team.
3# Distributed under the terms of the Modified BSD License.
6import json
7import os
8import pprint
9import warnings
10from copy import deepcopy
11from textwrap import dedent
12from typing import Any, Optional, Tuple
14from ._imports import import_item
15from .corpus.words import generate_corpus_id
16from .json_compat import ValidationError, _validator_for_name, get_current_validator
17from .reader import get_version
18from .warnings import DuplicateCellId, MissingIDFieldWarning
20validators = {}
21_deprecated = object()
24def _relax_additional_properties(obj):
25 """relax any `additionalProperties`"""
26 if isinstance(obj, dict):
27 for key, value in obj.items():
28 value = ( # noqa
29 True if key == "additionalProperties" else _relax_additional_properties(value)
30 )
31 obj[key] = value
32 elif isinstance(obj, list):
33 for i, value in enumerate(obj):
34 obj[i] = _relax_additional_properties(value)
35 return obj
38def _allow_undefined(schema):
39 schema["definitions"]["cell"]["oneOf"].append({"$ref": "#/definitions/unrecognized_cell"})
40 schema["definitions"]["output"]["oneOf"].append({"$ref": "#/definitions/unrecognized_output"})
41 return schema
44def get_validator(version=None, version_minor=None, relax_add_props=False, name=None):
45 """Load the JSON schema into a Validator"""
46 if version is None:
47 from . import current_nbformat
49 version = current_nbformat
51 v = import_item("nbformat.v%s" % version)
52 current_minor = getattr(v, "nbformat_minor", 0)
53 if version_minor is None:
54 version_minor = current_minor
56 current_validator = _validator_for_name(name) if name else get_current_validator()
58 version_tuple = (current_validator.name, version, version_minor)
60 if version_tuple not in validators:
61 try:
62 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)
63 except AttributeError:
64 return None
66 if current_minor < version_minor:
67 # notebook from the future, relax all `additionalProperties: False` requirements
68 schema_json = _relax_additional_properties(schema_json)
69 # and allow undefined cell types and outputs
70 schema_json = _allow_undefined(schema_json)
72 validators[version_tuple] = current_validator(schema_json)
74 if relax_add_props:
75 try:
76 schema_json = _get_schema_json(v, version=version, version_minor=version_minor)
77 except AttributeError:
78 return None
80 # this allows properties to be added for intermediate
81 # representations while validating for all other kinds of errors
82 schema_json = _relax_additional_properties(schema_json)
83 validators[version_tuple] = current_validator(schema_json)
85 return validators[version_tuple]
88def _get_schema_json(v, version=None, version_minor=None):
89 """
90 Gets the json schema from a given imported library and nbformat version.
91 """
92 if (version, version_minor) in v.nbformat_schema:
93 schema_path = os.path.join(
94 os.path.dirname(v.__file__), v.nbformat_schema[(version, version_minor)]
95 )
96 elif version_minor > v.nbformat_minor:
97 # load the latest schema
98 schema_path = os.path.join(os.path.dirname(v.__file__), v.nbformat_schema[(None, None)])
99 else:
100 msg = "Cannot find appropriate nbformat schema file."
101 raise AttributeError(msg)
102 with open(schema_path) as f:
103 schema_json = json.load(f)
104 return schema_json
107def isvalid(nbjson, ref=None, version=None, version_minor=None):
108 """Checks whether the given notebook JSON conforms to the current
109 notebook format schema. Returns True if the JSON is valid, and
110 False otherwise.
112 To see the individual errors that were encountered, please use the
113 `validate` function instead.
114 """
115 orig = deepcopy(nbjson)
116 try:
117 with warnings.catch_warnings():
118 warnings.filterwarnings("ignore", category=DeprecationWarning)
119 warnings.filterwarnings("ignore", category=MissingIDFieldWarning)
120 validate(nbjson, ref, version, version_minor, repair_duplicate_cell_ids=False)
121 except ValidationError:
122 return False
123 else:
124 return True
125 finally:
126 if nbjson != orig:
127 raise AssertionError
130def _format_as_index(indices):
131 """
132 (from jsonschema._utils.format_as_index, copied to avoid relying on private API)
134 Construct a single string containing indexing operations for the indices.
136 For example, [1, 2, "foo"] -> [1][2]["foo"]
137 """
139 if not indices:
140 return ""
141 return "[%s]" % "][".join(repr(index) for index in indices)
144_ITEM_LIMIT = 16
145_STR_LIMIT = 64
148def _truncate_obj(obj):
149 """Truncate objects for use in validation tracebacks
151 Cell and output lists are squashed, as are long strings, lists, and dicts.
152 """
153 if isinstance(obj, dict):
154 truncated_dict = {k: _truncate_obj(v) for k, v in list(obj.items())[:_ITEM_LIMIT]}
155 if isinstance(truncated_dict.get("cells"), list):
156 truncated_dict["cells"] = ["...%i cells..." % len(obj["cells"])]
157 if isinstance(truncated_dict.get("outputs"), list):
158 truncated_dict["outputs"] = ["...%i outputs..." % len(obj["outputs"])]
160 if len(obj) > _ITEM_LIMIT:
161 truncated_dict["..."] = "%i keys truncated" % (len(obj) - _ITEM_LIMIT)
162 return truncated_dict
163 elif isinstance(obj, list):
164 truncated_list = [_truncate_obj(item) for item in obj[:_ITEM_LIMIT]]
165 if len(obj) > _ITEM_LIMIT:
166 truncated_list.append("...%i items truncated..." % (len(obj) - _ITEM_LIMIT))
167 return truncated_list
168 elif isinstance(obj, str):
169 truncated_str = obj[:_STR_LIMIT]
170 if len(obj) > _STR_LIMIT:
171 truncated_str += "..."
172 return truncated_str
173 else:
174 return obj
177class NotebookValidationError(ValidationError):
178 """Schema ValidationError with truncated representation
180 to avoid massive verbose tracebacks.
181 """
183 def __init__(self, original, ref=None):
184 """Initialize the error class."""
185 self.original = original
186 self.ref = getattr(self.original, "ref", ref)
187 self.message = self.original.message
189 def __getattr__(self, key):
190 """Get an attribute from the error."""
191 return getattr(self.original, key)
193 def __unicode__(self):
194 """Custom str for validation errors
196 avoids dumping full schema and notebook to logs
197 """
198 error = self.original
199 instance = _truncate_obj(error.instance)
201 return "\n".join(
202 [
203 error.message,
204 "",
205 "Failed validating {!r} in {}{}:".format(
206 error.validator,
207 self.ref or "notebook",
208 _format_as_index(list(error.relative_schema_path)[:-1]),
209 ),
210 "",
211 "On instance%s:" % _format_as_index(error.relative_path),
212 pprint.pformat(instance, width=78),
213 ]
214 )
216 __str__ = __unicode__
219def better_validation_error(error, version, version_minor):
220 """Get better ValidationError on oneOf failures
222 oneOf errors aren't informative.
223 if it's a cell type or output_type error,
224 try validating directly based on the type for a better error message
225 """
226 if not len(error.schema_path):
227 return error
228 key = error.schema_path[-1]
229 ref = None
230 if key.endswith("Of"):
231 if isinstance(error.instance, dict):
232 if "cell_type" in error.instance:
233 ref = error.instance["cell_type"] + "_cell"
234 elif "output_type" in error.instance:
235 ref = error.instance["output_type"]
237 if ref:
238 try:
239 validate(
240 error.instance,
241 ref,
242 version=version,
243 version_minor=version_minor,
244 )
245 except ValidationError as sub_error:
246 # keep extending relative path
247 error.relative_path.extend(sub_error.relative_path)
248 sub_error.relative_path = error.relative_path
249 better = better_validation_error(sub_error, version, version_minor)
250 if better.ref is None:
251 better.ref = ref
252 return better
253 except Exception: # noqa
254 # if it fails for some reason,
255 # let the original error through
256 pass
257 return NotebookValidationError(error, ref)
260def normalize(
261 nbdict: Any,
262 version: Optional[int] = None,
263 version_minor: Optional[int] = None,
264 *,
265 relax_add_props: bool = False,
266 strip_invalid_metadata: bool = False,
267) -> Tuple[int, Any]:
268 """
269 Normalise a notebook prior to validation.
271 This tries to implement a couple of normalisation steps to standardise
272 notebooks and make validation easier.
274 You should in general not rely on this function and make sure the notebooks
275 that reach nbformat are already in a normal form. If not you likely have a bug,
276 and may have security issues.
278 Parameters
279 ----------
280 nbdict : dict
281 notebook document
282 version : int
283 version_minor : int
284 relax_add_props : bool
285 Whether to allow extra property in the Json schema validating the
286 notebook.
287 strip_invalid_metadata : bool
288 Whether to strip metadata that does not exist in the Json schema when
289 validating the notebook.
291 Returns
292 -------
293 changes : int
294 number of changes in the notebooks
295 notebook : dict
296 deep-copy of the original object with relevant changes.
298 """
299 nbdict = deepcopy(nbdict)
300 nbdict_version, nbdict_version_minor = get_version(nbdict)
301 if version is None:
302 version = nbdict_version
303 if version_minor is None:
304 version_minor = nbdict_version_minor
305 return _normalize(
306 nbdict,
307 version,
308 version_minor,
309 True,
310 relax_add_props=relax_add_props,
311 strip_invalid_metadata=strip_invalid_metadata,
312 )
315def _normalize(
316 nbdict: Any,
317 version: int,
318 version_minor: int,
319 repair_duplicate_cell_ids: bool,
320 relax_add_props: bool,
321 strip_invalid_metadata: bool,
322) -> Tuple[int, Any]:
323 """
324 Private normalisation routine.
326 This function attempts to normalize the `nbdict` passed to it.
328 As `_normalize()` is currently used both in `validate()` (for
329 historical reasons), and in the `normalize()` public function,
330 `_normalize()` does currently mutate `nbdict`.
331 Ideally, once `validate()` stops calling `_normalize()`, `_normalize()`
332 may stop mutating `nbdict`.
334 """
335 changes = 0
337 if (version, version_minor) >= (4, 5):
338 # if we support cell ids ensure default ids are provided
339 for cell in nbdict["cells"]:
340 if "id" not in cell:
341 warnings.warn(
342 "Code cell is missing an id field, this will become"
343 " a hard error in future nbformat versions. You may want"
344 " to use `normalize()` on your notebooks before validations"
345 " (available since nbformat 5.1.4). Previous versions of nbformat"
346 " are fixing this issue transparently, and will stop doing so"
347 " in the future.",
348 MissingIDFieldWarning,
349 stacklevel=3,
350 )
351 # Generate cell ids if any are missing
352 if repair_duplicate_cell_ids:
353 cell["id"] = generate_corpus_id()
354 changes += 1
356 # if we support cell ids check for uniqueness when validating the whole notebook
357 seen_ids = set()
358 for cell in nbdict["cells"]:
359 if "id" not in cell:
360 continue
361 cell_id = cell["id"]
362 if cell_id in seen_ids:
363 # Best effort to repair if we find a duplicate id
364 if repair_duplicate_cell_ids:
365 new_id = generate_corpus_id()
366 cell["id"] = new_id
367 changes += 1
368 warnings.warn(
369 f"Non-unique cell id {cell_id!r} detected. Corrected to {new_id!r}.",
370 DuplicateCellId,
371 stacklevel=3,
372 )
373 else:
374 msg = f"Non-unique cell id '{cell_id}' detected."
375 raise ValidationError(msg)
376 seen_ids.add(cell_id)
377 if strip_invalid_metadata:
378 changes += _strip_invalida_metadata(
379 nbdict, version, version_minor, relax_add_props=relax_add_props
380 )
381 return changes, nbdict
384def _dep_warn(field):
385 warnings.warn(
386 dedent(
387 f"""`{field}` kwargs of validate has been deprecated for security
388 reasons, and will be removed soon.
390 Please explicitly use the `n_changes, new_notebook = nbformat.validator.normalize(old_notebook, ...)` if you wish to
391 normalise your notebook. `normalize` is available since nbformat 5.5.0
393 """
394 ),
395 DeprecationWarning,
396 stacklevel=3,
397 )
400def validate( # noqa
401 nbdict: Any = None,
402 ref: Optional[str] = None,
403 version: Optional[int] = None,
404 version_minor: Optional[int] = None,
405 relax_add_props: bool = False,
406 nbjson: Any = None,
407 repair_duplicate_cell_ids: bool = _deprecated, # type: ignore
408 strip_invalid_metadata: bool = _deprecated, # type: ignore
409) -> None:
410 """Checks whether the given notebook dict-like object
411 conforms to the relevant notebook format schema.
413 Parameters
414 ----------
415 nbdict : dict
416 notebook document
417 ref : optional, str
418 reference to the subset of the schema we want to validate against.
419 for example ``"markdown_cell"``, `"code_cell"` ....
420 version : int
421 version_minor : int
422 relax_add_props : bool
423 Wether to allow extra properties in the JSON schema validating the notebook.
424 When True, all known fields are validated, but unknown fields are ignored.
425 nbjson
426 repair_duplicate_cell_ids : bool
427 Deprecated since 5.5.0 - will be removed in the future.
428 strip_invalid_metadata : bool
429 Deprecated since 5.5.0 - will be removed in the future.
431 Returns
432 -------
433 None
435 Raises
436 ------
437 ValidationError if not valid.
439 Notes
440 -----
441 Prior to Nbformat 5.5.0 the `validate` and `isvalid` method would silently
442 try to fix invalid notebook and mutate arguments. This behavior is deprecated
443 and will be removed in a near future.
445 Please explicitly call `normalize` if you need to normalize notebooks.
446 """
447 assert isinstance(ref, str) or ref is None # noqa
449 if strip_invalid_metadata is _deprecated:
450 strip_invalid_metadata = False
451 else:
452 _dep_warn("strip_invalid_metadata")
453 pass
455 if repair_duplicate_cell_ids is _deprecated:
456 repair_duplicate_cell_ids = True
457 else:
458 _dep_warn("repair_duplicate_cell_ids")
459 pass
461 # backwards compatibility for nbjson argument
462 if nbdict is not None:
463 pass
464 elif nbjson is not None:
465 nbdict = nbjson
466 else:
467 msg = "validate() missing 1 required argument: 'nbdict'"
468 raise TypeError(msg)
470 if ref is None:
471 # if ref is not specified, we have a whole notebook, so we can get the version
472 nbdict_version, nbdict_version_minor = get_version(nbdict)
473 if version is None:
474 version = nbdict_version
475 if version_minor is None:
476 version_minor = nbdict_version_minor
477 else:
478 # if ref is specified, and we don't have a version number, assume we're validating against 1.0
479 if version is None: # noqa
480 version, version_minor = 1, 0
482 if ref is None:
483 assert isinstance(version, int) # noqa
484 assert isinstance(version_minor, int) # noqa
485 _normalize(
486 nbdict,
487 version,
488 version_minor,
489 repair_duplicate_cell_ids,
490 relax_add_props=relax_add_props,
491 strip_invalid_metadata=strip_invalid_metadata,
492 )
494 for error in iter_validate(
495 nbdict,
496 ref=ref,
497 version=version,
498 version_minor=version_minor,
499 relax_add_props=relax_add_props,
500 strip_invalid_metadata=strip_invalid_metadata,
501 ):
502 raise error
505def _get_errors(
506 nbdict: Any, version: int, version_minor: int, relax_add_props: bool, *args: Any
507) -> Any:
508 validator = get_validator(version, version_minor, relax_add_props=relax_add_props)
509 if not validator:
510 msg = f"No schema for validating v{version}.{version_minor} notebooks"
511 raise ValidationError(msg)
512 iter_errors = validator.iter_errors(nbdict, *args)
513 errors = list(iter_errors)
514 # jsonschema gives the best error messages.
515 if len(errors) and validator.name != "jsonschema":
516 validator = get_validator(
517 version=version,
518 version_minor=version_minor,
519 relax_add_props=relax_add_props,
520 name="jsonschema",
521 )
522 return validator.iter_errors(nbdict, *args)
523 return iter(errors)
526def _strip_invalida_metadata( # noqa
527 nbdict: Any, version: int, version_minor: int, relax_add_props: bool
528) -> int:
529 """
530 This function tries to extract metadata errors from the validator and fix
531 them if necessary. This mostly mean stripping unknown keys from metadata
532 fields, or removing metadata fields altogether.
534 Parameters
535 ----------
536 nbdict : dict
537 notebook document
538 version : int
539 version_minor : int
540 relax_add_props : bool
541 Wether to allow extra property in the Json schema validating the
542 notebook.
544 Returns
545 -------
546 int
547 number of modifications
549 """
550 errors = _get_errors(nbdict, version, version_minor, relax_add_props)
551 changes = 0
552 if len(list(errors)) > 0:
553 # jsonschema gives a better error tree.
554 validator = get_validator(
555 version=version,
556 version_minor=version_minor,
557 relax_add_props=relax_add_props,
558 name="jsonschema",
559 )
560 if not validator:
561 msg = f"No jsonschema for validating v{version}.{version_minor} notebooks"
562 raise ValidationError(msg)
563 errors = validator.iter_errors(nbdict)
564 error_tree = validator.error_tree(errors)
565 if "metadata" in error_tree:
566 for key in error_tree["metadata"]:
567 nbdict["metadata"].pop(key, None)
568 changes += 1
570 if "cells" in error_tree:
571 number_of_cells = len(nbdict.get("cells", 0))
572 for cell_idx in range(number_of_cells):
573 # Cells don't report individual metadata keys as having failed validation
574 # Instead it reports that it failed to validate against each cell-type definition.
575 # We have to delve into why those definitions failed to uncover which metadata
576 # keys are misbehaving.
577 if "oneOf" in error_tree["cells"][cell_idx].errors:
578 intended_cell_type = nbdict["cells"][cell_idx]["cell_type"]
579 schemas_by_index = [
580 ref["$ref"]
581 for ref in error_tree["cells"][cell_idx].errors["oneOf"].schema["oneOf"]
582 ]
583 cell_type_definition_name = f"#/definitions/{intended_cell_type}_cell"
584 if cell_type_definition_name in schemas_by_index:
585 schema_index = schemas_by_index.index(cell_type_definition_name)
586 for error in error_tree["cells"][cell_idx].errors["oneOf"].context:
587 rel_path = error.relative_path
588 error_for_intended_schema = error.schema_path[0] == schema_index
589 is_top_level_metadata_key = (
590 len(rel_path) == 2 and rel_path[0] == "metadata" # noqa
591 )
592 if error_for_intended_schema and is_top_level_metadata_key:
593 nbdict["cells"][cell_idx]["metadata"].pop(rel_path[1], None)
594 changes += 1
596 return changes
599def iter_validate(
600 nbdict=None,
601 ref=None,
602 version=None,
603 version_minor=None,
604 relax_add_props=False,
605 nbjson=None,
606 strip_invalid_metadata=False,
607):
608 """Checks whether the given notebook dict-like object conforms to the
609 relevant notebook format schema.
611 Returns a generator of all ValidationErrors if not valid.
613 Notes
614 -----
615 To fix: For security reasons, this function should *never* mutate its `nbdict` argument, and
616 should *never* try to validate a mutated or modified version of its notebook.
618 """
619 # backwards compatibility for nbjson argument
620 if nbdict is not None:
621 pass
622 elif nbjson is not None:
623 nbdict = nbjson
624 else:
625 msg = "iter_validate() missing 1 required argument: 'nbdict'"
626 raise TypeError(msg)
628 if version is None:
629 version, version_minor = get_version(nbdict)
631 if ref:
632 try:
633 errors = _get_errors(
634 nbdict,
635 version,
636 version_minor,
637 relax_add_props,
638 {"$ref": "#/definitions/%s" % ref},
639 )
640 except ValidationError as e:
641 yield e
642 return
644 else:
645 if strip_invalid_metadata:
646 _strip_invalida_metadata(nbdict, version, version_minor, relax_add_props)
648 # Validate one more time to ensure that us removing metadata
649 # didn't cause another complex validation issue in the schema.
650 # Also to ensure that higher-level errors produced by individual metadata validation
651 # failures are removed.
652 try:
653 errors = _get_errors(nbdict, version, version_minor, relax_add_props)
654 except ValidationError as e:
655 yield e
656 return
658 for error in errors:
659 yield better_validation_error(error, version, version_minor)