Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/schema.py: 13%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright The Cloud Custodian Authors.
2# SPDX-License-Identifier: Apache-2.0
3"""
4Jsonschema validation of cloud custodian config.
6We start with a walkthrough of the various class registries
7of resource types and assemble and generate the schema.
9We do some specialization to reduce overall schema size
10via reference usage, although in some cases we prefer
11copies, due to issues with inheritance via reference (
12allowedProperties and enum extension).
14All filters and actions are annotated with schema typically using
15the utils.type_schema function.
16"""
17from collections import Counter
18import json
19import inspect
20import logging
22from jsonschema import Draft7Validator as JsonSchemaValidator
23from jsonschema.exceptions import best_match
25from c7n.policy import execution
26from c7n.provider import clouds
27from c7n.query import sources
28from c7n.resources import load_available
29from c7n.resolver import ValuesFrom
30from c7n.filters.core import (
31 ValueFilter,
32 EventFilter,
33 ReduceFilter,
34 OPERATORS,
35 VALUE_TYPES,
36)
37from c7n.structure import StructureParser # noqa
40def is_c7n_placeholder(instance):
41 """Is this schema element a Custodian variable placeholder?
43 Because policy validation can happen before we interpolate
44 variable values, there are cases where we validate non-string
45 types against variable placeholders. If a policy element is a string
46 that starts and ends with curly braces, we should avoid failing
47 failing type checks.
48 """
49 return (
50 isinstance(instance, str)
51 and instance.startswith('{')
52 and instance.endswith('}')
53 )
56def validate(data, schema=None, resource_types=()):
57 if schema is None:
58 schema = generate(resource_types)
59 JsonSchemaValidator.check_schema(schema)
61 validator = JsonSchemaValidator(schema)
62 errors = []
63 for error in validator.iter_errors(data):
64 try:
65 error = specific_error(error)
67 # ignore type checking errors against variable references that
68 # haven't yet been expanded
69 if error.validator == "type" and is_c7n_placeholder(error.instance):
70 continue
72 resp = policy_error_scope(error, data)
73 name = (
74 isinstance(error.instance, dict)
75 and error.instance.get('name', 'unknown') or 'unknown'
76 )
77 return [resp, name]
78 except Exception:
79 logging.exception(
80 "specific_error failed, traceback, followed by fallback")
81 errors.append(error)
82 if not errors:
83 return check_unique(data) or []
85 return list(filter(None, [
86 errors[0],
87 best_match(errors),
88 ]))
91def check_unique(data):
92 counter = Counter([p['name'] for p in data.get('policies', [])])
93 for k, v in list(counter.items()):
94 if v == 1:
95 counter.pop(k)
96 if counter:
97 return [ValueError(
98 "Only one policy with a given name allowed, duplicates: {}".format(counter)),
99 list(counter.keys())[0]]
102def policy_error_scope(error, data):
103 """Scope a schema error to its policy name and resource."""
104 err_path = list(error.absolute_path)
105 if err_path[0] != 'policies':
106 return error
107 pdata = data['policies'][err_path[1]]
108 pdata.get('name', 'unknown')
109 error.message = "Error on policy:{} resource:{}\n".format(
110 pdata.get('name', 'unknown'), pdata.get('resource', 'unknown')) + error.message
111 return error
114def specific_error(error):
115 """Try to find the best error for humans to resolve
117 The jsonschema.exceptions.best_match error is based purely on a
118 mix of a strong match (ie. not anyOf, oneOf) and schema depth,
119 this often yields odd results that are semantically confusing,
120 instead we can use a bit of structural knowledge of schema to
121 provide better results.
122 """
123 if error.validator not in ('anyOf', 'oneOf'):
124 return error
126 r = t = None
128 if isinstance(error.instance, dict):
129 t = error.instance.get('type')
130 r = error.instance.get('resource')
132 if r is not None and not isinstance(r, list):
133 found = None
134 for idx, v in enumerate(error.validator_value):
135 if '$ref' in v and v['$ref'].rsplit('/', 2)[1].endswith(r):
136 found = idx
137 break
138 if found is not None:
139 # error context is a flat list of all validation
140 # failures, we have to index back to the policy
141 # of interest.
142 for e in error.context:
143 # resource policies have a fixed path from
144 # the top of the schema
145 if e.absolute_schema_path[4] == found:
146 return specific_error(e)
147 return specific_error(error.context[idx])
149 if t is not None:
150 found = None
151 for idx, v in enumerate(error.validator_value):
152 if ('$ref' in v and
153 v['$ref'].rsplit('/', 2)[-1].rsplit('.', 1)[-1] == t):
154 found = idx
155 break
156 elif 'type' in v and t in v['properties'].get('type', {}).get('enum', []):
157 found = idx
158 break
160 if found is not None:
161 for e in error.context:
162 for el in reversed(e.absolute_schema_path):
163 if isinstance(el, int):
164 if el == found:
165 return e
166 break
167 return error
170def _get_attr_schema():
171 base_filters = [
172 {'$ref': '#/definitions/filters/value'},
173 {'$ref': '#/definitions/filters/valuekv'},
174 ]
175 any_of = []
176 any_of.extend(base_filters)
178 for op in ('and', 'or', 'not',):
179 any_of.append(
180 {
181 'additional_properties': False,
182 'properties': {
183 op: {
184 'type': 'array',
185 'items': {
186 'anyOf': base_filters
187 }
188 }
189 },
190 'type': 'object'
191 }
192 )
194 attr_schema = {
195 'items': {
196 'anyOf': any_of
197 },
198 'type': 'array',
199 }
200 return attr_schema
203def get_default_definitions(resource_defs):
204 return {
205 'resources': resource_defs,
206 'string_dict': {
207 "type": "object",
208 "patternProperties": {
209 "": {"type": "string"},
210 },
211 },
212 'basic_dict': {
213 "type": "object",
214 "patternProperties": {
215 "": {
216 'oneOf': [
217 {"type": "string"},
218 {"type": "boolean"},
219 {"type": "number"},
220 ],
221 }
222 },
223 },
224 'iam-statement': {
225 'additionalProperties': False,
226 'type': 'object',
227 'properties': {
228 'Sid': {'type': 'string'},
229 'Effect': {'type': 'string', 'enum': ['Allow', 'Deny']},
230 'Principal': {'anyOf': [
231 {'type': 'string'},
232 {'type': 'object'}, {'type': 'array'}]},
233 'NotPrincipal': {'anyOf': [{'type': 'object'}, {'type': 'array'}]},
234 'Action': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
235 'NotAction': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
236 'Resource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
237 'NotResource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
238 'Condition': {'type': 'object'}
239 },
240 'required': ['Sid', 'Effect'],
241 'oneOf': [
242 {'required': ['Principal', 'Action', 'Resource']},
243 {'required': ['NotPrincipal', 'Action', 'Resource']},
244 {'required': ['Principal', 'NotAction', 'Resource']},
245 {'required': ['NotPrincipal', 'NotAction', 'Resource']},
246 {'required': ['Principal', 'Action', 'NotResource']},
247 {'required': ['NotPrincipal', 'Action', 'NotResource']},
248 {'required': ['Principal', 'NotAction', 'NotResource']},
249 {'required': ['NotPrincipal', 'NotAction', 'NotResource']}
250 ]
251 },
252 'actions': {},
253 'filters': {
254 'value': ValueFilter.schema,
255 'event': EventFilter.schema,
256 'reduce': ReduceFilter.schema,
257 # Shortcut form of value filter as k=v
258 'valuekv': {
259 'type': 'object',
260 'additionalProperties': {'oneOf': [
261 {'type': 'number'},
262 {'type': 'null'},
263 {'type': 'array', 'maxItems': 0},
264 {'type': 'string'},
265 {'type': 'boolean'}
266 ]},
267 'minProperties': 1,
268 'maxProperties': 1},
269 },
270 'filters_common': {
271 'list_item_attrs': _get_attr_schema(),
272 'comparison_operators': {
273 'enum': list(OPERATORS.keys())},
274 'value_types': {'enum': VALUE_TYPES},
275 'value_from': ValuesFrom.schema,
276 'value': {'oneOf': [
277 {'type': 'array'},
278 {'type': 'string'},
279 {'type': 'boolean'},
280 {'type': 'number'},
281 {'type': 'null'}]},
282 },
283 'policy': {
284 'type': 'object',
285 'required': ['name', 'resource'],
286 'additionalProperties': False,
287 'properties': {
288 'name': {
289 'type': 'string',
290 'pattern': "^[A-z][A-z0-9]*(-[A-z0-9]+)*$"},
291 'conditions': {
292 'type': 'array',
293 'items': {'anyOf': [
294 {'type': 'object', 'additionalProperties': False,
295 'properties': {'or': {
296 '$ref': '#/definitions/policy/properties/conditions'}}},
297 {'type': 'object', 'additionalProperties': False,
298 'properties': {'not': {
299 '$ref': '#/definitions/policy/properties/conditions'}}},
300 {'type': 'object', 'additionalProperties': False,
301 'properties': {'and': {
302 '$ref': '#/definitions/policy/properties/conditions'}}},
303 {'$ref': '#/definitions/filters/value'},
304 {'$ref': '#/definitions/filters/event'},
305 {'$ref': '#/definitions/filters/valuekv'}]}},
306 # these should be deprecated for conditions
307 'region': {'type': 'string'},
308 'tz': {'type': 'string'},
309 'start': {'format': 'date-time'},
310 'end': {'format': 'date-time'},
311 'resource': {'oneOf': [
312 {'type': 'string'},
313 {'type': 'array', 'items': {'type': 'string'}}]},
314 'max-resources': {'anyOf': [
315 {'type': 'integer', 'minimum': 1},
316 {'$ref': '#/definitions/max-resources-properties'}
317 ]},
318 'max-resources-percent': {'type': 'number', 'minimum': 0, 'maximum': 100},
319 'comment': {'type': 'string'},
320 'comments': {'type': 'string'},
321 'description': {'type': 'string'},
322 'tags': {'type': 'array', 'items': {'type': 'string'}},
323 'metadata': {'type': 'object'},
324 'mode': {'$ref': '#/definitions/policy-mode'},
325 'source': {'enum': list(sources.keys())},
326 'actions': {
327 'type': 'array',
328 },
329 'filters': {
330 'type': 'array'
331 },
332 #
333 # TODO: source queries should really move under
334 # source. This was initially used for describe sources
335 # to expose server side query mechanisms, however its
336 # important to note it also prevents resource cache
337 # utilization between policies that have different
338 # queries.
339 'query': {
340 'type': 'array', 'items': {'type': 'object'}}
342 },
343 },
344 'policy-mode': {
345 'anyOf': [e.schema for _, e in execution.items()],
346 },
347 'max-resources-properties': {
348 'type': 'object',
349 'additionalProperties': False,
350 'properties': {
351 'amount': {"type": 'integer', 'minimum': 1},
352 'op': {'enum': ['or', 'and']},
353 'percent': {'type': 'number', 'minimum': 0, 'maximum': 100}
354 }
355 }
356 }
359def generate(resource_types=()):
360 resource_defs = {}
361 definitions = get_default_definitions(resource_defs)
363 resource_refs = []
364 for cloud_name, cloud_type in sorted(clouds.items()):
365 for type_name, resource_type in sorted(cloud_type.resources.items()):
366 r_type_name = "%s.%s" % (cloud_name, type_name)
367 if resource_types and r_type_name not in resource_types:
368 if not resource_type.type_aliases:
369 continue
370 elif not {"%s.%s" % (cloud_name, ralias) for ralias
371 in resource_type.type_aliases}.intersection(
372 resource_types):
373 continue
375 aliases = []
376 if resource_type.type_aliases:
377 aliases.extend(["%s.%s" % (cloud_name, a) for a in resource_type.type_aliases])
378 # aws gets legacy aliases with no cloud prefix
379 if cloud_name == 'aws':
380 aliases.extend(resource_type.type_aliases)
382 # aws gets additional alias for default name
383 if cloud_name == 'aws':
384 aliases.append(type_name)
386 resource_refs.append(
387 process_resource(
388 r_type_name,
389 resource_type,
390 resource_defs,
391 aliases,
392 definitions,
393 cloud_name
394 ))
396 schema = {
397 "$schema": "http://json-schema.org/draft-07/schema#",
398 'id': 'http://schema.cloudcustodian.io/v0/custodian.json',
399 'definitions': definitions,
400 'type': 'object',
401 'required': ['policies'],
402 'additionalProperties': False,
403 'properties': {
404 'vars': {'type': 'object'},
405 'policies': {
406 'type': 'array',
407 'additionalItems': False,
408 'items': {'anyOf': resource_refs}
409 }
410 }
411 }
413 # allow empty policies with lazy load
414 if not resource_refs:
415 schema['properties']['policies']['items'] = {'type': 'object'}
416 return schema
419def process_resource(
420 type_name, resource_type, resource_defs, aliases=None,
421 definitions=None, provider_name=None):
423 r = resource_defs.setdefault(type_name, {'actions': {}, 'filters': {}})
425 if getattr(resource_type, "get_schema", None):
426 resource_type.get_schema(
427 type_name, resource_defs, definitions, provider_name
428 )
429 return {'$ref': '#/definitions/resources/%s/policy' % type_name}
431 action_refs = []
432 for a in ElementSchema.elements(resource_type.action_registry):
433 action_name = a.type
434 if a.schema_alias:
435 action_alias = "%s.%s" % (provider_name, action_name)
436 if action_alias in definitions['actions']:
438 if definitions['actions'][action_alias] != a.schema: # NOQA
439 msg = "Schema mismatch on type:{} action:{} w/ schema alias ".format(
440 type_name, action_name)
441 raise SyntaxError(msg)
442 else:
443 definitions['actions'][action_alias] = a.schema
444 action_refs.append({'$ref': '#/definitions/actions/%s' % action_alias})
445 else:
446 r['actions'][action_name] = a.schema
447 action_refs.append(
448 {'$ref': '#/definitions/resources/%s/actions/%s' % (
449 type_name, action_name)})
451 # one word action shortcuts
452 action_refs.append(
453 {'enum': list(resource_type.action_registry.keys())})
455 filter_refs = []
456 for f in ElementSchema.elements(resource_type.filter_registry):
457 filter_name = f.type
458 if filter_name == 'value':
459 filter_refs.append({'$ref': '#/definitions/filters/value'})
460 filter_refs.append({'$ref': '#/definitions/filters/valuekv'})
461 elif filter_name == 'event':
462 filter_refs.append({'$ref': '#/definitions/filters/event'})
463 elif f.schema_alias:
464 filter_alias = "%s.%s" % (provider_name, filter_name)
465 if filter_alias in definitions['filters']:
466 assert definitions['filters'][filter_alias] == f.schema, "Schema mismatch on filter w/ schema alias" # NOQA
467 else:
468 definitions['filters'][filter_alias] = f.schema
469 filter_refs.append({'$ref': '#/definitions/filters/%s' % filter_alias})
470 continue
471 else:
472 r['filters'][filter_name] = f.schema
473 filter_refs.append(
474 {'$ref': '#/definitions/resources/%s/filters/%s' % (
475 type_name, filter_name)})
477 # one word filter shortcuts
478 filter_refs.append(
479 {'enum': list(resource_type.filter_registry.keys())})
481 block_fref = '#/definitions/resources/%s/policy/allOf/1/properties/filters' % (
482 type_name)
483 filter_refs.extend([
484 {'type': 'object', 'additionalProperties': False,
485 'properties': {'or': {'$ref': block_fref}}},
486 {'type': 'object', 'additionalProperties': False,
487 'properties': {'and': {'$ref': block_fref}}},
488 {'type': 'object', 'additionalProperties': False,
489 'properties': {'not': {'$ref': block_fref}}}])
491 resource_policy = {
492 'allOf': [
493 {'$ref': '#/definitions/policy'},
494 {'properties': {
495 'resource': {'enum': [type_name]},
496 'filters': {
497 'type': 'array',
498 'items': {'anyOf': filter_refs}},
499 'actions': {
500 'type': 'array',
501 'items': {'anyOf': action_refs}}}},
502 ]
503 }
505 if aliases:
506 resource_policy['allOf'][1]['properties'][
507 'resource']['enum'].extend(aliases)
509 if type_name == 'ec2':
510 resource_policy['allOf'][1]['properties']['query'] = {}
512 r['policy'] = resource_policy
513 return {'$ref': '#/definitions/resources/%s/policy' % type_name}
516def resource_outline(provider=None):
517 outline = {}
518 for cname, ctype in sorted(clouds.items()):
519 if provider and provider != cname:
520 continue
521 cresources = outline[cname] = {}
522 for rname, rtype in sorted(ctype.resources.items()):
523 cresources['%s.%s' % (cname, rname)] = rinfo = {}
524 rinfo['filters'] = sorted(rtype.filter_registry.keys())
525 rinfo['actions'] = sorted(rtype.action_registry.keys())
526 return outline
529def resource_vocabulary(cloud_name=None, qualify_name=True, aliases=True):
530 vocabulary = {}
531 resources = {}
533 if aliases:
534 vocabulary['aliases'] = {}
536 for cname, ctype in clouds.items():
537 if cloud_name is not None and cloud_name != cname:
538 continue
539 for rname, rtype in ctype.resources.items():
540 if qualify_name:
541 resources['%s.%s' % (cname, rname)] = rtype
542 else:
543 resources[rname] = rtype
545 for type_name, resource_type in resources.items():
546 classes = {'actions': {}, 'filters': {}, 'resource': resource_type}
547 actions = []
548 for cls in ElementSchema.elements(resource_type.action_registry):
549 action_name = ElementSchema.name(cls)
550 actions.append(action_name)
551 classes['actions'][action_name] = cls
553 filters = []
554 for cls in ElementSchema.elements(resource_type.filter_registry):
555 filter_name = ElementSchema.name(cls)
556 filters.append(filter_name)
557 classes['filters'][filter_name] = cls
559 vocabulary[type_name] = {
560 'filters': sorted(filters),
561 'actions': sorted(actions),
562 'classes': classes,
563 }
565 if aliases and resource_type.type_aliases:
566 provider = type_name.split('.', 1)[0]
567 for type_alias in resource_type.type_aliases:
568 vocabulary['aliases'][
569 "{}.{}".format(provider, type_alias)] = vocabulary[type_name]
570 if provider == 'aws':
571 vocabulary['aliases'][type_alias] = vocabulary[type_name]
572 vocabulary[type_name]['resource_type'] = type_name
574 vocabulary["mode"] = {}
575 for mode_name, cls in execution.items():
576 vocabulary["mode"][mode_name] = cls
578 return vocabulary
581class ElementSchema:
582 """Utility functions for working with resource's filters and actions.
583 """
585 @staticmethod
586 def elements(registry):
587 """Given a resource registry return sorted de-aliased values.
588 """
589 seen = {}
590 for k, v in registry.items():
591 if k in ('and', 'or', 'not'):
592 continue
593 if v in seen:
594 continue
595 else:
596 seen[ElementSchema.name(v)] = v
597 return [seen[k] for k in sorted(seen)]
599 @staticmethod
600 def resolve(vocabulary, schema_path):
601 """Given a resource vocabulary and a dotted path, resolve an element.
602 """
603 current = vocabulary
604 frag = None
605 if schema_path.startswith('.'):
606 # The preprended '.' is an odd artifact
607 schema_path = schema_path[1:]
608 parts = schema_path.split('.')
609 while parts:
610 k = parts.pop(0)
611 if frag:
612 k = "%s.%s" % (frag, k)
613 frag = None
614 parts.insert(0, 'classes')
615 elif k in clouds:
616 frag = k
617 if len(parts) == 1:
618 parts.append('resource')
619 continue
620 if k not in current:
621 raise ValueError("Invalid schema path %s" % schema_path)
622 current = current[k]
623 return current
625 @staticmethod
626 def name(cls):
627 """For a filter or action return its name."""
628 return cls.schema['properties']['type']['enum'][0]
630 @staticmethod
631 def doc(cls):
632 """Return 'best' formatted doc string for a given class.
634 Walks up class hierarchy, skipping known bad. Returns
635 empty string if no suitable doc string found.
636 """
637 # walk up class hierarchy for nearest
638 # good doc string, skip known
639 if cls.__doc__ is not None:
640 return inspect.cleandoc(cls.__doc__)
641 doc = None
642 for b in cls.__bases__:
643 if b in (ValueFilter, object):
644 continue
645 doc = b.__doc__ or ElementSchema.doc(b)
646 if doc is not None:
647 return inspect.cleandoc(doc)
648 return ""
650 @staticmethod
651 def schema(definitions, cls):
652 """Return a pretty'ified version of an element schema."""
653 schema = isinstance(cls, type) and dict(cls.schema) or dict(cls)
654 schema.pop('type', None)
655 schema.pop('additionalProperties', None)
656 return ElementSchema._expand_schema(schema, definitions)
658 @staticmethod
659 def _expand_schema(schema, definitions):
660 """Expand references in schema to their full schema"""
661 for k, v in list(schema.items()):
662 if k == '$ref':
663 # the value here is in the form of: '#/definitions/path/to/key'
664 parts = v.split('/')
665 if ['#', 'definitions'] != parts[0:2]:
666 raise ValueError("Invalid Ref %s" % v)
667 current = definitions
668 for p in parts[2:]:
669 if p not in current:
670 return None
671 current = current[p]
672 return ElementSchema._expand_schema(current, definitions)
673 elif isinstance(v, dict):
674 schema[k] = ElementSchema._expand_schema(v, definitions)
675 return schema
678def pprint_schema_summary(vocabulary):
679 providers = {}
680 non_providers = {}
682 for type_name, rv in vocabulary.items():
683 if '.' not in type_name:
684 non_providers[type_name] = len(rv)
685 else:
686 provider, _ = type_name.split('.', 1)
687 stats = providers.setdefault(provider, {
688 'resources': 0, 'actions': Counter(), 'filters': Counter()})
689 stats['resources'] += 1
690 for a in rv.get('actions'):
691 stats['actions'][a] += 1
692 for f in rv.get('filters'):
693 stats['filters'][f] += 1
695 for provider, stats in providers.items():
696 print("%s:" % provider)
697 print(" resource count: %d" % stats['resources'])
698 print(" actions: %d" % len(stats['actions']))
699 print(" filters: %d" % len(stats['filters']))
701 for non_providers_type, length in non_providers.items():
702 print("%s:" % non_providers_type)
703 print(" count: %d" % length)
706def json_dump(resource=None):
707 load_available()
708 print(json.dumps(generate(resource), indent=2))
711if __name__ == '__main__':
712 json_dump()