Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/schema.py: 13%
297 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:51 +0000
1# Copyright The Cloud Custodian Authors.
2# SPDX-License-Identifier: Apache-2.0
3"""
4Jsonschema validation of cloud custodian config.
6We start with a walkthrough of the various class registries
7of resource types and assemble and generate the schema.
9We do some specialization to reduce overall schema size
10via reference usage, although in some cases we prefer
11copies, due to issues with inheritance via reference (
12allowedProperties and enum extension).
14All filters and actions are annotated with schema typically using
15the utils.type_schema function.
16"""
17from collections import Counter
18import json
19import inspect
20import logging
22from jsonschema import Draft7Validator as JsonSchemaValidator
23from jsonschema.exceptions import best_match
25from c7n.policy import execution
26from c7n.provider import clouds
27from c7n.query import sources
28from c7n.resources import load_available
29from c7n.resolver import ValuesFrom
30from c7n.filters.core import (
31 ValueFilter,
32 EventFilter,
33 AgeFilter,
34 ReduceFilter,
35 OPERATORS,
36 VALUE_TYPES,
37)
38from c7n.structure import StructureParser # noqa
41def validate(data, schema=None, resource_types=()):
42 if schema is None:
43 schema = generate(resource_types)
44 JsonSchemaValidator.check_schema(schema)
46 validator = JsonSchemaValidator(schema)
47 errors = list(validator.iter_errors(data))
48 if not errors:
49 return check_unique(data) or []
50 try:
51 resp = policy_error_scope(specific_error(errors[0]), data)
52 name = isinstance(
53 errors[0].instance,
54 dict) and errors[0].instance.get(
55 'name',
56 'unknown') or 'unknown'
57 return [resp, name]
58 except Exception:
59 logging.exception(
60 "specific_error failed, traceback, followed by fallback")
62 return list(filter(None, [
63 errors[0],
64 best_match(validator.iter_errors(data)),
65 ]))
68def check_unique(data):
69 counter = Counter([p['name'] for p in data.get('policies', [])])
70 for k, v in list(counter.items()):
71 if v == 1:
72 counter.pop(k)
73 if counter:
74 return [ValueError(
75 "Only one policy with a given name allowed, duplicates: {}".format(counter)),
76 list(counter.keys())[0]]
79def policy_error_scope(error, data):
80 """Scope a schema error to its policy name and resource."""
81 err_path = list(error.absolute_path)
82 if err_path[0] != 'policies':
83 return error
84 pdata = data['policies'][err_path[1]]
85 pdata.get('name', 'unknown')
86 error.message = "Error on policy:{} resource:{}\n".format(
87 pdata.get('name', 'unknown'), pdata.get('resource', 'unknown')) + error.message
88 return error
91def specific_error(error):
92 """Try to find the best error for humans to resolve
94 The jsonschema.exceptions.best_match error is based purely on a
95 mix of a strong match (ie. not anyOf, oneOf) and schema depth,
96 this often yields odd results that are semantically confusing,
97 instead we can use a bit of structural knowledge of schema to
98 provide better results.
99 """
100 if error.validator not in ('anyOf', 'oneOf'):
101 return error
103 r = t = None
105 if isinstance(error.instance, dict):
106 t = error.instance.get('type')
107 r = error.instance.get('resource')
109 if r is not None:
110 found = None
111 for idx, v in enumerate(error.validator_value):
112 if '$ref' in v and v['$ref'].rsplit('/', 2)[1].endswith(r):
113 found = idx
114 break
115 if found is not None:
116 # error context is a flat list of all validation
117 # failures, we have to index back to the policy
118 # of interest.
119 for e in error.context:
120 # resource policies have a fixed path from
121 # the top of the schema
122 if e.absolute_schema_path[4] == found:
123 return specific_error(e)
124 return specific_error(error.context[idx])
126 if t is not None:
127 found = None
128 for idx, v in enumerate(error.validator_value):
129 if ('$ref' in v and
130 v['$ref'].rsplit('/', 2)[-1].rsplit('.', 1)[-1] == t):
131 found = idx
132 break
133 elif 'type' in v and t in v['properties']['type']['enum']:
134 found = idx
135 break
137 if found is not None:
138 for e in error.context:
139 for el in reversed(e.absolute_schema_path):
140 if isinstance(el, int):
141 if el == found:
142 return e
143 break
144 return error
147def _get_attr_schema():
148 base_filters = [
149 {'$ref': '#/definitions/filters/value'},
150 {'$ref': '#/definitions/filters/valuekv'},
151 ]
152 any_of = []
153 any_of.extend(base_filters)
155 for op in ('and', 'or', 'not',):
156 any_of.append(
157 {
158 'additional_properties': False,
159 'properties': {
160 op: {
161 'type': 'array',
162 'items': {
163 'anyOf': base_filters
164 }
165 }
166 },
167 'type': 'object'
168 }
169 )
171 attr_schema = {
172 'items': {
173 'anyOf': any_of
174 },
175 'type': 'array',
176 }
177 return attr_schema
180def generate(resource_types=()):
181 resource_defs = {}
182 definitions = {
183 'resources': resource_defs,
184 'string_dict': {
185 "type": "object",
186 "patternProperties": {
187 "": {"type": "string"},
188 },
189 },
190 'basic_dict': {
191 "type": "object",
192 "patternProperties": {
193 "": {
194 'oneOf': [
195 {"type": "string"},
196 {"type": "boolean"},
197 {"type": "number"},
198 ],
199 }
200 },
201 },
202 'iam-statement': {
203 'additionalProperties': False,
204 'type': 'object',
205 'properties': {
206 'Sid': {'type': 'string'},
207 'Effect': {'type': 'string', 'enum': ['Allow', 'Deny']},
208 'Principal': {'anyOf': [
209 {'type': 'string'},
210 {'type': 'object'}, {'type': 'array'}]},
211 'NotPrincipal': {'anyOf': [{'type': 'object'}, {'type': 'array'}]},
212 'Action': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
213 'NotAction': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
214 'Resource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
215 'NotResource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},
216 'Condition': {'type': 'object'}
217 },
218 'required': ['Sid', 'Effect'],
219 'oneOf': [
220 {'required': ['Principal', 'Action', 'Resource']},
221 {'required': ['NotPrincipal', 'Action', 'Resource']},
222 {'required': ['Principal', 'NotAction', 'Resource']},
223 {'required': ['NotPrincipal', 'NotAction', 'Resource']},
224 {'required': ['Principal', 'Action', 'NotResource']},
225 {'required': ['NotPrincipal', 'Action', 'NotResource']},
226 {'required': ['Principal', 'NotAction', 'NotResource']},
227 {'required': ['NotPrincipal', 'NotAction', 'NotResource']}
228 ]
229 },
230 'actions': {},
231 'filters': {
232 'value': ValueFilter.schema,
233 'event': EventFilter.schema,
234 'age': AgeFilter.schema,
235 'reduce': ReduceFilter.schema,
236 # Shortcut form of value filter as k=v
237 'valuekv': {
238 'type': 'object',
239 'additionalProperties': {'oneOf': [{'type': 'number'}, {'type': 'null'},
240 {'type': 'array', 'maxItems': 0}, {'type': 'string'}, {'type': 'boolean'}]},
241 'minProperties': 1,
242 'maxProperties': 1},
243 },
244 'filters_common': {
245 'list_item_attrs': _get_attr_schema(),
246 'comparison_operators': {
247 'enum': list(OPERATORS.keys())},
248 'value_types': {'enum': VALUE_TYPES},
249 'value_from': ValuesFrom.schema,
250 'value': {'oneOf': [
251 {'type': 'array'},
252 {'type': 'string'},
253 {'type': 'boolean'},
254 {'type': 'number'},
255 {'type': 'null'}]},
256 },
257 'policy': {
258 'type': 'object',
259 'required': ['name', 'resource'],
260 'additionalProperties': False,
261 'properties': {
262 'name': {
263 'type': 'string',
264 'pattern': "^[A-z][A-z0-9]*(-[A-z0-9]+)*$"},
265 'conditions': {
266 'type': 'array',
267 'items': {'anyOf': [
268 {'type': 'object', 'additionalProperties': False,
269 'properties': {'or': {
270 '$ref': '#/definitions/policy/properties/conditions'}}},
271 {'type': 'object', 'additionalProperties': False,
272 'properties': {'not': {
273 '$ref': '#/definitions/policy/properties/conditions'}}},
274 {'type': 'object', 'additionalProperties': False,
275 'properties': {'and': {
276 '$ref': '#/definitions/policy/properties/conditions'}}},
277 {'$ref': '#/definitions/filters/value'},
278 {'$ref': '#/definitions/filters/event'},
279 {'$ref': '#/definitions/filters/valuekv'}]}},
280 # these should be deprecated for conditions
281 'region': {'type': 'string'},
282 'tz': {'type': 'string'},
283 'start': {'format': 'date-time'},
284 'end': {'format': 'date-time'},
285 'resource': {'oneOf': [
286 {'type': 'string'},
287 {'type': 'array', 'items': {'type': 'string'}}]},
288 'max-resources': {'anyOf': [
289 {'type': 'integer', 'minimum': 1},
290 {'$ref': '#/definitions/max-resources-properties'}
291 ]},
292 'max-resources-percent': {'type': 'number', 'minimum': 0, 'maximum': 100},
293 'comment': {'type': 'string'},
294 'comments': {'type': 'string'},
295 'description': {'type': 'string'},
296 'tags': {'type': 'array', 'items': {'type': 'string'}},
297 'metadata': {'type': 'object'},
298 'mode': {'$ref': '#/definitions/policy-mode'},
299 'source': {'enum': list(sources.keys())},
300 'actions': {
301 'type': 'array',
302 },
303 'filters': {
304 'type': 'array'
305 },
306 #
307 # TODO: source queries should really move under
308 # source. This was initially used for describe sources
309 # to expose server side query mechanisms, however its
310 # important to note it also prevents resource cache
311 # utilization between policies that have different
312 # queries.
313 'query': {
314 'type': 'array', 'items': {'type': 'object'}}
316 },
317 },
318 'policy-mode': {
319 'anyOf': [e.schema for _, e in execution.items()],
320 },
321 'max-resources-properties': {
322 'type': 'object',
323 'additionalProperties': False,
324 'properties': {
325 'amount': {"type": 'integer', 'minimum': 1},
326 'op': {'enum': ['or', 'and']},
327 'percent': {'type': 'number', 'minimum': 0, 'maximum': 100}
328 }
329 }
330 }
332 resource_refs = []
333 for cloud_name, cloud_type in sorted(clouds.items()):
334 for type_name, resource_type in sorted(cloud_type.resources.items()):
335 r_type_name = "%s.%s" % (cloud_name, type_name)
336 if resource_types and r_type_name not in resource_types:
337 if not resource_type.type_aliases:
338 continue
339 elif not {"%s.%s" % (cloud_name, ralias) for ralias
340 in resource_type.type_aliases}.intersection(
341 resource_types):
342 continue
344 aliases = []
345 if resource_type.type_aliases:
346 aliases.extend(["%s.%s" % (cloud_name, a) for a in resource_type.type_aliases])
347 # aws gets legacy aliases with no cloud prefix
348 if cloud_name == 'aws':
349 aliases.extend(resource_type.type_aliases)
351 # aws gets additional alias for default name
352 if cloud_name == 'aws':
353 aliases.append(type_name)
355 resource_refs.append(
356 process_resource(
357 r_type_name,
358 resource_type,
359 resource_defs,
360 aliases,
361 definitions,
362 cloud_name
363 ))
365 schema = {
366 "$schema": "http://json-schema.org/draft-07/schema#",
367 'id': 'http://schema.cloudcustodian.io/v0/custodian.json',
368 'definitions': definitions,
369 'type': 'object',
370 'required': ['policies'],
371 'additionalProperties': False,
372 'properties': {
373 'vars': {'type': 'object'},
374 'policies': {
375 'type': 'array',
376 'additionalItems': False,
377 'items': {'anyOf': resource_refs}
378 }
379 }
380 }
382 # allow empty policies with lazy load
383 if not resource_refs:
384 schema['properties']['policies']['items'] = {'type': 'object'}
385 return schema
388def process_resource(
389 type_name, resource_type, resource_defs, aliases=None,
390 definitions=None, provider_name=None):
392 r = resource_defs.setdefault(type_name, {'actions': {}, 'filters': {}})
394 action_refs = []
395 for a in ElementSchema.elements(resource_type.action_registry):
396 action_name = a.type
397 if a.schema_alias:
398 action_alias = "%s.%s" % (provider_name, action_name)
399 if action_alias in definitions['actions']:
401 if definitions['actions'][action_alias] != a.schema: # NOQA
402 msg = "Schema mismatch on type:{} action:{} w/ schema alias ".format(
403 type_name, action_name)
404 raise SyntaxError(msg)
405 else:
406 definitions['actions'][action_alias] = a.schema
407 action_refs.append({'$ref': '#/definitions/actions/%s' % action_alias})
408 else:
409 r['actions'][action_name] = a.schema
410 action_refs.append(
411 {'$ref': '#/definitions/resources/%s/actions/%s' % (
412 type_name, action_name)})
414 # one word action shortcuts
415 action_refs.append(
416 {'enum': list(resource_type.action_registry.keys())})
418 filter_refs = []
419 for f in ElementSchema.elements(resource_type.filter_registry):
420 filter_name = f.type
421 if filter_name == 'value':
422 filter_refs.append({'$ref': '#/definitions/filters/value'})
423 filter_refs.append({'$ref': '#/definitions/filters/valuekv'})
424 elif filter_name == 'event':
425 filter_refs.append({'$ref': '#/definitions/filters/event'})
426 elif f.schema_alias:
427 filter_alias = "%s.%s" % (provider_name, filter_name)
428 if filter_alias in definitions['filters']:
429 assert definitions['filters'][filter_alias] == f.schema, "Schema mismatch on filter w/ schema alias" # NOQA
430 else:
431 definitions['filters'][filter_alias] = f.schema
432 filter_refs.append({'$ref': '#/definitions/filters/%s' % filter_alias})
433 continue
434 else:
435 r['filters'][filter_name] = f.schema
436 filter_refs.append(
437 {'$ref': '#/definitions/resources/%s/filters/%s' % (
438 type_name, filter_name)})
440 # one word filter shortcuts
441 filter_refs.append(
442 {'enum': list(resource_type.filter_registry.keys())})
444 block_fref = '#/definitions/resources/%s/policy/allOf/1/properties/filters' % (
445 type_name)
446 filter_refs.extend([
447 {'type': 'object', 'additionalProperties': False,
448 'properties': {'or': {'$ref': block_fref}}},
449 {'type': 'object', 'additionalProperties': False,
450 'properties': {'and': {'$ref': block_fref}}},
451 {'type': 'object', 'additionalProperties': False,
452 'properties': {'not': {'$ref': block_fref}}}])
454 resource_policy = {
455 'allOf': [
456 {'$ref': '#/definitions/policy'},
457 {'properties': {
458 'resource': {'enum': [type_name]},
459 'filters': {
460 'type': 'array',
461 'items': {'anyOf': filter_refs}},
462 'actions': {
463 'type': 'array',
464 'items': {'anyOf': action_refs}}}},
465 ]
466 }
468 if aliases:
469 resource_policy['allOf'][1]['properties'][
470 'resource']['enum'].extend(aliases)
472 if type_name == 'ec2':
473 resource_policy['allOf'][1]['properties']['query'] = {}
475 r['policy'] = resource_policy
476 return {'$ref': '#/definitions/resources/%s/policy' % type_name}
479def resource_outline(provider=None):
480 outline = {}
481 for cname, ctype in sorted(clouds.items()):
482 if provider and provider != cname:
483 continue
484 cresources = outline[cname] = {}
485 for rname, rtype in sorted(ctype.resources.items()):
486 cresources['%s.%s' % (cname, rname)] = rinfo = {}
487 rinfo['filters'] = sorted(rtype.filter_registry.keys())
488 rinfo['actions'] = sorted(rtype.action_registry.keys())
489 return outline
492def resource_vocabulary(cloud_name=None, qualify_name=True, aliases=True):
493 vocabulary = {}
494 resources = {}
496 if aliases:
497 vocabulary['aliases'] = {}
499 for cname, ctype in clouds.items():
500 if cloud_name is not None and cloud_name != cname:
501 continue
502 for rname, rtype in ctype.resources.items():
503 if qualify_name:
504 resources['%s.%s' % (cname, rname)] = rtype
505 else:
506 resources[rname] = rtype
508 for type_name, resource_type in resources.items():
509 classes = {'actions': {}, 'filters': {}, 'resource': resource_type}
510 actions = []
511 for cls in ElementSchema.elements(resource_type.action_registry):
512 action_name = ElementSchema.name(cls)
513 actions.append(action_name)
514 classes['actions'][action_name] = cls
516 filters = []
517 for cls in ElementSchema.elements(resource_type.filter_registry):
518 filter_name = ElementSchema.name(cls)
519 filters.append(filter_name)
520 classes['filters'][filter_name] = cls
522 vocabulary[type_name] = {
523 'filters': sorted(filters),
524 'actions': sorted(actions),
525 'classes': classes,
526 }
528 if aliases and resource_type.type_aliases:
529 provider = type_name.split('.', 1)[0]
530 for type_alias in resource_type.type_aliases:
531 vocabulary['aliases'][
532 "{}.{}".format(provider, type_alias)] = vocabulary[type_name]
533 if provider == 'aws':
534 vocabulary['aliases'][type_alias] = vocabulary[type_name]
535 vocabulary[type_name]['resource_type'] = type_name
537 vocabulary["mode"] = {}
538 for mode_name, cls in execution.items():
539 vocabulary["mode"][mode_name] = cls
541 return vocabulary
544class ElementSchema:
545 """Utility functions for working with resource's filters and actions.
546 """
548 @staticmethod
549 def elements(registry):
550 """Given a resource registry return sorted de-aliased values.
551 """
552 seen = {}
553 for k, v in registry.items():
554 if k in ('and', 'or', 'not'):
555 continue
556 if v in seen:
557 continue
558 else:
559 seen[ElementSchema.name(v)] = v
560 return [seen[k] for k in sorted(seen)]
562 @staticmethod
563 def resolve(vocabulary, schema_path):
564 """Given a resource vocabulary and a dotted path, resolve an element.
565 """
566 current = vocabulary
567 frag = None
568 if schema_path.startswith('.'):
569 # The preprended '.' is an odd artifact
570 schema_path = schema_path[1:]
571 parts = schema_path.split('.')
572 while parts:
573 k = parts.pop(0)
574 if frag:
575 k = "%s.%s" % (frag, k)
576 frag = None
577 parts.insert(0, 'classes')
578 elif k in clouds:
579 frag = k
580 if len(parts) == 1:
581 parts.append('resource')
582 continue
583 if k not in current:
584 raise ValueError("Invalid schema path %s" % schema_path)
585 current = current[k]
586 return current
588 @staticmethod
589 def name(cls):
590 """For a filter or action return its name."""
591 return cls.schema['properties']['type']['enum'][0]
593 @staticmethod
594 def doc(cls):
595 """Return 'best' formatted doc string for a given class.
597 Walks up class hierarchy, skipping known bad. Returns
598 empty string if no suitable doc string found.
599 """
600 # walk up class hierarchy for nearest
601 # good doc string, skip known
602 if cls.__doc__ is not None:
603 return inspect.cleandoc(cls.__doc__)
604 doc = None
605 for b in cls.__bases__:
606 if b in (ValueFilter, object):
607 continue
608 doc = b.__doc__ or ElementSchema.doc(b)
609 if doc is not None:
610 return inspect.cleandoc(doc)
611 return ""
613 @staticmethod
614 def schema(definitions, cls):
615 """Return a pretty'ified version of an element schema."""
616 schema = isinstance(cls, type) and dict(cls.schema) or dict(cls)
617 schema.pop('type', None)
618 schema.pop('additionalProperties', None)
619 return ElementSchema._expand_schema(schema, definitions)
621 @staticmethod
622 def _expand_schema(schema, definitions):
623 """Expand references in schema to their full schema"""
624 for k, v in list(schema.items()):
625 if k == '$ref':
626 # the value here is in the form of: '#/definitions/path/to/key'
627 parts = v.split('/')
628 if ['#', 'definitions'] != parts[0:2]:
629 raise ValueError("Invalid Ref %s" % v)
630 current = definitions
631 for p in parts[2:]:
632 if p not in current:
633 return None
634 current = current[p]
635 return ElementSchema._expand_schema(current, definitions)
636 elif isinstance(v, dict):
637 schema[k] = ElementSchema._expand_schema(v, definitions)
638 return schema
641def pprint_schema_summary(vocabulary):
642 providers = {}
643 non_providers = {}
645 for type_name, rv in vocabulary.items():
646 if '.' not in type_name:
647 non_providers[type_name] = len(rv)
648 else:
649 provider, name = type_name.split('.', 1)
650 stats = providers.setdefault(provider, {
651 'resources': 0, 'actions': Counter(), 'filters': Counter()})
652 stats['resources'] += 1
653 for a in rv.get('actions'):
654 stats['actions'][a] += 1
655 for f in rv.get('filters'):
656 stats['filters'][f] += 1
658 for provider, stats in providers.items():
659 print("%s:" % provider)
660 print(" resource count: %d" % stats['resources'])
661 print(" actions: %d" % len(stats['actions']))
662 print(" filters: %d" % len(stats['filters']))
664 for non_providers_type, length in non_providers.items():
665 print("%s:" % non_providers_type)
666 print(" count: %d" % length)
669def json_dump(resource=None):
670 load_available()
671 print(json.dumps(generate(resource), indent=2))
674if __name__ == '__main__':
675 json_dump()