Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/schema.py: 13%

1# Copyright The Cloud Custodian Authors.

2# SPDX-License-Identifier: Apache-2.0

3"""

4Jsonschema validation of cloud custodian config.

6We start with a walkthrough of the various class registries

7of resource types and assemble and generate the schema.

9We do some specialization to reduce overall schema size

10via reference usage, although in some cases we prefer

11copies, due to issues with inheritance via reference (

12allowedProperties and enum extension).

14All filters and actions are annotated with schema typically using

15the utils.type_schema function.

16"""

17from collections import Counter

18import json

19import inspect

20import logging

22from jsonschema import Draft7Validator as JsonSchemaValidator

23from jsonschema.exceptions import best_match

25from c7n.policy import execution

26from c7n.provider import clouds

27from c7n.query import sources

28from c7n.resources import load_available

29from c7n.resolver import ValuesFrom

30from c7n.filters.core import (

31 ValueFilter,

32 EventFilter,

33 ReduceFilter,

34 OPERATORS,

35 VALUE_TYPES,

36)

37from c7n.structure import StructureParser # noqa

40def is_c7n_placeholder(instance):

41 """Is this schema element a Custodian variable placeholder?

43 Because policy validation can happen before we interpolate

44 variable values, there are cases where we validate non-string

45 types against variable placeholders. If a policy element is a string

46 that starts and ends with curly braces, we should avoid failing

47 failing type checks.

48 """

49 return (

50 isinstance(instance, str)

51 and instance.startswith('{')

52 and instance.endswith('}')

53 )

56def validate(data, schema=None, resource_types=()):

57 if schema is None:

58 schema = generate(resource_types)

59 JsonSchemaValidator.check_schema(schema)

61 validator = JsonSchemaValidator(schema)

62 errors = []

63 for error in validator.iter_errors(data):

64 try:

65 error = specific_error(error)

67 # ignore type checking errors against variable references that

68 # haven't yet been expanded

69 if error.validator == "type" and is_c7n_placeholder(error.instance):

70 continue

72 resp = policy_error_scope(error, data)

73 name = (

74 isinstance(error.instance, dict)

75 and error.instance.get('name', 'unknown') or 'unknown'

76 )

77 return [resp, name]

78 except Exception:

79 logging.exception(

80 "specific_error failed, traceback, followed by fallback")

81 errors.append(error)

82 if not errors:

83 return check_unique(data) or []

85 return list(filter(None, [

86 errors[0],

87 best_match(errors),

88 ]))

91def check_unique(data):

92 counter = Counter([p['name'] for p in data.get('policies', [])])

93 for k, v in list(counter.items()):

94 if v == 1:

95 counter.pop(k)

96 if counter:

97 return [ValueError(

98 "Only one policy with a given name allowed, duplicates: {}".format(counter)),

99 list(counter.keys())[0]]

100

101

102def policy_error_scope(error, data):

103 """Scope a schema error to its policy name and resource."""

104 err_path = list(error.absolute_path)

105 if err_path[0] != 'policies':

106 return error

107 pdata = data['policies'][err_path[1]]

108 pdata.get('name', 'unknown')

109 error.message = "Error on policy:{} resource:{}\n".format(

110 pdata.get('name', 'unknown'), pdata.get('resource', 'unknown')) + error.message

111 return error

112

113

114def specific_error(error):

115 """Try to find the best error for humans to resolve

116

117 The jsonschema.exceptions.best_match error is based purely on a

118 mix of a strong match (ie. not anyOf, oneOf) and schema depth,

119 this often yields odd results that are semantically confusing,

120 instead we can use a bit of structural knowledge of schema to

121 provide better results.

122 """

123 if error.validator not in ('anyOf', 'oneOf'):

124 return error

125

126 r = t = None

127

128 if isinstance(error.instance, dict):

129 t = error.instance.get('type')

130 r = error.instance.get('resource')

131

132 if r is not None and not isinstance(r, list):

133 found = None

134 for idx, v in enumerate(error.validator_value):

135 if '$ref' in v and v['$ref'].rsplit('/', 2)[1].endswith(r):

136 found = idx

137 break

138 if found is not None:

139 # error context is a flat list of all validation

140 # failures, we have to index back to the policy

141 # of interest.

142 for e in error.context:

143 # resource policies have a fixed path from

144 # the top of the schema

145 if e.absolute_schema_path[4] == found:

146 return specific_error(e)

147 return specific_error(error.context[idx])

148

149 if t is not None:

150 found = None

151 for idx, v in enumerate(error.validator_value):

152 if ('$ref' in v and

153 v['$ref'].rsplit('/', 2)[-1].rsplit('.', 1)[-1] == t):

154 found = idx

155 break

156 elif 'type' in v and t in v['properties'].get('type', {}).get('enum', []):

157 found = idx

158 break

159

160 if found is not None:

161 for e in error.context:

162 for el in reversed(e.absolute_schema_path):

163 if isinstance(el, int):

164 if el == found:

165 return e

166 break

167 return error

168

169

170def _get_attr_schema():

171 base_filters = [

172 {'$ref': '#/definitions/filters/value'},

173 {'$ref': '#/definitions/filters/valuekv'},

174 ]

175 any_of = []

176 any_of.extend(base_filters)

177

178 for op in ('and', 'or', 'not',):

179 any_of.append(

180 {

181 'additional_properties': False,

182 'properties': {

183 op: {

184 'type': 'array',

185 'items': {

186 'anyOf': base_filters

187 }

188 }

189 },

190 'type': 'object'

191 }

192 )

193

194 attr_schema = {

195 'items': {

196 'anyOf': any_of

197 },

198 'type': 'array',

199 }

200 return attr_schema

201

202

203def get_default_definitions(resource_defs):

204 return {

205 'resources': resource_defs,

206 'string_dict': {

207 "type": "object",

208 "patternProperties": {

209 "": {"type": "string"},

210 },

211 },

212 'basic_dict': {

213 "type": "object",

214 "patternProperties": {

215 "": {

216 'oneOf': [

217 {"type": "string"},

218 {"type": "boolean"},

219 {"type": "number"},

220 ],

221 }

222 },

223 },

224 'iam-statement': {

225 'additionalProperties': False,

226 'type': 'object',

227 'properties': {

228 'Sid': {'type': 'string'},

229 'Effect': {'type': 'string', 'enum': ['Allow', 'Deny']},

230 'Principal': {'anyOf': [

231 {'type': 'string'},

232 {'type': 'object'}, {'type': 'array'}]},

233 'NotPrincipal': {'anyOf': [{'type': 'object'}, {'type': 'array'}]},

234 'Action': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},

235 'NotAction': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},

236 'Resource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},

237 'NotResource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]},

238 'Condition': {'type': 'object'}

239 },

240 'required': ['Sid', 'Effect'],

241 'oneOf': [

242 {'required': ['Principal', 'Action', 'Resource']},

243 {'required': ['NotPrincipal', 'Action', 'Resource']},

244 {'required': ['Principal', 'NotAction', 'Resource']},

245 {'required': ['NotPrincipal', 'NotAction', 'Resource']},

246 {'required': ['Principal', 'Action', 'NotResource']},

247 {'required': ['NotPrincipal', 'Action', 'NotResource']},

248 {'required': ['Principal', 'NotAction', 'NotResource']},

249 {'required': ['NotPrincipal', 'NotAction', 'NotResource']}

250 ]

251 },

252 'actions': {},

253 'filters': {

254 'value': ValueFilter.schema,

255 'event': EventFilter.schema,

256 'reduce': ReduceFilter.schema,

257 # Shortcut form of value filter as k=v

258 'valuekv': {

259 'type': 'object',

260 'additionalProperties': {'oneOf': [

261 {'type': 'number'},

262 {'type': 'null'},

263 {'type': 'array', 'maxItems': 0},

264 {'type': 'string'},

265 {'type': 'boolean'}

266 ]},

267 'minProperties': 1,

268 'maxProperties': 1},

269 },

270 'filters_common': {

271 'list_item_attrs': _get_attr_schema(),

272 'comparison_operators': {

273 'enum': list(OPERATORS.keys())},

274 'value_types': {'enum': VALUE_TYPES},

275 'value_from': ValuesFrom.schema,

276 'value': {'oneOf': [

277 {'type': 'array'},

278 {'type': 'string'},

279 {'type': 'boolean'},

280 {'type': 'number'},

281 {'type': 'null'}]},

282 },

283 'policy': {

284 'type': 'object',

285 'required': ['name', 'resource'],

286 'additionalProperties': False,

287 'properties': {

288 'name': {

289 'type': 'string',

290 'pattern': "^[A-z][A-z0-9]*(-[A-z0-9]+)*$"},

291 'conditions': {

292 'type': 'array',

293 'items': {'anyOf': [

294 {'type': 'object', 'additionalProperties': False,

295 'properties': {'or': {

296 '$ref': '#/definitions/policy/properties/conditions'}}},

297 {'type': 'object', 'additionalProperties': False,

298 'properties': {'not': {

299 '$ref': '#/definitions/policy/properties/conditions'}}},

300 {'type': 'object', 'additionalProperties': False,

301 'properties': {'and': {

302 '$ref': '#/definitions/policy/properties/conditions'}}},

303 {'$ref': '#/definitions/filters/value'},

304 {'$ref': '#/definitions/filters/event'},

305 {'$ref': '#/definitions/filters/valuekv'}]}},

306 # these should be deprecated for conditions

307 'region': {'type': 'string'},

308 'tz': {'type': 'string'},

309 'start': {'format': 'date-time'},

310 'end': {'format': 'date-time'},

311 'resource': {'oneOf': [

312 {'type': 'string'},

313 {'type': 'array', 'items': {'type': 'string'}}]},

314 'max-resources': {'anyOf': [

315 {'type': 'integer', 'minimum': 1},

316 {'$ref': '#/definitions/max-resources-properties'}

317 ]},

318 'max-resources-percent': {'type': 'number', 'minimum': 0, 'maximum': 100},

319 'comment': {'type': 'string'},

320 'comments': {'type': 'string'},

321 'description': {'type': 'string'},

322 'tags': {'type': 'array', 'items': {'type': 'string'}},

323 'metadata': {'type': 'object'},

324 'mode': {'$ref': '#/definitions/policy-mode'},

325 'source': {'enum': list(sources.keys())},

326 'actions': {

327 'type': 'array',

328 },

329 'filters': {

330 'type': 'array'

331 },

332 #

333 # TODO: source queries should really move under

334 # source. This was initially used for describe sources

335 # to expose server side query mechanisms, however its

336 # important to note it also prevents resource cache

337 # utilization between policies that have different

338 # queries.

339 'query': {

340 'type': 'array', 'items': {'type': 'object'}}

341

342 },

343 },

344 'policy-mode': {

345 'anyOf': [e.schema for _, e in execution.items()],

346 },

347 'max-resources-properties': {

348 'type': 'object',

349 'additionalProperties': False,

350 'properties': {

351 'amount': {"type": 'integer', 'minimum': 1},

352 'op': {'enum': ['or', 'and']},

353 'percent': {'type': 'number', 'minimum': 0, 'maximum': 100}

354 }

355 }

356 }

357

358

359def generate(resource_types=()):

360 resource_defs = {}

361 definitions = get_default_definitions(resource_defs)

362

363 resource_refs = []

364 for cloud_name, cloud_type in sorted(clouds.items()):

365 for type_name, resource_type in sorted(cloud_type.resources.items()):

366 r_type_name = "%s.%s" % (cloud_name, type_name)

367 if resource_types and r_type_name not in resource_types:

368 if not resource_type.type_aliases:

369 continue

370 elif not {"%s.%s" % (cloud_name, ralias) for ralias

371 in resource_type.type_aliases}.intersection(

372 resource_types):

373 continue

374

375 aliases = []

376 if resource_type.type_aliases:

377 aliases.extend(["%s.%s" % (cloud_name, a) for a in resource_type.type_aliases])

378 # aws gets legacy aliases with no cloud prefix

379 if cloud_name == 'aws':

380 aliases.extend(resource_type.type_aliases)

381

382 # aws gets additional alias for default name

383 if cloud_name == 'aws':

384 aliases.append(type_name)

385

386 resource_refs.append(

387 process_resource(

388 r_type_name,

389 resource_type,

390 resource_defs,

391 aliases,

392 definitions,

393 cloud_name

394 ))

395

396 schema = {

397 "$schema": "http://json-schema.org/draft-07/schema#",

398 'id': 'http://schema.cloudcustodian.io/v0/custodian.json',

399 'definitions': definitions,

400 'type': 'object',

401 'required': ['policies'],

402 'additionalProperties': False,

403 'properties': {

404 'vars': {'type': 'object'},

405 'policies': {

406 'type': 'array',

407 'additionalItems': False,

408 'items': {'anyOf': resource_refs}

409 }

410 }

411 }

412

413 # allow empty policies with lazy load

414 if not resource_refs:

415 schema['properties']['policies']['items'] = {'type': 'object'}

416 return schema

417

418

419def process_resource(

420 type_name, resource_type, resource_defs, aliases=None,

421 definitions=None, provider_name=None):

422

423 r = resource_defs.setdefault(type_name, {'actions': {}, 'filters': {}})

424

425 if getattr(resource_type, "get_schema", None):

426 resource_type.get_schema(

427 type_name, resource_defs, definitions, provider_name

428 )

429 return {'$ref': '#/definitions/resources/%s/policy' % type_name}

430

431 action_refs = []

432 for a in ElementSchema.elements(resource_type.action_registry):

433 action_name = a.type

434 if a.schema_alias:

435 action_alias = "%s.%s" % (provider_name, action_name)

436 if action_alias in definitions['actions']:

437

438 if definitions['actions'][action_alias] != a.schema: # NOQA

439 msg = "Schema mismatch on type:{} action:{} w/ schema alias ".format(

440 type_name, action_name)

441 raise SyntaxError(msg)

442 else:

443 definitions['actions'][action_alias] = a.schema

444 action_refs.append({'$ref': '#/definitions/actions/%s' % action_alias})

445 else:

446 r['actions'][action_name] = a.schema

447 action_refs.append(

448 {'$ref': '#/definitions/resources/%s/actions/%s' % (

449 type_name, action_name)})

450

451 # one word action shortcuts

452 action_refs.append(

453 {'enum': list(resource_type.action_registry.keys())})

454

455 filter_refs = []

456 for f in ElementSchema.elements(resource_type.filter_registry):

457 filter_name = f.type

458 if filter_name == 'value':

459 filter_refs.append({'$ref': '#/definitions/filters/value'})

460 filter_refs.append({'$ref': '#/definitions/filters/valuekv'})

461 elif filter_name == 'event':

462 filter_refs.append({'$ref': '#/definitions/filters/event'})

463 elif f.schema_alias:

464 filter_alias = "%s.%s" % (provider_name, filter_name)

465 if filter_alias in definitions['filters']:

466 assert definitions['filters'][filter_alias] == f.schema, "Schema mismatch on filter w/ schema alias" # NOQA

467 else:

468 definitions['filters'][filter_alias] = f.schema

469 filter_refs.append({'$ref': '#/definitions/filters/%s' % filter_alias})

470 continue

471 else:

472 r['filters'][filter_name] = f.schema

473 filter_refs.append(

474 {'$ref': '#/definitions/resources/%s/filters/%s' % (

475 type_name, filter_name)})

476

477 # one word filter shortcuts

478 filter_refs.append(

479 {'enum': list(resource_type.filter_registry.keys())})

480

481 block_fref = '#/definitions/resources/%s/policy/allOf/1/properties/filters' % (

482 type_name)

483 filter_refs.extend([

484 {'type': 'object', 'additionalProperties': False,

485 'properties': {'or': {'$ref': block_fref}}},

486 {'type': 'object', 'additionalProperties': False,

487 'properties': {'and': {'$ref': block_fref}}},

488 {'type': 'object', 'additionalProperties': False,

489 'properties': {'not': {'$ref': block_fref}}}])

490

491 resource_policy = {

492 'allOf': [

493 {'$ref': '#/definitions/policy'},

494 {'properties': {

495 'resource': {'enum': [type_name]},

496 'filters': {

497 'type': 'array',

498 'items': {'anyOf': filter_refs}},

499 'actions': {

500 'type': 'array',

501 'items': {'anyOf': action_refs}}}},

502 ]

503 }

504

505 if aliases:

506 resource_policy['allOf'][1]['properties'][

507 'resource']['enum'].extend(aliases)

508

509 if type_name == 'ec2':

510 resource_policy['allOf'][1]['properties']['query'] = {}

511

512 r['policy'] = resource_policy

513 return {'$ref': '#/definitions/resources/%s/policy' % type_name}

514

515

516def resource_outline(provider=None):

517 outline = {}

518 for cname, ctype in sorted(clouds.items()):

519 if provider and provider != cname:

520 continue

521 cresources = outline[cname] = {}

522 for rname, rtype in sorted(ctype.resources.items()):

523 cresources['%s.%s' % (cname, rname)] = rinfo = {}

524 rinfo['filters'] = sorted(rtype.filter_registry.keys())

525 rinfo['actions'] = sorted(rtype.action_registry.keys())

526 return outline

527

528

529def resource_vocabulary(cloud_name=None, qualify_name=True, aliases=True):

530 vocabulary = {}

531 resources = {}

532

533 if aliases:

534 vocabulary['aliases'] = {}

535

536 for cname, ctype in clouds.items():

537 if cloud_name is not None and cloud_name != cname:

538 continue

539 for rname, rtype in ctype.resources.items():

540 if qualify_name:

541 resources['%s.%s' % (cname, rname)] = rtype

542 else:

543 resources[rname] = rtype

544

545 for type_name, resource_type in resources.items():

546 classes = {'actions': {}, 'filters': {}, 'resource': resource_type}

547 actions = []

548 for cls in ElementSchema.elements(resource_type.action_registry):

549 action_name = ElementSchema.name(cls)

550 actions.append(action_name)

551 classes['actions'][action_name] = cls

552

553 filters = []

554 for cls in ElementSchema.elements(resource_type.filter_registry):

555 filter_name = ElementSchema.name(cls)

556 filters.append(filter_name)

557 classes['filters'][filter_name] = cls

558

559 vocabulary[type_name] = {

560 'filters': sorted(filters),

561 'actions': sorted(actions),

562 'classes': classes,

563 }

564

565 if aliases and resource_type.type_aliases:

566 provider = type_name.split('.', 1)[0]

567 for type_alias in resource_type.type_aliases:

568 vocabulary['aliases'][

569 "{}.{}".format(provider, type_alias)] = vocabulary[type_name]

570 if provider == 'aws':

571 vocabulary['aliases'][type_alias] = vocabulary[type_name]

572 vocabulary[type_name]['resource_type'] = type_name

573

574 vocabulary["mode"] = {}

575 for mode_name, cls in execution.items():

576 vocabulary["mode"][mode_name] = cls

577

578 return vocabulary

579

580

581class ElementSchema:

582 """Utility functions for working with resource's filters and actions.

583 """

584

585 @staticmethod

586 def elements(registry):

587 """Given a resource registry return sorted de-aliased values.

588 """

589 seen = {}

590 for k, v in registry.items():

591 if k in ('and', 'or', 'not'):

592 continue

593 if v in seen:

594 continue

595 else:

596 seen[ElementSchema.name(v)] = v

597 return [seen[k] for k in sorted(seen)]

598

599 @staticmethod

600 def resolve(vocabulary, schema_path):

601 """Given a resource vocabulary and a dotted path, resolve an element.

602 """

603 current = vocabulary

604 frag = None

605 if schema_path.startswith('.'):

606 # The preprended '.' is an odd artifact

607 schema_path = schema_path[1:]

608 parts = schema_path.split('.')

609 while parts:

610 k = parts.pop(0)

611 if frag:

612 k = "%s.%s" % (frag, k)

613 frag = None

614 parts.insert(0, 'classes')

615 elif k in clouds:

616 frag = k

617 if len(parts) == 1:

618 parts.append('resource')

619 continue

620 if k not in current:

621 raise ValueError("Invalid schema path %s" % schema_path)

622 current = current[k]

623 return current

624

625 @staticmethod

626 def name(cls):

627 """For a filter or action return its name."""

628 return cls.schema['properties']['type']['enum'][0]

629

630 @staticmethod

631 def doc(cls):

632 """Return 'best' formatted doc string for a given class.

633

634 Walks up class hierarchy, skipping known bad. Returns

635 empty string if no suitable doc string found.

636 """

637 # walk up class hierarchy for nearest

638 # good doc string, skip known

639 if cls.__doc__ is not None:

640 return inspect.cleandoc(cls.__doc__)

641 doc = None

642 for b in cls.__bases__:

643 if b in (ValueFilter, object):

644 continue

645 doc = b.__doc__ or ElementSchema.doc(b)

646 if doc is not None:

647 return inspect.cleandoc(doc)

648 return ""

649

650 @staticmethod

651 def schema(definitions, cls):

652 """Return a pretty'ified version of an element schema."""

653 schema = isinstance(cls, type) and dict(cls.schema) or dict(cls)

654 schema.pop('type', None)

655 schema.pop('additionalProperties', None)

656 return ElementSchema._expand_schema(schema, definitions)

657

658 @staticmethod

659 def _expand_schema(schema, definitions):

660 """Expand references in schema to their full schema"""

661 for k, v in list(schema.items()):

662 if k == '$ref':

663 # the value here is in the form of: '#/definitions/path/to/key'

664 parts = v.split('/')

665 if ['#', 'definitions'] != parts[0:2]:

666 raise ValueError("Invalid Ref %s" % v)

667 current = definitions

668 for p in parts[2:]:

669 if p not in current:

670 return None

671 current = current[p]

672 return ElementSchema._expand_schema(current, definitions)

673 elif isinstance(v, dict):

674 schema[k] = ElementSchema._expand_schema(v, definitions)

675 return schema

676

677

678def pprint_schema_summary(vocabulary):

679 providers = {}

680 non_providers = {}

681

682 for type_name, rv in vocabulary.items():

683 if '.' not in type_name:

684 non_providers[type_name] = len(rv)

685 else:

686 provider, _ = type_name.split('.', 1)

687 stats = providers.setdefault(provider, {

688 'resources': 0, 'actions': Counter(), 'filters': Counter()})

689 stats['resources'] += 1

690 for a in rv.get('actions'):

691 stats['actions'][a] += 1

692 for f in rv.get('filters'):

693 stats['filters'][f] += 1

694

695 for provider, stats in providers.items():

696 print("%s:" % provider)

697 print(" resource count: %d" % stats['resources'])

698 print(" actions: %d" % len(stats['actions']))

699 print(" filters: %d" % len(stats['filters']))

700

701 for non_providers_type, length in non_providers.items():

702 print("%s:" % non_providers_type)

703 print(" count: %d" % length)

704

705

706def json_dump(resource=None):

707 load_available()

708 print(json.dumps(generate(resource), indent=2))

709

710

711if __name__ == '__main__':

712 json_dump()