Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/schema.py: 13%

297 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1# Copyright The Cloud Custodian Authors. 

2# SPDX-License-Identifier: Apache-2.0 

3""" 

4Jsonschema validation of cloud custodian config. 

5 

6We start with a walkthrough of the various class registries 

7of resource types and assemble and generate the schema. 

8 

9We do some specialization to reduce overall schema size 

10via reference usage, although in some cases we prefer 

11copies, due to issues with inheritance via reference ( 

12allowedProperties and enum extension). 

13 

14All filters and actions are annotated with schema typically using 

15the utils.type_schema function. 

16""" 

17from collections import Counter 

18import json 

19import inspect 

20import logging 

21 

22from jsonschema import Draft7Validator as JsonSchemaValidator 

23from jsonschema.exceptions import best_match 

24 

25from c7n.policy import execution 

26from c7n.provider import clouds 

27from c7n.query import sources 

28from c7n.resources import load_available 

29from c7n.resolver import ValuesFrom 

30from c7n.filters.core import ( 

31 ValueFilter, 

32 EventFilter, 

33 AgeFilter, 

34 ReduceFilter, 

35 OPERATORS, 

36 VALUE_TYPES, 

37) 

38from c7n.structure import StructureParser # noqa 

39 

40 

41def validate(data, schema=None, resource_types=()): 

42 if schema is None: 

43 schema = generate(resource_types) 

44 JsonSchemaValidator.check_schema(schema) 

45 

46 validator = JsonSchemaValidator(schema) 

47 errors = list(validator.iter_errors(data)) 

48 if not errors: 

49 return check_unique(data) or [] 

50 try: 

51 resp = policy_error_scope(specific_error(errors[0]), data) 

52 name = isinstance( 

53 errors[0].instance, 

54 dict) and errors[0].instance.get( 

55 'name', 

56 'unknown') or 'unknown' 

57 return [resp, name] 

58 except Exception: 

59 logging.exception( 

60 "specific_error failed, traceback, followed by fallback") 

61 

62 return list(filter(None, [ 

63 errors[0], 

64 best_match(validator.iter_errors(data)), 

65 ])) 

66 

67 

68def check_unique(data): 

69 counter = Counter([p['name'] for p in data.get('policies', [])]) 

70 for k, v in list(counter.items()): 

71 if v == 1: 

72 counter.pop(k) 

73 if counter: 

74 return [ValueError( 

75 "Only one policy with a given name allowed, duplicates: {}".format(counter)), 

76 list(counter.keys())[0]] 

77 

78 

79def policy_error_scope(error, data): 

80 """Scope a schema error to its policy name and resource.""" 

81 err_path = list(error.absolute_path) 

82 if err_path[0] != 'policies': 

83 return error 

84 pdata = data['policies'][err_path[1]] 

85 pdata.get('name', 'unknown') 

86 error.message = "Error on policy:{} resource:{}\n".format( 

87 pdata.get('name', 'unknown'), pdata.get('resource', 'unknown')) + error.message 

88 return error 

89 

90 

91def specific_error(error): 

92 """Try to find the best error for humans to resolve 

93 

94 The jsonschema.exceptions.best_match error is based purely on a 

95 mix of a strong match (ie. not anyOf, oneOf) and schema depth, 

96 this often yields odd results that are semantically confusing, 

97 instead we can use a bit of structural knowledge of schema to 

98 provide better results. 

99 """ 

100 if error.validator not in ('anyOf', 'oneOf'): 

101 return error 

102 

103 r = t = None 

104 

105 if isinstance(error.instance, dict): 

106 t = error.instance.get('type') 

107 r = error.instance.get('resource') 

108 

109 if r is not None: 

110 found = None 

111 for idx, v in enumerate(error.validator_value): 

112 if '$ref' in v and v['$ref'].rsplit('/', 2)[1].endswith(r): 

113 found = idx 

114 break 

115 if found is not None: 

116 # error context is a flat list of all validation 

117 # failures, we have to index back to the policy 

118 # of interest. 

119 for e in error.context: 

120 # resource policies have a fixed path from 

121 # the top of the schema 

122 if e.absolute_schema_path[4] == found: 

123 return specific_error(e) 

124 return specific_error(error.context[idx]) 

125 

126 if t is not None: 

127 found = None 

128 for idx, v in enumerate(error.validator_value): 

129 if ('$ref' in v and 

130 v['$ref'].rsplit('/', 2)[-1].rsplit('.', 1)[-1] == t): 

131 found = idx 

132 break 

133 elif 'type' in v and t in v['properties']['type']['enum']: 

134 found = idx 

135 break 

136 

137 if found is not None: 

138 for e in error.context: 

139 for el in reversed(e.absolute_schema_path): 

140 if isinstance(el, int): 

141 if el == found: 

142 return e 

143 break 

144 return error 

145 

146 

147def _get_attr_schema(): 

148 base_filters = [ 

149 {'$ref': '#/definitions/filters/value'}, 

150 {'$ref': '#/definitions/filters/valuekv'}, 

151 ] 

152 any_of = [] 

153 any_of.extend(base_filters) 

154 

155 for op in ('and', 'or', 'not',): 

156 any_of.append( 

157 { 

158 'additional_properties': False, 

159 'properties': { 

160 op: { 

161 'type': 'array', 

162 'items': { 

163 'anyOf': base_filters 

164 } 

165 } 

166 }, 

167 'type': 'object' 

168 } 

169 ) 

170 

171 attr_schema = { 

172 'items': { 

173 'anyOf': any_of 

174 }, 

175 'type': 'array', 

176 } 

177 return attr_schema 

178 

179 

180def generate(resource_types=()): 

181 resource_defs = {} 

182 definitions = { 

183 'resources': resource_defs, 

184 'string_dict': { 

185 "type": "object", 

186 "patternProperties": { 

187 "": {"type": "string"}, 

188 }, 

189 }, 

190 'basic_dict': { 

191 "type": "object", 

192 "patternProperties": { 

193 "": { 

194 'oneOf': [ 

195 {"type": "string"}, 

196 {"type": "boolean"}, 

197 {"type": "number"}, 

198 ], 

199 } 

200 }, 

201 }, 

202 'iam-statement': { 

203 'additionalProperties': False, 

204 'type': 'object', 

205 'properties': { 

206 'Sid': {'type': 'string'}, 

207 'Effect': {'type': 'string', 'enum': ['Allow', 'Deny']}, 

208 'Principal': {'anyOf': [ 

209 {'type': 'string'}, 

210 {'type': 'object'}, {'type': 'array'}]}, 

211 'NotPrincipal': {'anyOf': [{'type': 'object'}, {'type': 'array'}]}, 

212 'Action': {'anyOf': [{'type': 'string'}, {'type': 'array'}]}, 

213 'NotAction': {'anyOf': [{'type': 'string'}, {'type': 'array'}]}, 

214 'Resource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]}, 

215 'NotResource': {'anyOf': [{'type': 'string'}, {'type': 'array'}]}, 

216 'Condition': {'type': 'object'} 

217 }, 

218 'required': ['Sid', 'Effect'], 

219 'oneOf': [ 

220 {'required': ['Principal', 'Action', 'Resource']}, 

221 {'required': ['NotPrincipal', 'Action', 'Resource']}, 

222 {'required': ['Principal', 'NotAction', 'Resource']}, 

223 {'required': ['NotPrincipal', 'NotAction', 'Resource']}, 

224 {'required': ['Principal', 'Action', 'NotResource']}, 

225 {'required': ['NotPrincipal', 'Action', 'NotResource']}, 

226 {'required': ['Principal', 'NotAction', 'NotResource']}, 

227 {'required': ['NotPrincipal', 'NotAction', 'NotResource']} 

228 ] 

229 }, 

230 'actions': {}, 

231 'filters': { 

232 'value': ValueFilter.schema, 

233 'event': EventFilter.schema, 

234 'age': AgeFilter.schema, 

235 'reduce': ReduceFilter.schema, 

236 # Shortcut form of value filter as k=v 

237 'valuekv': { 

238 'type': 'object', 

239 'additionalProperties': {'oneOf': [{'type': 'number'}, {'type': 'null'}, 

240 {'type': 'array', 'maxItems': 0}, {'type': 'string'}, {'type': 'boolean'}]}, 

241 'minProperties': 1, 

242 'maxProperties': 1}, 

243 }, 

244 'filters_common': { 

245 'list_item_attrs': _get_attr_schema(), 

246 'comparison_operators': { 

247 'enum': list(OPERATORS.keys())}, 

248 'value_types': {'enum': VALUE_TYPES}, 

249 'value_from': ValuesFrom.schema, 

250 'value': {'oneOf': [ 

251 {'type': 'array'}, 

252 {'type': 'string'}, 

253 {'type': 'boolean'}, 

254 {'type': 'number'}, 

255 {'type': 'null'}]}, 

256 }, 

257 'policy': { 

258 'type': 'object', 

259 'required': ['name', 'resource'], 

260 'additionalProperties': False, 

261 'properties': { 

262 'name': { 

263 'type': 'string', 

264 'pattern': "^[A-z][A-z0-9]*(-[A-z0-9]+)*$"}, 

265 'conditions': { 

266 'type': 'array', 

267 'items': {'anyOf': [ 

268 {'type': 'object', 'additionalProperties': False, 

269 'properties': {'or': { 

270 '$ref': '#/definitions/policy/properties/conditions'}}}, 

271 {'type': 'object', 'additionalProperties': False, 

272 'properties': {'not': { 

273 '$ref': '#/definitions/policy/properties/conditions'}}}, 

274 {'type': 'object', 'additionalProperties': False, 

275 'properties': {'and': { 

276 '$ref': '#/definitions/policy/properties/conditions'}}}, 

277 {'$ref': '#/definitions/filters/value'}, 

278 {'$ref': '#/definitions/filters/event'}, 

279 {'$ref': '#/definitions/filters/valuekv'}]}}, 

280 # these should be deprecated for conditions 

281 'region': {'type': 'string'}, 

282 'tz': {'type': 'string'}, 

283 'start': {'format': 'date-time'}, 

284 'end': {'format': 'date-time'}, 

285 'resource': {'oneOf': [ 

286 {'type': 'string'}, 

287 {'type': 'array', 'items': {'type': 'string'}}]}, 

288 'max-resources': {'anyOf': [ 

289 {'type': 'integer', 'minimum': 1}, 

290 {'$ref': '#/definitions/max-resources-properties'} 

291 ]}, 

292 'max-resources-percent': {'type': 'number', 'minimum': 0, 'maximum': 100}, 

293 'comment': {'type': 'string'}, 

294 'comments': {'type': 'string'}, 

295 'description': {'type': 'string'}, 

296 'tags': {'type': 'array', 'items': {'type': 'string'}}, 

297 'metadata': {'type': 'object'}, 

298 'mode': {'$ref': '#/definitions/policy-mode'}, 

299 'source': {'enum': list(sources.keys())}, 

300 'actions': { 

301 'type': 'array', 

302 }, 

303 'filters': { 

304 'type': 'array' 

305 }, 

306 # 

307 # TODO: source queries should really move under 

308 # source. This was initially used for describe sources 

309 # to expose server side query mechanisms, however its 

310 # important to note it also prevents resource cache 

311 # utilization between policies that have different 

312 # queries. 

313 'query': { 

314 'type': 'array', 'items': {'type': 'object'}} 

315 

316 }, 

317 }, 

318 'policy-mode': { 

319 'anyOf': [e.schema for _, e in execution.items()], 

320 }, 

321 'max-resources-properties': { 

322 'type': 'object', 

323 'additionalProperties': False, 

324 'properties': { 

325 'amount': {"type": 'integer', 'minimum': 1}, 

326 'op': {'enum': ['or', 'and']}, 

327 'percent': {'type': 'number', 'minimum': 0, 'maximum': 100} 

328 } 

329 } 

330 } 

331 

332 resource_refs = [] 

333 for cloud_name, cloud_type in sorted(clouds.items()): 

334 for type_name, resource_type in sorted(cloud_type.resources.items()): 

335 r_type_name = "%s.%s" % (cloud_name, type_name) 

336 if resource_types and r_type_name not in resource_types: 

337 if not resource_type.type_aliases: 

338 continue 

339 elif not {"%s.%s" % (cloud_name, ralias) for ralias 

340 in resource_type.type_aliases}.intersection( 

341 resource_types): 

342 continue 

343 

344 aliases = [] 

345 if resource_type.type_aliases: 

346 aliases.extend(["%s.%s" % (cloud_name, a) for a in resource_type.type_aliases]) 

347 # aws gets legacy aliases with no cloud prefix 

348 if cloud_name == 'aws': 

349 aliases.extend(resource_type.type_aliases) 

350 

351 # aws gets additional alias for default name 

352 if cloud_name == 'aws': 

353 aliases.append(type_name) 

354 

355 resource_refs.append( 

356 process_resource( 

357 r_type_name, 

358 resource_type, 

359 resource_defs, 

360 aliases, 

361 definitions, 

362 cloud_name 

363 )) 

364 

365 schema = { 

366 "$schema": "http://json-schema.org/draft-07/schema#", 

367 'id': 'http://schema.cloudcustodian.io/v0/custodian.json', 

368 'definitions': definitions, 

369 'type': 'object', 

370 'required': ['policies'], 

371 'additionalProperties': False, 

372 'properties': { 

373 'vars': {'type': 'object'}, 

374 'policies': { 

375 'type': 'array', 

376 'additionalItems': False, 

377 'items': {'anyOf': resource_refs} 

378 } 

379 } 

380 } 

381 

382 # allow empty policies with lazy load 

383 if not resource_refs: 

384 schema['properties']['policies']['items'] = {'type': 'object'} 

385 return schema 

386 

387 

388def process_resource( 

389 type_name, resource_type, resource_defs, aliases=None, 

390 definitions=None, provider_name=None): 

391 

392 r = resource_defs.setdefault(type_name, {'actions': {}, 'filters': {}}) 

393 

394 action_refs = [] 

395 for a in ElementSchema.elements(resource_type.action_registry): 

396 action_name = a.type 

397 if a.schema_alias: 

398 action_alias = "%s.%s" % (provider_name, action_name) 

399 if action_alias in definitions['actions']: 

400 

401 if definitions['actions'][action_alias] != a.schema: # NOQA 

402 msg = "Schema mismatch on type:{} action:{} w/ schema alias ".format( 

403 type_name, action_name) 

404 raise SyntaxError(msg) 

405 else: 

406 definitions['actions'][action_alias] = a.schema 

407 action_refs.append({'$ref': '#/definitions/actions/%s' % action_alias}) 

408 else: 

409 r['actions'][action_name] = a.schema 

410 action_refs.append( 

411 {'$ref': '#/definitions/resources/%s/actions/%s' % ( 

412 type_name, action_name)}) 

413 

414 # one word action shortcuts 

415 action_refs.append( 

416 {'enum': list(resource_type.action_registry.keys())}) 

417 

418 filter_refs = [] 

419 for f in ElementSchema.elements(resource_type.filter_registry): 

420 filter_name = f.type 

421 if filter_name == 'value': 

422 filter_refs.append({'$ref': '#/definitions/filters/value'}) 

423 filter_refs.append({'$ref': '#/definitions/filters/valuekv'}) 

424 elif filter_name == 'event': 

425 filter_refs.append({'$ref': '#/definitions/filters/event'}) 

426 elif f.schema_alias: 

427 filter_alias = "%s.%s" % (provider_name, filter_name) 

428 if filter_alias in definitions['filters']: 

429 assert definitions['filters'][filter_alias] == f.schema, "Schema mismatch on filter w/ schema alias" # NOQA 

430 else: 

431 definitions['filters'][filter_alias] = f.schema 

432 filter_refs.append({'$ref': '#/definitions/filters/%s' % filter_alias}) 

433 continue 

434 else: 

435 r['filters'][filter_name] = f.schema 

436 filter_refs.append( 

437 {'$ref': '#/definitions/resources/%s/filters/%s' % ( 

438 type_name, filter_name)}) 

439 

440 # one word filter shortcuts 

441 filter_refs.append( 

442 {'enum': list(resource_type.filter_registry.keys())}) 

443 

444 block_fref = '#/definitions/resources/%s/policy/allOf/1/properties/filters' % ( 

445 type_name) 

446 filter_refs.extend([ 

447 {'type': 'object', 'additionalProperties': False, 

448 'properties': {'or': {'$ref': block_fref}}}, 

449 {'type': 'object', 'additionalProperties': False, 

450 'properties': {'and': {'$ref': block_fref}}}, 

451 {'type': 'object', 'additionalProperties': False, 

452 'properties': {'not': {'$ref': block_fref}}}]) 

453 

454 resource_policy = { 

455 'allOf': [ 

456 {'$ref': '#/definitions/policy'}, 

457 {'properties': { 

458 'resource': {'enum': [type_name]}, 

459 'filters': { 

460 'type': 'array', 

461 'items': {'anyOf': filter_refs}}, 

462 'actions': { 

463 'type': 'array', 

464 'items': {'anyOf': action_refs}}}}, 

465 ] 

466 } 

467 

468 if aliases: 

469 resource_policy['allOf'][1]['properties'][ 

470 'resource']['enum'].extend(aliases) 

471 

472 if type_name == 'ec2': 

473 resource_policy['allOf'][1]['properties']['query'] = {} 

474 

475 r['policy'] = resource_policy 

476 return {'$ref': '#/definitions/resources/%s/policy' % type_name} 

477 

478 

479def resource_outline(provider=None): 

480 outline = {} 

481 for cname, ctype in sorted(clouds.items()): 

482 if provider and provider != cname: 

483 continue 

484 cresources = outline[cname] = {} 

485 for rname, rtype in sorted(ctype.resources.items()): 

486 cresources['%s.%s' % (cname, rname)] = rinfo = {} 

487 rinfo['filters'] = sorted(rtype.filter_registry.keys()) 

488 rinfo['actions'] = sorted(rtype.action_registry.keys()) 

489 return outline 

490 

491 

492def resource_vocabulary(cloud_name=None, qualify_name=True, aliases=True): 

493 vocabulary = {} 

494 resources = {} 

495 

496 if aliases: 

497 vocabulary['aliases'] = {} 

498 

499 for cname, ctype in clouds.items(): 

500 if cloud_name is not None and cloud_name != cname: 

501 continue 

502 for rname, rtype in ctype.resources.items(): 

503 if qualify_name: 

504 resources['%s.%s' % (cname, rname)] = rtype 

505 else: 

506 resources[rname] = rtype 

507 

508 for type_name, resource_type in resources.items(): 

509 classes = {'actions': {}, 'filters': {}, 'resource': resource_type} 

510 actions = [] 

511 for cls in ElementSchema.elements(resource_type.action_registry): 

512 action_name = ElementSchema.name(cls) 

513 actions.append(action_name) 

514 classes['actions'][action_name] = cls 

515 

516 filters = [] 

517 for cls in ElementSchema.elements(resource_type.filter_registry): 

518 filter_name = ElementSchema.name(cls) 

519 filters.append(filter_name) 

520 classes['filters'][filter_name] = cls 

521 

522 vocabulary[type_name] = { 

523 'filters': sorted(filters), 

524 'actions': sorted(actions), 

525 'classes': classes, 

526 } 

527 

528 if aliases and resource_type.type_aliases: 

529 provider = type_name.split('.', 1)[0] 

530 for type_alias in resource_type.type_aliases: 

531 vocabulary['aliases'][ 

532 "{}.{}".format(provider, type_alias)] = vocabulary[type_name] 

533 if provider == 'aws': 

534 vocabulary['aliases'][type_alias] = vocabulary[type_name] 

535 vocabulary[type_name]['resource_type'] = type_name 

536 

537 vocabulary["mode"] = {} 

538 for mode_name, cls in execution.items(): 

539 vocabulary["mode"][mode_name] = cls 

540 

541 return vocabulary 

542 

543 

544class ElementSchema: 

545 """Utility functions for working with resource's filters and actions. 

546 """ 

547 

548 @staticmethod 

549 def elements(registry): 

550 """Given a resource registry return sorted de-aliased values. 

551 """ 

552 seen = {} 

553 for k, v in registry.items(): 

554 if k in ('and', 'or', 'not'): 

555 continue 

556 if v in seen: 

557 continue 

558 else: 

559 seen[ElementSchema.name(v)] = v 

560 return [seen[k] for k in sorted(seen)] 

561 

562 @staticmethod 

563 def resolve(vocabulary, schema_path): 

564 """Given a resource vocabulary and a dotted path, resolve an element. 

565 """ 

566 current = vocabulary 

567 frag = None 

568 if schema_path.startswith('.'): 

569 # The preprended '.' is an odd artifact 

570 schema_path = schema_path[1:] 

571 parts = schema_path.split('.') 

572 while parts: 

573 k = parts.pop(0) 

574 if frag: 

575 k = "%s.%s" % (frag, k) 

576 frag = None 

577 parts.insert(0, 'classes') 

578 elif k in clouds: 

579 frag = k 

580 if len(parts) == 1: 

581 parts.append('resource') 

582 continue 

583 if k not in current: 

584 raise ValueError("Invalid schema path %s" % schema_path) 

585 current = current[k] 

586 return current 

587 

588 @staticmethod 

589 def name(cls): 

590 """For a filter or action return its name.""" 

591 return cls.schema['properties']['type']['enum'][0] 

592 

593 @staticmethod 

594 def doc(cls): 

595 """Return 'best' formatted doc string for a given class. 

596 

597 Walks up class hierarchy, skipping known bad. Returns 

598 empty string if no suitable doc string found. 

599 """ 

600 # walk up class hierarchy for nearest 

601 # good doc string, skip known 

602 if cls.__doc__ is not None: 

603 return inspect.cleandoc(cls.__doc__) 

604 doc = None 

605 for b in cls.__bases__: 

606 if b in (ValueFilter, object): 

607 continue 

608 doc = b.__doc__ or ElementSchema.doc(b) 

609 if doc is not None: 

610 return inspect.cleandoc(doc) 

611 return "" 

612 

613 @staticmethod 

614 def schema(definitions, cls): 

615 """Return a pretty'ified version of an element schema.""" 

616 schema = isinstance(cls, type) and dict(cls.schema) or dict(cls) 

617 schema.pop('type', None) 

618 schema.pop('additionalProperties', None) 

619 return ElementSchema._expand_schema(schema, definitions) 

620 

621 @staticmethod 

622 def _expand_schema(schema, definitions): 

623 """Expand references in schema to their full schema""" 

624 for k, v in list(schema.items()): 

625 if k == '$ref': 

626 # the value here is in the form of: '#/definitions/path/to/key' 

627 parts = v.split('/') 

628 if ['#', 'definitions'] != parts[0:2]: 

629 raise ValueError("Invalid Ref %s" % v) 

630 current = definitions 

631 for p in parts[2:]: 

632 if p not in current: 

633 return None 

634 current = current[p] 

635 return ElementSchema._expand_schema(current, definitions) 

636 elif isinstance(v, dict): 

637 schema[k] = ElementSchema._expand_schema(v, definitions) 

638 return schema 

639 

640 

641def pprint_schema_summary(vocabulary): 

642 providers = {} 

643 non_providers = {} 

644 

645 for type_name, rv in vocabulary.items(): 

646 if '.' not in type_name: 

647 non_providers[type_name] = len(rv) 

648 else: 

649 provider, name = type_name.split('.', 1) 

650 stats = providers.setdefault(provider, { 

651 'resources': 0, 'actions': Counter(), 'filters': Counter()}) 

652 stats['resources'] += 1 

653 for a in rv.get('actions'): 

654 stats['actions'][a] += 1 

655 for f in rv.get('filters'): 

656 stats['filters'][f] += 1 

657 

658 for provider, stats in providers.items(): 

659 print("%s:" % provider) 

660 print(" resource count: %d" % stats['resources']) 

661 print(" actions: %d" % len(stats['actions'])) 

662 print(" filters: %d" % len(stats['filters'])) 

663 

664 for non_providers_type, length in non_providers.items(): 

665 print("%s:" % non_providers_type) 

666 print(" count: %d" % length) 

667 

668 

669def json_dump(resource=None): 

670 load_available() 

671 print(json.dumps(generate(resource), indent=2)) 

672 

673 

674if __name__ == '__main__': 

675 json_dump()