Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/filters/core.py: 25%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

670 statements  

1# Copyright The Cloud Custodian Authors. 

2# SPDX-License-Identifier: Apache-2.0 

3""" 

4Resource Filtering Logic 

5""" 

6import copy 

7import datetime 

8from datetime import timedelta 

9import fnmatch 

10import ipaddress 

11import logging 

12import operator 

13import re 

14 

15from dateutil.tz import tzutc 

16from dateutil.parser import parse 

17from c7n.vendored.distutils import version 

18from random import sample 

19 

20from c7n.element import Element 

21from c7n.exceptions import PolicyValidationError, PolicyExecutionError 

22from c7n.manager import ResourceManager 

23from c7n.registry import PluginRegistry 

24from c7n.resolver import ValuesFrom 

25from c7n.utils import ( 

26 set_annotation, 

27 type_schema, 

28 parse_cidr, 

29 parse_date, 

30 jmespath_search, 

31 jmespath_compile 

32) 

33from c7n.manager import iter_filters 

34 

35 

36class FilterValidationError(Exception): 

37 pass 

38 

39 

40# Matching filters annotate their key onto objects 

41ANNOTATION_KEY = "c7n:MatchedFilters" 

42 

43 

44def glob_match(value, pattern): 

45 if not isinstance(value, str): 

46 return False 

47 return fnmatch.fnmatch(value, pattern) 

48 

49 

50def regex_match(value, regex): 

51 if not isinstance(value, str): 

52 return False 

53 # Note python 2.5+ internally cache regex 

54 # would be nice to use re2 

55 return bool(re.match(regex, value, flags=re.IGNORECASE)) 

56 

57 

58def regex_case_sensitive_match(value, regex): 

59 if not isinstance(value, str): 

60 return False 

61 # Note python 2.5+ internally cache regex 

62 # would be nice to use re2 

63 return bool(re.match(regex, value)) 

64 

65 

66def operator_in(x, y): 

67 return x in y 

68 

69 

70def operator_ni(x, y): 

71 return x not in y 

72 

73 

74def difference(x, y): 

75 return bool(set(x).difference(y)) 

76 

77 

78def intersect(x, y): 

79 return bool(set(x).intersection(y)) 

80 

81 

82def mod(x, y): 

83 return bool(x % y) 

84 

85 

86OPERATORS = { 

87 'eq': operator.eq, 

88 'equal': operator.eq, 

89 'ne': operator.ne, 

90 'not-equal': operator.ne, 

91 'gt': operator.gt, 

92 'greater-than': operator.gt, 

93 'ge': operator.ge, 

94 'gte': operator.ge, 

95 'le': operator.le, 

96 'lte': operator.le, 

97 'lt': operator.lt, 

98 'less-than': operator.lt, 

99 'glob': glob_match, 

100 'regex': regex_match, 

101 'regex-case': regex_case_sensitive_match, 

102 'in': operator_in, 

103 'ni': operator_ni, 

104 'not-in': operator_ni, 

105 'contains': operator.contains, 

106 'difference': difference, 

107 'intersect': intersect, 

108 'mod': mod} 

109 

110 

111VALUE_TYPES = [ 

112 'age', 'integer', 'expiration', 'normalize', 'size', 

113 'cidr', 'cidr_size', 'swap', 'resource_count', 'expr', 

114 'unique_size', 'date', 'version', 'float'] 

115 

116 

117class FilterRegistry(PluginRegistry): 

118 

119 value_filter_class = None 

120 

121 def __init__(self, *args, **kw): 

122 super().__init__(*args, **kw) 

123 self.register('value', ValueFilter) 

124 self.register('or', Or) 

125 self.register('and', And) 

126 self.register('not', Not) 

127 self.register('event', EventFilter) 

128 self.register('reduce', ReduceFilter) 

129 self.register('list-item', ListItemFilter) 

130 

131 def parse(self, data, manager): 

132 results = [] 

133 for d in data: 

134 results.append(self.factory(d, manager)) 

135 return results 

136 

137 def factory(self, data, manager=None): 

138 """Factory func for filters. 

139 

140 data - policy config for filters 

141 manager - resource type manager (ec2, s3, etc) 

142 """ 

143 

144 # Make the syntax a little nicer for common cases. 

145 if isinstance(data, dict) and len(data) == 1 and 'type' not in data: 

146 op = list(data.keys())[0] 

147 if op == 'or': 

148 return self['or'](data, self, manager) 

149 elif op == 'and': 

150 return self['and'](data, self, manager) 

151 elif op == 'not': 

152 return self['not'](data, self, manager) 

153 return self.value_filter_class(data, manager) 

154 if isinstance(data, str): 

155 filter_type = data 

156 data = {'type': data} 

157 else: 

158 filter_type = data.get('type') 

159 if not filter_type: 

160 raise PolicyValidationError( 

161 "%s Invalid Filter %s" % ( 

162 self.plugin_type, data)) 

163 filter_class = self.get(filter_type) 

164 if filter_class is not None: 

165 return filter_class(data, manager) 

166 else: 

167 raise PolicyValidationError( 

168 "%s Invalid filter type %s" % ( 

169 self.plugin_type, data)) 

170 

171 

172def trim_runtime(filters): 

173 """Remove runtime filters. 

174 

175 Some filters can only be effectively evaluated at policy 

176 execution, ie. event filters. 

177 

178 When evaluating conditions for dryrun or provisioning stages we 

179 remove them. 

180 """ 

181 def remove_filter(f): 

182 block = f.get_block_parent() 

183 block.filters.remove(f) 

184 if isinstance(block, BooleanGroupFilter) and not len(block): 

185 remove_filter(block) 

186 

187 for f in iter_filters(filters): 

188 if isinstance(f, EventFilter): 

189 remove_filter(f) 

190 

191 

192# Really should be an abstract base class (abc) or 

193# zope.interface 

194 

195class Filter(Element): 

196 

197 log = logging.getLogger('custodian.filters') 

198 

199 def __init__(self, data, manager=None): 

200 self.data = data 

201 self.manager = manager 

202 

203 def process(self, resources, event=None): 

204 """ Bulk process resources and return filtered set.""" 

205 return list(filter(self, resources)) 

206 

207 def get_block_operator(self): 

208 """Determine the immediate parent boolean operator for a filter""" 

209 # Top level operator is `and` 

210 block = self.get_block_parent() 

211 if block.type in ('and', 'or', 'not'): 

212 return block.type 

213 return 'and' 

214 

215 def get_block_parent(self): 

216 """Get the block parent for a filter""" 

217 block_stack = [self.manager] 

218 for f in self.manager.iter_filters(block_end=True): 

219 if f is None: 

220 block_stack.pop() 

221 elif f == self: 

222 return block_stack[-1] 

223 elif f.type in ('and', 'or', 'not'): 

224 block_stack.append(f) 

225 

226 def merge_annotation(self, r, annotation_key, values): 

227 block_op = self.get_block_operator() 

228 if block_op in ('and', 'not'): 

229 r[self.matched_annotation_key] = intersect_list( 

230 values, 

231 r.get(self.matched_annotation_key)) 

232 elif block_op == 'or': 

233 r[self.matched_annotation_key] = union_list( 

234 values, 

235 r.get(self.matched_annotation_key)) 

236 

237 

238class BaseValueFilter(Filter): 

239 expr = None 

240 

241 def __init__(self, data, manager=None): 

242 super(BaseValueFilter, self).__init__(data, manager) 

243 self.expr = {} 

244 

245 def get_resource_value(self, k, i, regex=None): 

246 r = None 

247 if k.startswith('tag:'): 

248 tk = k.split(':', 1)[1] 

249 if 'Tags' in i: 

250 for t in i.get("Tags", []): 

251 if t.get('Key') == tk: 

252 r = t.get('Value') 

253 break 

254 # GCP schema: 'labels': {'key': 'value'} 

255 elif 'labels' in i: 

256 r = i.get('labels', {}).get(tk, None) 

257 # GCP has a secondary form of labels called tags 

258 # as labels without values. 

259 # Azure schema: 'tags': {'key': 'value'} 

260 elif 'tags' in i: 

261 r = (i.get('tags', {}) or {}).get(tk, None) 

262 elif k in i: 

263 r = i.get(k) 

264 elif k not in self.expr: 

265 self.expr[k] = jmespath_compile(k) 

266 r = self.expr[k].search(i) 

267 else: 

268 r = self.expr[k].search(i) 

269 

270 if regex: 

271 r = ValueRegex(regex).get_resource_value(r) 

272 return r 

273 

274 def _validate_value_regex(self, regex): 

275 """Specific validation for `value_regex` type 

276 

277 The `value_regex` type works a little differently. In 

278 particular it doesn't support OPERATORS that perform 

279 operations on a list of values, specifically 'intersect', 

280 'contains', 'difference', 'in' and 'not-in' 

281 """ 

282 # Sanity check that we can compile 

283 try: 

284 pattern = re.compile(regex) 

285 if pattern.groups != 1: 

286 raise PolicyValidationError( 

287 "value_regex must have a single capturing group: %s" % 

288 self.data) 

289 except re.error as e: 

290 raise PolicyValidationError( 

291 "Invalid value_regex: %s %s" % (e, self.data)) 

292 return self 

293 

294 

295def intersect_list(a, b): 

296 if b is None: 

297 return a 

298 elif a is None: 

299 return b 

300 res = [] 

301 for x in a: 

302 if x in b: 

303 res.append(x) 

304 return res 

305 

306 

307def union_list(a, b): 

308 if not b: 

309 return a 

310 if not a: 

311 return b 

312 res = a 

313 res.extend(x for x in b if x not in a) 

314 return res 

315 

316 

317class BooleanGroupFilter(Filter): 

318 

319 def __init__(self, data, registry, manager): 

320 super(BooleanGroupFilter, self).__init__(data) 

321 self.registry = registry 

322 self.filters = registry.parse(list(self.data.values())[0], manager) 

323 self.manager = manager 

324 

325 def validate(self): 

326 for f in self.filters: 

327 f.validate() 

328 return self 

329 

330 def get_resource_type_id(self): 

331 resource_type = self.manager.get_model() 

332 return resource_type.id 

333 

334 def __len__(self): 

335 return len(self.filters) 

336 

337 def __bool__(self): 

338 return True 

339 

340 def get_deprecations(self): 

341 """Return any matching deprecations for the nested filters.""" 

342 deprecations = [] 

343 for f in self.filters: 

344 deprecations.extend(f.get_deprecations()) 

345 return deprecations 

346 

347 

348class Or(BooleanGroupFilter): 

349 

350 def process(self, resources, event=None): 

351 if self.manager: 

352 return self.process_set(resources, event) 

353 return super(Or, self).process(resources, event) 

354 

355 def __call__(self, r): 

356 """Fallback for older unit tests that don't utilize a query manager""" 

357 for f in self.filters: 

358 if f(r): 

359 return True 

360 return False 

361 

362 def process_set(self, resources, event): 

363 rtype_id = self.get_resource_type_id() 

364 compiled = None 

365 if '.' in rtype_id: 

366 compiled = jmespath_compile(rtype_id) 

367 resource_map = {compiled.search(r): r for r in resources} 

368 else: 

369 resource_map = {r[rtype_id]: r for r in resources} 

370 results = set() 

371 for f in self.filters: 

372 if compiled: 

373 results = results.union([ 

374 compiled.search(r) for r in f.process(resources, event)]) 

375 else: 

376 results = results.union([ 

377 r[rtype_id] for r in f.process(resources, event)]) 

378 return [resource_map[r_id] for r_id in results] 

379 

380 

381class And(BooleanGroupFilter): 

382 

383 def process(self, resources, events=None): 

384 if self.manager: 

385 sweeper = AnnotationSweeper(self.get_resource_type_id(), resources) 

386 

387 for f in self.filters: 

388 resources = f.process(resources, events) 

389 if not resources: 

390 break 

391 

392 if self.manager: 

393 sweeper.sweep(resources) 

394 

395 return resources 

396 

397 

398class Not(BooleanGroupFilter): 

399 

400 def process(self, resources, event=None): 

401 if self.manager: 

402 return self.process_set(resources, event) 

403 return super(Not, self).process(resources, event) 

404 

405 def __call__(self, r): 

406 """Fallback for older unit tests that don't utilize a query manager""" 

407 

408 # There is an implicit 'and' for self.filters 

409 # ~(A ^ B ^ ... ^ Z) = ~A v ~B v ... v ~Z 

410 for f in self.filters: 

411 if not f(r): 

412 return True 

413 return False 

414 

415 def process_set(self, resources, event): 

416 rtype_id = self.get_resource_type_id() 

417 compiled = None 

418 if '.' in rtype_id: 

419 compiled = jmespath_compile(rtype_id) 

420 resource_map = {compiled.search(r): r for r in resources} 

421 else: 

422 resource_map = {r[rtype_id]: r for r in resources} 

423 sweeper = AnnotationSweeper(rtype_id, resources) 

424 

425 for f in self.filters: 

426 resources = f.process(resources, event) 

427 if not resources: 

428 break 

429 

430 before = set(resource_map.keys()) 

431 if compiled: 

432 after = {compiled.search(r) for r in resources} 

433 else: 

434 after = {r[rtype_id] for r in resources} 

435 results = before - after 

436 sweeper.sweep([]) 

437 

438 return [resource_map[r_id] for r_id in results] 

439 

440 

441class AnnotationSweeper: 

442 """Support clearing annotations set within a block filter. 

443 

444 See https://github.com/cloud-custodian/cloud-custodian/issues/2116 

445 """ 

446 def __init__(self, id_key, resources): 

447 self.id_key = id_key 

448 ra_map = {} 

449 resource_map = {} 

450 compiled = None 

451 if '.' in id_key: 

452 compiled = jmespath_compile(self.id_key) 

453 for r in resources: 

454 if compiled: 

455 id_ = compiled.search(r) 

456 else: 

457 id_ = r[self.id_key] 

458 ra_map[id_] = {k: v for k, v in r.items() if k.startswith('c7n')} 

459 resource_map[id_] = r 

460 # We keep a full copy of the annotation keys to allow restore. 

461 self.ra_map = copy.deepcopy(ra_map) 

462 self.resource_map = resource_map 

463 

464 def sweep(self, resources): 

465 compiled = None 

466 if '.' in self.id_key: 

467 compiled = jmespath_compile(self.id_key) 

468 diff = set(self.ra_map).difference([compiled.search(r) for r in resources]) 

469 else: 

470 diff = set(self.ra_map).difference([r[self.id_key] for r in resources]) 

471 for rid in diff: 

472 # Clear annotations if the block filter didn't match 

473 akeys = [k for k in self.resource_map[rid] if k.startswith('c7n')] 

474 for k in akeys: 

475 del self.resource_map[rid][k] 

476 # Restore annotations that may have existed prior to the block filter. 

477 self.resource_map[rid].update(self.ra_map[rid]) 

478 

479 

480# The default LooseVersion will fail on comparing present strings, used 

481# in the value as shorthand for certain options. 

482class ComparableVersion(version.LooseVersion): 

483 def __eq__(self, other): 

484 try: 

485 return super(ComparableVersion, self).__eq__(other) 

486 except TypeError: 

487 return False 

488 

489 

490class ValueFilter(BaseValueFilter): 

491 """Generic value filter using jmespath 

492 """ 

493 op = v = vtype = None 

494 

495 schema = { 

496 'type': 'object', 

497 # Doesn't mix well with inherits that extend 

498 'additionalProperties': False, 

499 'required': ['type'], 

500 'properties': { 

501 # Doesn't mix well as enum with inherits that extend 

502 'type': {'enum': ['value']}, 

503 'key': {'type': 'string'}, 

504 'value_type': {'$ref': '#/definitions/filters_common/value_types'}, 

505 'default': {'type': 'object'}, 

506 'value_regex': {'type': 'string'}, 

507 'value_from': {'$ref': '#/definitions/filters_common/value_from'}, 

508 'value': {'$ref': '#/definitions/filters_common/value'}, 

509 'op': {'$ref': '#/definitions/filters_common/comparison_operators'}, 

510 'value_path': {'type': 'string'} 

511 } 

512 } 

513 schema_alias = True 

514 annotate = True 

515 required_keys = {'value', 'key'} 

516 

517 def _validate_resource_count(self): 

518 """ Specific validation for `resource_count` type 

519 

520 The `resource_count` type works a little differently because it operates 

521 on the entire set of resources. It: 

522 - does not require `key` 

523 - `value` must be a number 

524 - supports a subset of the OPERATORS list 

525 """ 

526 for field in ('op', 'value'): 

527 if field not in self.data: 

528 raise PolicyValidationError( 

529 "Missing '%s' in value filter %s" % (field, self.data)) 

530 

531 if not (isinstance(self.data['value'], int) or 

532 isinstance(self.data['value'], list)): 

533 raise PolicyValidationError( 

534 "`value` must be an integer in resource_count filter %s" % self.data) 

535 

536 # I don't see how to support regex for this? 

537 if (self.data['op'] not in OPERATORS or 

538 self.data['op'] in {'regex', 'regex-case'} or 

539 'value_regex' in self.data): 

540 raise PolicyValidationError( 

541 "Invalid operator in value filter %s" % self.data) 

542 

543 return self 

544 

545 def validate(self): 

546 if len(self.data) == 1: 

547 return self 

548 

549 # `resource_count` requires a slightly different schema than the rest of 

550 # the value filters because it operates on the full resource list 

551 if self.data.get('value_type') == 'resource_count': 

552 return self._validate_resource_count() 

553 elif self.data.get('value_type') == 'date': 

554 if not parse_date(self.data.get('value')): 

555 raise PolicyValidationError( 

556 "value_type: date with invalid date value:%s", 

557 self.data.get('value', '')) 

558 if 'key' not in self.data and 'key' in self.required_keys: 

559 raise PolicyValidationError( 

560 "Missing 'key' in value filter %s" % self.data) 

561 if ('value' not in self.data and 

562 'value_from' not in self.data and 

563 'value_path' not in self.data and 

564 'value' in self.required_keys): 

565 raise PolicyValidationError( 

566 "Missing 'value' in value filter %s" % self.data) 

567 if 'op' in self.data: 

568 if self.data['op'] not in OPERATORS: 

569 raise PolicyValidationError( 

570 "Invalid operator in value filter %s" % self.data) 

571 if self.data['op'] in {'regex', 'regex-case'}: 

572 # Sanity check that we can compile 

573 try: 

574 re.compile(self.data['value']) 

575 except re.error as e: 

576 raise PolicyValidationError( 

577 "Invalid regex: %s %s" % (e, self.data)) 

578 if 'value_regex' in self.data: 

579 return self._validate_value_regex(self.data['value_regex']) 

580 

581 return self 

582 

583 def __call__(self, i): 

584 if self.data.get('value_type') == 'resource_count': 

585 return self.process(i) 

586 

587 matched = self.match(i) 

588 if matched and self.annotate: 

589 set_annotation(i, ANNOTATION_KEY, self.k) 

590 return matched 

591 

592 def process(self, resources, event=None): 

593 # For the resource_count filter we operate on the full set of resources. 

594 if self.data.get('value_type') == 'resource_count': 

595 op = OPERATORS[self.data.get('op')] 

596 if op(len(resources), self.data.get('value')): 

597 return resources 

598 return [] 

599 

600 return super(ValueFilter, self).process(resources, event) 

601 

602 def get_resource_value(self, k, i): 

603 return super(ValueFilter, self).get_resource_value(k, i, self.data.get('value_regex')) 

604 

605 def get_path_value(self, i): 

606 """Retrieve values using JMESPath. 

607 

608 When using a Value Filter, a ``value_path`` can be specified. 

609 This means the value(s) the filter will compare against are 

610 calculated during the initialization of the filter. 

611 

612 Note that this option only pulls properties of the resource 

613 currently being filtered. 

614 

615 .. code-block:: yaml 

616 - name: find-admins-with-user-roles 

617 resource: gcp.project 

618 filters: 

619 - type: iam-policy 

620 doc: 

621 key: bindings[?(role=='roles/admin')].members[] 

622 op: intersect 

623 value_path: bindings[?(role=='roles/user_access')].members[] 

624 

625 The iam-policy use the implementation of the generic Value Filter. 

626 This implementation allows for the comparison of two separate lists of values 

627 within the same resource. 

628 """ 

629 return jmespath_search(self.data.get('value_path'), i) 

630 

631 def match(self, i): 

632 if self.v is None and len(self.data) == 1: 

633 [(self.k, self.v)] = self.data.items() 

634 elif self.v is None and not hasattr(self, 'content_initialized'): 

635 self.k = self.data.get('key') 

636 self.op = self.data.get('op') 

637 if 'value_from' in self.data: 

638 values = ValuesFrom(self.data['value_from'], self.manager) 

639 self.v = values.get_values() 

640 elif 'value_path' in self.data: 

641 self.v = self.get_path_value(i) 

642 else: 

643 self.v = self.data.get('value') 

644 self.content_initialized = True 

645 self.vtype = self.data.get('value_type') 

646 

647 if i is None: 

648 return False 

649 

650 # value extract 

651 r = self.get_resource_value(self.k, i) 

652 if self.op in ('in', 'not-in') and r is None: 

653 r = () 

654 

655 # value type conversion 

656 if self.vtype is not None: 

657 v, r = self.process_value_type(self.v, r, i) 

658 else: 

659 v = self.v 

660 

661 # Value match 

662 if r is None and v == 'absent': 

663 return True 

664 elif r is not None and v == 'present': 

665 return True 

666 elif v == 'not-null' and r: 

667 return True 

668 elif v == 'empty' and not r: 

669 return True 

670 elif self.op: 

671 op = OPERATORS[self.op] 

672 try: 

673 return op(r, v) 

674 except TypeError: 

675 return False 

676 elif r == v: 

677 return True 

678 

679 return False 

680 

681 def process_value_type(self, sentinel, value, resource): 

682 if self.vtype == 'normalize' and isinstance(value, str): 

683 return sentinel, value.strip().lower() 

684 

685 elif self.vtype == 'expr': 

686 sentinel = self.get_resource_value(sentinel, resource) 

687 return sentinel, value 

688 

689 elif self.vtype == 'integer': 

690 try: 

691 value = int(str(value).strip()) 

692 except ValueError: 

693 value = 0 

694 elif self.vtype == 'float': 

695 try: 

696 value = float(str(value).strip()) 

697 except ValueError: 

698 value = 0.0 

699 elif self.vtype == 'size': 

700 try: 

701 return sentinel, len(value) 

702 except TypeError: 

703 return sentinel, 0 

704 elif self.vtype == 'unique_size': 

705 try: 

706 return sentinel, len(set(value)) 

707 except TypeError: 

708 return sentinel, 0 

709 elif self.vtype == 'swap': 

710 return value, sentinel 

711 elif self.vtype == 'date': 

712 return parse_date(sentinel), parse_date(value) 

713 elif self.vtype == 'age': 

714 if not isinstance(sentinel, datetime.datetime): 

715 sentinel = datetime.datetime.now(tz=tzutc()) - timedelta(sentinel) 

716 value = parse_date(value) 

717 if value is None: 

718 # compatiblity 

719 value = 0 

720 # Reverse the age comparison, we want to compare the value being 

721 # greater than the sentinel typically. Else the syntax for age 

722 # comparisons is intuitively wrong. 

723 return value, sentinel 

724 elif self.vtype == 'cidr': 

725 s = parse_cidr(sentinel) 

726 v = parse_cidr(value) 

727 if (isinstance(s, ipaddress._BaseAddress) and isinstance(v, ipaddress._BaseNetwork)): 

728 return v, s 

729 return s, v 

730 elif self.vtype == 'cidr_size': 

731 cidr = parse_cidr(value) 

732 if cidr: 

733 return sentinel, cidr.prefixlen 

734 return sentinel, 0 

735 

736 # Allows for expiration filtering, for events in the future as opposed 

737 # to events in the past which age filtering allows for. 

738 elif self.vtype == 'expiration': 

739 if not isinstance(sentinel, datetime.datetime): 

740 sentinel = datetime.datetime.now(tz=tzutc()) + timedelta(sentinel) 

741 value = parse_date(value) 

742 if value is None: 

743 value = 0 

744 return sentinel, value 

745 

746 # Allows for comparing version numbers, for things that you expect a minimum version number. 

747 elif self.vtype == 'version': 

748 s = ComparableVersion(sentinel) 

749 v = ComparableVersion(value) 

750 return s, v 

751 

752 return sentinel, value 

753 

754 

755FilterRegistry.value_filter_class = ValueFilter 

756 

757 

758class AgeFilter(Filter): 

759 """Automatically filter resources older than a given date. 

760 

761 **Deprecated** use a value filter with `value_type: age` which can be 

762 done on any attribute. 

763 """ 

764 threshold_date = None 

765 

766 # The name of attribute to compare to threshold; must override in subclass 

767 date_attribute = None 

768 

769 schema = None 

770 

771 def validate(self): 

772 if not self.date_attribute: 

773 raise NotImplementedError( 

774 "date_attribute must be overriden in subclass") 

775 return self 

776 

777 def get_resource_date(self, i): 

778 v = i[self.date_attribute] 

779 if not isinstance(v, datetime.datetime): 

780 v = parse(v) 

781 if not v.tzinfo: 

782 v = v.replace(tzinfo=tzutc()) 

783 return v 

784 

785 def __call__(self, i): 

786 v = self.get_resource_date(i) 

787 if v is None: 

788 return False 

789 op = OPERATORS[self.data.get('op', 'greater-than')] 

790 

791 if not self.threshold_date: 

792 

793 days = self.data.get('days', 0) 

794 hours = self.data.get('hours', 0) 

795 minutes = self.data.get('minutes', 0) 

796 # Work around placebo issues with tz 

797 if v.tzinfo: 

798 n = datetime.datetime.now(tz=tzutc()) 

799 else: 

800 n = datetime.datetime.now() 

801 self.threshold_date = n - timedelta(days=days, hours=hours, minutes=minutes) 

802 

803 return op(self.threshold_date, v) 

804 

805 

806class EventFilter(ValueFilter): 

807 """Filter a resource based on an event.""" 

808 

809 schema = type_schema('event', rinherit=ValueFilter.schema) 

810 schema_alias = True 

811 

812 def validate(self): 

813 if 'mode' not in self.manager.data: 

814 raise PolicyValidationError( 

815 "Event filters can only be used with lambda policies in %s" % ( 

816 self.manager.data,)) 

817 return self 

818 

819 def process(self, resources, event=None): 

820 if event is None: 

821 return resources 

822 if self(event): 

823 return resources 

824 return [] 

825 

826 

827class ValueRegex: 

828 """Allows filtering based on the output of a regex capture. 

829 This is useful for parsing data that has a weird format. 

830 

831 Instead of comparing the contents of the 'resource value' with the 'value', 

832 it will instead apply the regex to contents of the 'resource value', and compare 

833 the result of the capture group defined in that regex with the 'value'. 

834 Therefore you must have a single capture group defined in the regex. 

835 

836 If the regex doesn't find a match it will return 'None' 

837 

838 Example of getting a datetime object to make an 'expiration' comparison:: 

839 

840 type: value 

841 value_regex: ".*delete_after=([0-9]{4}-[0-9]{2}-[0-9]{2}).*" 

842 key: "tag:company_mandated_metadata" 

843 value_type: expiration 

844 op: lte 

845 value: 0 

846 """ 

847 

848 def __init__(self, expr): 

849 self.expr = expr 

850 

851 def get_resource_value(self, resource): 

852 if resource is None: 

853 return resource 

854 try: 

855 capture = re.match(self.expr, resource) 

856 except (ValueError, TypeError): 

857 return None 

858 if capture is None: # regex didn't capture anything 

859 return None 

860 return capture.group(1) 

861 

862 

863class ReduceFilter(BaseValueFilter): 

864 """Generic reduce filter to group, sort, and limit your resources. 

865 

866 This example will select the longest running instance from each ASG, 

867 then randomly choose 10% of those, maxing at 15 total instances. 

868 

869 :example: 

870 

871 .. code-block:: yaml 

872 

873 - name: oldest-instance-by-asg 

874 resource: ec2 

875 filters: 

876 - "tag:aws:autoscaling:groupName": present 

877 - type: reduce 

878 group-by: "tag:aws:autoscaling:groupName" 

879 sort-by: "LaunchTime" 

880 order: asc 

881 limit: 1 

882 

883 Or you might want to randomly select a 10 percent of your resources, 

884 but no more than 15. 

885 

886 :example: 

887 

888 .. code-block:: yaml 

889 

890 - name: random-selection 

891 resource: ec2 

892 filters: 

893 - type: reduce 

894 order: randomize 

895 limit: 15 

896 limit-percent: 10 

897 

898 """ 

899 annotate = False 

900 

901 schema = { 

902 'type': 'object', 

903 # Doesn't mix well with inherits that extend 

904 'additionalProperties': False, 

905 'required': ['type'], 

906 'properties': { 

907 # Doesn't mix well as enum with inherits that extend 

908 'type': {'enum': ['reduce']}, 

909 'group-by': { 

910 'oneOf': [ 

911 {'type': 'string'}, 

912 { 

913 'type': 'object', 

914 'key': {'type': 'string'}, 

915 'value_type': {'enum': ['string', 'number', 'date']}, 

916 'value_regex': 'string', 

917 }, 

918 ] 

919 }, 

920 'sort-by': { 

921 'oneOf': [ 

922 {'type': 'string'}, 

923 { 

924 'type': 'object', 

925 'key': {'type': 'string'}, 

926 'value_type': {'enum': ['string', 'number', 'date']}, 

927 'value_regex': 'string', 

928 }, 

929 ] 

930 }, 

931 'order': {'enum': ['asc', 'desc', 'reverse', 'randomize']}, 

932 'null-order': {'enum': ['first', 'last']}, 

933 'limit': {'type': 'number', 'minimum': 0}, 

934 'limit-percent': {'type': 'number', 'minimum': 0, 'maximum': 100}, 

935 'discard': {'type': 'number', 'minimum': 0}, 

936 'discard-percent': {'type': 'number', 'minimum': 0, 'maximum': 100}, 

937 }, 

938 } 

939 schema_alias = True 

940 

941 def __init__(self, data, manager): 

942 super(ReduceFilter, self).__init__(data, manager) 

943 self.order = self.data.get('order', 'asc') 

944 self.group_by = self.get_sort_config('group-by') 

945 self.sort_by = self.get_sort_config('sort-by') 

946 

947 def validate(self): 

948 # make sure the regexes compile 

949 if 'value_regex' in self.group_by: 

950 self._validate_value_regex(self.group_by['value_regex']) 

951 if 'value_regex' in self.sort_by: 

952 self._validate_value_regex(self.sort_by['value_regex']) 

953 return self 

954 

955 def process(self, resources, event=None): 

956 groups = self.group(resources) 

957 

958 # specified either of the sorting options, so sort 

959 if 'sort-by' in self.data or 'order' in self.data: 

960 groups = self.sort_groups(groups) 

961 

962 # now apply any limits to the groups and concatenate 

963 return list(filter(None, self.limit(groups))) 

964 

965 def group(self, resources): 

966 groups = {} 

967 for r in resources: 

968 v = self._value_to_sort(self.group_by, r) 

969 vstr = str(v) 

970 if vstr not in groups: 

971 groups[vstr] = {'sortkey': v, 'resources': []} 

972 groups[vstr]['resources'].append(r) 

973 return groups 

974 

975 def get_sort_config(self, key): 

976 # allow `foo: bar` but convert to 

977 # `foo: {'key': bar}` 

978 d = self.data.get(key, {}) 

979 if isinstance(d, str): 

980 d = {'key': d} 

981 d['null_sort_value'] = self.null_sort_value(d) 

982 return d 

983 

984 def sort_groups(self, groups): 

985 for g in groups: 

986 groups[g]['resources'] = self.reorder( 

987 groups[g]['resources'], 

988 key=lambda r: self._value_to_sort(self.sort_by, r), 

989 ) 

990 return groups 

991 

992 def _value_to_sort(self, config, r): 

993 expr = config.get('key') 

994 vtype = config.get('value_type', 'string') 

995 vregex = config.get('value_regex') 

996 v = None 

997 

998 try: 

999 # extract value based on jmespath 

1000 if expr: 

1001 v = self.get_resource_value(expr, r, vregex) 

1002 

1003 if v is not None: 

1004 # now convert to expected type 

1005 if vtype == 'number': 

1006 v = float(v) 

1007 elif vtype == 'date': 

1008 v = parse_date(v) 

1009 else: 

1010 v = str(v) 

1011 except (AttributeError, ValueError): 

1012 v = None 

1013 

1014 if v is None: 

1015 v = config.get('null_sort_value') 

1016 return v 

1017 

1018 def null_sort_value(self, config): 

1019 vtype = config.get('value_type', 'string') 

1020 placement = self.data.get('null-order', 'last') 

1021 

1022 if (placement == 'last' and self.order == 'desc') or ( 

1023 placement != 'last' and self.order != 'desc' 

1024 ): 

1025 # return a value that will sort first 

1026 if vtype == 'number': 

1027 return float('-inf') 

1028 elif vtype == 'date': 

1029 return datetime.datetime.min.replace(tzinfo=tzutc()) 

1030 return '' 

1031 else: 

1032 # return a value that will sort last 

1033 if vtype == 'number': 

1034 return float('inf') 

1035 elif vtype == 'date': 

1036 return datetime.datetime.max.replace(tzinfo=tzutc()) 

1037 return '\uffff' 

1038 

1039 def limit(self, groups): 

1040 results = [] 

1041 

1042 max = self.data.get('limit', 0) 

1043 pct = self.data.get('limit-percent', 0) 

1044 drop = self.data.get('discard', 0) 

1045 droppct = self.data.get('discard-percent', 0) 

1046 ordered = list(groups) 

1047 if 'group-by' in self.data or 'order' in self.data: 

1048 ordered = self.reorder(ordered, key=lambda r: groups[r]['sortkey']) 

1049 for g in ordered: 

1050 # discard X first 

1051 if droppct > 0: 

1052 n = int(droppct / 100 * len(groups[g]['resources'])) 

1053 if n > drop: 

1054 drop = n 

1055 if drop > 0: 

1056 groups[g]['resources'] = groups[g]['resources'][drop:] 

1057 

1058 # then limit the remaining 

1059 count = len(groups[g]['resources']) 

1060 if pct > 0: 

1061 count = int(pct / 100 * len(groups[g]['resources'])) 

1062 if max > 0 and max < count: 

1063 count = max 

1064 results.extend(groups[g]['resources'][0:count]) 

1065 return results 

1066 

1067 def reorder(self, items, key=None): 

1068 if self.order == 'randomize': 

1069 return sample(items, k=len(items)) 

1070 elif self.order == 'reverse': 

1071 return items[::-1] 

1072 else: 

1073 return sorted(items, key=key, reverse=(self.order == 'desc')) 

1074 

1075 

1076class ListItemModel: 

1077 id = 'c7n:_id' 

1078 

1079 

1080class ListItemRegistry(FilterRegistry): 

1081 

1082 def __init__(self, *args, **kw): 

1083 super(FilterRegistry, self).__init__(*args, **kw) 

1084 self.register('value', ValueFilter) 

1085 self.register('or', Or) 

1086 self.register('and', And) 

1087 self.register('not', Not) 

1088 self.register('reduce', ReduceFilter) 

1089 

1090 

1091class ListItemResourceManager(ResourceManager): 

1092 filter_registry = ListItemRegistry('filters') 

1093 

1094 def get_model(self): 

1095 return ListItemModel 

1096 

1097 

1098class ListItemFilter(Filter): 

1099 """ 

1100 Perform multi attribute filtering on items within a list, 

1101 for example looking for security groups that have rules which 

1102 include 0.0.0.0/0 and port 22 open. 

1103 

1104 :example: 

1105 

1106 .. code-block:: yaml 

1107 

1108 policies: 

1109 - name: security-group-with-22-open-to-world 

1110 resource: aws.security-group 

1111 filters: 

1112 - type: list-item 

1113 key: IpPermissions 

1114 attrs: 

1115 - type: value 

1116 key: IpRanges[].CidrIp 

1117 value: '0.0.0.0/0' 

1118 op: in 

1119 value_type: swap 

1120 - type: value 

1121 key: FromPort 

1122 value: 22 

1123 - type: value 

1124 key: ToPort 

1125 value: 22 

1126 - name: find-task-def-not-using-registry 

1127 resource: aws.ecs-task-definition 

1128 filters: 

1129 - not: 

1130 - type: list-item 

1131 key: containerDefinitions 

1132 attrs: 

1133 - not: 

1134 - type: value 

1135 key: image 

1136 value: "${account_id}.dkr.ecr.us-east-2.amazonaws.com.*" 

1137 op: regex 

1138 """ 

1139 

1140 schema = type_schema( 

1141 'list-item', 

1142 **{ 

1143 'key': {'type': 'string'}, 

1144 'attrs': {'$ref': '#/definitions/filters_common/list_item_attrs'}, 

1145 'count': {'type': 'number'}, 

1146 'count_op': {'$ref': '#/definitions/filters_common/comparison_operators'}, 

1147 }, 

1148 ) 

1149 

1150 schema_alias = True 

1151 annotate_items = False 

1152 item_annotation_key = "c7n:ListItemMatches" 

1153 _expr = None 

1154 

1155 @property 

1156 def expr(self): 

1157 if self._expr: 

1158 return self._expr 

1159 self._expr = jmespath_compile(self.data['key']) 

1160 return self._expr 

1161 

1162 def check_count(self, rcount): 

1163 if 'count' not in self.data: 

1164 return False 

1165 count = self.data['count'] 

1166 op = OPERATORS[self.data.get('count_op', 'eq')] 

1167 if op(rcount, count): 

1168 return True 

1169 

1170 def process(self, resources, event=None): 

1171 result = [] 

1172 frm = ListItemResourceManager( 

1173 self.manager.ctx, data={'filters': self.data.get('attrs', [])}) 

1174 for r in resources: 

1175 list_values = self.get_item_values(r) 

1176 if not list_values: 

1177 if self.check_count(0): 

1178 result.append(r) 

1179 continue 

1180 if not isinstance(list_values, list): 

1181 item_type = type(list_values) 

1182 raise PolicyExecutionError( 

1183 f"list-item filter value for {self.data['key']} is a {item_type} not a list" 

1184 ) 

1185 for idx, list_value in enumerate(list_values): 

1186 list_value['c7n:_id'] = idx 

1187 list_resources = frm.filter_resources(list_values, event) 

1188 matched_indicies = [r['c7n:_id'] for r in list_resources] 

1189 for idx, list_value in enumerate(list_values): 

1190 list_value.pop('c7n:_id') 

1191 if 'count' in self.data: 

1192 if self.check_count(len(list_resources)): 

1193 result.append(r) 

1194 elif list_resources: 

1195 if not self.annotate_items: 

1196 annotations = [ 

1197 f'{self.data.get("key", self.type)}[{str(i)}]' 

1198 for i in matched_indicies 

1199 ] 

1200 else: 

1201 annotations = list_resources 

1202 r.setdefault(self.item_annotation_key, []) 

1203 r[self.item_annotation_key].extend(annotations) 

1204 result.append(r) 

1205 return result 

1206 

1207 def get_item_values(self, resource): 

1208 return self.expr.search(resource) 

1209 

1210 def __call__(self, resource): 

1211 if self.process((resource,)): 

1212 return True 

1213 return False