Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/c7n/filters/core.py: 25%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright The Cloud Custodian Authors.
2# SPDX-License-Identifier: Apache-2.0
3"""
4Resource Filtering Logic
5"""
6import copy
7import datetime
8from datetime import timedelta
9import fnmatch
10import ipaddress
11import logging
12import operator
13import re
15from dateutil.tz import tzutc
16from dateutil.parser import parse
17from c7n.vendored.distutils import version
18from random import sample
20from c7n.element import Element
21from c7n.exceptions import PolicyValidationError, PolicyExecutionError
22from c7n.manager import ResourceManager
23from c7n.registry import PluginRegistry
24from c7n.resolver import ValuesFrom
25from c7n.utils import (
26 set_annotation,
27 type_schema,
28 parse_cidr,
29 parse_date,
30 jmespath_search,
31 jmespath_compile
32)
33from c7n.manager import iter_filters
36class FilterValidationError(Exception):
37 pass
40# Matching filters annotate their key onto objects
41ANNOTATION_KEY = "c7n:MatchedFilters"
44def glob_match(value, pattern):
45 if not isinstance(value, str):
46 return False
47 return fnmatch.fnmatch(value, pattern)
50def regex_match(value, regex):
51 if not isinstance(value, str):
52 return False
53 # Note python 2.5+ internally cache regex
54 # would be nice to use re2
55 return bool(re.match(regex, value, flags=re.IGNORECASE))
58def regex_case_sensitive_match(value, regex):
59 if not isinstance(value, str):
60 return False
61 # Note python 2.5+ internally cache regex
62 # would be nice to use re2
63 return bool(re.match(regex, value))
66def operator_in(x, y):
67 return x in y
70def operator_ni(x, y):
71 return x not in y
74def difference(x, y):
75 return bool(set(x).difference(y))
78def intersect(x, y):
79 return bool(set(x).intersection(y))
82def mod(x, y):
83 return bool(x % y)
86OPERATORS = {
87 'eq': operator.eq,
88 'equal': operator.eq,
89 'ne': operator.ne,
90 'not-equal': operator.ne,
91 'gt': operator.gt,
92 'greater-than': operator.gt,
93 'ge': operator.ge,
94 'gte': operator.ge,
95 'le': operator.le,
96 'lte': operator.le,
97 'lt': operator.lt,
98 'less-than': operator.lt,
99 'glob': glob_match,
100 'regex': regex_match,
101 'regex-case': regex_case_sensitive_match,
102 'in': operator_in,
103 'ni': operator_ni,
104 'not-in': operator_ni,
105 'contains': operator.contains,
106 'difference': difference,
107 'intersect': intersect,
108 'mod': mod}
111VALUE_TYPES = [
112 'age', 'integer', 'expiration', 'normalize', 'size',
113 'cidr', 'cidr_size', 'swap', 'resource_count', 'expr',
114 'unique_size', 'date', 'version', 'float']
117class FilterRegistry(PluginRegistry):
119 value_filter_class = None
121 def __init__(self, *args, **kw):
122 super().__init__(*args, **kw)
123 self.register('value', ValueFilter)
124 self.register('or', Or)
125 self.register('and', And)
126 self.register('not', Not)
127 self.register('event', EventFilter)
128 self.register('reduce', ReduceFilter)
129 self.register('list-item', ListItemFilter)
131 def parse(self, data, manager):
132 results = []
133 for d in data:
134 results.append(self.factory(d, manager))
135 return results
137 def factory(self, data, manager=None):
138 """Factory func for filters.
140 data - policy config for filters
141 manager - resource type manager (ec2, s3, etc)
142 """
144 # Make the syntax a little nicer for common cases.
145 if isinstance(data, dict) and len(data) == 1 and 'type' not in data:
146 op = list(data.keys())[0]
147 if op == 'or':
148 return self['or'](data, self, manager)
149 elif op == 'and':
150 return self['and'](data, self, manager)
151 elif op == 'not':
152 return self['not'](data, self, manager)
153 return self.value_filter_class(data, manager)
154 if isinstance(data, str):
155 filter_type = data
156 data = {'type': data}
157 else:
158 filter_type = data.get('type')
159 if not filter_type:
160 raise PolicyValidationError(
161 "%s Invalid Filter %s" % (
162 self.plugin_type, data))
163 filter_class = self.get(filter_type)
164 if filter_class is not None:
165 return filter_class(data, manager)
166 else:
167 raise PolicyValidationError(
168 "%s Invalid filter type %s" % (
169 self.plugin_type, data))
172def trim_runtime(filters):
173 """Remove runtime filters.
175 Some filters can only be effectively evaluated at policy
176 execution, ie. event filters.
178 When evaluating conditions for dryrun or provisioning stages we
179 remove them.
180 """
181 def remove_filter(f):
182 block = f.get_block_parent()
183 block.filters.remove(f)
184 if isinstance(block, BooleanGroupFilter) and not len(block):
185 remove_filter(block)
187 for f in iter_filters(filters):
188 if isinstance(f, EventFilter):
189 remove_filter(f)
192# Really should be an abstract base class (abc) or
193# zope.interface
195class Filter(Element):
197 log = logging.getLogger('custodian.filters')
199 def __init__(self, data, manager=None):
200 self.data = data
201 self.manager = manager
203 def process(self, resources, event=None):
204 """ Bulk process resources and return filtered set."""
205 return list(filter(self, resources))
207 def get_block_operator(self):
208 """Determine the immediate parent boolean operator for a filter"""
209 # Top level operator is `and`
210 block = self.get_block_parent()
211 if block.type in ('and', 'or', 'not'):
212 return block.type
213 return 'and'
215 def get_block_parent(self):
216 """Get the block parent for a filter"""
217 block_stack = [self.manager]
218 for f in self.manager.iter_filters(block_end=True):
219 if f is None:
220 block_stack.pop()
221 elif f == self:
222 return block_stack[-1]
223 elif f.type in ('and', 'or', 'not'):
224 block_stack.append(f)
226 def merge_annotation(self, r, annotation_key, values):
227 block_op = self.get_block_operator()
228 if block_op in ('and', 'not'):
229 r[self.matched_annotation_key] = intersect_list(
230 values,
231 r.get(self.matched_annotation_key))
232 elif block_op == 'or':
233 r[self.matched_annotation_key] = union_list(
234 values,
235 r.get(self.matched_annotation_key))
238class BaseValueFilter(Filter):
239 expr = None
241 def __init__(self, data, manager=None):
242 super(BaseValueFilter, self).__init__(data, manager)
243 self.expr = {}
245 def get_resource_value(self, k, i, regex=None):
246 r = None
247 if k.startswith('tag:'):
248 tk = k.split(':', 1)[1]
249 if 'Tags' in i:
250 for t in i.get("Tags", []):
251 if t.get('Key') == tk:
252 r = t.get('Value')
253 break
254 # GCP schema: 'labels': {'key': 'value'}
255 elif 'labels' in i:
256 r = i.get('labels', {}).get(tk, None)
257 # GCP has a secondary form of labels called tags
258 # as labels without values.
259 # Azure schema: 'tags': {'key': 'value'}
260 elif 'tags' in i:
261 r = (i.get('tags', {}) or {}).get(tk, None)
262 elif k in i:
263 r = i.get(k)
264 elif k not in self.expr:
265 self.expr[k] = jmespath_compile(k)
266 r = self.expr[k].search(i)
267 else:
268 r = self.expr[k].search(i)
270 if regex:
271 r = ValueRegex(regex).get_resource_value(r)
272 return r
274 def _validate_value_regex(self, regex):
275 """Specific validation for `value_regex` type
277 The `value_regex` type works a little differently. In
278 particular it doesn't support OPERATORS that perform
279 operations on a list of values, specifically 'intersect',
280 'contains', 'difference', 'in' and 'not-in'
281 """
282 # Sanity check that we can compile
283 try:
284 pattern = re.compile(regex)
285 if pattern.groups != 1:
286 raise PolicyValidationError(
287 "value_regex must have a single capturing group: %s" %
288 self.data)
289 except re.error as e:
290 raise PolicyValidationError(
291 "Invalid value_regex: %s %s" % (e, self.data))
292 return self
295def intersect_list(a, b):
296 if b is None:
297 return a
298 elif a is None:
299 return b
300 res = []
301 for x in a:
302 if x in b:
303 res.append(x)
304 return res
307def union_list(a, b):
308 if not b:
309 return a
310 if not a:
311 return b
312 res = a
313 res.extend(x for x in b if x not in a)
314 return res
317class BooleanGroupFilter(Filter):
319 def __init__(self, data, registry, manager):
320 super(BooleanGroupFilter, self).__init__(data)
321 self.registry = registry
322 self.filters = registry.parse(list(self.data.values())[0], manager)
323 self.manager = manager
325 def validate(self):
326 for f in self.filters:
327 f.validate()
328 return self
330 def get_resource_type_id(self):
331 resource_type = self.manager.get_model()
332 return resource_type.id
334 def __len__(self):
335 return len(self.filters)
337 def __bool__(self):
338 return True
340 def get_deprecations(self):
341 """Return any matching deprecations for the nested filters."""
342 deprecations = []
343 for f in self.filters:
344 deprecations.extend(f.get_deprecations())
345 return deprecations
348class Or(BooleanGroupFilter):
350 def process(self, resources, event=None):
351 if self.manager:
352 return self.process_set(resources, event)
353 return super(Or, self).process(resources, event)
355 def __call__(self, r):
356 """Fallback for older unit tests that don't utilize a query manager"""
357 for f in self.filters:
358 if f(r):
359 return True
360 return False
362 def process_set(self, resources, event):
363 rtype_id = self.get_resource_type_id()
364 compiled = None
365 if '.' in rtype_id:
366 compiled = jmespath_compile(rtype_id)
367 resource_map = {compiled.search(r): r for r in resources}
368 else:
369 resource_map = {r[rtype_id]: r for r in resources}
370 results = set()
371 for f in self.filters:
372 if compiled:
373 results = results.union([
374 compiled.search(r) for r in f.process(resources, event)])
375 else:
376 results = results.union([
377 r[rtype_id] for r in f.process(resources, event)])
378 return [resource_map[r_id] for r_id in results]
381class And(BooleanGroupFilter):
383 def process(self, resources, events=None):
384 if self.manager:
385 sweeper = AnnotationSweeper(self.get_resource_type_id(), resources)
387 for f in self.filters:
388 resources = f.process(resources, events)
389 if not resources:
390 break
392 if self.manager:
393 sweeper.sweep(resources)
395 return resources
398class Not(BooleanGroupFilter):
400 def process(self, resources, event=None):
401 if self.manager:
402 return self.process_set(resources, event)
403 return super(Not, self).process(resources, event)
405 def __call__(self, r):
406 """Fallback for older unit tests that don't utilize a query manager"""
408 # There is an implicit 'and' for self.filters
409 # ~(A ^ B ^ ... ^ Z) = ~A v ~B v ... v ~Z
410 for f in self.filters:
411 if not f(r):
412 return True
413 return False
415 def process_set(self, resources, event):
416 rtype_id = self.get_resource_type_id()
417 compiled = None
418 if '.' in rtype_id:
419 compiled = jmespath_compile(rtype_id)
420 resource_map = {compiled.search(r): r for r in resources}
421 else:
422 resource_map = {r[rtype_id]: r for r in resources}
423 sweeper = AnnotationSweeper(rtype_id, resources)
425 for f in self.filters:
426 resources = f.process(resources, event)
427 if not resources:
428 break
430 before = set(resource_map.keys())
431 if compiled:
432 after = {compiled.search(r) for r in resources}
433 else:
434 after = {r[rtype_id] for r in resources}
435 results = before - after
436 sweeper.sweep([])
438 return [resource_map[r_id] for r_id in results]
441class AnnotationSweeper:
442 """Support clearing annotations set within a block filter.
444 See https://github.com/cloud-custodian/cloud-custodian/issues/2116
445 """
446 def __init__(self, id_key, resources):
447 self.id_key = id_key
448 ra_map = {}
449 resource_map = {}
450 compiled = None
451 if '.' in id_key:
452 compiled = jmespath_compile(self.id_key)
453 for r in resources:
454 if compiled:
455 id_ = compiled.search(r)
456 else:
457 id_ = r[self.id_key]
458 ra_map[id_] = {k: v for k, v in r.items() if k.startswith('c7n')}
459 resource_map[id_] = r
460 # We keep a full copy of the annotation keys to allow restore.
461 self.ra_map = copy.deepcopy(ra_map)
462 self.resource_map = resource_map
464 def sweep(self, resources):
465 compiled = None
466 if '.' in self.id_key:
467 compiled = jmespath_compile(self.id_key)
468 diff = set(self.ra_map).difference([compiled.search(r) for r in resources])
469 else:
470 diff = set(self.ra_map).difference([r[self.id_key] for r in resources])
471 for rid in diff:
472 # Clear annotations if the block filter didn't match
473 akeys = [k for k in self.resource_map[rid] if k.startswith('c7n')]
474 for k in akeys:
475 del self.resource_map[rid][k]
476 # Restore annotations that may have existed prior to the block filter.
477 self.resource_map[rid].update(self.ra_map[rid])
480# The default LooseVersion will fail on comparing present strings, used
481# in the value as shorthand for certain options.
482class ComparableVersion(version.LooseVersion):
483 def __eq__(self, other):
484 try:
485 return super(ComparableVersion, self).__eq__(other)
486 except TypeError:
487 return False
490class ValueFilter(BaseValueFilter):
491 """Generic value filter using jmespath
492 """
493 op = v = vtype = None
495 schema = {
496 'type': 'object',
497 # Doesn't mix well with inherits that extend
498 'additionalProperties': False,
499 'required': ['type'],
500 'properties': {
501 # Doesn't mix well as enum with inherits that extend
502 'type': {'enum': ['value']},
503 'key': {'type': 'string'},
504 'value_type': {'$ref': '#/definitions/filters_common/value_types'},
505 'default': {'type': 'object'},
506 'value_regex': {'type': 'string'},
507 'value_from': {'$ref': '#/definitions/filters_common/value_from'},
508 'value': {'$ref': '#/definitions/filters_common/value'},
509 'op': {'$ref': '#/definitions/filters_common/comparison_operators'},
510 'value_path': {'type': 'string'}
511 }
512 }
513 schema_alias = True
514 annotate = True
515 required_keys = {'value', 'key'}
517 def _validate_resource_count(self):
518 """ Specific validation for `resource_count` type
520 The `resource_count` type works a little differently because it operates
521 on the entire set of resources. It:
522 - does not require `key`
523 - `value` must be a number
524 - supports a subset of the OPERATORS list
525 """
526 for field in ('op', 'value'):
527 if field not in self.data:
528 raise PolicyValidationError(
529 "Missing '%s' in value filter %s" % (field, self.data))
531 if not (isinstance(self.data['value'], int) or
532 isinstance(self.data['value'], list)):
533 raise PolicyValidationError(
534 "`value` must be an integer in resource_count filter %s" % self.data)
536 # I don't see how to support regex for this?
537 if (self.data['op'] not in OPERATORS or
538 self.data['op'] in {'regex', 'regex-case'} or
539 'value_regex' in self.data):
540 raise PolicyValidationError(
541 "Invalid operator in value filter %s" % self.data)
543 return self
545 def validate(self):
546 if len(self.data) == 1:
547 return self
549 # `resource_count` requires a slightly different schema than the rest of
550 # the value filters because it operates on the full resource list
551 if self.data.get('value_type') == 'resource_count':
552 return self._validate_resource_count()
553 elif self.data.get('value_type') == 'date':
554 if not parse_date(self.data.get('value')):
555 raise PolicyValidationError(
556 "value_type: date with invalid date value:%s",
557 self.data.get('value', ''))
558 if 'key' not in self.data and 'key' in self.required_keys:
559 raise PolicyValidationError(
560 "Missing 'key' in value filter %s" % self.data)
561 if ('value' not in self.data and
562 'value_from' not in self.data and
563 'value_path' not in self.data and
564 'value' in self.required_keys):
565 raise PolicyValidationError(
566 "Missing 'value' in value filter %s" % self.data)
567 if 'op' in self.data:
568 if self.data['op'] not in OPERATORS:
569 raise PolicyValidationError(
570 "Invalid operator in value filter %s" % self.data)
571 if self.data['op'] in {'regex', 'regex-case'}:
572 # Sanity check that we can compile
573 try:
574 re.compile(self.data['value'])
575 except re.error as e:
576 raise PolicyValidationError(
577 "Invalid regex: %s %s" % (e, self.data))
578 if 'value_regex' in self.data:
579 return self._validate_value_regex(self.data['value_regex'])
581 return self
583 def __call__(self, i):
584 if self.data.get('value_type') == 'resource_count':
585 return self.process(i)
587 matched = self.match(i)
588 if matched and self.annotate:
589 set_annotation(i, ANNOTATION_KEY, self.k)
590 return matched
592 def process(self, resources, event=None):
593 # For the resource_count filter we operate on the full set of resources.
594 if self.data.get('value_type') == 'resource_count':
595 op = OPERATORS[self.data.get('op')]
596 if op(len(resources), self.data.get('value')):
597 return resources
598 return []
600 return super(ValueFilter, self).process(resources, event)
602 def get_resource_value(self, k, i):
603 return super(ValueFilter, self).get_resource_value(k, i, self.data.get('value_regex'))
605 def get_path_value(self, i):
606 """Retrieve values using JMESPath.
608 When using a Value Filter, a ``value_path`` can be specified.
609 This means the value(s) the filter will compare against are
610 calculated during the initialization of the filter.
612 Note that this option only pulls properties of the resource
613 currently being filtered.
615 .. code-block:: yaml
616 - name: find-admins-with-user-roles
617 resource: gcp.project
618 filters:
619 - type: iam-policy
620 doc:
621 key: bindings[?(role=='roles/admin')].members[]
622 op: intersect
623 value_path: bindings[?(role=='roles/user_access')].members[]
625 The iam-policy use the implementation of the generic Value Filter.
626 This implementation allows for the comparison of two separate lists of values
627 within the same resource.
628 """
629 return jmespath_search(self.data.get('value_path'), i)
631 def match(self, i):
632 if self.v is None and len(self.data) == 1:
633 [(self.k, self.v)] = self.data.items()
634 elif self.v is None and not hasattr(self, 'content_initialized'):
635 self.k = self.data.get('key')
636 self.op = self.data.get('op')
637 if 'value_from' in self.data:
638 values = ValuesFrom(self.data['value_from'], self.manager)
639 self.v = values.get_values()
640 elif 'value_path' in self.data:
641 self.v = self.get_path_value(i)
642 else:
643 self.v = self.data.get('value')
644 self.content_initialized = True
645 self.vtype = self.data.get('value_type')
647 if i is None:
648 return False
650 # value extract
651 r = self.get_resource_value(self.k, i)
652 if self.op in ('in', 'not-in') and r is None:
653 r = ()
655 # value type conversion
656 if self.vtype is not None:
657 v, r = self.process_value_type(self.v, r, i)
658 else:
659 v = self.v
661 # Value match
662 if r is None and v == 'absent':
663 return True
664 elif r is not None and v == 'present':
665 return True
666 elif v == 'not-null' and r:
667 return True
668 elif v == 'empty' and not r:
669 return True
670 elif self.op:
671 op = OPERATORS[self.op]
672 try:
673 return op(r, v)
674 except TypeError:
675 return False
676 elif r == v:
677 return True
679 return False
681 def process_value_type(self, sentinel, value, resource):
682 if self.vtype == 'normalize' and isinstance(value, str):
683 return sentinel, value.strip().lower()
685 elif self.vtype == 'expr':
686 sentinel = self.get_resource_value(sentinel, resource)
687 return sentinel, value
689 elif self.vtype == 'integer':
690 try:
691 value = int(str(value).strip())
692 except ValueError:
693 value = 0
694 elif self.vtype == 'float':
695 try:
696 value = float(str(value).strip())
697 except ValueError:
698 value = 0.0
699 elif self.vtype == 'size':
700 try:
701 return sentinel, len(value)
702 except TypeError:
703 return sentinel, 0
704 elif self.vtype == 'unique_size':
705 try:
706 return sentinel, len(set(value))
707 except TypeError:
708 return sentinel, 0
709 elif self.vtype == 'swap':
710 return value, sentinel
711 elif self.vtype == 'date':
712 return parse_date(sentinel), parse_date(value)
713 elif self.vtype == 'age':
714 if not isinstance(sentinel, datetime.datetime):
715 sentinel = datetime.datetime.now(tz=tzutc()) - timedelta(sentinel)
716 value = parse_date(value)
717 if value is None:
718 # compatiblity
719 value = 0
720 # Reverse the age comparison, we want to compare the value being
721 # greater than the sentinel typically. Else the syntax for age
722 # comparisons is intuitively wrong.
723 return value, sentinel
724 elif self.vtype == 'cidr':
725 s = parse_cidr(sentinel)
726 v = parse_cidr(value)
727 if (isinstance(s, ipaddress._BaseAddress) and isinstance(v, ipaddress._BaseNetwork)):
728 return v, s
729 return s, v
730 elif self.vtype == 'cidr_size':
731 cidr = parse_cidr(value)
732 if cidr:
733 return sentinel, cidr.prefixlen
734 return sentinel, 0
736 # Allows for expiration filtering, for events in the future as opposed
737 # to events in the past which age filtering allows for.
738 elif self.vtype == 'expiration':
739 if not isinstance(sentinel, datetime.datetime):
740 sentinel = datetime.datetime.now(tz=tzutc()) + timedelta(sentinel)
741 value = parse_date(value)
742 if value is None:
743 value = 0
744 return sentinel, value
746 # Allows for comparing version numbers, for things that you expect a minimum version number.
747 elif self.vtype == 'version':
748 s = ComparableVersion(sentinel)
749 v = ComparableVersion(value)
750 return s, v
752 return sentinel, value
755FilterRegistry.value_filter_class = ValueFilter
758class AgeFilter(Filter):
759 """Automatically filter resources older than a given date.
761 **Deprecated** use a value filter with `value_type: age` which can be
762 done on any attribute.
763 """
764 threshold_date = None
766 # The name of attribute to compare to threshold; must override in subclass
767 date_attribute = None
769 schema = None
771 def validate(self):
772 if not self.date_attribute:
773 raise NotImplementedError(
774 "date_attribute must be overriden in subclass")
775 return self
777 def get_resource_date(self, i):
778 v = i[self.date_attribute]
779 if not isinstance(v, datetime.datetime):
780 v = parse(v)
781 if not v.tzinfo:
782 v = v.replace(tzinfo=tzutc())
783 return v
785 def __call__(self, i):
786 v = self.get_resource_date(i)
787 if v is None:
788 return False
789 op = OPERATORS[self.data.get('op', 'greater-than')]
791 if not self.threshold_date:
793 days = self.data.get('days', 0)
794 hours = self.data.get('hours', 0)
795 minutes = self.data.get('minutes', 0)
796 # Work around placebo issues with tz
797 if v.tzinfo:
798 n = datetime.datetime.now(tz=tzutc())
799 else:
800 n = datetime.datetime.now()
801 self.threshold_date = n - timedelta(days=days, hours=hours, minutes=minutes)
803 return op(self.threshold_date, v)
806class EventFilter(ValueFilter):
807 """Filter a resource based on an event."""
809 schema = type_schema('event', rinherit=ValueFilter.schema)
810 schema_alias = True
812 def validate(self):
813 if 'mode' not in self.manager.data:
814 raise PolicyValidationError(
815 "Event filters can only be used with lambda policies in %s" % (
816 self.manager.data,))
817 return self
819 def process(self, resources, event=None):
820 if event is None:
821 return resources
822 if self(event):
823 return resources
824 return []
827class ValueRegex:
828 """Allows filtering based on the output of a regex capture.
829 This is useful for parsing data that has a weird format.
831 Instead of comparing the contents of the 'resource value' with the 'value',
832 it will instead apply the regex to contents of the 'resource value', and compare
833 the result of the capture group defined in that regex with the 'value'.
834 Therefore you must have a single capture group defined in the regex.
836 If the regex doesn't find a match it will return 'None'
838 Example of getting a datetime object to make an 'expiration' comparison::
840 type: value
841 value_regex: ".*delete_after=([0-9]{4}-[0-9]{2}-[0-9]{2}).*"
842 key: "tag:company_mandated_metadata"
843 value_type: expiration
844 op: lte
845 value: 0
846 """
848 def __init__(self, expr):
849 self.expr = expr
851 def get_resource_value(self, resource):
852 if resource is None:
853 return resource
854 try:
855 capture = re.match(self.expr, resource)
856 except (ValueError, TypeError):
857 return None
858 if capture is None: # regex didn't capture anything
859 return None
860 return capture.group(1)
863class ReduceFilter(BaseValueFilter):
864 """Generic reduce filter to group, sort, and limit your resources.
866 This example will select the longest running instance from each ASG,
867 then randomly choose 10% of those, maxing at 15 total instances.
869 :example:
871 .. code-block:: yaml
873 - name: oldest-instance-by-asg
874 resource: ec2
875 filters:
876 - "tag:aws:autoscaling:groupName": present
877 - type: reduce
878 group-by: "tag:aws:autoscaling:groupName"
879 sort-by: "LaunchTime"
880 order: asc
881 limit: 1
883 Or you might want to randomly select a 10 percent of your resources,
884 but no more than 15.
886 :example:
888 .. code-block:: yaml
890 - name: random-selection
891 resource: ec2
892 filters:
893 - type: reduce
894 order: randomize
895 limit: 15
896 limit-percent: 10
898 """
899 annotate = False
901 schema = {
902 'type': 'object',
903 # Doesn't mix well with inherits that extend
904 'additionalProperties': False,
905 'required': ['type'],
906 'properties': {
907 # Doesn't mix well as enum with inherits that extend
908 'type': {'enum': ['reduce']},
909 'group-by': {
910 'oneOf': [
911 {'type': 'string'},
912 {
913 'type': 'object',
914 'key': {'type': 'string'},
915 'value_type': {'enum': ['string', 'number', 'date']},
916 'value_regex': 'string',
917 },
918 ]
919 },
920 'sort-by': {
921 'oneOf': [
922 {'type': 'string'},
923 {
924 'type': 'object',
925 'key': {'type': 'string'},
926 'value_type': {'enum': ['string', 'number', 'date']},
927 'value_regex': 'string',
928 },
929 ]
930 },
931 'order': {'enum': ['asc', 'desc', 'reverse', 'randomize']},
932 'null-order': {'enum': ['first', 'last']},
933 'limit': {'type': 'number', 'minimum': 0},
934 'limit-percent': {'type': 'number', 'minimum': 0, 'maximum': 100},
935 'discard': {'type': 'number', 'minimum': 0},
936 'discard-percent': {'type': 'number', 'minimum': 0, 'maximum': 100},
937 },
938 }
939 schema_alias = True
941 def __init__(self, data, manager):
942 super(ReduceFilter, self).__init__(data, manager)
943 self.order = self.data.get('order', 'asc')
944 self.group_by = self.get_sort_config('group-by')
945 self.sort_by = self.get_sort_config('sort-by')
947 def validate(self):
948 # make sure the regexes compile
949 if 'value_regex' in self.group_by:
950 self._validate_value_regex(self.group_by['value_regex'])
951 if 'value_regex' in self.sort_by:
952 self._validate_value_regex(self.sort_by['value_regex'])
953 return self
955 def process(self, resources, event=None):
956 groups = self.group(resources)
958 # specified either of the sorting options, so sort
959 if 'sort-by' in self.data or 'order' in self.data:
960 groups = self.sort_groups(groups)
962 # now apply any limits to the groups and concatenate
963 return list(filter(None, self.limit(groups)))
965 def group(self, resources):
966 groups = {}
967 for r in resources:
968 v = self._value_to_sort(self.group_by, r)
969 vstr = str(v)
970 if vstr not in groups:
971 groups[vstr] = {'sortkey': v, 'resources': []}
972 groups[vstr]['resources'].append(r)
973 return groups
975 def get_sort_config(self, key):
976 # allow `foo: bar` but convert to
977 # `foo: {'key': bar}`
978 d = self.data.get(key, {})
979 if isinstance(d, str):
980 d = {'key': d}
981 d['null_sort_value'] = self.null_sort_value(d)
982 return d
984 def sort_groups(self, groups):
985 for g in groups:
986 groups[g]['resources'] = self.reorder(
987 groups[g]['resources'],
988 key=lambda r: self._value_to_sort(self.sort_by, r),
989 )
990 return groups
992 def _value_to_sort(self, config, r):
993 expr = config.get('key')
994 vtype = config.get('value_type', 'string')
995 vregex = config.get('value_regex')
996 v = None
998 try:
999 # extract value based on jmespath
1000 if expr:
1001 v = self.get_resource_value(expr, r, vregex)
1003 if v is not None:
1004 # now convert to expected type
1005 if vtype == 'number':
1006 v = float(v)
1007 elif vtype == 'date':
1008 v = parse_date(v)
1009 else:
1010 v = str(v)
1011 except (AttributeError, ValueError):
1012 v = None
1014 if v is None:
1015 v = config.get('null_sort_value')
1016 return v
1018 def null_sort_value(self, config):
1019 vtype = config.get('value_type', 'string')
1020 placement = self.data.get('null-order', 'last')
1022 if (placement == 'last' and self.order == 'desc') or (
1023 placement != 'last' and self.order != 'desc'
1024 ):
1025 # return a value that will sort first
1026 if vtype == 'number':
1027 return float('-inf')
1028 elif vtype == 'date':
1029 return datetime.datetime.min.replace(tzinfo=tzutc())
1030 return ''
1031 else:
1032 # return a value that will sort last
1033 if vtype == 'number':
1034 return float('inf')
1035 elif vtype == 'date':
1036 return datetime.datetime.max.replace(tzinfo=tzutc())
1037 return '\uffff'
1039 def limit(self, groups):
1040 results = []
1042 max = self.data.get('limit', 0)
1043 pct = self.data.get('limit-percent', 0)
1044 drop = self.data.get('discard', 0)
1045 droppct = self.data.get('discard-percent', 0)
1046 ordered = list(groups)
1047 if 'group-by' in self.data or 'order' in self.data:
1048 ordered = self.reorder(ordered, key=lambda r: groups[r]['sortkey'])
1049 for g in ordered:
1050 # discard X first
1051 if droppct > 0:
1052 n = int(droppct / 100 * len(groups[g]['resources']))
1053 if n > drop:
1054 drop = n
1055 if drop > 0:
1056 groups[g]['resources'] = groups[g]['resources'][drop:]
1058 # then limit the remaining
1059 count = len(groups[g]['resources'])
1060 if pct > 0:
1061 count = int(pct / 100 * len(groups[g]['resources']))
1062 if max > 0 and max < count:
1063 count = max
1064 results.extend(groups[g]['resources'][0:count])
1065 return results
1067 def reorder(self, items, key=None):
1068 if self.order == 'randomize':
1069 return sample(items, k=len(items))
1070 elif self.order == 'reverse':
1071 return items[::-1]
1072 else:
1073 return sorted(items, key=key, reverse=(self.order == 'desc'))
1076class ListItemModel:
1077 id = 'c7n:_id'
1080class ListItemRegistry(FilterRegistry):
1082 def __init__(self, *args, **kw):
1083 super(FilterRegistry, self).__init__(*args, **kw)
1084 self.register('value', ValueFilter)
1085 self.register('or', Or)
1086 self.register('and', And)
1087 self.register('not', Not)
1088 self.register('reduce', ReduceFilter)
1091class ListItemResourceManager(ResourceManager):
1092 filter_registry = ListItemRegistry('filters')
1094 def get_model(self):
1095 return ListItemModel
1098class ListItemFilter(Filter):
1099 """
1100 Perform multi attribute filtering on items within a list,
1101 for example looking for security groups that have rules which
1102 include 0.0.0.0/0 and port 22 open.
1104 :example:
1106 .. code-block:: yaml
1108 policies:
1109 - name: security-group-with-22-open-to-world
1110 resource: aws.security-group
1111 filters:
1112 - type: list-item
1113 key: IpPermissions
1114 attrs:
1115 - type: value
1116 key: IpRanges[].CidrIp
1117 value: '0.0.0.0/0'
1118 op: in
1119 value_type: swap
1120 - type: value
1121 key: FromPort
1122 value: 22
1123 - type: value
1124 key: ToPort
1125 value: 22
1126 - name: find-task-def-not-using-registry
1127 resource: aws.ecs-task-definition
1128 filters:
1129 - not:
1130 - type: list-item
1131 key: containerDefinitions
1132 attrs:
1133 - not:
1134 - type: value
1135 key: image
1136 value: "${account_id}.dkr.ecr.us-east-2.amazonaws.com.*"
1137 op: regex
1138 """
1140 schema = type_schema(
1141 'list-item',
1142 **{
1143 'key': {'type': 'string'},
1144 'attrs': {'$ref': '#/definitions/filters_common/list_item_attrs'},
1145 'count': {'type': 'number'},
1146 'count_op': {'$ref': '#/definitions/filters_common/comparison_operators'},
1147 },
1148 )
1150 schema_alias = True
1151 annotate_items = False
1152 item_annotation_key = "c7n:ListItemMatches"
1153 _expr = None
1155 @property
1156 def expr(self):
1157 if self._expr:
1158 return self._expr
1159 self._expr = jmespath_compile(self.data['key'])
1160 return self._expr
1162 def check_count(self, rcount):
1163 if 'count' not in self.data:
1164 return False
1165 count = self.data['count']
1166 op = OPERATORS[self.data.get('count_op', 'eq')]
1167 if op(rcount, count):
1168 return True
1170 def process(self, resources, event=None):
1171 result = []
1172 frm = ListItemResourceManager(
1173 self.manager.ctx, data={'filters': self.data.get('attrs', [])})
1174 for r in resources:
1175 list_values = self.get_item_values(r)
1176 if not list_values:
1177 if self.check_count(0):
1178 result.append(r)
1179 continue
1180 if not isinstance(list_values, list):
1181 item_type = type(list_values)
1182 raise PolicyExecutionError(
1183 f"list-item filter value for {self.data['key']} is a {item_type} not a list"
1184 )
1185 for idx, list_value in enumerate(list_values):
1186 list_value['c7n:_id'] = idx
1187 list_resources = frm.filter_resources(list_values, event)
1188 matched_indicies = [r['c7n:_id'] for r in list_resources]
1189 for idx, list_value in enumerate(list_values):
1190 list_value.pop('c7n:_id')
1191 if 'count' in self.data:
1192 if self.check_count(len(list_resources)):
1193 result.append(r)
1194 elif list_resources:
1195 if not self.annotate_items:
1196 annotations = [
1197 f'{self.data.get("key", self.type)}[{str(i)}]'
1198 for i in matched_indicies
1199 ]
1200 else:
1201 annotations = list_resources
1202 r.setdefault(self.item_annotation_key, [])
1203 r[self.item_annotation_key].extend(annotations)
1204 result.append(r)
1205 return result
1207 def get_item_values(self, resource):
1208 return self.expr.search(resource)
1210 def __call__(self, resource):
1211 if self.process((resource,)):
1212 return True
1213 return False