1# Copyright 2022 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15################################################################################
16"""Sanitizer for regular expression dos."""
17
18# pylint: disable=protected-access
19
20import time
21import os
22from pysecsan import sanlib
23
24START_RE_TIME = None
25
26
27# Hooks for regular expressions.
28# Main problem is to identify ReDOS attemps. This is a non-trivial task
29# - https://arxiv.org/pdf/1701.04045.pdf
30# - https://dl.acm.org/doi/pdf/10.1145/3236024.3236027
31# and the current approach we use is simply check for extensive computing time.
32# In essence, this is more of a refinement of traditional timeout checker from
33# the fuzzer, which, effectively will detect these types of attacks by way of
34# timeouts.
35#
36# Perhaps the smartest would be to use something like e.g.
37# https://github.com/doyensec/regexploit to scan the regex patterns.
38# Other heuristics without going too technical on identifying super-linear
39# regexes:
40# - check
41# - if 'taint' exists in re.compile(xx)
42# - check
43# - for backtracking possbility in PATTERN within re.comile(PATTERN)
44# - and
45# - 'taint' in findall(XX) calls.
46# pylint: disable=global-statement
47def hook_post_exec_re_pattern_findall(self, re_str):
48 """Hook post exeution re.compile().findall()."""
49 _ = self # Satisfy lint
50 global START_RE_TIME
51 try:
52 endtime = time.time() - START_RE_TIME
53 if endtime > 4:
54 sanlib.abort_with_issue(f'Potential ReDOS attack.\n {re_str}', 'ReDOS')
55 except NameError:
56 sanlib.sanitizer_log(
57 'starttime is not set, which it should have. Error in PySecSan',
58 sanlib.LOG_INFO)
59 os._exit(1)
60
61
62def hook_pre_exec_re_pattern_findall(self, string):
63 """Hook pre execution of re.pattern().findall()."""
64 _ = (self, string) # Satisfy lint
65 global START_RE_TIME
66 START_RE_TIME = time.time()
67
68
69def hook_post_exec_re_compile(retval, pattern, flags=None):
70 """Hook for re.compile post execution to hook returned objects functions."""
71 _ = (pattern, flags) # Satisfy lint
72 sanlib.sanitizer_log('Inside of post compile hook', sanlib.LOG_DEBUG)
73 wrapper_object = sanlib.create_object_wrapper(
74 findall=(hook_pre_exec_re_pattern_findall,
75 hook_pre_exec_re_pattern_findall))
76 hooked_object = wrapper_object(retval)
77 return hooked_object
78
79
80def hook_pre_exec_re_compile(pattern, flags=None):
81 """Check if tainted input exists in pattern. If so, likely chance of making
82 ReDOS possible."""
83 _ = (pattern, flags) # Satisfy lint
84 sanlib.sanitizer_log('Inside re compile hook', sanlib.LOG_DEBUG)