Coverage Report

Created: 2026-03-31 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fuzz_regex.cpp
Line
Count
Source
1
/* Copyright 2025 Google LLC
2
Licensed under the Apache License, Version 2.0 (the "License");
3
you may not use this file except in compliance with the License.
4
You may obtain a copy of the License at
5
      http://www.apache.org/licenses/LICENSE-2.0
6
Unless required by applicable law or agreed to in writing, software
7
distributed under the License is distributed on an "AS IS" BASIS,
8
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
See the License for the specific language governing permissions and
10
limitations under the License.
11
*/
12
13
/*
14
 * Fuzzer for Ruby's Regex implementation (re.c, regcomp.c, regexec.c, regparse.c)
15
 * 
16
 * Purpose: Test regex compilation from potentially malformed patterns and matching
17
 * against various strings. Tests parser edge cases, compilation bugs, and matching
18
 * correctness with complex patterns.
19
 * 
20
 * Coverage:
21
 * - Regex compilation: Pattern parsing, syntax validation, optimization
22
 * - Regex matching: match, =~, scan, gsub operations
23
 * - Edge cases: Invalid patterns, backtracking, captures, Unicode, lookahead/lookbehind
24
 * - Memory: Backtracking stack overflow, catastrophic backtracking
25
 */
26
27
#include <stdint.h>
28
#include <stddef.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <unistd.h>
32
#include <fcntl.h>
33
#include <fuzzer/FuzzedDataProvider.h>
34
#include "ruby.h"
35
#include "ruby/encoding.h"
36
#include "ruby/re.h"
37
38
static int ruby_initialized = 0;
39
40
extern "C" VALUE ruby_verbose;
41
42
// Wrapper functions for rb_protect - necessary to catch exceptions
43
// Regex operations can raise (e.g., syntax errors, invalid patterns)
44
5.36k
static VALUE call_regex_match(VALUE args) {
45
5.36k
    VALUE *ptr = (VALUE *)args;
46
5.36k
    return rb_funcall(ptr[0], rb_intern("match"), 1, ptr[1]);  // Regexp#match - full match info
47
5.36k
}
48
49
// Wrapper for regex =~ operator
50
5.36k
static VALUE call_regex_match_op(VALUE args) {
51
5.36k
    VALUE *ptr = (VALUE *)args;
52
5.36k
    return rb_funcall(ptr[0], rb_intern("=~"), 1, ptr[1]);  // Regexp#=~ - match position
53
5.36k
}
54
55
// Wrapper for regex scan
56
4.32k
static VALUE call_regex_scan(VALUE args) {
57
4.32k
    VALUE *ptr = (VALUE *)args;
58
4.32k
    return rb_funcall(ptr[1], rb_intern("scan"), 1, ptr[0]);  // String#scan - find all matches
59
4.32k
}
60
61
// Wrapper for regex gsub
62
5.36k
static VALUE call_regex_gsub(VALUE args) {
63
5.36k
    VALUE *ptr = (VALUE *)args;
64
5.36k
    return rb_funcall(ptr[1], rb_intern("gsub"), 2, ptr[0], ptr[2]);  // String#gsub - replace all
65
5.36k
}
66
67
6.54k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
68
    // Initialize Ruby once on first call
69
    // Sets up VM, object system, and Regexp class
70
6.54k
    if (!ruby_initialized) {
71
1
        ruby_init();
72
1
        ruby_initialized = 1;
73
        
74
        // Suppress Ruby warnings to avoid log noise
75
1
        ruby_verbose = Qfalse;
76
1
    }
77
    
78
6.54k
    if (size < 2) return 0;
79
    
80
    // Use FuzzedDataProvider to split input into pattern and test string
81
6.54k
    FuzzedDataProvider fdp(data, size);
82
    
83
    // Consume pattern string with limited length to avoid pathological patterns
84
6.54k
    size_t pattern_len = fdp.ConsumeIntegralInRange<size_t>(1, 1000);  // Reduced from 10000
85
6.54k
    std::string pattern = fdp.ConsumeBytesAsString(pattern_len);
86
    
87
    // Consume test string from remaining data with size limit
88
6.54k
    std::string test = fdp.ConsumeRemainingBytesAsString();
89
6.54k
    if (test.size() > 10000) {
90
82
        test.resize(10000);  // Limit test string size to prevent memory issues
91
82
    }
92
    
93
    // Create Ruby strings - these can fail if data is invalid
94
6.54k
    VALUE pattern_str = rb_str_new(pattern.data(), pattern.size());
95
6.54k
    VALUE test_str = rb_str_new(test.data(), test.size());
96
    
97
6.54k
    int state = 0;
98
6.54k
    VALUE args[3];
99
    
100
    // Temporarily redirect stderr file descriptor to suppress regex compilation warnings
101
    // Duplicate stderr, redirect to /dev/null, then restore after compilation
102
6.54k
    int saved_stderr = dup(STDERR_FILENO);
103
6.54k
    int dev_null = open("/dev/null", O_WRONLY);
104
6.54k
    if (dev_null >= 0) {
105
6.54k
        dup2(dev_null, STDERR_FILENO);
106
6.54k
        close(dev_null);
107
6.54k
    }
108
    
109
    // Compile the regex with default options (0)
110
    // This exercises the regex parser (regparse.c)
111
    // Tests pattern syntax validation, AST building, and optimization
112
6.54k
    VALUE regexp = rb_protect((VALUE (*)(VALUE))rb_reg_regcomp, pattern_str, &state);
113
    
114
    // Restore stderr file descriptor
115
6.54k
    if (saved_stderr >= 0) {
116
6.54k
        dup2(saved_stderr, STDERR_FILENO);
117
6.54k
        close(saved_stderr);
118
6.54k
    }
119
    
120
6.54k
    if (state) {
121
        // Pattern compilation failed (syntax error, invalid escape, etc.)
122
1.17k
        rb_set_errinfo(Qnil);
123
1.17k
        rb_gc_start();
124
1.17k
        return 0;
125
1.17k
    }
126
    
127
5.36k
    if (NIL_P(regexp)) {
128
0
        rb_gc_start();
129
0
        return 0;
130
0
    }
131
    
132
    // Test 1: Regexp#match - exercises regex matching engine (regexec.c)
133
    // Returns MatchData object with capture groups
134
5.36k
    args[0] = regexp;
135
5.36k
    args[1] = test_str;
136
5.36k
    rb_protect(call_regex_match, (VALUE)args, &state);
137
5.36k
    if (state) {
138
166
        rb_set_errinfo(Qnil);
139
166
        state = 0;
140
166
    }
141
    
142
    // Test 2: Regexp#=~ - exercises match position finding
143
    // Returns integer position or nil
144
5.36k
    rb_protect(call_regex_match_op, (VALUE)args, &state);
145
5.36k
    if (state) {
146
166
        rb_set_errinfo(Qnil);
147
166
        state = 0;
148
166
    }
149
    
150
    // Test 3: String#scan - find all matches
151
    // Tests repeated matching and capture handling
152
    // Skip scan if test string is too large to avoid memory issues
153
5.36k
    if (test.size() <= 5000) {
154
4.32k
        rb_protect(call_regex_scan, (VALUE)args, &state);
155
4.32k
        if (state) {
156
202
            rb_set_errinfo(Qnil);
157
202
            state = 0;
158
202
        }
159
4.32k
    }
160
    
161
    // Test 4: String#gsub - replace matches
162
    // Tests matching combined with string building
163
5.36k
    VALUE replacement = rb_str_new("X", 1);
164
5.36k
    args[0] = regexp;
165
5.36k
    args[1] = test_str;
166
5.36k
    args[2] = replacement;
167
5.36k
    rb_protect(call_regex_gsub, (VALUE)args, &state);
168
5.36k
    if (state) {
169
256
        rb_set_errinfo(Qnil);
170
256
        state = 0;
171
256
    }
172
    
173
    // Clean up - force GC to release memory
174
    // Necessary to prevent memory growth from regex compilation artifacts
175
5.36k
    rb_gc_start();
176
    
177
5.36k
    return 0;
178
5.36k
}