Coverage Report

Created: 2026-04-12 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ada-url/fuzz/url_pattern.cc
Line
Count
Source
1
#include <fuzzer/FuzzedDataProvider.h>
2
3
#include <memory>
4
#include <string>
5
6
#include "ada.cpp"
7
#include "ada.h"
8
9
using regex_provider = ada::url_pattern_regex::std_regex_provider;
10
11
92.1k
void exercise_result(auto result) {
12
92.1k
  (void)result.get_protocol();
13
92.1k
  (void)result.get_username();
14
92.1k
  (void)result.get_password();
15
92.1k
  (void)result.get_hostname();
16
92.1k
  (void)result.get_port();
17
92.1k
  (void)result.get_pathname();
18
92.1k
  (void)result.get_search();
19
92.1k
  (void)result.get_hash();
20
92.1k
  (void)result.ignore_case();
21
92.1k
  (void)result.has_regexp_groups();
22
92.1k
}
23
24
// Shared helper: walk every field of a url_pattern_result.
25
36.6k
static void exercise_match_result(const ada::url_pattern_result& match) {
26
36.6k
  volatile size_t len = 0;
27
293k
  auto exercise_component = [&len](const ada::url_pattern_component_result& c) {
28
293k
    len += c.input.size();
29
293k
    for (const auto& [k, v] : c.groups) {
30
290k
      len += k.size();
31
290k
      if (v.has_value()) len += v->size();
32
290k
    }
33
293k
  };
34
36.6k
  exercise_component(match.protocol);
35
36.6k
  exercise_component(match.username);
36
36.6k
  exercise_component(match.password);
37
36.6k
  exercise_component(match.hostname);
38
36.6k
  exercise_component(match.port);
39
36.6k
  exercise_component(match.pathname);
40
36.6k
  exercise_component(match.search);
41
36.6k
  exercise_component(match.hash);
42
  // Exercise the 'inputs' vector (each element is a url_pattern_input
43
  // variant holding either a string_view or url_pattern_init).
44
45.0k
  for (const auto& inp : match.inputs) {
45
45.0k
    if (std::holds_alternative<std::string_view>(inp)) {
46
24.9k
      len += std::get<std::string_view>(inp).size();
47
24.9k
    }
48
45.0k
  }
49
36.6k
  (void)len;
50
36.6k
}
51
52
// Exercise exec() and test() on a parsed url_pattern with an ASCII input.
53
// We restrict inputs to ASCII to avoid catastrophic regex backtracking.
54
static void exercise_exec_and_test(ada::url_pattern<regex_provider>& pattern,
55
                                   const std::string& test_input,
56
92.1k
                                   const std::string& test_base) {
57
92.1k
  std::string_view test_view(test_input.data(), test_input.size());
58
59
  // exec() and test() must agree: exec finds a match iff test returns true.
60
  // Both operate on the same input so their answers must be consistent.
61
92.1k
  auto exec_result = pattern.exec(test_view, nullptr);
62
92.1k
  auto test_result = pattern.test(test_view, nullptr);
63
64
92.1k
  bool exec_matched = exec_result && exec_result->has_value();
65
92.1k
  bool test_matched = test_result && *test_result;
66
67
92.1k
  if (exec_matched != test_matched) {
68
0
    printf(
69
0
        "exec/test inconsistency on input '%s': exec_matched=%d "
70
0
        "test_matched=%d\n",
71
0
        test_input.c_str(), exec_matched, test_matched);
72
0
    abort();
73
0
  }
74
75
92.1k
  if (exec_result && exec_result->has_value()) {
76
8.33k
    exercise_match_result(**exec_result);
77
8.33k
  }
78
79
  // test() with base URL
80
92.1k
  if (!test_base.empty()) {
81
92.1k
    std::string_view base_view(test_base.data(), test_base.size());
82
92.1k
    auto test_result_with_base = pattern.test(test_view, &base_view);
83
92.1k
    auto exec_with_base = pattern.exec(test_view, &base_view);
84
85
92.1k
    bool exec_base_matched = exec_with_base && exec_with_base->has_value();
86
92.1k
    bool test_base_matched = test_result_with_base && *test_result_with_base;
87
88
92.1k
    if (exec_base_matched != test_base_matched) {
89
0
      printf(
90
0
          "exec/test inconsistency with base on input '%s': "
91
0
          "exec_matched=%d test_matched=%d\n",
92
0
          test_input.c_str(), exec_base_matched, test_base_matched);
93
0
      abort();
94
0
    }
95
96
92.1k
    if (exec_with_base && exec_with_base->has_value()) {
97
8.33k
      exercise_match_result(**exec_with_base);
98
8.33k
    }
99
92.1k
  }
100
101
  // test() with url_pattern_init input (sets only the pathname component)
102
92.1k
  ada::url_pattern_init init_input{};
103
92.1k
  init_input.pathname = test_input;
104
92.1k
  auto test_with_init = pattern.test(init_input, nullptr);
105
92.1k
  auto exec_with_init = pattern.exec(init_input, nullptr);
106
  // exec and test must agree on the init-based input too.
107
92.1k
  if ((test_with_init && *test_with_init) !=
108
92.1k
      (exec_with_init && exec_with_init->has_value())) {
109
0
    printf("exec/test inconsistency on url_pattern_init input\n");
110
0
    abort();
111
0
  }
112
92.1k
  if (exec_with_init && exec_with_init->has_value()) {
113
20.0k
    exercise_match_result(**exec_with_init);
114
20.0k
  }
115
116
  // test_components() — tests each URL component individually
117
92.1k
  {
118
92.1k
    std::string_view sv(test_input.data(), test_input.size());
119
92.1k
    auto parsed = ada::parse<ada::url_aggregator>(sv);
120
92.1k
    if (parsed) {
121
36.0k
      volatile bool tc = pattern.test_components(
122
36.0k
          std::string(parsed->get_protocol()),
123
36.0k
          std::string(parsed->get_username()),
124
36.0k
          std::string(parsed->get_password()),
125
36.0k
          std::string(parsed->get_hostname()), std::string(parsed->get_port()),
126
36.0k
          std::string(parsed->get_pathname()),
127
36.0k
          std::string(parsed->get_search()), std::string(parsed->get_hash()));
128
36.0k
      (void)tc;
129
36.0k
    }
130
92.1k
  }
131
132
  // match() — the internal method underlying exec(); must not crash.
133
92.1k
  auto match_result = pattern.match(test_view, nullptr);
134
92.1k
  (void)match_result;
135
92.1k
}
136
137
10.6k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
138
325k
  auto to_ascii = [](const std::string& source) -> std::string {
139
325k
    std::string result;
140
325k
    result.reserve(source.size());
141
325k
    for (char c : source) {
142
217k
      result.push_back(static_cast<unsigned char>(c) % 128);
143
217k
    }
144
325k
    return result;
145
325k
  };
146
10.6k
  FuzzedDataProvider fdp(data, size);
147
  // We do not want to trigger arbitrary regex matching.
148
10.6k
  std::string source_1 = "/" + to_ascii(fdp.ConsumeRandomLengthString(50)) +
149
10.6k
                         "/" + to_ascii(fdp.ConsumeRandomLengthString(50));
150
10.6k
  std::string base_source_1 = "/" +
151
10.6k
                              to_ascii(fdp.ConsumeRandomLengthString(50)) +
152
10.6k
                              "/" + to_ascii(fdp.ConsumeRandomLengthString(50));
153
154
10.6k
  std::string source_2 = "https://ada-url.com/*";
155
10.6k
  std::string base_source_2 = "https://ada-url.com";
156
157
  // Additional test input for exec/test calls (also ASCII-only)
158
10.6k
  std::string test_input = "https://" +
159
10.6k
                           to_ascii(fdp.ConsumeRandomLengthString(30)) + "/" +
160
10.6k
                           to_ascii(fdp.ConsumeRandomLengthString(20));
161
10.6k
  std::string test_base = "https://ada-url.com";
162
163
10.6k
  std::array<std::pair<std::string, std::string>, 2> sources = {{
164
10.6k
      {source_1, base_source_1},
165
10.6k
      {source_2, base_source_2},
166
10.6k
  }};
167
168
21.3k
  for (const auto& [source, base_source] : sources) {
169
    // Without base or options
170
21.3k
    auto result =
171
21.3k
        ada::parse_url_pattern<regex_provider>(source, nullptr, nullptr);
172
21.3k
    if (result) {
173
10.6k
      exercise_result(*result);
174
10.6k
      exercise_exec_and_test(*result, test_input, test_base);
175
10.6k
    }
176
177
    // Testing with base_url
178
21.3k
    std::string_view base_source_view(base_source.data(), base_source.length());
179
21.3k
    auto result_with_base = ada::parse_url_pattern<regex_provider>(
180
21.3k
        source, &base_source_view, nullptr);
181
21.3k
    if (result_with_base) {
182
10.6k
      exercise_result(*result_with_base);
183
10.6k
      exercise_exec_and_test(*result_with_base, test_input, test_base);
184
10.6k
    }
185
186
    // Testing with base_url and options
187
21.3k
    ada::url_pattern_options options{.ignore_case = fdp.ConsumeBool()};
188
21.3k
    auto result_with_base_and_options = ada::parse_url_pattern<regex_provider>(
189
21.3k
        source, &base_source_view, &options);
190
21.3k
    if (result_with_base_and_options) {
191
10.6k
      exercise_result(*result_with_base_and_options);
192
10.6k
      exercise_exec_and_test(*result_with_base_and_options, test_input,
193
10.6k
                             test_base);
194
10.6k
    }
195
196
    // Testing with url_pattern_init and base url.
197
21.3k
    int field_index = fdp.ConsumeIntegralInRange(0, 7);
198
21.3k
    std::string random_value = to_ascii(fdp.ConsumeRandomLengthString(50));
199
21.3k
    ada::url_pattern_init init{};
200
21.3k
    switch (field_index) {
201
15.1k
      case 0:
202
15.1k
        init.protocol = random_value;
203
15.1k
        break;
204
548
      case 1:
205
548
        init.username = random_value;
206
548
        break;
207
1.37k
      case 2:
208
1.37k
        init.password = random_value;
209
1.37k
        break;
210
566
      case 3:
211
566
        init.hostname = random_value;
212
566
        break;
213
1.63k
      case 4:
214
1.63k
        init.port = random_value;
215
1.63k
        break;
216
568
      case 5:
217
568
        init.pathname = random_value;
218
568
        break;
219
542
      case 6:
220
542
        init.search = random_value;
221
542
        break;
222
982
      case 7:
223
982
        init.hash = random_value;
224
982
        break;
225
21.3k
    }
226
21.3k
    auto result_with_init = ada::parse_url_pattern<regex_provider>(
227
21.3k
        init, &base_source_view, nullptr);
228
21.3k
    if (result_with_init) {
229
0
      exercise_result(*result_with_init);
230
0
      exercise_exec_and_test(*result_with_init, test_input, test_base);
231
0
    }
232
233
    // Testing url_pattern_init with ALL fields populated simultaneously
234
21.3k
    ada::url_pattern_init init_all{};
235
21.3k
    init_all.protocol = to_ascii(fdp.ConsumeRandomLengthString(10));
236
21.3k
    init_all.username = to_ascii(fdp.ConsumeRandomLengthString(10));
237
21.3k
    init_all.password = to_ascii(fdp.ConsumeRandomLengthString(10));
238
21.3k
    init_all.hostname = to_ascii(fdp.ConsumeRandomLengthString(20));
239
21.3k
    init_all.port = to_ascii(fdp.ConsumeRandomLengthString(5));
240
21.3k
    init_all.pathname = "/" + to_ascii(fdp.ConsumeRandomLengthString(20));
241
21.3k
    init_all.search = to_ascii(fdp.ConsumeRandomLengthString(10));
242
21.3k
    init_all.hash = to_ascii(fdp.ConsumeRandomLengthString(10));
243
21.3k
    auto result_with_init_all =
244
21.3k
        ada::parse_url_pattern<regex_provider>(init_all, nullptr, nullptr);
245
21.3k
    if (result_with_init_all) {
246
18.1k
      exercise_result(*result_with_init_all);
247
18.1k
      exercise_exec_and_test(*result_with_init_all, test_input, test_base);
248
18.1k
    }
249
250
    // Testing url_pattern_init with the base_url field set.
251
    //
252
    // url_pattern_init::base_url is a completely separate code path from the
253
    // base_url *parameter* of parse_url_pattern. When base_url is embedded
254
    // inside the init struct the spec processes it differently. This field
255
    // was previously never exercised by any fuzzer.
256
21.3k
    {
257
21.3k
      ada::url_pattern_init init_base_url{};
258
21.3k
      init_base_url.pathname =
259
21.3k
          "/" + to_ascii(fdp.ConsumeRandomLengthString(20));
260
21.3k
      init_base_url.base_url = "https://example.com";
261
21.3k
      auto result_base_in_init = ada::parse_url_pattern<regex_provider>(
262
21.3k
          init_base_url, nullptr, nullptr);
263
21.3k
      if (result_base_in_init) {
264
20.6k
        exercise_result(*result_base_in_init);
265
20.6k
        exercise_exec_and_test(*result_base_in_init, test_input, test_base);
266
20.6k
      }
267
268
      // Also fuzz the base_url field itself.
269
21.3k
      ada::url_pattern_init init_fuzz_base{};
270
21.3k
      init_fuzz_base.pathname =
271
21.3k
          "/" + to_ascii(fdp.ConsumeRandomLengthString(15));
272
21.3k
      init_fuzz_base.base_url =
273
21.3k
          "https://" + to_ascii(fdp.ConsumeRandomLengthString(20));
274
21.3k
      auto result_fuzz_base = ada::parse_url_pattern<regex_provider>(
275
21.3k
          init_fuzz_base, nullptr, nullptr);
276
21.3k
      if (result_fuzz_base) {
277
717
        exercise_result(*result_fuzz_base);
278
717
        exercise_exec_and_test(*result_fuzz_base, test_input, test_base);
279
717
      }
280
21.3k
    }
281
282
    // Testing url_pattern_init with a random subset (2–4) of fields set.
283
    //
284
    // The single-field case (switch above) and the all-fields case are covered
285
    // above. Here we pick a random bitmask of fields so the parser sees every
286
    // combination of present/absent components.
287
21.3k
    {
288
21.3k
      uint8_t field_mask = fdp.ConsumeIntegral<uint8_t>();
289
21.3k
      ada::url_pattern_init init_subset{};
290
21.3k
      if (field_mask & 0x01)
291
521
        init_subset.protocol = to_ascii(fdp.ConsumeRandomLengthString(8));
292
21.3k
      if (field_mask & 0x02)
293
689
        init_subset.hostname = to_ascii(fdp.ConsumeRandomLengthString(20));
294
21.3k
      if (field_mask & 0x04)
295
719
        init_subset.port = to_ascii(fdp.ConsumeRandomLengthString(5));
296
21.3k
      if (field_mask & 0x08)
297
903
        init_subset.pathname =
298
903
            "/" + to_ascii(fdp.ConsumeRandomLengthString(20));
299
21.3k
      if (field_mask & 0x10)
300
715
        init_subset.search = to_ascii(fdp.ConsumeRandomLengthString(10));
301
21.3k
      if (field_mask & 0x20)
302
858
        init_subset.hash = to_ascii(fdp.ConsumeRandomLengthString(10));
303
21.3k
      if (field_mask & 0x40)
304
647
        init_subset.username = to_ascii(fdp.ConsumeRandomLengthString(10));
305
21.3k
      if (field_mask & 0x80)
306
374
        init_subset.password = to_ascii(fdp.ConsumeRandomLengthString(10));
307
21.3k
      auto result_subset =
308
21.3k
          ada::parse_url_pattern<regex_provider>(init_subset, nullptr, nullptr);
309
21.3k
      if (result_subset) {
310
20.5k
        exercise_result(*result_subset);
311
20.5k
        exercise_exec_and_test(*result_subset, test_input, test_base);
312
20.5k
      }
313
21.3k
    }
314
21.3k
  }
315
316
10.6k
  return 0;
317
10.6k
}