Coverage Report

Created: 2025-08-08 06:27

/src/ada-url/fuzz/parse.cc
Line
Count
Source (jump to first uncovered line)
1
#include <fuzzer/FuzzedDataProvider.h>
2
3
#include <cstdio>
4
#include <memory>
5
#include <string>
6
7
#include "ada.cpp"
8
#include "ada.h"
9
10
8.79k
bool is_valid_utf8_string(const char *buf, size_t len) {
11
8.79k
  const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12
8.79k
  uint64_t pos = 0;
13
8.79k
  uint32_t code_point = 0;
14
33.2k
  while (pos < len) {
15
31.7k
    uint64_t next_pos = pos + 16;
16
31.7k
    if (next_pos <= len) {  // if it is safe to read 16 more bytes, check that
17
                            // they are ascii
18
20.9k
      uint64_t v1;
19
20.9k
      std::memcpy(&v1, data + pos, sizeof(uint64_t));
20
20.9k
      uint64_t v2;
21
20.9k
      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
22
20.9k
      uint64_t v{v1 | v2};
23
20.9k
      if ((v & 0x8080808080808080) == 0) {
24
5.25k
        pos = next_pos;
25
5.25k
        continue;
26
5.25k
      }
27
20.9k
    }
28
26.4k
    unsigned char byte = data[pos];
29
76.1k
    while (byte < 0b10000000) {
30
54.6k
      if (++pos == len) {
31
5.04k
        return true;
32
5.04k
      }
33
49.6k
      byte = data[pos];
34
49.6k
    }
35
36
21.4k
    if ((byte & 0b11100000) == 0b11000000) {
37
3.49k
      next_pos = pos + 2;
38
3.49k
      if (next_pos > len) {
39
81
        return false;
40
81
      }
41
3.41k
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
42
225
        return false;
43
225
      }
44
3.19k
      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
45
3.19k
      if ((code_point < 0x80) || (0x7ff < code_point)) {
46
16
        return false;
47
16
      }
48
17.9k
    } else if ((byte & 0b11110000) == 0b11100000) {
49
15.9k
      next_pos = pos + 3;
50
15.9k
      if (next_pos > len) {
51
44
        return false;
52
44
      }
53
15.8k
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
54
105
        return false;
55
105
      }
56
15.7k
      if ((data[pos + 2] & 0b11000000) != 0b10000000) {
57
23
        return false;
58
23
      }
59
15.7k
      code_point = (byte & 0b00001111) << 12 |
60
15.7k
                   (data[pos + 1] & 0b00111111) << 6 |
61
15.7k
                   (data[pos + 2] & 0b00111111);
62
15.7k
      if ((code_point < 0x800) || (0xffff < code_point) ||
63
15.7k
          (0xd7ff < code_point && code_point < 0xe000)) {
64
23
        return false;
65
23
      }
66
15.7k
    } else if ((byte & 0b11111000) == 0b11110000) {  // 0b11110000
67
443
      next_pos = pos + 4;
68
443
      if (next_pos > len) {
69
41
        return false;
70
41
      }
71
402
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
72
59
        return false;
73
59
      }
74
343
      if ((data[pos + 2] & 0b11000000) != 0b10000000) {
75
10
        return false;
76
10
      }
77
333
      if ((data[pos + 3] & 0b11000000) != 0b10000000) {
78
10
        return false;
79
10
      }
80
323
      code_point =
81
323
          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
82
323
          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
83
323
      if (code_point <= 0xffff || 0x10ffff < code_point) {
84
17
        return false;
85
17
      }
86
1.57k
    } else {
87
1.57k
      return false;
88
1.57k
    }
89
19.2k
    pos = next_pos;
90
19.2k
  }
91
1.52k
  return true;
92
8.79k
}
93
94
8.79k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
95
8.79k
  FuzzedDataProvider fdp(data, size);
96
8.79k
  std::string source = fdp.ConsumeRandomLengthString(256);
97
98
  // volatile forces the compiler to store the results without undue
99
  // optimizations
100
8.79k
  volatile size_t length = 0;
101
102
8.79k
  auto parse_url = ada::parse<ada::url>(source);
103
8.79k
  auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source);
104
105
8.79k
  if (is_valid_utf8_string(source.data(), source.length())) {
106
6.56k
    if (parse_url.has_value() ^ parse_url_aggregator.has_value()) {
107
0
      printf("Source used to parse: %s", source.c_str());
108
0
      abort();
109
0
    }
110
6.56k
  }
111
112
8.79k
  if (parse_url) {
113
3.05k
    length += parse_url->get_href().size();
114
3.05k
    length += parse_url->get_origin().size();
115
3.05k
  }
116
117
8.79k
  if (parse_url_aggregator) {
118
2.90k
    length += parse_url_aggregator->get_href().size();
119
2.90k
    length += parse_url_aggregator->get_origin().size();
120
121
2.90k
    volatile bool is_parse_url_aggregator_output_valid = false;
122
2.90k
    is_parse_url_aggregator_output_valid = parse_url_aggregator->validate();
123
124
2.90k
    assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol());
125
2.90k
    assert(parse_url->get_href() == parse_url_aggregator->get_href());
126
127
2.90k
    parse_url->set_href(source);
128
2.90k
    parse_url_aggregator->set_href(source);
129
2.90k
    assert(parse_url->get_href() == parse_url_aggregator->get_href());
130
2.90k
  }
131
132
  /**
133
   * ada::parse<ada::url>
134
   */
135
8.79k
  auto out_url = ada::parse<ada::url>("https://www.ada-url.com");
136
137
8.79k
  if (out_url) {
138
8.79k
    out_url->set_protocol(source);
139
8.79k
    out_url->set_username(source);
140
8.79k
    out_url->set_password(source);
141
8.79k
    out_url->set_hostname(source);
142
8.79k
    out_url->set_host(source);
143
8.79k
    out_url->set_pathname(source);
144
8.79k
    out_url->set_search(source);
145
8.79k
    out_url->set_hash(source);
146
8.79k
    out_url->set_port(source);
147
148
    // getters
149
8.79k
    length += out_url->get_protocol().size();
150
8.79k
    length += out_url->get_username().size();
151
8.79k
    length += out_url->get_password().size();
152
8.79k
    length += out_url->get_hostname().size();
153
8.79k
    length += out_url->get_host().size();
154
8.79k
    length += out_url->get_pathname().size();
155
8.79k
    length += out_url->get_search().size();
156
8.79k
    length += out_url->get_hash().size();
157
8.79k
    length += out_url->get_origin().size();
158
8.79k
    length += out_url->get_port().size();
159
160
8.79k
    length += out_url->to_string().size();
161
8.79k
  }
162
163
  /**
164
   * ada::parse<ada::url_aggregator>
165
   */
166
8.79k
  auto out_aggregator =
167
8.79k
      ada::parse<ada::url_aggregator>("https://www.ada-url.com");
168
169
8.79k
  if (out_aggregator) {
170
8.79k
    out_aggregator->set_protocol(source);
171
8.79k
    out_aggregator->set_username(source);
172
8.79k
    out_aggregator->set_password(source);
173
8.79k
    out_aggregator->set_hostname(source);
174
8.79k
    out_aggregator->set_host(source);
175
8.79k
    out_aggregator->set_pathname(source);
176
8.79k
    out_aggregator->set_search(source);
177
8.79k
    out_aggregator->set_hash(source);
178
8.79k
    out_aggregator->set_port(source);
179
180
    // getters
181
8.79k
    length += out_aggregator->get_protocol().size();
182
8.79k
    length += out_aggregator->get_username().size();
183
8.79k
    length += out_aggregator->get_password().size();
184
8.79k
    length += out_aggregator->get_hostname().size();
185
8.79k
    length += out_aggregator->get_host().size();
186
8.79k
    length += out_aggregator->get_pathname().size();
187
8.79k
    length += out_aggregator->get_search().size();
188
8.79k
    length += out_aggregator->get_hash().size();
189
8.79k
    length += out_aggregator->get_origin().size();
190
8.79k
    length += out_aggregator->get_port().size();
191
192
8.79k
    length += out_aggregator->to_string().size();
193
194
8.79k
    volatile bool is_output_valid = false;
195
8.79k
    is_output_valid = out_aggregator->validate();
196
197
    // Printing due to dead-code elimination
198
8.79k
    printf("diagram %s\n", out_aggregator->to_diagram().c_str());
199
200
    // clear methods
201
8.79k
    out_aggregator->clear_port();
202
8.79k
    out_aggregator->clear_search();
203
8.79k
    out_aggregator->clear_hash();
204
8.79k
  }
205
206
  /**
207
   * Node.js specific
208
   */
209
8.79k
  length += ada::href_from_file(source).size();
210
211
  /**
212
   * Others
213
   */
214
8.79k
  bool is_valid = ada::checkers::verify_dns_length(source);
215
216
  // Only used for avoiding dead-code elimination
217
8.79k
  if (is_valid) {
218
6.85k
    printf("dns length is valid\n");
219
6.85k
  }
220
221
  // Only used for avoiding dead-code elimination
222
8.79k
  printf("length of url is %zu\n", length);
223
224
8.79k
  return 0;
225
8.79k
}  // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {