Coverage Report

Created: 2025-07-11 06:48

/src/ada-url/fuzz/parse.cc
Line
Count
Source (jump to first uncovered line)
1
#include <fuzzer/FuzzedDataProvider.h>
2
3
#include <cstdio>
4
#include <memory>
5
#include <string>
6
7
#include "ada.cpp"
8
#include "ada.h"
9
10
8.21k
bool is_valid_utf8_string(const char *buf, size_t len) {
11
8.21k
  const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12
8.21k
  uint64_t pos = 0;
13
8.21k
  uint32_t code_point = 0;
14
30.4k
  while (pos < len) {
15
29.1k
    uint64_t next_pos = pos + 16;
16
29.1k
    if (next_pos <= len) {  // if it is safe to read 16 more bytes, check that
17
                            // they are ascii
18
19.3k
      uint64_t v1;
19
19.3k
      std::memcpy(&v1, data + pos, sizeof(uint64_t));
20
19.3k
      uint64_t v2;
21
19.3k
      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
22
19.3k
      uint64_t v{v1 | v2};
23
19.3k
      if ((v & 0x8080808080808080) == 0) {
24
5.28k
        pos = next_pos;
25
5.28k
        continue;
26
5.28k
      }
27
19.3k
    }
28
23.8k
    unsigned char byte = data[pos];
29
71.5k
    while (byte < 0b10000000) {
30
52.4k
      if (++pos == len) {
31
4.77k
        return true;
32
4.77k
      }
33
47.6k
      byte = data[pos];
34
47.6k
    }
35
36
19.0k
    if ((byte & 0b11100000) == 0b11000000) {
37
3.45k
      next_pos = pos + 2;
38
3.45k
      if (next_pos > len) {
39
78
        return false;
40
78
      }
41
3.37k
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
42
222
        return false;
43
222
      }
44
3.15k
      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
45
3.15k
      if ((code_point < 0x80) || (0x7ff < code_point)) {
46
8
        return false;
47
8
      }
48
15.6k
    } else if ((byte & 0b11110000) == 0b11100000) {
49
13.7k
      next_pos = pos + 3;
50
13.7k
      if (next_pos > len) {
51
36
        return false;
52
36
      }
53
13.6k
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
54
99
        return false;
55
99
      }
56
13.5k
      if ((data[pos + 2] & 0b11000000) != 0b10000000) {
57
14
        return false;
58
14
      }
59
13.5k
      code_point = (byte & 0b00001111) << 12 |
60
13.5k
                   (data[pos + 1] & 0b00111111) << 6 |
61
13.5k
                   (data[pos + 2] & 0b00111111);
62
13.5k
      if ((code_point < 0x800) || (0xffff < code_point) ||
63
13.5k
          (0xd7ff < code_point && code_point < 0xe000)) {
64
17
        return false;
65
17
      }
66
13.5k
    } else if ((byte & 0b11111000) == 0b11110000) {  // 0b11110000
67
435
      next_pos = pos + 4;
68
435
      if (next_pos > len) {
69
32
        return false;
70
32
      }
71
403
      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
72
62
        return false;
73
62
      }
74
341
      if ((data[pos + 2] & 0b11000000) != 0b10000000) {
75
7
        return false;
76
7
      }
77
334
      if ((data[pos + 3] & 0b11000000) != 0b10000000) {
78
9
        return false;
79
9
      }
80
325
      code_point =
81
325
          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
82
325
          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
83
325
      if (code_point <= 0xffff || 0x10ffff < code_point) {
84
21
        return false;
85
21
      }
86
1.49k
    } else {
87
1.49k
      return false;
88
1.49k
    }
89
16.9k
    pos = next_pos;
90
16.9k
  }
91
1.33k
  return true;
92
8.21k
}
93
94
8.21k
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
95
8.21k
  FuzzedDataProvider fdp(data, size);
96
8.21k
  std::string source = fdp.ConsumeRandomLengthString(256);
97
98
  // volatile forces the compiler to store the results without undue
99
  // optimizations
100
8.21k
  volatile size_t length = 0;
101
102
8.21k
  auto parse_url = ada::parse<ada::url>(source);
103
8.21k
  auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source);
104
105
8.21k
  if (is_valid_utf8_string(source.data(), source.length())) {
106
6.11k
    if (parse_url.has_value() ^ parse_url_aggregator.has_value()) {
107
0
      printf("Source used to parse: %s", source.c_str());
108
0
      abort();
109
0
    }
110
6.11k
  }
111
112
8.21k
  if (parse_url) {
113
2.86k
    length += parse_url->get_href().size();
114
2.86k
    length += parse_url->get_origin().size();
115
2.86k
  }
116
117
8.21k
  if (parse_url_aggregator) {
118
2.72k
    length += parse_url_aggregator->get_href().size();
119
2.72k
    length += parse_url_aggregator->get_origin().size();
120
121
2.72k
    volatile bool is_parse_url_aggregator_output_valid = false;
122
2.72k
    is_parse_url_aggregator_output_valid = parse_url_aggregator->validate();
123
124
2.72k
    assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol());
125
2.72k
    assert(parse_url->get_href() == parse_url_aggregator->get_href());
126
127
2.72k
    parse_url->set_href(source);
128
2.72k
    parse_url_aggregator->set_href(source);
129
2.72k
    assert(parse_url->get_href() == parse_url_aggregator->get_href());
130
2.72k
  }
131
132
  /**
133
   * ada::parse<ada::url>
134
   */
135
8.21k
  auto out_url = ada::parse<ada::url>("https://www.ada-url.com");
136
137
8.21k
  if (out_url) {
138
8.21k
    out_url->set_protocol(source);
139
8.21k
    out_url->set_username(source);
140
8.21k
    out_url->set_password(source);
141
8.21k
    out_url->set_hostname(source);
142
8.21k
    out_url->set_host(source);
143
8.21k
    out_url->set_pathname(source);
144
8.21k
    out_url->set_search(source);
145
8.21k
    out_url->set_hash(source);
146
8.21k
    out_url->set_port(source);
147
148
    // getters
149
8.21k
    length += out_url->get_protocol().size();
150
8.21k
    length += out_url->get_username().size();
151
8.21k
    length += out_url->get_password().size();
152
8.21k
    length += out_url->get_hostname().size();
153
8.21k
    length += out_url->get_host().size();
154
8.21k
    length += out_url->get_pathname().size();
155
8.21k
    length += out_url->get_search().size();
156
8.21k
    length += out_url->get_hash().size();
157
8.21k
    length += out_url->get_origin().size();
158
8.21k
    length += out_url->get_port().size();
159
160
8.21k
    length += out_url->to_string().size();
161
8.21k
  }
162
163
  /**
164
   * ada::parse<ada::url_aggregator>
165
   */
166
8.21k
  auto out_aggregator =
167
8.21k
      ada::parse<ada::url_aggregator>("https://www.ada-url.com");
168
169
8.21k
  if (out_aggregator) {
170
8.21k
    out_aggregator->set_protocol(source);
171
8.21k
    out_aggregator->set_username(source);
172
8.21k
    out_aggregator->set_password(source);
173
8.21k
    out_aggregator->set_hostname(source);
174
8.21k
    out_aggregator->set_host(source);
175
8.21k
    out_aggregator->set_pathname(source);
176
8.21k
    out_aggregator->set_search(source);
177
8.21k
    out_aggregator->set_hash(source);
178
8.21k
    out_aggregator->set_port(source);
179
180
    // getters
181
8.21k
    length += out_aggregator->get_protocol().size();
182
8.21k
    length += out_aggregator->get_username().size();
183
8.21k
    length += out_aggregator->get_password().size();
184
8.21k
    length += out_aggregator->get_hostname().size();
185
8.21k
    length += out_aggregator->get_host().size();
186
8.21k
    length += out_aggregator->get_pathname().size();
187
8.21k
    length += out_aggregator->get_search().size();
188
8.21k
    length += out_aggregator->get_hash().size();
189
8.21k
    length += out_aggregator->get_origin().size();
190
8.21k
    length += out_aggregator->get_port().size();
191
192
8.21k
    length += out_aggregator->to_string().size();
193
194
8.21k
    volatile bool is_output_valid = false;
195
8.21k
    is_output_valid = out_aggregator->validate();
196
197
    // Printing due to dead-code elimination
198
8.21k
    printf("diagram %s\n", out_aggregator->to_diagram().c_str());
199
200
    // clear methods
201
8.21k
    out_aggregator->clear_port();
202
8.21k
    out_aggregator->clear_search();
203
8.21k
    out_aggregator->clear_hash();
204
8.21k
  }
205
206
  /**
207
   * Node.js specific
208
   */
209
8.21k
  length += ada::href_from_file(source).size();
210
211
  /**
212
   * Others
213
   */
214
8.21k
  bool is_valid = ada::checkers::verify_dns_length(source);
215
216
  // Only used for avoiding dead-code elimination
217
8.21k
  if (is_valid) {
218
6.30k
    printf("dns length is valid\n");
219
6.30k
  }
220
221
  // Only used for avoiding dead-code elimination
222
8.21k
  printf("length of url is %zu\n", length);
223
224
8.21k
  return 0;
225
8.21k
}  // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {