Coverage Report

Created: 2024-09-03 06:23

/src/brpc/src/brpc/uri.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
19
#include <ctype.h>                         // isalnum
20
21
#include <unordered_set>
22
23
#include "brpc/log.h"
24
#include "brpc/details/http_parser.h"      // http_parser_parse_url
25
#include "brpc/uri.h"                      // URI
26
27
28
namespace brpc {
29
30
URI::URI() 
31
    : _port(-1)
32
    , _query_was_modified(false)
33
    , _initialized_query_map(false)
34
8.90k
{}
35
36
8.90k
URI::~URI() {
37
8.90k
}
38
39
11.2k
void URI::Clear() {
40
11.2k
    _st.reset();
41
11.2k
    _port = -1;
42
11.2k
    _query_was_modified = false;
43
11.2k
    _initialized_query_map = false;
44
11.2k
    _host.clear();
45
11.2k
    _path.clear();
46
11.2k
    _user_info.clear();
47
11.2k
    _fragment.clear();
48
11.2k
    _scheme.clear();
49
11.2k
    _query.clear();
50
11.2k
    _query_map.clear();
51
11.2k
}
52
53
0
void URI::Swap(URI &rhs) {
54
0
    _st.swap(rhs._st);
55
0
    std::swap(_port, rhs._port);
56
0
    std::swap(_query_was_modified, rhs._query_was_modified);
57
0
    std::swap(_initialized_query_map, rhs._initialized_query_map);
58
0
    _host.swap(rhs._host);
59
0
    _path.swap(rhs._path);
60
0
    _user_info.swap(rhs._user_info);
61
0
    _fragment.swap(rhs._fragment);
62
0
    _scheme.swap(rhs._scheme);
63
0
    _query.swap(rhs._query);
64
0
    _query_map.swap(rhs._query_map);
65
0
}
66
67
// Parse queries, which is case-sensitive
68
0
static void ParseQueries(URI::QueryMap& query_map, const std::string &query) {
69
0
    query_map.clear();
70
0
    if (query.empty()) {
71
0
        return;
72
0
    }
73
0
    for (QuerySplitter sp(query.c_str()); sp; ++sp) {
74
0
        if (!sp.key().empty()) {
75
0
            if (!query_map.initialized()) {
76
0
                query_map.init(URI::QUERY_MAP_INITIAL_BUCKET);
77
0
            }
78
0
            std::string key(sp.key().data(), sp.key().size());
79
0
            std::string value(sp.value().data(), sp.value().size());
80
0
            query_map[key] = value;
81
0
        }
82
0
    }
83
0
}
84
85
inline const char* SplitHostAndPort(const char* host_begin,
86
                                    const char* host_end,
87
12.4k
                                    int* port) {
88
12.4k
    uint64_t port_raw = 0;
89
12.4k
    uint64_t multiply = 1;
90
13.8k
    for (const char* q = host_end - 1; q > host_begin; --q) {
91
4.07k
        if (*q >= '0' && *q <= '9') {
92
1.43k
            port_raw += (*q - '0') * multiply;
93
1.43k
            multiply *= 10;
94
2.64k
        } else if (*q == ':') {
95
653
            *port = static_cast<int>(port_raw);
96
653
            return q;
97
1.98k
        } else {
98
1.98k
            break;
99
1.98k
        }
100
4.07k
    }
101
11.7k
    *port = -1;
102
11.7k
    return host_end;
103
12.4k
}
104
105
// valid characters in URL
106
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
107
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
108
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.4
109
// space is not allowed by rfc3986, but allowed by brpc
110
3.25k
static bool is_valid_char(char c) {
111
3.25k
    static const std::unordered_set<char> other_valid_char = {
112
3.25k
        ':', '/', '?', '#', '[', ']', '@', '!', '$', '&',
113
3.25k
        '\'', '(', ')', '*', '+', ',', ';', '=', '-', '.',
114
3.25k
        '_', '~', '%', ' '
115
3.25k
    };
116
117
3.25k
    return (isalnum(c) || other_valid_char.count(c));
118
3.25k
}
119
120
84
static bool is_all_spaces(const char* p) {
121
905
    for (; *p == ' '; ++p) {}
122
84
    return !*p;
123
84
}
124
125
const char URI_PARSE_CONTINUE = 0;
126
const char URI_PARSE_CHECK = 1;
127
const char URI_PARSE_BREAK = 2;
128
static const char g_url_parsing_fast_action_map_raw[] = {
129
    0/*-128*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130
    0/*-118*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131
    0/*-108*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132
    0/*-98*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
133
    0/*-88*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
134
    0/*-78*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135
    0/*-68*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136
    0/*-58*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137
    0/*-48*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138
    0/*-38*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139
    0/*-28*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140
    0/*-18*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141
    0/*-8*/, 0, 0, 0, 0, 0, 0, 0, URI_PARSE_BREAK/*\0*/, 0,
142
    0/*2*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
143
    0/*12*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144
    0/*22*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145
    URI_PARSE_CHECK/* */, 0, 0, URI_PARSE_BREAK/*#*/, 0, 0, 0, 0, 0, 0,
146
    0/*42*/, 0, 0, 0, 0, URI_PARSE_BREAK/*/*/, 0, 0, 0, 0,
147
    0/*52*/, 0, 0, 0, 0, 0, URI_PARSE_CHECK/*:*/, 0, 0, 0,
148
    0/*62*/, URI_PARSE_BREAK/*?*/, URI_PARSE_CHECK/*@*/, 0, 0, 0, 0, 0, 0, 0,
149
    0/*72*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150
    0/*82*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
151
    0/*92*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
152
    0/*102*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
153
    0/*112*/, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154
    0/*122*/, 0, 0, 0, 0, 0
155
};
156
static const char* const g_url_parsing_fast_action_map =
157
    g_url_parsing_fast_action_map_raw + 128;
158
159
// This implementation is faster than http_parser_parse_url() and allows
160
// ignoring of scheme("http://")
161
11.2k
int URI::SetHttpURL(const char* url) {
162
11.2k
    Clear();
163
    
164
11.2k
    const char* p = url;
165
    // skip heading blanks
166
11.2k
    if (*p == ' ') {
167
208
        for (++p; *p == ' '; ++p) {}
168
13
    }
169
11.2k
    const char* start = p;
170
    // Find end of host, locate scheme and user_info during the searching
171
11.2k
    bool need_scheme = true;
172
11.2k
    bool need_user_info = true;
173
43.3k
    for (; true; ++p) {
174
43.3k
        const char action = g_url_parsing_fast_action_map[(int)*p];
175
43.3k
        if (action == URI_PARSE_CONTINUE) {
176
28.8k
            continue;
177
28.8k
        }
178
14.4k
        if (action == URI_PARSE_BREAK) {
179
11.2k
            break;
180
11.2k
        }
181
3.25k
        if (!is_valid_char(*p)) {
182
0
            _st.set_error(EINVAL, "invalid character in url");
183
0
            return -1;
184
3.25k
        } else if (*p == ':') {
185
2.10k
            if (p[1] == '/' && p[2] == '/' && need_scheme) {
186
777
                need_scheme = false;
187
777
                _scheme.assign(start, p - start);
188
777
                p += 2;
189
777
                start = p + 1;
190
777
            }
191
2.10k
        } else if (*p == '@') {
192
1.12k
            if (need_user_info) {
193
286
                need_user_info = false;
194
286
                _user_info.assign(start, p - start);
195
286
                start = p + 1;
196
286
            }
197
1.12k
        } else if (*p == ' ') {
198
22
            if (!is_all_spaces(p + 1)) {
199
12
                _st.set_error(EINVAL, "Invalid space in url");
200
12
                return -1;
201
12
            }
202
10
            break;
203
22
        }
204
3.25k
    }
205
11.2k
    const char* host_end = SplitHostAndPort(start, p, &_port);
206
11.2k
    _host.assign(start, host_end - start);
207
11.2k
    if (*p == '/') {
208
8.97k
        start = p; //slash pointed by p is counted into _path
209
8.97k
        ++p;
210
116k
        for (; *p && *p != '?' && *p != '#'; ++p) {
211
107k
            if (*p == ' ') {
212
13
                if (!is_all_spaces(p + 1)) {
213
4
                    _st.set_error(EINVAL, "Invalid space in path");
214
4
                    return -1;
215
4
                }
216
9
                break;
217
13
            }
218
107k
        }
219
8.96k
        _path.assign(start, p - start);
220
8.96k
    }
221
11.2k
    if (*p == '?') {
222
1.10k
        start = ++p;
223
26.8k
        for (; *p && *p != '#'; ++p) {
224
25.7k
            if (*p == ' ') {
225
28
                if (!is_all_spaces(p + 1)) {
226
19
                    _st.set_error(EINVAL, "Invalid space in query");
227
19
                    return -1;
228
19
                }
229
9
                break;
230
28
            }
231
25.7k
        }
232
1.08k
        _query.assign(start, p - start);
233
1.08k
    }
234
11.2k
    if (*p == '#') {
235
834
        start = ++p;
236
18.0k
        for (; *p; ++p) {
237
17.2k
            if (*p == ' ') {
238
21
                if (!is_all_spaces(p + 1)) {
239
12
                    _st.set_error(EINVAL, "Invalid space in fragment");
240
12
                    return -1;
241
12
                }
242
9
                break;
243
21
            }
244
17.2k
        }
245
822
        _fragment.assign(start, p - start);
246
822
    }
247
11.2k
    return 0;
248
11.2k
}
249
250
int ParseURL(const char* url,
251
0
             std::string* scheme_out, std::string* host_out, int* port_out) {
252
0
    const char* p = url;
253
    // skip heading blanks
254
0
    if (*p == ' ') {
255
0
        for (++p; *p == ' '; ++p) {}
256
0
    }
257
0
    const char* start = p;
258
    // Find end of host, locate scheme and user_info during the searching
259
0
    bool need_scheme = true;
260
0
    bool need_user_info = true;
261
0
    for (; true; ++p) {
262
0
        const char action = g_url_parsing_fast_action_map[(int)*p];
263
0
        if (action == URI_PARSE_CONTINUE) {
264
0
            continue;
265
0
        }
266
0
        if (action == URI_PARSE_BREAK) {
267
0
            break;
268
0
        }
269
0
        if (*p == ':') {
270
0
            if (p[1] == '/' && p[2] == '/' && need_scheme) {
271
0
                need_scheme = false;
272
0
                if (scheme_out) {
273
0
                    scheme_out->assign(start, p - start);
274
0
                }
275
0
                p += 2;
276
0
                start = p + 1;
277
0
            }
278
0
        } else if (*p == '@') {
279
0
            if (need_user_info) {
280
0
                need_user_info = false;
281
0
                start = p + 1;
282
0
            }
283
0
        } else if (*p == ' ') {
284
0
            if (!is_all_spaces(p + 1)) {
285
0
                LOG(ERROR) << "Invalid space in url=`" << url << '\'';
286
0
                return -1;
287
0
            }
288
0
            break;
289
0
        }
290
0
    }
291
0
    int port = -1;
292
0
    const char* host_end = SplitHostAndPort(start, p, &port);
293
0
    if (host_out) {
294
0
        host_out->assign(start, host_end - start);
295
0
    }
296
0
    if (port_out) {
297
0
        *port_out = port;
298
0
    }
299
0
    return 0;
300
0
}
301
302
0
void URI::Print(std::ostream& os) const {
303
0
    if (!_host.empty()) {
304
0
        if (!_scheme.empty()) {
305
0
            os << _scheme << "://";
306
0
        } else {
307
0
            os << "http://";
308
0
        }
309
        // user_info is passed by Authorization
310
0
        os << _host;
311
0
        if (_port >= 0) {
312
0
            os << ':' << _port;
313
0
        }
314
0
    }
315
0
    PrintWithoutHost(os);
316
0
}
317
    
318
0
void URI::PrintWithoutHost(std::ostream& os) const {
319
0
    if (_path.empty()) {
320
        // According to rfc2616#section-5.1.2, the absolute path
321
        // cannot be empty; if none is present in the original URI, it MUST
322
        // be given as "/" (the server root).
323
0
        os << '/';
324
0
    } else {
325
0
        os << _path;
326
0
    }
327
0
    if (_initialized_query_map && _query_was_modified) {
328
0
        bool is_first = true;
329
0
        for (QueryIterator it = QueryBegin(); it != QueryEnd(); ++it) {
330
0
            if (is_first) {
331
0
                is_first = false;
332
0
                os << '?';
333
0
            } else {
334
0
                os << '&';
335
0
            }
336
0
            os << it->first;
337
0
            if (!it->second.empty()) {
338
0
                os << '=' << it->second;
339
0
            }
340
0
        }
341
0
    } else if (!_query.empty()) {
342
0
        os << '?' << _query;
343
0
    }
344
0
    if (!_fragment.empty()) {
345
0
        os << '#' << _fragment;
346
0
    }
347
0
}
348
349
0
void URI::InitializeQueryMap() const {
350
0
    if (!_query_map.initialized()) {
351
0
        CHECK_EQ(0, _query_map.init(QUERY_MAP_INITIAL_BUCKET));
352
0
    }
353
0
    ParseQueries(_query_map, _query);
354
0
    _query_was_modified = false;
355
0
    _initialized_query_map = true;
356
0
}
357
358
0
void URI::AppendQueryString(std::string* query, bool append_question_mark) const {
359
0
    if (_query_map.empty()) {
360
0
        return;
361
0
    }
362
0
    if (append_question_mark) {
363
0
        query->push_back('?');
364
0
    }
365
0
    QueryIterator it = QueryBegin();
366
0
    query->append(it->first);
367
0
    if (!it->second.empty()) {
368
0
        query->push_back('=');
369
0
        query->append(it->second);
370
0
    }
371
0
    ++it;
372
0
    for (; it != QueryEnd(); ++it) {
373
0
        query->push_back('&');
374
0
        query->append(it->first);
375
0
        if (!it->second.empty()) {
376
0
            query->push_back('=');
377
0
            query->append(it->second);
378
0
        }
379
0
    }
380
0
}
381
382
0
void URI::GenerateH2Path(std::string* h2_path) const {
383
0
    h2_path->reserve(_path.size() + _query.size() + _fragment.size() + 3);
384
0
    h2_path->clear();
385
0
    if (_path.empty()) {
386
0
        h2_path->push_back('/');
387
0
    } else {
388
0
        h2_path->append(_path);
389
0
    }
390
0
    if (_initialized_query_map && _query_was_modified) {
391
0
        AppendQueryString(h2_path, true);
392
0
    } else if (!_query.empty()) {
393
0
        h2_path->push_back('?');
394
0
        h2_path->append(_query);
395
0
    }
396
0
    if (!_fragment.empty()) {
397
0
        h2_path->push_back('#');
398
0
        h2_path->append(_fragment);
399
0
    }
400
0
}
401
402
1.19k
void URI::SetHostAndPort(const std::string& host) {
403
1.19k
    const char* const host_begin = host.c_str();
404
1.19k
    const char* host_end =
405
1.19k
        SplitHostAndPort(host_begin, host_begin + host.size(), &_port);
406
1.19k
    _host.assign(host_begin, host_end - host_begin);
407
1.19k
}
408
409
0
void URI::SetH2Path(const char* h2_path) {
410
0
    _path.clear();
411
0
    _query.clear();
412
0
    _fragment.clear();
413
0
    _query_was_modified = false;
414
0
    _initialized_query_map = false;
415
0
    _query_map.clear();
416
417
0
    const char* p = h2_path;
418
0
    const char* start = p;
419
0
    for (; *p && *p != '?' && *p != '#'; ++p) {}
420
0
    _path.assign(start, p - start);
421
0
    if (*p == '?') {
422
0
        start = ++p;
423
0
        for (; *p && *p != '#'; ++p) {}
424
0
        _query.assign(start, p - start);
425
0
    }
426
0
    if (*p == '#') {
427
0
        start = ++p;
428
0
        for (; *p; ++p) {}
429
0
        _fragment.assign(start, p - start);
430
0
    }
431
0
}
432
433
QueryRemover::QueryRemover(const std::string* str)
434
    : _query(str)
435
    , _qs(str->data(), str->data() + str->size())
436
    , _iterated_len(0)
437
    , _removed_current_key_value(false)
438
0
    , _ever_removed(false) {
439
0
}
440
441
0
QueryRemover& QueryRemover::operator++() {
442
0
    if (!_qs) {
443
0
        return *this;
444
0
    }
445
0
    if (!_ever_removed) {
446
0
        _qs.operator++();
447
0
        return *this;
448
0
    }
449
0
    if (!_removed_current_key_value) {
450
0
        _modified_query.resize(_iterated_len);
451
0
        if (!_modified_query.empty()) {
452
0
            _modified_query.push_back('&');
453
0
            _iterated_len += 1;
454
0
        }
455
0
        _modified_query.append(key_and_value().data(), key_and_value().length());
456
0
        _iterated_len += key_and_value().length();
457
0
    } else {
458
0
        _removed_current_key_value = false;
459
0
    }
460
0
    _qs.operator++();
461
0
    return *this;
462
0
}
463
464
0
QueryRemover QueryRemover::operator++(int) {
465
0
    QueryRemover tmp = *this;
466
0
    operator++();
467
0
    return tmp;
468
0
}
469
470
0
void QueryRemover::remove_current_key_and_value() {
471
0
    _removed_current_key_value = true;
472
0
    if (!_ever_removed) {
473
0
        _ever_removed = true;
474
0
        size_t offset = key().data() - _query->data();
475
0
        size_t len = offset - ((offset > 0 && (*_query)[offset - 1] == '&')? 1: 0);
476
0
        _modified_query.append(_query->data(), len);
477
0
        _iterated_len += len;
478
0
    }
479
0
    return;
480
0
}
481
482
0
std::string QueryRemover::modified_query() {
483
0
    if (!_ever_removed) {
484
0
        return *_query;
485
0
    }
486
0
    size_t offset = key().data() - _query->data();
487
    // find out where the remaining string starts
488
0
    if (_removed_current_key_value) {
489
0
        size_t size = key_and_value().length();
490
0
        while (offset + size < _query->size() && (*_query)[offset + size] == '&') {
491
            // ingore unnecessary '&'
492
0
            size += 1;
493
0
        }
494
0
        offset += size;
495
0
    }
496
0
    _modified_query.resize(_iterated_len);
497
0
    if (offset < _query->size()) {
498
0
        if (!_modified_query.empty()) {
499
0
            _modified_query.push_back('&');
500
0
        }
501
0
        _modified_query.append(*_query, offset, std::string::npos);
502
0
    }
503
0
    return _modified_query;
504
0
}
505
506
void append_query(std::string *query_string,
507
                  const butil::StringPiece& key,
508
0
                  const butil::StringPiece& value) {
509
0
    if (!query_string->empty() && butil::back_char(*query_string) != '?') {
510
0
        query_string->push_back('&');
511
0
    }
512
0
    query_string->append(key.data(), key.size());
513
0
    query_string->push_back('=');
514
0
    query_string->append(value.data(), value.size());
515
0
}
516
517
} // namespace brpc