Grcov report - path_normalizer.cc

1

#include "source/extensions/http/header_validators/envoy_default/path_normalizer.h"

2

3

#include "envoy/http/header_validator_errors.h"

4

5

#include "source/common/http/header_utility.h"

6

#include "source/common/http/headers.h"

7

#include "source/common/runtime/runtime_features.h"

8

#include "source/extensions/http/header_validators/envoy_default/character_tables.h"

9

10

#include "absl/strings/match.h"

11

12

namespace Envoy {

13

namespace Extensions {

14

namespace Http {

15

namespace HeaderValidators {

16

namespace EnvoyDefault {

17

18

using ::envoy::extensions::http::header_validators::envoy_default::v3::HeaderValidatorConfig;

19

using ::envoy::extensions::http::header_validators::envoy_default::v3::

20

    HeaderValidatorConfig_UriPathNormalizationOptions;

21

using ::Envoy::Http::HeaderUtility;

22

using ::Envoy::Http::PathNormalizerResponseCodeDetail;

23

using ::Envoy::Http::RequestHeaderMap;

24

using ::Envoy::Http::testCharInTable;

25

using ::Envoy::Http::UhvResponseCodeDetail;

26

27

PathNormalizer::PathNormalizer(const HeaderValidatorConfig& config,

28

                               const ConfigOverrides& config_overrides)

29

613

    : config_(config), config_overrides_(config_overrides) {}

30

31

PathNormalizer::DecodedOctet

32

PathNormalizer::normalizeAndDecodeOctet(std::string::iterator iter,

33

87

                                        std::string::iterator end) const {

34

  // From RFC 3986: https://datatracker.ietf.org/doc/html/rfc3986#section-2.1

35

//

36

  // SPELLCHECKER(off)

37

  // pct-encoded = "%" HEXDIG HEXDIG

38

//

39

  // The uppercase hexadecimal digits 'A' through 'F' are equivalent to

40

  // the lowercase digits 'a' through 'f', respectively. If two URIs

41

  // differ only in the case of hexadecimal digits used in percent-encoded

42

  // octets, they are equivalent. For consistency, URI producers and

43

  // normalizers should use uppercase hexadecimal digits for all percent-

44

  // encodings.

45

//

46

  // Also from RFC 3986: https://datatracker.ietf.org/doc/html/rfc3986#section-2.4

47

//

48

  // When a URI is dereferenced, the components and subcomponents significant

49

  // to the scheme-specific dereferencing process (if any) must be parsed and

50

  // separated before the percent-encoded octets within those components can

51

  // be safely decoded, as otherwise the data may be mistaken for component

52

  // delimiters. The only exception is for percent-encoded octets corresponding

53

  // to characters in the unreserved set, which can be decoded at any time.

54

  // SPELLCHECKER(on)

55

56

87

  if (iter == end || *iter != '%') {

57

    return {PercentDecodeResult::Invalid};

58

59

60

87

  const bool preserve_case = config_overrides_.preserve_url_encoded_case_;

61

62

87

  char ch = '\0';

63

  // Normalize and decode the octet

64

222

  for (int i = 0; i < 2; ++i) {

65

158

    ++iter;

66

158

    if (iter == end) {

67

6

      return {PercentDecodeResult::Invalid};

68

6

69

70

152

    char nibble = *iter;

71

152

    if (!isxdigit(*iter)) {

72

17

      return {PercentDecodeResult::Invalid};

73

17

74

75

    // normalize

76

135

    nibble = nibble >= 'a' ? nibble ^ 0x20 : nibble;

77

135

    if (!preserve_case) {

78

36

      *iter = nibble;

79

36

80

81

    // decode

82

135

    int factor = i == 0 ? 16 : 1;

83

135

    ch += factor * (nibble >= 'A' ? (nibble - 'A' + 10) : (nibble - '0'));

84

135

85

86

64

  if (testCharInTable(kUnreservedCharTable, ch)) {

87

    // Based on RFC, only decode characters in the UNRESERVED set.

88

9

    return {PercentDecodeResult::Decoded, ch};

89

9

90

91

55

  if (ch == '/' || ch == '\\') {

92

    // We decoded a slash character and how we handle it depends on the active configuration.

93

41

    switch (config_.uri_path_normalization_options().path_with_escaped_slashes_action()) {

94

10

    case HeaderValidatorConfig_UriPathNormalizationOptions::IMPLEMENTATION_SPECIFIC_DEFAULT:

95

10

      ABSL_FALLTHROUGH_INTENDED;

96

13

    case HeaderValidatorConfig_UriPathNormalizationOptions::KEEP_UNCHANGED:

97

      // default implementation: normalize the encoded octet and accept the path

98

13

      return {PercentDecodeResult::Normalized};

99

100

3

    case HeaderValidatorConfig_UriPathNormalizationOptions::REJECT_REQUEST:

101

      // Reject the entire request

102

3

      return {PercentDecodeResult::Reject};

103

104

10

    case HeaderValidatorConfig_UriPathNormalizationOptions::UNESCAPE_AND_FORWARD:

105

      // Decode the slash and accept the path.

106

10

      return {PercentDecodeResult::Decoded, ch};

107

108

15

    case HeaderValidatorConfig_UriPathNormalizationOptions::UNESCAPE_AND_REDIRECT:

109

      // Decode the slash and response with a redirect to the normalized path.

110

15

      return {PercentDecodeResult::DecodedRedirect, ch};

111

112

    default:

113

      // This should never occur but it's here to make the compiler happy because of the extra

114

      // values added by protobuf.

115

      ENVOY_BUG(false, "Unexpected path_with_escaped_slashes_action");

116

      break;

117

41

118

41

119

120

  // The octet is a valid encoding but it wasn't be decoded because it was outside the UNRESERVED

121

  // character set.

122

14

  return {PercentDecodeResult::Normalized};

123

55

124

125

/*

126

 * Find the start of the previous segment within the path. The start of the previous segment is the

127

 * first non-slash character that directly follows a slash. For example:

128

129

 *   path = "/hello/world/..";

130

 *           ^      ^    ^-- current argument

131

 *           |      |-- start of previous segment (return value)

132

 *           |-- begin argument

133

134

 * Duplicate slashes that are encountered are ignored. For example:

135

136

 * path = "/parent//child////..";

137

 *                  ^       ^-- current argument

138

 *                  |-- start of previous segment

139

140

 * The ``current`` argument must point to a slash character. The ``begin`` iterator must be the

141

 * start of the path and it is returned on error.

142

*/

143

std::string::iterator findStartOfPreviousSegment(std::string::iterator current,

144

13

                                                 std::string::iterator begin) {

145

13

  bool seen_segment_char = false;

146

66

  for (; current != begin; --current) {

147

57

    if (*current == '/' && seen_segment_char) {

148

4

      ++current;

149

4

      return current;

150

4

151

152

53

    if (*current != '/' && !seen_segment_char) {

153

10

      seen_segment_char = true;

154

10

155

53

156

157

9

  if (seen_segment_char) {

158

6

    ++begin;

159

6

160

161

9

  return begin;

162

13

163

164

PathNormalizer::PathNormalizationResult

165

82

PathNormalizer::normalizePathUri(RequestHeaderMap& header_map) const {

166

  // Parse and normalize the :path header and update it in the map. From RFC 9112,

167

  // https://www.rfc-editor.org/rfc/rfc9112.html#section-3.2:

168

//

169

  // request-target = origin-form

170

  //                / absolute-form

171

  //                / authority-form

172

  //                / asterisk-form

173

//

174

  // origin-form    = absolute-path [ "?" query ]

175

  // absolute-form  = absolute-URI

176

  // authority-form = uri-host ":" port

177

  // asterisk-form  = "*"

178

//

179

  // TODO(#23887) - potentially separate path normalization into multiple independent operations.

180

82

  const auto original_path = header_map.getPathValue();

181

82

  if (original_path == "*" &&

182

82

      header_map.getMethodValue() == ::Envoy::Http::Headers::get().MethodValues.Options) {

183

    // asterisk-form, only valid for OPTIONS request

184

1

    return PathNormalizationResult::success();

185

1

186

187

81

  if (HeaderUtility::isStandardConnectRequest(header_map)) {

188

    // The :path can only be empty for standard CONNECT methods, where the request-target is in

189

    // authority-form for HTTP/1 requests, or :path is empty for HTTP/2 requests.

190

4

    if (original_path.empty()) {

191

3

      return PathNormalizationResult::success();

192

3

193

1

    return {PathNormalizationResult::Action::Reject, UhvResponseCodeDetail::get().InvalidUrl};

194

4

195

196

77

  if (original_path.empty() || original_path.at(0) != '/') {

197

3

    return {PathNormalizationResult::Action::Reject, UhvResponseCodeDetail::get().InvalidUrl};

198

3

199

200

  // Split the path and the query parameters / fragment component.

201

74

  auto [path_view, query] = splitPathAndQueryParams(original_path);

202

  // Make a copy of the original path and then create a readonly string_view to it. The string_view

203

  // is used for optimized sub-strings and the path is modified in place.

204

74

  std::string path{path_view.data(), path_view.length()};

205

206

  // Start normalizing the path.

207

74

  bool redirect = false;

208

209

  // Path normalization is based on RFC 3986:

210

  // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3

211

//

212

  // SPELLCHECKER(off)

213

  // path          = path-abempty    ; begins with "/" or is empty

214

  //               / path-absolute   ; begins with "/" but not "//"

215

  //               / path-noscheme   ; begins with a non-colon segment

216

  //               / path-rootless   ; begins with a segment

217

  //               / path-empty      ; zero characters

218

//

219

  // path-abempty  = *( "/" segment )

220

  // path-absolute = "/" [ segment-nz *( "/" segment ) ]

221

  // path-noscheme = segment-nz-nc *( "/" segment )

222

  // path-rootless = segment-nz *( "/" segment )

223

  // path-empty    = 0<pchar>

224

  // segment       = *pchar

225

  // segment-nz    = 1*pchar

226

  // segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )

227

  //               ; non-zero-length segment without any colon ":"

228

//

229

  // pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"

230

  // SPELLCHECKER(on)

231

74

232

    // pass 1: normalize and decode percent-encoded octets

233

74

    const auto result = decodePass(path);

234

74

    if (result.action() == PathNormalizationResult::Action::Reject) {

235

6

      return result;

236

6

237

238

68

    redirect |= result.action() == PathNormalizationResult::Action::Redirect;

239

68

240

241

  // The `envoy.uhv.allow_non_compliant_characters_in_path` flag allows the \ (back slash)

242

  // character, which legacy path normalization was changing to / (forward slash).

243

68

  if (config_overrides_.allow_non_compliant_characters_in_path_) {

244

67

    translateBackToForwardSlashes(path);

245

67

246

247

68

  if (!config_.uri_path_normalization_options().skip_merging_slashes()) {

248

    // pass 2: merge duplicate slashes (if configured to do so)

249

66

    const auto result = mergeSlashesPass(path);

250

66

    if (result.action() == PathNormalizationResult::Action::Reject) {

251

      return result;

252

253

254

66

    redirect |= result.action() == PathNormalizationResult::Action::Redirect;

255

66

256

257

68

258

    // pass 3: collapse dot and dot-dot segments

259

68

    const auto result = collapseDotSegmentsPass(path);

260

68

    if (result.action() == PathNormalizationResult::Action::Reject) {

261

3

      return result;

262

3

263

264

65

    redirect |= result.action() == PathNormalizationResult::Action::Redirect;

265

65

266

267

  absl::string_view normalized_path{path};

268

  // Update the :path header. We need to honor the normalized path and the original query/fragment

269

  // components.

270

65

  header_map.setPath(absl::StrCat(normalized_path, query));

271

272

65

  if (redirect) {

273

4

    return {PathNormalizationResult::Action::Redirect,

274

4

            ::Envoy::Http::PathNormalizerResponseCodeDetail::get().RedirectNormalized};

275

4

276

277

61

  return PathNormalizationResult::success();

278

65

279

280

67

void PathNormalizer::translateBackToForwardSlashes(std::string& path) const {

281

1016

  for (char& character : path) {

282

1016

    if (character == '\\') {

283

21

      character = '/';

284

21

285

1016

286

67

287

288

74

PathNormalizer::PathNormalizationResult PathNormalizer::decodePass(std::string& path) const {

289

74

  auto begin = path.begin();

290

74

  auto read = std::next(begin);

291

74

  auto write = std::next(begin);

292

74

  auto end = path.end();

293

74

  bool redirect = false;

294

74

  const bool allow_invalid_url_encoding =

295

74

      Runtime::runtimeFeatureEnabled("envoy.reloadable_features.uhv_allow_malformed_url_encoding");

296

297

1056

  while (read != end) {

298

988

    if (*read == '%') {

299

76

      auto decode_result = normalizeAndDecodeOctet(read, end);

300

      // TODO(#23885) - add and honor config to not reject invalid percent-encoded octets.

301

76

      switch (decode_result.result()) {

302

20

      case PercentDecodeResult::Invalid:

303

20

        if (allow_invalid_url_encoding) {

304

          // Write the % character that starts invalid URL encoded sequence and then continue

305

          // scanning from the next character.

306

16

          *write++ = *read++;

307

16

          break;

308

16

309

4

        ABSL_FALLTHROUGH_INTENDED;

310

6

      case PercentDecodeResult::Reject:

311

        // Reject the request

312

6

        return {PathNormalizationResult::Action::Reject, UhvResponseCodeDetail::get().InvalidUrl};

313

314

23

      case PercentDecodeResult::Normalized:

315

        // Valid encoding but outside the UNRESERVED character set. The encoding was normalized to

316

        // UPPERCASE and the octet must not be decoded. Copy the normalized encoding.

317

23

        *write++ = *read++;

318

23

        *write++ = *read++;

319

23

        *write++ = *read++;

320

23

        break;

321

322

14

      case PercentDecodeResult::DecodedRedirect:

323

        // The encoding was properly decoded but, based on the config, the request should be

324

        // redirected to the normalized path.

325

14

        redirect = true;

326

14

        ABSL_FALLTHROUGH_INTENDED;

327

31

      case PercentDecodeResult::Decoded:

328

        // The encoding was decoded. Store the decoded octet in the last character of the percent

329

        // encoding (read[2]) so it will be processed in the next iteration. We can safely advance

330

        // 2 positions since we know that the value was correctly decoded.

331

31

        std::advance(read, 2);

332

31

        *read = decode_result.octet();

333

76

334

912

    } else {

335

912

      *write++ = *read++;

336

912

337

988

338

339

68

  path.resize(std::distance(begin, write));

340

68

  if (redirect) {

341

4

    return {PathNormalizationResult::Action::Redirect,

342

4

            ::Envoy::Http::PathNormalizerResponseCodeDetail::get().RedirectNormalized};

343

4

344

345

64

  return PathNormalizationResult::success();

346

68

347

348

66

PathNormalizer::PathNormalizationResult PathNormalizer::mergeSlashesPass(std::string& path) const {

349

66

  auto begin = path.begin();

350

66

  auto read = std::next(begin);

351

66

  auto write = std::next(begin);

352

66

  auto end = path.end();

353

354

1012

  while (read != end) {

355

946

    if (*read == '/') {

356

87

      char prev = *std::prev(write);

357

87

      if (prev == '/') {

358

        // Duplicate slash, merge it

359

12

        ++read;

360

75

      } else {

361

        // Not a duplicate slash

362

75

        *write++ = *read++;

363

75

364

859

    } else {

365

859

      *write++ = *read++;

366

859

367

946

368

369

66

  path.resize(std::distance(begin, write));

370

66

  return PathNormalizationResult::success();

371

66

372

373

PathNormalizer::PathNormalizationResult

374

68

PathNormalizer::collapseDotSegmentsPass(std::string& path) const {

375

68

  auto begin = path.begin();

376

68

  auto read = std::next(begin);

377

68

  auto write = std::next(begin);

378

68

  auto end = path.end();

379

68

  absl::string_view path_view{path};

380

381

1000

  while (read != end) {

382

935

    if (*read == '.') {

383

24

      char prev = *std::prev(write);

384

24

      if (prev == '/') {

385

        // attempt to read ahead 2 characters to see if we are in a "./" or "../" segment.

386

20

        const auto dot_segment = path_view.substr(std::distance(begin, read), 3);

387

20

        if (absl::StartsWith(dot_segment, "./") || dot_segment == ".") {

388

          // This is a "/./" segment or the path is terminated by "/.", ignore it

389

5

          size_t distance = std::min<size_t>(dot_segment.size(), 2);

390

          // Advance the read iterator by 1 if the path ends with "." or 2 if the segment is "./"

391

5

          std::advance(read, distance);

392

15

        } else if (dot_segment == "../" || dot_segment == "..") {

393

          // This is a "/../" segment or the path is terminated by "/..", navigate one segment up.

394

          // Back up write 1 position to the previous slash to find the previous segment start.

395

13

          auto new_write = findStartOfPreviousSegment(std::prev(write), begin);

396

13

          if (new_write == begin) {

397

            // This is an invalid ".." segment, most likely the full path is "/..", which attempts

398

            // to go above the root.

399

3

            return {PathNormalizationResult::Action::Reject,

400

3

                    UhvResponseCodeDetail::get().InvalidUrl};

401

3

402

403

          // Set the write position to overwrite the previous segment

404

10

          write = new_write;

405

          // Advance the read iterator by 2 if the path ends with ".." or 3 if the segment is "../"

406

10

          size_t distance = std::min<size_t>(dot_segment.size(), 3);

407

10

          std::advance(read, distance);

408

10

        } else {

409

2

          *write++ = *read++;

410

2

411

20

      } else {

412

4

        *write++ = *read++;

413

4

414

911

    } else {

415

911

      *write++ = *read++;

416

911

417

935

418

419

65

  path.resize(std::distance(begin, write));

420

65

  return PathNormalizationResult::success();

421

68

422

423

std::tuple<absl::string_view, absl::string_view>

424

74

PathNormalizer::splitPathAndQueryParams(absl::string_view path_and_query_params) const {

425

  // Split on the query (?) or fragment (#) delimiter, whichever one is first.

426

  // TODO(#23886) - add and honor config option for handling the path fragment component.

427

74

  auto delim = path_and_query_params.find_first_of("?#");

428

74

  if (delim == absl::string_view::npos) {

429

    // no query/fragment component

430

62

    return std::make_tuple(path_and_query_params, "");

431

62

432

433

12

  return std::make_tuple(path_and_query_params.substr(0, delim),

434

12

                         path_and_query_params.substr(delim));

435

74

436

437

} // namespace EnvoyDefault

438

} // namespace HeaderValidators

439

} // namespace Http

440

} // namespace Extensions

441

} // namespace Envoy