Coverage Report

Created: 2025-11-29 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/internal/utf8.h
Line
Count
Source
1
// Copyright 2021 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#ifndef THIRD_PARTY_CEL_CPP_INTERNAL_UTF8_H_
16
#define THIRD_PARTY_CEL_CPP_INTERNAL_UTF8_H_
17
18
#include <cstddef>
19
#include <string>
20
#include <utility>
21
22
#include "absl/base/attributes.h"
23
#include "absl/base/nullability.h"
24
#include "absl/strings/cord.h"
25
#include "absl/strings/string_view.h"
26
27
namespace cel::internal {
28
29
// Returns true if the given UTF-8 encoded string is not malformed, false
30
// otherwise.
31
bool Utf8IsValid(absl::string_view str);
32
bool Utf8IsValid(const absl::Cord& str);
33
34
// Returns the number of Unicode code points in the UTF-8 encoded string.
35
//
36
// If there are any invalid bytes, they will each be counted as an invalid code
37
// point.
38
size_t Utf8CodePointCount(absl::string_view str);
39
size_t Utf8CodePointCount(const absl::Cord& str);
40
41
// Validates the given UTF-8 encoded string. The first return value is the
42
// number of code points and its meaning depends on the second return value. If
43
// the second return value is true the entire string is not malformed and the
44
// first return value is the number of code points. If the second return value
45
// is false the string is malformed and the first return value is the number of
46
// code points up until the malformed sequence was encountered.
47
std::pair<size_t, bool> Utf8Validate(absl::string_view str);
48
std::pair<size_t, bool> Utf8Validate(const absl::Cord& str);
49
50
// Decodes the next code point, returning the decoded code point and the number
51
// of code units (a.k.a. bytes) consumed. In the event that an invalid code unit
52
// sequence is returned the replacement character, U+FFFD, is returned with a
53
// code unit count of 1. As U+FFFD requires 3 code units when encoded, this can
54
// be used to differentiate valid input from malformed input.
55
size_t Utf8Decode(absl::string_view str, char32_t* absl_nullable code_point);
56
size_t Utf8Decode(const absl::Cord::CharIterator& it,
57
                  char32_t* absl_nullable code_point);
58
293M
inline std::pair<char32_t, size_t> Utf8Decode(absl::string_view str) {
59
293M
  char32_t code_point;
60
293M
  size_t code_units = Utf8Decode(str, &code_point);
61
293M
  return std::pair{code_point, code_units};
62
293M
}
63
inline std::pair<char32_t, size_t> Utf8Decode(
64
0
    const absl::Cord::CharIterator& it) {
65
0
  char32_t code_point;
66
0
  size_t code_units = Utf8Decode(it, &code_point);
67
0
  return std::pair{code_point, code_units};
68
0
}
69
70
// Encodes the given code point and appends it to the buffer. If the code point
71
// is an unpaired surrogate or outside of the valid Unicode range it is replaced
72
// with the replacement character, U+FFFD.
73
size_t Utf8Encode(char32_t code_point, std::string* absl_nonnull buffer);
74
size_t Utf8Encode(char32_t code_point, char* absl_nonnull buffer);
75
ABSL_DEPRECATED("Use other overload")
76
608M
inline size_t Utf8Encode(std::string& buffer, char32_t code_point) {
77
608M
  return Utf8Encode(code_point, &buffer);
78
608M
}
79
80
}  // namespace cel::internal
81
82
#endif  // THIRD_PARTY_CEL_CPP_INTERNAL_UTF8_H_