Coverage Report

Created: 2026-05-27 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/common/values/string_value.cc
Line
Count
Source
1
// Copyright 2023 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include <cstddef>
16
#include <cstdint>
17
#include <cstring>
18
#include <limits>
19
#include <string>
20
#include <tuple>
21
#include <utility>
22
#include <vector>
23
24
#include "google/protobuf/wrappers.pb.h"
25
#include "absl/base/nullability.h"
26
#include "absl/functional/overload.h"
27
#include "absl/log/absl_check.h"
28
#include "absl/status/status.h"
29
#include "absl/status/statusor.h"
30
#include "absl/strings/ascii.h"
31
#include "absl/strings/cord.h"
32
#include "absl/strings/cord_buffer.h"
33
#include "absl/strings/match.h"
34
#include "absl/strings/str_cat.h"
35
#include "absl/strings/string_view.h"
36
#include "absl/types/optional.h"
37
#include "common/internal/byte_string.h"
38
#include "common/internal/reference_count.h"
39
#include "common/value.h"
40
#include "internal/status_macros.h"
41
#include "internal/strings.h"
42
#include "internal/utf8.h"
43
#include "internal/well_known_types.h"
44
#include "runtime/internal/errors.h"
45
#include "google/protobuf/arena.h"
46
#include "google/protobuf/descriptor.h"
47
#include "google/protobuf/io/zero_copy_stream.h"
48
#include "google/protobuf/message.h"
49
50
namespace cel {
51
52
namespace {
53
54
using ::cel::well_known_types::ValueReflection;
55
56
template <typename Bytes>
57
2.30k
std::string StringDebugString(const Bytes& value) {
58
2.30k
  return value.NativeValue(absl::Overload(
59
2.30k
      [](absl::string_view string) -> std::string {
60
2.30k
        return internal::FormatStringLiteral(string);
61
2.30k
      },
62
2.30k
      [](const absl::Cord& cord) -> std::string {
63
0
        if (auto flat = cord.TryFlat(); flat.has_value()) {
64
0
          return internal::FormatStringLiteral(*flat);
65
0
        }
66
0
        return internal::FormatStringLiteral(static_cast<std::string>(cord));
67
0
      }));
68
2.30k
}
69
70
}  // namespace
71
72
StringValue StringValue::Concat(const StringValue& lhs, const StringValue& rhs,
73
2.17k
                                google::protobuf::Arena* absl_nonnull arena) {
74
2.17k
  return StringValue(
75
2.17k
      common_internal::ByteString::Concat(lhs.value_, rhs.value_, arena));
76
2.17k
}
77
78
2.30k
std::string StringValue::DebugString() const {
79
2.30k
  return StringDebugString(*this);
80
2.30k
}
81
82
absl::Status StringValue::SerializeTo(
83
    const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool,
84
    google::protobuf::MessageFactory* absl_nonnull message_factory,
85
0
    google::protobuf::io::ZeroCopyOutputStream* absl_nonnull output) const {
86
0
  ABSL_DCHECK(descriptor_pool != nullptr);
87
0
  ABSL_DCHECK(message_factory != nullptr);
88
0
  ABSL_DCHECK(output != nullptr);
89
90
0
  google::protobuf::StringValue message;
91
0
  message.set_value(NativeString());
92
0
  if (!message.SerializePartialToZeroCopyStream(output)) {
93
0
    return absl::UnknownError(
94
0
        absl::StrCat("failed to serialize message: ", message.GetTypeName()));
95
0
  }
96
97
0
  return absl::OkStatus();
98
0
}
99
100
absl::Status StringValue::ConvertToJson(
101
    const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool,
102
    google::protobuf::MessageFactory* absl_nonnull message_factory,
103
0
    google::protobuf::Message* absl_nonnull json) const {
104
0
  ABSL_DCHECK(descriptor_pool != nullptr);
105
0
  ABSL_DCHECK(message_factory != nullptr);
106
0
  ABSL_DCHECK(json != nullptr);
107
0
  ABSL_DCHECK_EQ(json->GetDescriptor()->well_known_type(),
108
0
                 google::protobuf::Descriptor::WELLKNOWNTYPE_VALUE);
109
110
0
  ValueReflection value_reflection;
111
0
  CEL_RETURN_IF_ERROR(value_reflection.Initialize(json->GetDescriptor()));
112
0
  NativeValue(
113
0
      [&](const auto& value) { value_reflection.SetStringValue(json, value); });
Unexecuted instantiation: string_value.cc:auto cel::StringValue::ConvertToJson(google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Message*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const
Unexecuted instantiation: string_value.cc:auto cel::StringValue::ConvertToJson(google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Message*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
114
115
0
  return absl::OkStatus();
116
0
}
117
118
absl::Status StringValue::Equal(
119
    const Value& other,
120
    const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool,
121
    google::protobuf::MessageFactory* absl_nonnull message_factory,
122
619
    google::protobuf::Arena* absl_nonnull arena, Value* absl_nonnull result) const {
123
619
  ABSL_DCHECK(descriptor_pool != nullptr);
124
619
  ABSL_DCHECK(message_factory != nullptr);
125
619
  ABSL_DCHECK(arena != nullptr);
126
619
  ABSL_DCHECK(result != nullptr);
127
128
619
  if (auto other_value = other.AsString(); other_value.has_value()) {
129
538
    *result = NativeValue([other_value](const auto& value) -> BoolValue {
130
538
      return other_value->NativeValue(
131
538
          [&value](const auto& other_value) -> BoolValue {
132
538
            return BoolValue{value == other_value};
133
538
          });
string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const::{lambda(auto:1 const&)#1}::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const
Line
Count
Source
131
538
          [&value](const auto& other_value) -> BoolValue {
132
538
            return BoolValue{value == other_value};
133
538
          });
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const::{lambda(auto:1 const&)#1}::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const::{lambda(auto:1 const&)#1}::operator()<std::__1::basic_string_view<char, {lambda(auto:1 const&)#1}::operator()::char_traits<char> > >(std::__1::basic_string_view<char, {lambda(auto:1 const&)#1}::operator()::char_traits<char> > const&) const
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const::{lambda(auto:1 const&)#1}::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
134
538
    });
string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const
Line
Count
Source
129
538
    *result = NativeValue([other_value](const auto& value) -> BoolValue {
130
538
      return other_value->NativeValue(
131
538
          [&value](const auto& other_value) -> BoolValue {
132
538
            return BoolValue{value == other_value};
133
538
          });
134
538
    });
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
135
538
    return absl::OkStatus();
136
538
  }
137
81
  *result = FalseValue();
138
81
  return absl::OkStatus();
139
619
}
140
141
47
size_t StringValue::Size() const {
142
47
  return NativeValue([](const auto& alternative) -> size_t {
143
47
    return internal::Utf8CodePointCount(alternative);
144
47
  });
string_value.cc:unsigned long cel::StringValue::Size() const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const
Line
Count
Source
142
47
  return NativeValue([](const auto& alternative) -> size_t {
143
47
    return internal::Utf8CodePointCount(alternative);
144
47
  });
Unexecuted instantiation: string_value.cc:unsigned long cel::StringValue::Size() const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
145
47
}
146
147
0
bool StringValue::IsEmpty() const {
148
0
  return NativeValue(
149
0
      [](const auto& alternative) -> bool { return alternative.empty(); });
Unexecuted instantiation: string_value.cc:bool cel::StringValue::IsEmpty() const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const
Unexecuted instantiation: string_value.cc:bool cel::StringValue::IsEmpty() const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const
150
0
}
151
152
3.12k
bool StringValue::Equals(absl::string_view string) const {
153
3.12k
  return value_.Equals(string);
154
3.12k
}
155
156
0
bool StringValue::Equals(const absl::Cord& string) const {
157
0
  return value_.Equals(string);
158
0
}
159
160
113
bool StringValue::Equals(const StringValue& string) const {
161
113
  return value_.Equals(string.value_);
162
113
}
163
164
0
StringValue StringValue::Clone(google::protobuf::Arena* absl_nonnull arena) const {
165
0
  return StringValue(value_.Clone(arena));
166
0
}
167
168
0
int StringValue::Compare(absl::string_view string) const {
169
0
  return value_.Compare(string);
170
0
}
171
172
0
int StringValue::Compare(const absl::Cord& string) const {
173
0
  return value_.Compare(string);
174
0
}
175
176
775
int StringValue::Compare(const StringValue& string) const {
177
775
  return value_.Compare(string.value_);
178
775
}
179
180
0
bool StringValue::StartsWith(absl::string_view string) const {
181
0
  return value_.StartsWith(string);
182
0
}
183
184
0
bool StringValue::StartsWith(const absl::Cord& string) const {
185
0
  return value_.StartsWith(string);
186
0
}
187
188
0
bool StringValue::StartsWith(const StringValue& string) const {
189
0
  return value_.StartsWith(string.value_);
190
0
}
191
192
0
bool StringValue::EndsWith(absl::string_view string) const {
193
0
  return value_.EndsWith(string);
194
0
}
195
196
0
bool StringValue::EndsWith(const absl::Cord& string) const {
197
0
  return value_.EndsWith(string);
198
0
}
199
200
0
bool StringValue::EndsWith(const StringValue& string) const {
201
0
  return value_.EndsWith(string.value_);
202
0
}
203
204
0
bool StringValue::Contains(absl::string_view string) const {
205
0
  return value_.Visit(absl::Overload(
206
0
      [&](absl::string_view lhs) -> bool {
207
0
        return absl::StrContains(lhs, string);
208
0
      },
209
0
      [&](const absl::Cord& lhs) -> bool { return lhs.Contains(string); }));
210
0
}
211
212
0
bool StringValue::Contains(const absl::Cord& string) const {
213
0
  return value_.Visit(absl::Overload(
214
0
      [&](absl::string_view lhs) -> bool {
215
0
        if (auto flat = string.TryFlat(); flat) {
216
0
          return absl::StrContains(lhs, *flat);
217
0
        }
218
        // There is no nice way to do this. We cannot use std::search due to
219
        // absl::Cord::CharIterator being an input iterator instead of a forward
220
        // iterator. So just make an external cord with a noop releaser. We know
221
        // the external cord will not outlive this function.
222
0
        return absl::MakeCordFromExternal(lhs, []() {}).Contains(string);
223
0
      },
224
0
      [&](const absl::Cord& lhs) -> bool { return lhs.Contains(string); }));
225
0
}
226
227
0
bool StringValue::Contains(const StringValue& string) const {
228
0
  return string.value_.Visit(absl::Overload(
229
0
      [&](absl::string_view rhs) -> bool { return Contains(rhs); },
230
0
      [&](const absl::Cord& rhs) -> bool { return Contains(rhs); }));
231
0
}
232
233
0
absl::optional<int64_t> StringValue::IndexOf(absl::string_view string) const {
234
0
  return value_.Visit(absl::Overload(
235
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
236
0
        int64_t code_points = 0;
237
0
        while (lhs.size() >= string.size()) {
238
0
          if (absl::StartsWith(lhs, string)) {
239
0
            return code_points;
240
0
          }
241
0
          if (lhs.size() == string.size()) {
242
0
            break;
243
0
          }
244
0
          size_t code_units =
245
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
246
0
          lhs.remove_prefix(code_units);
247
0
          ++code_points;
248
0
        }
249
0
        return absl::nullopt;
250
0
      },
251
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
252
0
        int64_t code_points = 0;
253
0
        while (lhs.size() >= string.size()) {
254
0
          if (lhs.StartsWith(string)) {
255
0
            return code_points;
256
0
          }
257
0
          if (lhs.size() == string.size()) {
258
0
            break;
259
0
          }
260
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
261
0
                                                        /*code_point=*/nullptr);
262
0
          lhs.RemovePrefix(code_units);
263
0
          ++code_points;
264
0
        }
265
0
        return absl::nullopt;
266
0
      }));
267
0
}
268
269
0
absl::optional<int64_t> StringValue::IndexOf(const absl::Cord& string) const {
270
0
  return value_.Visit(absl::Overload(
271
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
272
0
        int64_t code_points = 0;
273
0
        while (lhs.size() >= string.size()) {
274
0
          if (lhs.substr(0, string.size()) == string) {
275
0
            return code_points;
276
0
          }
277
0
          if (lhs.size() == string.size()) {
278
0
            break;
279
0
          }
280
0
          size_t code_units =
281
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
282
0
          lhs.remove_prefix(code_units);
283
0
          ++code_points;
284
0
        }
285
0
        return absl::nullopt;
286
0
      },
287
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
288
0
        int64_t code_points = 0;
289
0
        while (lhs.size() >= string.size()) {
290
0
          if (lhs.StartsWith(string)) {
291
0
            return code_points;
292
0
          }
293
0
          if (lhs.size() == string.size()) {
294
0
            break;
295
0
          }
296
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
297
0
                                                        /*code_point=*/nullptr);
298
0
          lhs.RemovePrefix(code_units);
299
0
          ++code_points;
300
0
        }
301
0
        return absl::nullopt;
302
0
      }));
303
0
}
304
305
0
absl::optional<int64_t> StringValue::IndexOf(const StringValue& string) const {
306
0
  return string.value_.Visit(absl::Overload(
307
0
      [this](absl::string_view rhs) -> absl::optional<int64_t> {
308
0
        return IndexOf(rhs);
309
0
      },
310
0
      [this](const absl::Cord& rhs) -> absl::optional<int64_t> {
311
0
        return IndexOf(rhs);
312
0
      }));
313
0
}
314
315
absl::optional<int64_t> StringValue::IndexOf(absl::string_view string,
316
0
                                             int64_t pos) const {
317
0
  if (pos < 0) {
318
0
    pos = 0;
319
0
  }
320
0
  return value_.Visit(absl::Overload(
321
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
322
0
        int64_t code_points = 0;
323
0
        while (lhs.size() >= string.size()) {
324
0
          if (code_points >= pos && absl::StartsWith(lhs, string)) {
325
0
            return code_points;
326
0
          }
327
0
          if (lhs.size() == string.size()) {
328
0
            break;
329
0
          }
330
0
          size_t code_units =
331
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
332
0
          lhs.remove_prefix(code_units);
333
0
          ++code_points;
334
0
        }
335
0
        return absl::nullopt;
336
0
      },
337
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
338
0
        int64_t code_points = 0;
339
0
        while (lhs.size() >= string.size()) {
340
0
          if (code_points >= pos && lhs.StartsWith(string)) {
341
0
            return code_points;
342
0
          }
343
0
          if (lhs.size() == string.size()) {
344
0
            break;
345
0
          }
346
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
347
0
                                                        /*code_point=*/nullptr);
348
0
          lhs.RemovePrefix(code_units);
349
0
          ++code_points;
350
0
        }
351
0
        return absl::nullopt;
352
0
      }));
353
0
}
354
355
absl::optional<int64_t> StringValue::IndexOf(const absl::Cord& string,
356
0
                                             int64_t pos) const {
357
0
  if (pos < 0) {
358
0
    pos = 0;
359
0
  }
360
0
  return value_.Visit(absl::Overload(
361
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
362
0
        int64_t code_points = 0;
363
0
        while (lhs.size() >= string.size()) {
364
0
          if (code_points >= pos && lhs.substr(0, string.size()) == string) {
365
0
            return code_points;
366
0
          }
367
0
          if (lhs.size() == string.size()) {
368
0
            break;
369
0
          }
370
0
          size_t code_units =
371
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
372
0
          lhs.remove_prefix(code_units);
373
0
          ++code_points;
374
0
        }
375
0
        return absl::nullopt;
376
0
      },
377
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
378
0
        int64_t code_points = 0;
379
0
        while (lhs.size() >= string.size()) {
380
0
          if (code_points >= pos && lhs.StartsWith(string)) {
381
0
            return code_points;
382
0
          }
383
0
          if (lhs.size() == string.size()) {
384
0
            break;
385
0
          }
386
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
387
0
                                                        /*code_point=*/nullptr);
388
0
          lhs.RemovePrefix(code_units);
389
0
          ++code_points;
390
0
        }
391
0
        return absl::nullopt;
392
0
      }));
393
0
}
394
395
absl::optional<int64_t> StringValue::IndexOf(const StringValue& string,
396
0
                                             int64_t pos) const {
397
0
  return string.value_.Visit(absl::Overload(
398
0
      [this, pos](absl::string_view rhs) -> absl::optional<int64_t> {
399
0
        return IndexOf(rhs, pos);
400
0
      },
401
0
      [this, pos](const absl::Cord& rhs) -> absl::optional<int64_t> {
402
0
        return IndexOf(rhs, pos);
403
0
      }));
404
0
}
405
406
absl::optional<int64_t> StringValue::LastIndexOf(
407
0
    absl::string_view string) const {
408
0
  return value_.Visit(absl::Overload(
409
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
410
0
        int64_t last_index = -1;
411
0
        int64_t code_points = 0;
412
0
        while (lhs.size() >= string.size()) {
413
0
          if (absl::StartsWith(lhs, string)) {
414
0
            last_index = code_points;
415
0
          }
416
0
          if (lhs.size() == string.size()) {
417
0
            break;
418
0
          }
419
0
          size_t code_units =
420
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
421
0
          lhs.remove_prefix(code_units);
422
0
          ++code_points;
423
0
        }
424
0
        if (last_index < 0) return absl::nullopt;
425
0
        return last_index;
426
0
      },
427
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
428
0
        int64_t last_index = -1;
429
0
        int64_t code_points = 0;
430
0
        while (lhs.size() >= string.size()) {
431
0
          if (lhs.StartsWith(string)) {
432
0
            last_index = code_points;
433
0
          }
434
0
          if (lhs.size() == string.size()) {
435
0
            break;
436
0
          }
437
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
438
0
                                                        /*code_point=*/nullptr);
439
0
          lhs.RemovePrefix(code_units);
440
0
          ++code_points;
441
0
        }
442
0
        if (last_index < 0) return absl::nullopt;
443
0
        return last_index;
444
0
      }));
445
0
}
446
447
absl::optional<int64_t> StringValue::LastIndexOf(
448
0
    const absl::Cord& string) const {
449
0
  return value_.Visit(absl::Overload(
450
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
451
0
        int64_t last_index = -1;
452
0
        int64_t code_points = 0;
453
0
        while (lhs.size() >= string.size()) {
454
0
          if (lhs.substr(0, string.size()) == string) {
455
0
            last_index = code_points;
456
0
          }
457
0
          if (lhs.size() == string.size()) {
458
0
            break;
459
0
          }
460
0
          size_t code_units =
461
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
462
0
          lhs.remove_prefix(code_units);
463
0
          ++code_points;
464
0
        }
465
0
        if (last_index < 0) return absl::nullopt;
466
0
        return last_index;
467
0
      },
468
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
469
0
        int64_t last_index = -1;
470
0
        int64_t code_points = 0;
471
0
        while (lhs.size() >= string.size()) {
472
0
          if (lhs.StartsWith(string)) {
473
0
            last_index = code_points;
474
0
          }
475
0
          if (lhs.size() == string.size()) {
476
0
            break;
477
0
          }
478
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
479
0
                                                        /*code_point=*/nullptr);
480
0
          lhs.RemovePrefix(code_units);
481
0
          ++code_points;
482
0
        }
483
0
        if (last_index < 0) return absl::nullopt;
484
0
        return last_index;
485
0
      }));
486
0
}
487
488
absl::optional<int64_t> StringValue::LastIndexOf(
489
0
    const StringValue& string) const {
490
0
  return string.value_.Visit(absl::Overload(
491
0
      [this](absl::string_view rhs) -> absl::optional<int64_t> {
492
0
        return LastIndexOf(rhs);
493
0
      },
494
0
      [this](const absl::Cord& rhs) -> absl::optional<int64_t> {
495
0
        return LastIndexOf(rhs);
496
0
      }));
497
0
}
498
499
absl::optional<int64_t> StringValue::LastIndexOf(absl::string_view string,
500
0
                                                 int64_t pos) const {
501
0
  if (pos < 0) {
502
0
    return absl::nullopt;
503
0
  }
504
0
  return value_.Visit(absl::Overload(
505
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
506
0
        int64_t last_index = -1;
507
0
        int64_t code_points = 0;
508
0
        while (lhs.size() >= string.size()) {
509
0
          if (absl::StartsWith(lhs, string)) {
510
0
            last_index = code_points;
511
0
          }
512
0
          if (code_points >= pos || lhs.size() == string.size()) {
513
0
            break;
514
0
          }
515
0
          size_t code_units =
516
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
517
0
          lhs.remove_prefix(code_units);
518
0
          ++code_points;
519
0
        }
520
0
        if (last_index < 0) return absl::nullopt;
521
0
        return last_index;
522
0
      },
523
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
524
0
        int64_t last_index = -1;
525
0
        int64_t code_points = 0;
526
0
        while (lhs.size() >= string.size()) {
527
0
          if (lhs.StartsWith(string)) {
528
0
            last_index = code_points;
529
0
          }
530
0
          if (code_points >= pos || lhs.size() == string.size()) {
531
0
            break;
532
0
          }
533
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
534
0
                                                        /*code_point=*/nullptr);
535
0
          lhs.RemovePrefix(code_units);
536
0
          ++code_points;
537
0
        }
538
0
        if (last_index < 0) return absl::nullopt;
539
0
        return last_index;
540
0
      }));
541
0
}
542
543
absl::optional<int64_t> StringValue::LastIndexOf(const absl::Cord& string,
544
0
                                                 int64_t pos) const {
545
0
  if (pos < 0) {
546
0
    return absl::nullopt;
547
0
  }
548
0
  return value_.Visit(absl::Overload(
549
0
      [&](absl::string_view lhs) -> absl::optional<int64_t> {
550
0
        int64_t last_index = -1;
551
0
        int64_t code_points = 0;
552
0
        while (lhs.size() >= string.size()) {
553
0
          if (lhs.substr(0, string.size()) == string) {
554
0
            last_index = code_points;
555
0
          }
556
0
          if (code_points >= pos || lhs.size() == string.size()) {
557
0
            break;
558
0
          }
559
0
          size_t code_units =
560
0
              cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr);
561
0
          lhs.remove_prefix(code_units);
562
0
          ++code_points;
563
0
        }
564
0
        if (last_index < 0) return absl::nullopt;
565
0
        return last_index;
566
0
      },
567
0
      [&](absl::Cord lhs) -> absl::optional<int64_t> {
568
0
        int64_t last_index = -1;
569
0
        int64_t code_points = 0;
570
0
        while (lhs.size() >= string.size()) {
571
0
          if (lhs.StartsWith(string)) {
572
0
            last_index = code_points;
573
0
          }
574
0
          if (code_points >= pos || lhs.size() == string.size()) {
575
0
            break;
576
0
          }
577
0
          size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(),
578
0
                                                        /*code_point=*/nullptr);
579
0
          lhs.RemovePrefix(code_units);
580
0
          ++code_points;
581
0
        }
582
0
        if (last_index < 0) return absl::nullopt;
583
0
        return last_index;
584
0
      }));
585
0
}
586
587
absl::optional<int64_t> StringValue::LastIndexOf(const StringValue& string,
588
0
                                                 int64_t pos) const {
589
0
  return string.value_.Visit(absl::Overload(
590
0
      [this, pos](absl::string_view rhs) -> absl::optional<int64_t> {
591
0
        return LastIndexOf(rhs, pos);
592
0
      },
593
0
      [this, pos](const absl::Cord& rhs) -> absl::optional<int64_t> {
594
0
        return LastIndexOf(rhs, pos);
595
0
      }));
596
0
}
597
598
namespace {
599
600
0
absl::StatusOr<size_t> SubstringImpl(absl::string_view string, uint64_t start) {
601
0
  size_t size_code_points = 0;
602
0
  size_t size_code_units = 0;
603
0
  while (!string.empty()) {
604
0
    char32_t code_point;
605
0
    size_t code_units;
606
0
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(string);
607
0
    if (size_code_points == start) {
608
0
      return size_code_units;
609
0
    }
610
0
    string.remove_prefix(code_units);
611
0
    ++size_code_points;
612
0
    size_code_units += code_units;
613
0
  }
614
0
  if (size_code_points == start) {
615
0
    return size_code_units;
616
0
  }
617
0
  return absl::InvalidArgumentError(
618
0
      "<string>.substring(<start>): <start> is greater than <string>.size()");
619
0
}
620
621
absl::StatusOr<absl::Cord> SubstringImpl(const absl::Cord& cord,
622
0
                                         uint64_t start) {
623
0
  absl::Cord::CharIterator char_begin = cord.char_begin();
624
0
  absl::Cord::CharIterator char_end = cord.char_end();
625
0
  size_t size_code_points = 0;
626
0
  size_t size_code_units = 0;
627
0
  while (char_begin != char_end) {
628
0
    char32_t code_point;
629
0
    size_t code_units;
630
0
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(char_begin);
631
0
    if (size_code_points == start) {
632
0
      return cord.Subcord(size_code_units, std::numeric_limits<size_t>::max());
633
0
    }
634
0
    absl::Cord::Advance(&char_begin, code_units);
635
0
    ++size_code_points;
636
0
    size_code_units += code_units;
637
0
  }
638
0
  if (size_code_points == start) {
639
0
    return cord;
640
0
  }
641
0
  return absl::InvalidArgumentError(
642
0
      "<string>.substring(<start>): <start> is greater than <string>.size()");
643
0
}
644
645
}  // namespace
646
647
0
Value StringValue::Substring(int64_t start) const {
648
0
  if (start < 0) {
649
0
    return ErrorValue(absl::InvalidArgumentError(
650
0
        "<string>.substring(<start>): <start> is less than 0"));
651
0
  }
652
0
  if (static_cast<uint64_t>(start) > value_.size()) {
653
0
    return ErrorValue(absl::InvalidArgumentError(
654
0
        "<string>.substring(<start>, <end>): <start> or <end> is greater than "
655
0
        "<string>.size()"));
656
0
  }
657
0
  if (start == 0) {
658
0
    return *this;
659
0
  }
660
0
  switch (value_.GetKind()) {
661
0
    case common_internal::ByteStringKind::kSmall: {
662
0
      absl::StatusOr<size_t> status_or_index =
663
0
          (SubstringImpl)(value_.GetSmall(), start);
664
0
      if (!status_or_index.ok()) {
665
0
        return ErrorValue(std::move(status_or_index).status());
666
0
      }
667
0
      StringValue result;
668
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall;
669
0
      result.value_.rep_.small.size = value_.rep_.small.size - *status_or_index;
670
0
      std::memcpy(result.value_.rep_.small.data,
671
0
                  value_.rep_.small.data + *status_or_index,
672
0
                  result.value_.rep_.small.size);
673
0
      result.value_.rep_.small.arena = value_.rep_.small.arena;
674
0
      return result;
675
0
    }
676
0
    case common_internal::ByteStringKind::kMedium: {
677
0
      absl::StatusOr<size_t> status_or_index =
678
0
          (SubstringImpl)(value_.GetMedium(), start);
679
0
      if (!status_or_index.ok()) {
680
0
        return ErrorValue(std::move(status_or_index).status());
681
0
      }
682
0
      StringValue result;
683
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium;
684
0
      result.value_.rep_.medium.size =
685
0
          value_.rep_.medium.size - *status_or_index;
686
0
      result.value_.rep_.medium.data =
687
0
          value_.rep_.medium.data + *status_or_index;
688
0
      result.value_.rep_.medium.owner = value_.rep_.medium.owner;
689
0
      common_internal::StrongRef(result.value_.GetMediumReferenceCount());
690
0
      return result;
691
0
    }
692
0
    case common_internal::ByteStringKind::kLarge: {
693
0
      absl::StatusOr<absl::Cord> status_or_cord =
694
0
          (SubstringImpl)(value_.GetLarge(), start);
695
0
      if (!status_or_cord.ok()) {
696
0
        return ErrorValue(std::move(status_or_cord).status());
697
0
      }
698
0
      return StringValue::Wrap(*std::move(status_or_cord));
699
0
    }
700
0
  }
701
0
}
702
703
namespace {
704
705
absl::StatusOr<std::pair<size_t, size_t>> SubstringImpl(
706
0
    absl::string_view string, uint64_t start, uint64_t end) {
707
0
  size_t size_code_points = 0;
708
0
  size_t size_code_units = 0;
709
0
  size_t start_code_units;
710
0
  while (!string.empty()) {
711
0
    if (size_code_points == start) {
712
0
      start_code_units = size_code_units;
713
0
    }
714
0
    if (size_code_points == end) {
715
0
      return std::pair{start_code_units, size_code_units};
716
0
    }
717
0
    char32_t code_point;
718
0
    size_t code_units;
719
0
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(string);
720
0
    string.remove_prefix(code_units);
721
0
    ++size_code_points;
722
0
    size_code_units += code_units;
723
0
  }
724
0
  if (size_code_points == start && start == end) {
725
0
    return std::pair{size_code_units, size_code_units};
726
0
  }
727
0
  return absl::InvalidArgumentError(
728
0
      "<string>.substring(<start>, <end>): <start> or <end> is greater than "
729
0
      "<string>.size()");
730
0
}
731
732
absl::StatusOr<absl::Cord> SubstringImpl(const absl::Cord& cord, uint64_t start,
733
0
                                         uint64_t end) {
734
0
  absl::Cord::CharIterator char_begin = cord.char_begin();
735
0
  absl::Cord::CharIterator char_end = cord.char_end();
736
0
  size_t size_code_points = 0;
737
0
  size_t size_code_units = 0;
738
0
  size_t start_code_units;
739
0
  while (char_begin != char_end) {
740
0
    if (size_code_points == start) {
741
0
      start_code_units = size_code_units;
742
0
    }
743
0
    if (size_code_points == end) {
744
0
      return cord.Subcord(start_code_units,
745
0
                          size_code_points - start_code_units);
746
0
    }
747
0
    char32_t code_point;
748
0
    size_t code_units;
749
0
    std::tie(code_point, code_units) = cel::internal::Utf8Decode(char_begin);
750
0
    absl::Cord::Advance(&char_begin, code_units);
751
0
    ++size_code_points;
752
0
    size_code_units += code_units;
753
0
  }
754
0
  if (size_code_points == start && start == end) {
755
0
    return absl::Cord();
756
0
  }
757
0
  return absl::InvalidArgumentError(
758
0
      "<string>.substring(<start>, <end>): <start> or <end> is greater than "
759
0
      "<string>.size()");
760
0
}
761
762
}  // namespace
763
764
0
Value StringValue::Substring(int64_t start, int64_t end) const {
765
0
  if (start < 0) {
766
0
    return ErrorValue(absl::InvalidArgumentError(
767
0
        "<string>.substring(<start>, <end>): <start> is less than 0"));
768
0
  }
769
0
  if (end < start) {
770
0
    return ErrorValue(absl::InvalidArgumentError(
771
0
        "<string>.substring(<start>, <end>): <end> is less than <start>"));
772
0
  }
773
0
  if (static_cast<uint64_t>(start) > value_.size() ||
774
0
      static_cast<uint64_t>(end) > value_.size()) {
775
0
    return ErrorValue(absl::InvalidArgumentError(
776
0
        "<string>.substring(<start>, <end>): <start> or <end> is greater than "
777
0
        "<string>.size()"));
778
0
  }
779
0
  switch (value_.GetKind()) {
780
0
    case common_internal::ByteStringKind::kSmall: {
781
0
      absl::StatusOr<std::pair<size_t, size_t>> status_or_indices =
782
0
          (SubstringImpl)(value_.GetSmall(), start, end);
783
0
      if (!status_or_indices.ok()) {
784
0
        return ErrorValue(std::move(status_or_indices).status());
785
0
      }
786
0
      StringValue result;
787
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall;
788
0
      result.value_.rep_.small.size =
789
0
          (status_or_indices->second - status_or_indices->first);
790
0
      std::memcpy(result.value_.rep_.small.data,
791
0
                  value_.rep_.small.data + status_or_indices->first,
792
0
                  result.value_.rep_.small.size);
793
0
      result.value_.rep_.small.arena = value_.rep_.small.arena;
794
0
      return result;
795
0
    }
796
0
    case common_internal::ByteStringKind::kMedium: {
797
0
      absl::StatusOr<std::pair<size_t, size_t>> status_or_indices =
798
0
          (SubstringImpl)(value_.GetMedium(), start, end);
799
0
      if (!status_or_indices.ok()) {
800
0
        return ErrorValue(std::move(status_or_indices).status());
801
0
      }
802
0
      StringValue result;
803
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium;
804
0
      result.value_.rep_.medium.size =
805
0
          (status_or_indices->second - status_or_indices->first);
806
0
      result.value_.rep_.medium.data =
807
0
          value_.rep_.medium.data + status_or_indices->first;
808
0
      result.value_.rep_.medium.owner = value_.rep_.medium.owner;
809
0
      common_internal::StrongRef(result.value_.GetMediumReferenceCount());
810
0
      return result;
811
0
    }
812
0
    case common_internal::ByteStringKind::kLarge: {
813
0
      absl::StatusOr<absl::Cord> status_or_cord =
814
0
          (SubstringImpl)(value_.GetLarge(), start, end);
815
0
      if (!status_or_cord.ok()) {
816
0
        return ErrorValue(std::move(status_or_cord).status());
817
0
      }
818
0
      return StringValue::Wrap(*std::move(status_or_cord));
819
0
    }
820
0
  }
821
0
}
822
823
namespace {
824
825
0
bool LowerAsciiImpl(absl::string_view in, std::string* absl_nonnull out) {
826
0
  if (in.empty()) {
827
0
    return false;
828
0
  }
829
0
  bool needs_conversion = false;
830
0
  for (char c : in) {
831
0
    if (absl::ascii_isupper(c)) {
832
0
      needs_conversion = true;
833
0
      break;
834
0
    }
835
0
  }
836
837
0
  if (!needs_conversion) {
838
0
    return false;
839
0
  }
840
841
0
  *out = absl::AsciiStrToLower(in);
842
0
  return true;
843
0
}
844
845
0
absl::Cord LowerAsciiImpl(const absl::Cord& in) {
846
0
  if (in.empty()) {
847
0
    return in;
848
0
  }
849
0
  size_t pos = 0;
850
0
  bool needs_conversion = false;
851
0
  for (char c : in.Chars()) {
852
0
    if (absl::ascii_isupper(c)) {
853
0
      needs_conversion = true;
854
0
      break;
855
0
    }
856
0
    pos++;
857
0
  }
858
0
  if (!needs_conversion) {
859
0
    return in;
860
0
  }
861
0
  absl::Cord out = in.Subcord(0, pos);
862
0
  absl::Cord rest = in.Subcord(pos, in.size() - pos);
863
0
  std::string suffix;
864
0
  suffix.resize(rest.size());
865
0
  size_t current = 0;
866
0
  for (char c : rest.Chars()) {
867
0
    suffix[current++] = absl::ascii_tolower(c);
868
0
  }
869
0
  out.Append(std::move(suffix));
870
0
  return out;
871
0
}
872
873
}  // namespace
874
875
0
StringValue StringValue::LowerAscii(google::protobuf::Arena* absl_nonnull arena) const {
876
0
  ABSL_DCHECK(arena != nullptr);
877
878
0
  switch (value_.GetKind()) {
879
0
    case common_internal::ByteStringKind::kSmall: {
880
0
      std::string out;
881
0
      if (!(LowerAsciiImpl)(value_.GetSmall(), &out)) {
882
0
        return *this;
883
0
      }
884
0
      return StringValue::From(std::move(out), arena);
885
0
    }
886
0
    case common_internal::ByteStringKind::kMedium: {
887
0
      std::string out;
888
0
      if (!(LowerAsciiImpl)(value_.GetMedium(), &out)) {
889
0
        return *this;
890
0
      }
891
0
      return StringValue::From(std::move(out), arena);
892
0
    }
893
0
    case common_internal::ByteStringKind::kLarge:
894
0
      return StringValue::Wrap((LowerAsciiImpl)(value_.GetLarge()));
895
0
  }
896
0
}
897
898
namespace {
899
900
0
bool UpperAsciiImpl(absl::string_view in, std::string* absl_nonnull out) {
901
0
  if (in.empty()) {
902
0
    return false;
903
0
  }
904
0
  bool needs_conversion = false;
905
0
  for (char c : in) {
906
0
    if (absl::ascii_islower(c)) {
907
0
      needs_conversion = true;
908
0
      break;
909
0
    }
910
0
  }
911
912
0
  if (!needs_conversion) {
913
0
    return false;
914
0
  }
915
916
0
  *out = absl::AsciiStrToUpper(in);
917
0
  return true;
918
0
}
919
920
0
absl::Cord UpperAsciiImpl(const absl::Cord& in) {
921
0
  if (in.empty()) {
922
0
    return in;
923
0
  }
924
0
  size_t pos = 0;
925
0
  bool needs_conversion = false;
926
0
  for (char c : in.Chars()) {
927
0
    if (absl::ascii_islower(c)) {
928
0
      needs_conversion = true;
929
0
      break;
930
0
    }
931
0
    pos++;
932
0
  }
933
0
  if (!needs_conversion) {
934
0
    return in;
935
0
  }
936
0
  absl::Cord out = in.Subcord(0, pos);
937
0
  absl::Cord rest = in.Subcord(pos, in.size() - pos);
938
0
  std::string suffix;
939
0
  suffix.resize(rest.size());
940
0
  size_t current = 0;
941
0
  for (char c : rest.Chars()) {
942
0
    suffix[current++] = absl::ascii_toupper(c);
943
0
  }
944
0
  out.Append(std::move(suffix));
945
0
  return out;
946
0
}
947
948
}  // namespace
949
950
0
StringValue StringValue::UpperAscii(google::protobuf::Arena* absl_nonnull arena) const {
951
0
  ABSL_DCHECK(arena != nullptr);
952
953
0
  switch (value_.GetKind()) {
954
0
    case common_internal::ByteStringKind::kSmall: {
955
0
      std::string out;
956
0
      if (!(UpperAsciiImpl)(value_.GetSmall(), &out)) {
957
0
        return *this;
958
0
      }
959
0
      return StringValue::From(std::move(out), arena);
960
0
    }
961
0
    case common_internal::ByteStringKind::kMedium: {
962
0
      std::string out;
963
0
      if (!(UpperAsciiImpl)(value_.GetMedium(), &out)) {
964
0
        return *this;
965
0
      }
966
0
      return StringValue::From(std::move(out), arena);
967
0
    }
968
0
    case common_internal::ByteStringKind::kLarge:
969
0
      return StringValue::Wrap((UpperAsciiImpl)(value_.GetLarge()));
970
0
  }
971
0
}
972
973
namespace {
974
975
// Per CEL spec, checking for Unicode whitespace.
976
0
bool IsUnicodeWhitespace(char32_t c) {
977
0
  if (c <= 0x0020) {
978
0
    return c == 0x0020 || (c >= 0x0009 && c <= 0x000D);
979
0
  }
980
0
  if (c > 0x3000) return false;
981
0
  if (c == 0x0085 || c == 0x00a0 || c == 0x1680) return true;
982
0
  if (c >= 0x2000 && c <= 0x200a) return true;
983
0
  return c == 0x2028 || c == 0x2029 || c == 0x202f || c == 0x205f ||
984
0
         c == 0x3000;
985
0
}
986
987
0
std::pair<size_t, size_t> TrimImpl(absl::string_view string) {
988
0
  absl::string_view temp_string = string;
989
0
  size_t left_trim_bytes = 0;
990
0
  while (!temp_string.empty()) {
991
0
    char32_t c;
992
0
    size_t char_len = cel::internal::Utf8Decode(temp_string, &c);
993
0
    if (!IsUnicodeWhitespace(c)) {
994
0
      break;
995
0
    }
996
0
    temp_string.remove_prefix(char_len);
997
0
    left_trim_bytes += char_len;
998
0
  }
999
1000
0
  if (left_trim_bytes == string.size()) {
1001
0
    return {left_trim_bytes, 0};
1002
0
  }
1003
1004
0
  size_t last_non_ws_end_bytes = 0;
1005
0
  size_t current_pos_bytes = 0;
1006
0
  temp_string = string;
1007
0
  while (!temp_string.empty()) {
1008
0
    char32_t c;
1009
0
    size_t char_len = cel::internal::Utf8Decode(temp_string, &c);
1010
0
    if (!IsUnicodeWhitespace(c)) {
1011
0
      last_non_ws_end_bytes = current_pos_bytes + char_len;
1012
0
    }
1013
0
    current_pos_bytes += char_len;
1014
0
    temp_string.remove_prefix(char_len);
1015
0
  }
1016
1017
0
  return {left_trim_bytes, string.size() - last_non_ws_end_bytes};
1018
0
}
1019
1020
0
absl::Cord TrimImpl(const absl::Cord& cord) {
1021
0
  size_t left_trim_bytes = 0;
1022
0
  {
1023
0
    absl::Cord::CharIterator begin = cord.char_begin();
1024
0
    const absl::Cord::CharIterator end = cord.char_end();
1025
0
    while (begin != end) {
1026
0
      char32_t c;
1027
0
      size_t char_len;
1028
0
      std::tie(c, char_len) = cel::internal::Utf8Decode(begin);
1029
0
      if (!IsUnicodeWhitespace(c)) {
1030
0
        break;
1031
0
      }
1032
0
      absl::Cord::Advance(&begin, char_len);
1033
0
      left_trim_bytes += char_len;
1034
0
    }
1035
0
  }
1036
1037
0
  if (left_trim_bytes == cord.size()) {
1038
0
    return absl::Cord();
1039
0
  }
1040
1041
0
  absl::Cord ltrimmed =
1042
0
      cord.Subcord(left_trim_bytes, cord.size() - left_trim_bytes);
1043
1044
0
  size_t last_non_ws_end_bytes = 0;
1045
0
  size_t current_pos_bytes = 0;
1046
0
  {
1047
0
    absl::Cord::CharIterator begin = ltrimmed.char_begin();
1048
0
    const absl::Cord::CharIterator end = ltrimmed.char_end();
1049
0
    while (begin != end) {
1050
0
      char32_t c;
1051
0
      size_t char_len;
1052
0
      std::tie(c, char_len) = cel::internal::Utf8Decode(begin);
1053
0
      if (!IsUnicodeWhitespace(c)) {
1054
0
        last_non_ws_end_bytes = current_pos_bytes + char_len;
1055
0
      }
1056
0
      absl::Cord::Advance(&begin, char_len);
1057
0
      current_pos_bytes += char_len;
1058
0
    }
1059
0
  }
1060
0
  return ltrimmed.Subcord(0, last_non_ws_end_bytes);
1061
0
}
1062
1063
}  // namespace
1064
1065
0
StringValue StringValue::Trim() const {
1066
0
  switch (value_.GetKind()) {
1067
0
    case common_internal::ByteStringKind::kSmall: {
1068
0
      std::pair<size_t, size_t> trims = (TrimImpl)(value_.GetSmall());
1069
0
      StringValue result;
1070
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall;
1071
0
      result.value_.rep_.small.size =
1072
0
          value_.rep_.small.size - trims.first - trims.second;
1073
0
      std::memcpy(result.value_.rep_.small.data,
1074
0
                  value_.rep_.small.data + trims.first,
1075
0
                  result.value_.rep_.small.size);
1076
0
      result.value_.rep_.small.arena = value_.GetSmallArena();
1077
0
      return result;
1078
0
    }
1079
0
    case common_internal::ByteStringKind::kMedium: {
1080
0
      std::pair<size_t, size_t> trims = (TrimImpl)(value_.GetMedium());
1081
0
      StringValue result;
1082
0
      result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium;
1083
0
      result.value_.rep_.medium.size =
1084
0
          value_.rep_.medium.size - trims.first - trims.second;
1085
0
      result.value_.rep_.medium.data = value_.rep_.medium.data + trims.first;
1086
0
      result.value_.rep_.medium.owner = value_.rep_.medium.owner;
1087
0
      common_internal::StrongRef(result.value_.GetMediumReferenceCount());
1088
0
      return result;
1089
0
    }
1090
0
    case common_internal::ByteStringKind::kLarge: {
1091
0
      return StringValue::Wrap((TrimImpl)(value_.GetLarge()));
1092
0
    }
1093
0
  }
1094
0
}
1095
1096
namespace {
1097
1098
0
void AppendQuoteCodePoint(char32_t code_point, std::string& dst) {
1099
0
  switch (code_point) {
1100
0
    case '\a':
1101
0
      dst.append("\\a");
1102
0
      break;
1103
0
    case '\b':
1104
0
      dst.append("\\b");
1105
0
      break;
1106
0
    case '\f':
1107
0
      dst.append("\\f");
1108
0
      break;
1109
0
    case '\n':
1110
0
      dst.append("\\n");
1111
0
      break;
1112
0
    case '\r':
1113
0
      dst.append("\\r");
1114
0
      break;
1115
0
    case '\t':
1116
0
      dst.append("\\t");
1117
0
      break;
1118
0
    case '\v':
1119
0
      dst.append("\\v");
1120
0
      break;
1121
0
    case '\\':
1122
0
      dst.append("\\\\");
1123
0
      break;
1124
0
    case '\"':
1125
0
      dst.append("\\\"");
1126
0
      break;
1127
0
    default:
1128
0
      cel::internal::Utf8Encode(code_point, &dst);
1129
0
      break;
1130
0
  }
1131
0
}
1132
1133
}  // namespace
1134
1135
0
StringValue StringValue::Quote(google::protobuf::Arena* absl_nonnull arena) const {
1136
0
  return value_.Visit(absl::Overload(
1137
0
      [&](absl::string_view rep) -> StringValue {
1138
0
        std::string result;
1139
0
        result.push_back('\"');
1140
0
        while (!rep.empty()) {
1141
0
          char32_t code_point;
1142
0
          size_t code_units;
1143
0
          std::tie(code_point, code_units) = cel::internal::Utf8Decode(rep);
1144
0
          AppendQuoteCodePoint(code_point, result);
1145
0
          rep.remove_prefix(code_units);
1146
0
        }
1147
0
        result.push_back('\"');
1148
0
        return StringValue::From(std::move(result), arena);
1149
0
      },
1150
0
      [&](const absl::Cord& rep) -> StringValue {
1151
0
        absl::Cord::CharIterator begin = rep.char_begin();
1152
0
        absl::Cord::CharIterator end = rep.char_end();
1153
0
        std::string result;
1154
0
        result.push_back('\"');
1155
0
        while (begin != end) {
1156
0
          char32_t code_point;
1157
0
          size_t code_units;
1158
0
          std::tie(code_point, code_units) = cel::internal::Utf8Decode(begin);
1159
0
          AppendQuoteCodePoint(code_point, result);
1160
0
          absl::Cord::Advance(&begin, code_units);
1161
0
        }
1162
0
        result.push_back('\"');
1163
0
        return StringValue::From(std::move(result), arena);
1164
0
      }));
1165
0
}
1166
1167
0
StringValue StringValue::Reverse(google::protobuf::Arena* absl_nonnull arena) const {
1168
0
  return value_.Visit(absl::Overload(
1169
0
      [arena](absl::string_view string) -> StringValue {
1170
0
        if (string.empty()) {
1171
0
          return StringValue();
1172
0
        }
1173
0
        std::string reversed;
1174
0
        reversed.reserve(string.size());
1175
0
        const char* ptr = string.data() + string.size();
1176
0
        const char* begin = string.data();
1177
0
        while (ptr > begin) {
1178
0
          const char* char_end = ptr;
1179
0
          --ptr;
1180
          // Back up to beginning of encoded UTF-8 code point.
1181
0
          while (ptr > begin && (*ptr & 0xC0) == 0x80) {
1182
0
            --ptr;
1183
0
          }
1184
0
          reversed.append(ptr, char_end - ptr);
1185
0
        }
1186
0
        return StringValue::From(std::move(reversed), arena);
1187
0
      },
1188
0
      [arena](const absl::Cord& cord) -> StringValue {
1189
0
        if (cord.empty()) {
1190
0
          return StringValue();
1191
0
        }
1192
0
        std::vector<char32_t> code_points;
1193
0
        absl::Cord::CharIterator char_begin = cord.char_begin();
1194
0
        absl::Cord::CharIterator char_end = cord.char_end();
1195
0
        while (char_begin != char_end) {
1196
0
          char32_t code_point;
1197
0
          size_t code_units =
1198
0
              cel::internal::Utf8Decode(char_begin, &code_point);
1199
0
          code_points.push_back(code_point);
1200
0
          absl::Cord::Advance(&char_begin, code_units);
1201
0
        }
1202
0
        std::string reversed;
1203
0
        reversed.reserve(cord.size());
1204
0
        for (auto it = code_points.rbegin(); it != code_points.rend(); ++it) {
1205
0
          cel::internal::Utf8Encode(*it, &reversed);
1206
0
        }
1207
0
        return StringValue::From(std::move(reversed), arena);
1208
0
      }));
1209
0
}
1210
1211
absl::StatusOr<Value> StringValue::Join(
1212
    const ListValue& list,
1213
    const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool,
1214
    google::protobuf::MessageFactory* absl_nonnull message_factory,
1215
0
    google::protobuf::Arena* absl_nonnull arena) const {
1216
0
  Value result;
1217
0
  CEL_RETURN_IF_ERROR(
1218
0
      Join(list, descriptor_pool, message_factory, arena, &result));
1219
0
  return result;
1220
0
}
1221
1222
absl::Status StringValue::Join(
1223
    const ListValue& list,
1224
    const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool,
1225
    google::protobuf::MessageFactory* absl_nonnull message_factory,
1226
0
    google::protobuf::Arena* absl_nonnull arena, Value* absl_nonnull result) const {
1227
0
  ABSL_DCHECK(descriptor_pool != nullptr);
1228
0
  ABSL_DCHECK(message_factory != nullptr);
1229
0
  ABSL_DCHECK(arena != nullptr);
1230
0
  ABSL_DCHECK(result != nullptr);
1231
1232
0
  std::string joined;
1233
1234
0
  CEL_ASSIGN_OR_RETURN(auto iterator, list.NewIterator());
1235
1236
0
  CEL_ASSIGN_OR_RETURN(
1237
0
      absl::optional<Value> element,
1238
0
      iterator->Next1(descriptor_pool, message_factory, arena));
1239
0
  if (element) {
1240
0
    if (auto string_element = element->AsString(); string_element) {
1241
0
      string_element->AppendToString(&joined);
1242
0
    } else {
1243
0
      ABSL_DCHECK(!element->Is<ErrorValue>());
1244
0
      *result =
1245
0
          ErrorValue(runtime_internal::CreateNoMatchingOverloadError("join"));
1246
0
      return absl::OkStatus();
1247
0
    }
1248
0
    while (true) {
1249
0
      CEL_ASSIGN_OR_RETURN(
1250
0
          element, iterator->Next1(descriptor_pool, message_factory, arena));
1251
0
      if (!element) {
1252
0
        break;
1253
0
      }
1254
0
      AppendToString(&joined);
1255
0
      if (auto string_element = element->AsString(); string_element) {
1256
0
        string_element->AppendToString(&joined);
1257
0
      } else {
1258
0
        ABSL_DCHECK(!element->Is<ErrorValue>());
1259
0
        *result =
1260
0
            ErrorValue(runtime_internal::CreateNoMatchingOverloadError("join"));
1261
0
        return absl::OkStatus();
1262
0
      }
1263
0
    }
1264
0
  }
1265
1266
0
  if (joined.size() > common_internal::kSmallByteStringCapacity) {
1267
0
    joined.shrink_to_fit();
1268
0
  }
1269
1270
0
  *result = StringValue::From(std::move(joined), arena);
1271
0
  return absl::OkStatus();
1272
0
}
1273
1274
absl::StatusOr<Value> StringValue::Split(
1275
    const StringValue& delimiter, int64_t limit,
1276
0
    google::protobuf::Arena* absl_nonnull arena) const {
1277
0
  Value result;
1278
0
  CEL_RETURN_IF_ERROR(Split(delimiter, limit, arena, &result));
1279
0
  return result;
1280
0
}
1281
1282
absl::Status StringValue::Split(const StringValue& delimiter,
1283
                                google::protobuf::Arena* absl_nonnull arena,
1284
0
                                Value* absl_nonnull result) const {
1285
0
  return Split(delimiter, -1, arena, result);
1286
0
}
1287
1288
absl::StatusOr<Value> StringValue::Split(
1289
0
    const StringValue& delimiter, google::protobuf::Arena* absl_nonnull arena) const {
1290
0
  Value result;
1291
0
  CEL_RETURN_IF_ERROR(Split(delimiter, -1, arena, &result));
1292
0
  return result;
1293
0
}
1294
1295
absl::Status StringValue::Split(const StringValue& delimiter, int64_t limit,
1296
                                google::protobuf::Arena* absl_nonnull arena,
1297
0
                                Value* absl_nonnull result) const {
1298
0
  ABSL_DCHECK(arena != nullptr);
1299
0
  ABSL_DCHECK(result != nullptr);
1300
1301
0
  if (limit == 0) {
1302
    // Per spec, when limit is 0 return an empty list.
1303
0
    *result = ListValue();
1304
0
    return absl::OkStatus();
1305
0
  }
1306
0
  if (limit < 0) {
1307
    // Per spec, when limit is negative treat it as unlimited splits.
1308
0
    limit = std::numeric_limits<int64_t>::max();
1309
0
  }
1310
1311
0
  std::vector<std::pair<size_t, size_t>> splits;
1312
0
  size_t pos = 0;
1313
0
  const size_t len = value_.size();
1314
1315
0
  if (delimiter.IsEmpty()) {
1316
0
    value_.Visit(absl::Overload(
1317
0
        [&](absl::string_view s) {
1318
0
          while (pos < len && limit > 1) {
1319
0
            size_t char_len = cel::internal::Utf8Decode(s.substr(pos), nullptr);
1320
0
            splits.push_back({pos, pos + char_len});
1321
0
            pos += char_len;
1322
0
            --limit;
1323
0
          }
1324
0
        },
1325
0
        [&](const absl::Cord& s) {
1326
0
          while (pos < len && limit > 1) {
1327
0
            size_t char_len = cel::internal::Utf8Decode(
1328
0
                s.Subcord(pos, len - pos).char_begin(), nullptr);
1329
0
            splits.push_back({pos, pos + char_len});
1330
0
            pos += char_len;
1331
0
            --limit;
1332
0
          }
1333
0
        }));
1334
0
  } else {
1335
0
    while (pos < len && limit > 1) {
1336
0
      absl::optional<size_t> next = value_.Find(delimiter.value_, pos);
1337
0
      if (!next) {
1338
0
        break;
1339
0
      }
1340
0
      splits.push_back(std::pair{pos, *next});
1341
0
      pos = *next + delimiter.value_.size();
1342
0
      --limit;
1343
0
      ABSL_DCHECK_LE(pos, len);
1344
0
    }
1345
0
  }
1346
1347
0
  if (splits.empty() || !delimiter.IsEmpty() || pos < len) {
1348
0
    splits.push_back(std::pair{pos, len});
1349
0
  }
1350
1351
0
  auto builder = NewListValueBuilder(arena);
1352
0
  builder->Reserve(splits.size());
1353
0
  for (const std::pair<size_t, size_t>& split : splits) {
1354
0
    builder->UnsafeAdd(
1355
0
        StringValue(value_.Substring(split.first, split.second)));
1356
0
  }
1357
0
  *result = std::move(*builder).Build();
1358
0
  return absl::OkStatus();
1359
0
}
1360
1361
absl::StatusOr<Value> StringValue::Replace(
1362
    const StringValue& needle, const StringValue& replacement, int64_t limit,
1363
0
    google::protobuf::Arena* absl_nonnull arena) const {
1364
0
  Value result;
1365
0
  CEL_RETURN_IF_ERROR(Replace(needle, replacement, limit, arena, &result));
1366
0
  return result;
1367
0
}
1368
1369
absl::Status StringValue::Replace(const StringValue& needle,
1370
                                  const StringValue& replacement,
1371
                                  google::protobuf::Arena* absl_nonnull arena,
1372
0
                                  Value* absl_nonnull result) const {
1373
0
  return Replace(needle, replacement, -1, arena, result);
1374
0
}
1375
1376
absl::StatusOr<Value> StringValue::Replace(
1377
    const StringValue& needle, const StringValue& replacement,
1378
0
    google::protobuf::Arena* absl_nonnull arena) const {
1379
0
  Value result;
1380
0
  CEL_RETURN_IF_ERROR(Replace(needle, replacement, -1, arena, &result));
1381
0
  return result;
1382
0
}
1383
1384
absl::Status StringValue::Replace(const StringValue& needle,
1385
                                  const StringValue& replacement, int64_t limit,
1386
                                  google::protobuf::Arena* absl_nonnull arena,
1387
0
                                  Value* absl_nonnull result) const {
1388
0
  ABSL_DCHECK(arena != nullptr);
1389
0
  ABSL_DCHECK(result != nullptr);
1390
1391
0
  if (limit == 0) {
1392
    // Per spec, when limit is 0 return the original string.
1393
0
    *result = *this;
1394
0
    return absl::OkStatus();
1395
0
  }
1396
0
  if (limit < 0) {
1397
    // Per spec, when limit is negative treat it as unlimited replacements.
1398
0
    limit = std::numeric_limits<int64_t>::max();
1399
0
  }
1400
1401
0
  size_t pos = 0;
1402
0
  const size_t len = value_.size();
1403
0
  const size_t needle_len = needle.value_.size();
1404
0
  std::string res_str;
1405
1406
0
  if (needle.IsEmpty()) {
1407
0
    value_.Visit(absl::Overload(
1408
0
        [&](absl::string_view s) {
1409
0
          while (pos < len && limit > 0) {
1410
0
            replacement.AppendToString(&res_str);
1411
0
            size_t char_len = cel::internal::Utf8Decode(s.substr(pos), nullptr);
1412
0
            value_.Substring(pos, pos + char_len).AppendToString(&res_str);
1413
0
            pos += char_len;
1414
0
            --limit;
1415
0
          }
1416
0
        },
1417
0
        [&](const absl::Cord& s) {
1418
0
          while (pos < len && limit > 0) {
1419
0
            replacement.AppendToString(&res_str);
1420
0
            size_t char_len = cel::internal::Utf8Decode(
1421
0
                s.Subcord(pos, len - pos).char_begin(), nullptr);
1422
0
            value_.Substring(pos, pos + char_len).AppendToString(&res_str);
1423
0
            pos += char_len;
1424
0
            --limit;
1425
0
          }
1426
0
        }));
1427
0
    if (limit > 0) {
1428
0
      replacement.AppendToString(&res_str);
1429
0
    }
1430
0
  } else {
1431
0
    while (pos < len && limit > 0) {
1432
0
      absl::optional<size_t> next = value_.Find(needle.value_, pos);
1433
0
      if (!next) {
1434
0
        break;
1435
0
      }
1436
1437
0
      value_.Substring(pos, *next).AppendToString(&res_str);
1438
0
      replacement.AppendToString(&res_str);
1439
1440
0
      pos = *next + needle_len;
1441
0
      --limit;
1442
0
    }
1443
0
  }
1444
1445
0
  if (pos < len) {
1446
0
    value_.Substring(pos, len).AppendToString(&res_str);
1447
0
  }
1448
1449
0
  if (res_str.size() > common_internal::kSmallByteStringCapacity) {
1450
0
    res_str.shrink_to_fit();
1451
0
  }
1452
1453
0
  *result = StringValue::From(std::move(res_str), arena);
1454
0
  return absl::OkStatus();
1455
0
}
1456
1457
0
Value StringValue::CharAt(int64_t pos) const {
1458
0
  if (pos < 0) {
1459
0
    return ErrorValue(absl::InvalidArgumentError(
1460
0
        "<string>.charAt(<pos>): <pos> is less than 0"));
1461
0
  }
1462
0
  return value_.Visit(absl::Overload(
1463
0
      [this, pos](absl::string_view rep) mutable -> Value {
1464
0
        while (!rep.empty()) {
1465
0
          char32_t code_point;
1466
0
          size_t code_units;
1467
0
          std::tie(code_point, code_units) = cel::internal::Utf8Decode(rep);
1468
0
          if (pos == 0) {
1469
0
            StringValue result;
1470
0
            result.value_.rep_.header.kind =
1471
0
                common_internal::ByteStringKind::kSmall;
1472
0
            result.value_.rep_.small.size = cel::internal::Utf8Encode(
1473
0
                code_point, result.value_.rep_.small.data);
1474
0
            result.value_.rep_.small.arena = value_.GetArena();
1475
0
            return result;
1476
0
          }
1477
0
          rep.remove_prefix(code_units);
1478
0
          --pos;
1479
0
        }
1480
        // If we exit the loop, we iterated through all the code points in
1481
        // `rep`. `pos == 0` means we were looking for a character at index
1482
        // `size()`, which is defined to return an empty string.
1483
0
        if (pos == 0) {
1484
0
          return StringValue();
1485
0
        }
1486
0
        return ErrorValue(absl::InvalidArgumentError(
1487
0
            "<string>.charAt(<pos>): <pos> is greater than <string>.size()"));
1488
0
      },
1489
0
      [pos](const absl::Cord& rep) mutable -> Value {
1490
0
        absl::Cord::CharIterator begin = rep.char_begin();
1491
0
        absl::Cord::CharIterator end = rep.char_end();
1492
0
        while (begin != end) {
1493
0
          char32_t code_point;
1494
0
          size_t code_units;
1495
0
          std::tie(code_point, code_units) = cel::internal::Utf8Decode(begin);
1496
0
          if (pos == 0) {
1497
0
            StringValue result;
1498
0
            result.value_.rep_.header.kind =
1499
0
                common_internal::ByteStringKind::kSmall;
1500
0
            result.value_.rep_.small.size = cel::internal::Utf8Encode(
1501
0
                code_point, result.value_.rep_.small.data);
1502
0
            result.value_.rep_.small.arena = nullptr;
1503
0
            return result;
1504
0
          }
1505
0
          absl::Cord::Advance(&begin, code_units);
1506
0
          --pos;
1507
0
        }
1508
        // If we exit the loop, we iterated through all the code points in
1509
        // `rep`. `pos == 0` means we were looking for a character at index
1510
        // `size()`, which is defined to return an empty string.
1511
0
        if (pos == 0) {
1512
0
          return StringValue();
1513
0
        }
1514
0
        return ErrorValue(absl::InvalidArgumentError(
1515
0
            "<string>.charAt(<pos>): <pos> is greater than <string>.size()"));
1516
0
      }));
1517
0
}
1518
1519
}  // namespace cel