/proc/self/cwd/common/values/string_value.cc
Line | Count | Source |
1 | | // Copyright 2023 Google LLC |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include <cstddef> |
16 | | #include <cstdint> |
17 | | #include <cstring> |
18 | | #include <limits> |
19 | | #include <string> |
20 | | #include <tuple> |
21 | | #include <utility> |
22 | | #include <vector> |
23 | | |
24 | | #include "google/protobuf/wrappers.pb.h" |
25 | | #include "absl/base/nullability.h" |
26 | | #include "absl/functional/overload.h" |
27 | | #include "absl/log/absl_check.h" |
28 | | #include "absl/status/status.h" |
29 | | #include "absl/status/statusor.h" |
30 | | #include "absl/strings/ascii.h" |
31 | | #include "absl/strings/cord.h" |
32 | | #include "absl/strings/cord_buffer.h" |
33 | | #include "absl/strings/match.h" |
34 | | #include "absl/strings/str_cat.h" |
35 | | #include "absl/strings/string_view.h" |
36 | | #include "absl/types/optional.h" |
37 | | #include "common/internal/byte_string.h" |
38 | | #include "common/internal/reference_count.h" |
39 | | #include "common/value.h" |
40 | | #include "internal/status_macros.h" |
41 | | #include "internal/strings.h" |
42 | | #include "internal/utf8.h" |
43 | | #include "internal/well_known_types.h" |
44 | | #include "runtime/internal/errors.h" |
45 | | #include "google/protobuf/arena.h" |
46 | | #include "google/protobuf/descriptor.h" |
47 | | #include "google/protobuf/io/zero_copy_stream.h" |
48 | | #include "google/protobuf/message.h" |
49 | | |
50 | | namespace cel { |
51 | | |
52 | | namespace { |
53 | | |
54 | | using ::cel::well_known_types::ValueReflection; |
55 | | |
56 | | template <typename Bytes> |
57 | 2.30k | std::string StringDebugString(const Bytes& value) { |
58 | 2.30k | return value.NativeValue(absl::Overload( |
59 | 2.30k | [](absl::string_view string) -> std::string { |
60 | 2.30k | return internal::FormatStringLiteral(string); |
61 | 2.30k | }, |
62 | 2.30k | [](const absl::Cord& cord) -> std::string { |
63 | 0 | if (auto flat = cord.TryFlat(); flat.has_value()) { |
64 | 0 | return internal::FormatStringLiteral(*flat); |
65 | 0 | } |
66 | 0 | return internal::FormatStringLiteral(static_cast<std::string>(cord)); |
67 | 0 | })); |
68 | 2.30k | } |
69 | | |
70 | | } // namespace |
71 | | |
72 | | StringValue StringValue::Concat(const StringValue& lhs, const StringValue& rhs, |
73 | 2.17k | google::protobuf::Arena* absl_nonnull arena) { |
74 | 2.17k | return StringValue( |
75 | 2.17k | common_internal::ByteString::Concat(lhs.value_, rhs.value_, arena)); |
76 | 2.17k | } |
77 | | |
78 | 2.30k | std::string StringValue::DebugString() const { |
79 | 2.30k | return StringDebugString(*this); |
80 | 2.30k | } |
81 | | |
82 | | absl::Status StringValue::SerializeTo( |
83 | | const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool, |
84 | | google::protobuf::MessageFactory* absl_nonnull message_factory, |
85 | 0 | google::protobuf::io::ZeroCopyOutputStream* absl_nonnull output) const { |
86 | 0 | ABSL_DCHECK(descriptor_pool != nullptr); |
87 | 0 | ABSL_DCHECK(message_factory != nullptr); |
88 | 0 | ABSL_DCHECK(output != nullptr); |
89 | |
|
90 | 0 | google::protobuf::StringValue message; |
91 | 0 | message.set_value(NativeString()); |
92 | 0 | if (!message.SerializePartialToZeroCopyStream(output)) { |
93 | 0 | return absl::UnknownError( |
94 | 0 | absl::StrCat("failed to serialize message: ", message.GetTypeName())); |
95 | 0 | } |
96 | | |
97 | 0 | return absl::OkStatus(); |
98 | 0 | } |
99 | | |
100 | | absl::Status StringValue::ConvertToJson( |
101 | | const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool, |
102 | | google::protobuf::MessageFactory* absl_nonnull message_factory, |
103 | 0 | google::protobuf::Message* absl_nonnull json) const { |
104 | 0 | ABSL_DCHECK(descriptor_pool != nullptr); |
105 | 0 | ABSL_DCHECK(message_factory != nullptr); |
106 | 0 | ABSL_DCHECK(json != nullptr); |
107 | 0 | ABSL_DCHECK_EQ(json->GetDescriptor()->well_known_type(), |
108 | 0 | google::protobuf::Descriptor::WELLKNOWNTYPE_VALUE); |
109 | |
|
110 | 0 | ValueReflection value_reflection; |
111 | 0 | CEL_RETURN_IF_ERROR(value_reflection.Initialize(json->GetDescriptor())); |
112 | 0 | NativeValue( |
113 | 0 | [&](const auto& value) { value_reflection.SetStringValue(json, value); });Unexecuted instantiation: string_value.cc:auto cel::StringValue::ConvertToJson(google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Message*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Unexecuted instantiation: string_value.cc:auto cel::StringValue::ConvertToJson(google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Message*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const |
114 | |
|
115 | 0 | return absl::OkStatus(); |
116 | 0 | } |
117 | | |
118 | | absl::Status StringValue::Equal( |
119 | | const Value& other, |
120 | | const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool, |
121 | | google::protobuf::MessageFactory* absl_nonnull message_factory, |
122 | 619 | google::protobuf::Arena* absl_nonnull arena, Value* absl_nonnull result) const { |
123 | 619 | ABSL_DCHECK(descriptor_pool != nullptr); |
124 | 619 | ABSL_DCHECK(message_factory != nullptr); |
125 | 619 | ABSL_DCHECK(arena != nullptr); |
126 | 619 | ABSL_DCHECK(result != nullptr); |
127 | | |
128 | 619 | if (auto other_value = other.AsString(); other_value.has_value()) { |
129 | 538 | *result = NativeValue([other_value](const auto& value) -> BoolValue { |
130 | 538 | return other_value->NativeValue( |
131 | 538 | [&value](const auto& other_value) -> BoolValue { |
132 | 538 | return BoolValue{value == other_value}; |
133 | 538 | }); string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const::{lambda(auto:1 const&)#1}::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) constLine | Count | Source | 131 | 538 | [&value](const auto& other_value) -> BoolValue { | 132 | 538 | return BoolValue{value == other_value}; | 133 | 538 | }); |
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const::{lambda(auto:1 const&)#1}::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) constUnexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const::{lambda(auto:1 const&)#1}::operator()<std::__1::basic_string_view<char, {lambda(auto:1 const&)#1}::operator()::char_traits<char> > >(std::__1::basic_string_view<char, {lambda(auto:1 const&)#1}::operator()::char_traits<char> > const&) constUnexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const::{lambda(auto:1 const&)#1}::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const |
134 | 538 | }); string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 129 | 538 | *result = NativeValue([other_value](const auto& value) -> BoolValue { | 130 | 538 | return other_value->NativeValue( | 131 | 538 | [&value](const auto& other_value) -> BoolValue { | 132 | 538 | return BoolValue{value == other_value}; | 133 | 538 | }); | 134 | 538 | }); |
Unexecuted instantiation: string_value.cc:cel::BoolValue cel::StringValue::Equal(cel::Value const&, google::protobuf::DescriptorPool const*, google::protobuf::MessageFactory*, google::protobuf::Arena*, cel::Value*) const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const |
135 | 538 | return absl::OkStatus(); |
136 | 538 | } |
137 | 81 | *result = FalseValue(); |
138 | 81 | return absl::OkStatus(); |
139 | 619 | } |
140 | | |
141 | 47 | size_t StringValue::Size() const { |
142 | 47 | return NativeValue([](const auto& alternative) -> size_t { |
143 | 47 | return internal::Utf8CodePointCount(alternative); |
144 | 47 | }); string_value.cc:unsigned long cel::StringValue::Size() const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Line | Count | Source | 142 | 47 | return NativeValue([](const auto& alternative) -> size_t { | 143 | 47 | return internal::Utf8CodePointCount(alternative); | 144 | 47 | }); |
Unexecuted instantiation: string_value.cc:unsigned long cel::StringValue::Size() const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const |
145 | 47 | } |
146 | | |
147 | 0 | bool StringValue::IsEmpty() const { |
148 | 0 | return NativeValue( |
149 | 0 | [](const auto& alternative) -> bool { return alternative.empty(); });Unexecuted instantiation: string_value.cc:bool cel::StringValue::IsEmpty() const::$_0::operator()<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) const Unexecuted instantiation: string_value.cc:bool cel::StringValue::IsEmpty() const::$_0::operator()<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&) const |
150 | 0 | } |
151 | | |
152 | 3.12k | bool StringValue::Equals(absl::string_view string) const { |
153 | 3.12k | return value_.Equals(string); |
154 | 3.12k | } |
155 | | |
156 | 0 | bool StringValue::Equals(const absl::Cord& string) const { |
157 | 0 | return value_.Equals(string); |
158 | 0 | } |
159 | | |
160 | 113 | bool StringValue::Equals(const StringValue& string) const { |
161 | 113 | return value_.Equals(string.value_); |
162 | 113 | } |
163 | | |
164 | 0 | StringValue StringValue::Clone(google::protobuf::Arena* absl_nonnull arena) const { |
165 | 0 | return StringValue(value_.Clone(arena)); |
166 | 0 | } |
167 | | |
168 | 0 | int StringValue::Compare(absl::string_view string) const { |
169 | 0 | return value_.Compare(string); |
170 | 0 | } |
171 | | |
172 | 0 | int StringValue::Compare(const absl::Cord& string) const { |
173 | 0 | return value_.Compare(string); |
174 | 0 | } |
175 | | |
176 | 775 | int StringValue::Compare(const StringValue& string) const { |
177 | 775 | return value_.Compare(string.value_); |
178 | 775 | } |
179 | | |
180 | 0 | bool StringValue::StartsWith(absl::string_view string) const { |
181 | 0 | return value_.StartsWith(string); |
182 | 0 | } |
183 | | |
184 | 0 | bool StringValue::StartsWith(const absl::Cord& string) const { |
185 | 0 | return value_.StartsWith(string); |
186 | 0 | } |
187 | | |
188 | 0 | bool StringValue::StartsWith(const StringValue& string) const { |
189 | 0 | return value_.StartsWith(string.value_); |
190 | 0 | } |
191 | | |
192 | 0 | bool StringValue::EndsWith(absl::string_view string) const { |
193 | 0 | return value_.EndsWith(string); |
194 | 0 | } |
195 | | |
196 | 0 | bool StringValue::EndsWith(const absl::Cord& string) const { |
197 | 0 | return value_.EndsWith(string); |
198 | 0 | } |
199 | | |
200 | 0 | bool StringValue::EndsWith(const StringValue& string) const { |
201 | 0 | return value_.EndsWith(string.value_); |
202 | 0 | } |
203 | | |
204 | 0 | bool StringValue::Contains(absl::string_view string) const { |
205 | 0 | return value_.Visit(absl::Overload( |
206 | 0 | [&](absl::string_view lhs) -> bool { |
207 | 0 | return absl::StrContains(lhs, string); |
208 | 0 | }, |
209 | 0 | [&](const absl::Cord& lhs) -> bool { return lhs.Contains(string); })); |
210 | 0 | } |
211 | | |
212 | 0 | bool StringValue::Contains(const absl::Cord& string) const { |
213 | 0 | return value_.Visit(absl::Overload( |
214 | 0 | [&](absl::string_view lhs) -> bool { |
215 | 0 | if (auto flat = string.TryFlat(); flat) { |
216 | 0 | return absl::StrContains(lhs, *flat); |
217 | 0 | } |
218 | | // There is no nice way to do this. We cannot use std::search due to |
219 | | // absl::Cord::CharIterator being an input iterator instead of a forward |
220 | | // iterator. So just make an external cord with a noop releaser. We know |
221 | | // the external cord will not outlive this function. |
222 | 0 | return absl::MakeCordFromExternal(lhs, []() {}).Contains(string); |
223 | 0 | }, |
224 | 0 | [&](const absl::Cord& lhs) -> bool { return lhs.Contains(string); })); |
225 | 0 | } |
226 | | |
227 | 0 | bool StringValue::Contains(const StringValue& string) const { |
228 | 0 | return string.value_.Visit(absl::Overload( |
229 | 0 | [&](absl::string_view rhs) -> bool { return Contains(rhs); }, |
230 | 0 | [&](const absl::Cord& rhs) -> bool { return Contains(rhs); })); |
231 | 0 | } |
232 | | |
233 | 0 | absl::optional<int64_t> StringValue::IndexOf(absl::string_view string) const { |
234 | 0 | return value_.Visit(absl::Overload( |
235 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
236 | 0 | int64_t code_points = 0; |
237 | 0 | while (lhs.size() >= string.size()) { |
238 | 0 | if (absl::StartsWith(lhs, string)) { |
239 | 0 | return code_points; |
240 | 0 | } |
241 | 0 | if (lhs.size() == string.size()) { |
242 | 0 | break; |
243 | 0 | } |
244 | 0 | size_t code_units = |
245 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
246 | 0 | lhs.remove_prefix(code_units); |
247 | 0 | ++code_points; |
248 | 0 | } |
249 | 0 | return absl::nullopt; |
250 | 0 | }, |
251 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
252 | 0 | int64_t code_points = 0; |
253 | 0 | while (lhs.size() >= string.size()) { |
254 | 0 | if (lhs.StartsWith(string)) { |
255 | 0 | return code_points; |
256 | 0 | } |
257 | 0 | if (lhs.size() == string.size()) { |
258 | 0 | break; |
259 | 0 | } |
260 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
261 | 0 | /*code_point=*/nullptr); |
262 | 0 | lhs.RemovePrefix(code_units); |
263 | 0 | ++code_points; |
264 | 0 | } |
265 | 0 | return absl::nullopt; |
266 | 0 | })); |
267 | 0 | } |
268 | | |
269 | 0 | absl::optional<int64_t> StringValue::IndexOf(const absl::Cord& string) const { |
270 | 0 | return value_.Visit(absl::Overload( |
271 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
272 | 0 | int64_t code_points = 0; |
273 | 0 | while (lhs.size() >= string.size()) { |
274 | 0 | if (lhs.substr(0, string.size()) == string) { |
275 | 0 | return code_points; |
276 | 0 | } |
277 | 0 | if (lhs.size() == string.size()) { |
278 | 0 | break; |
279 | 0 | } |
280 | 0 | size_t code_units = |
281 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
282 | 0 | lhs.remove_prefix(code_units); |
283 | 0 | ++code_points; |
284 | 0 | } |
285 | 0 | return absl::nullopt; |
286 | 0 | }, |
287 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
288 | 0 | int64_t code_points = 0; |
289 | 0 | while (lhs.size() >= string.size()) { |
290 | 0 | if (lhs.StartsWith(string)) { |
291 | 0 | return code_points; |
292 | 0 | } |
293 | 0 | if (lhs.size() == string.size()) { |
294 | 0 | break; |
295 | 0 | } |
296 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
297 | 0 | /*code_point=*/nullptr); |
298 | 0 | lhs.RemovePrefix(code_units); |
299 | 0 | ++code_points; |
300 | 0 | } |
301 | 0 | return absl::nullopt; |
302 | 0 | })); |
303 | 0 | } |
304 | | |
305 | 0 | absl::optional<int64_t> StringValue::IndexOf(const StringValue& string) const { |
306 | 0 | return string.value_.Visit(absl::Overload( |
307 | 0 | [this](absl::string_view rhs) -> absl::optional<int64_t> { |
308 | 0 | return IndexOf(rhs); |
309 | 0 | }, |
310 | 0 | [this](const absl::Cord& rhs) -> absl::optional<int64_t> { |
311 | 0 | return IndexOf(rhs); |
312 | 0 | })); |
313 | 0 | } |
314 | | |
315 | | absl::optional<int64_t> StringValue::IndexOf(absl::string_view string, |
316 | 0 | int64_t pos) const { |
317 | 0 | if (pos < 0) { |
318 | 0 | pos = 0; |
319 | 0 | } |
320 | 0 | return value_.Visit(absl::Overload( |
321 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
322 | 0 | int64_t code_points = 0; |
323 | 0 | while (lhs.size() >= string.size()) { |
324 | 0 | if (code_points >= pos && absl::StartsWith(lhs, string)) { |
325 | 0 | return code_points; |
326 | 0 | } |
327 | 0 | if (lhs.size() == string.size()) { |
328 | 0 | break; |
329 | 0 | } |
330 | 0 | size_t code_units = |
331 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
332 | 0 | lhs.remove_prefix(code_units); |
333 | 0 | ++code_points; |
334 | 0 | } |
335 | 0 | return absl::nullopt; |
336 | 0 | }, |
337 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
338 | 0 | int64_t code_points = 0; |
339 | 0 | while (lhs.size() >= string.size()) { |
340 | 0 | if (code_points >= pos && lhs.StartsWith(string)) { |
341 | 0 | return code_points; |
342 | 0 | } |
343 | 0 | if (lhs.size() == string.size()) { |
344 | 0 | break; |
345 | 0 | } |
346 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
347 | 0 | /*code_point=*/nullptr); |
348 | 0 | lhs.RemovePrefix(code_units); |
349 | 0 | ++code_points; |
350 | 0 | } |
351 | 0 | return absl::nullopt; |
352 | 0 | })); |
353 | 0 | } |
354 | | |
355 | | absl::optional<int64_t> StringValue::IndexOf(const absl::Cord& string, |
356 | 0 | int64_t pos) const { |
357 | 0 | if (pos < 0) { |
358 | 0 | pos = 0; |
359 | 0 | } |
360 | 0 | return value_.Visit(absl::Overload( |
361 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
362 | 0 | int64_t code_points = 0; |
363 | 0 | while (lhs.size() >= string.size()) { |
364 | 0 | if (code_points >= pos && lhs.substr(0, string.size()) == string) { |
365 | 0 | return code_points; |
366 | 0 | } |
367 | 0 | if (lhs.size() == string.size()) { |
368 | 0 | break; |
369 | 0 | } |
370 | 0 | size_t code_units = |
371 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
372 | 0 | lhs.remove_prefix(code_units); |
373 | 0 | ++code_points; |
374 | 0 | } |
375 | 0 | return absl::nullopt; |
376 | 0 | }, |
377 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
378 | 0 | int64_t code_points = 0; |
379 | 0 | while (lhs.size() >= string.size()) { |
380 | 0 | if (code_points >= pos && lhs.StartsWith(string)) { |
381 | 0 | return code_points; |
382 | 0 | } |
383 | 0 | if (lhs.size() == string.size()) { |
384 | 0 | break; |
385 | 0 | } |
386 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
387 | 0 | /*code_point=*/nullptr); |
388 | 0 | lhs.RemovePrefix(code_units); |
389 | 0 | ++code_points; |
390 | 0 | } |
391 | 0 | return absl::nullopt; |
392 | 0 | })); |
393 | 0 | } |
394 | | |
395 | | absl::optional<int64_t> StringValue::IndexOf(const StringValue& string, |
396 | 0 | int64_t pos) const { |
397 | 0 | return string.value_.Visit(absl::Overload( |
398 | 0 | [this, pos](absl::string_view rhs) -> absl::optional<int64_t> { |
399 | 0 | return IndexOf(rhs, pos); |
400 | 0 | }, |
401 | 0 | [this, pos](const absl::Cord& rhs) -> absl::optional<int64_t> { |
402 | 0 | return IndexOf(rhs, pos); |
403 | 0 | })); |
404 | 0 | } |
405 | | |
406 | | absl::optional<int64_t> StringValue::LastIndexOf( |
407 | 0 | absl::string_view string) const { |
408 | 0 | return value_.Visit(absl::Overload( |
409 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
410 | 0 | int64_t last_index = -1; |
411 | 0 | int64_t code_points = 0; |
412 | 0 | while (lhs.size() >= string.size()) { |
413 | 0 | if (absl::StartsWith(lhs, string)) { |
414 | 0 | last_index = code_points; |
415 | 0 | } |
416 | 0 | if (lhs.size() == string.size()) { |
417 | 0 | break; |
418 | 0 | } |
419 | 0 | size_t code_units = |
420 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
421 | 0 | lhs.remove_prefix(code_units); |
422 | 0 | ++code_points; |
423 | 0 | } |
424 | 0 | if (last_index < 0) return absl::nullopt; |
425 | 0 | return last_index; |
426 | 0 | }, |
427 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
428 | 0 | int64_t last_index = -1; |
429 | 0 | int64_t code_points = 0; |
430 | 0 | while (lhs.size() >= string.size()) { |
431 | 0 | if (lhs.StartsWith(string)) { |
432 | 0 | last_index = code_points; |
433 | 0 | } |
434 | 0 | if (lhs.size() == string.size()) { |
435 | 0 | break; |
436 | 0 | } |
437 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
438 | 0 | /*code_point=*/nullptr); |
439 | 0 | lhs.RemovePrefix(code_units); |
440 | 0 | ++code_points; |
441 | 0 | } |
442 | 0 | if (last_index < 0) return absl::nullopt; |
443 | 0 | return last_index; |
444 | 0 | })); |
445 | 0 | } |
446 | | |
447 | | absl::optional<int64_t> StringValue::LastIndexOf( |
448 | 0 | const absl::Cord& string) const { |
449 | 0 | return value_.Visit(absl::Overload( |
450 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
451 | 0 | int64_t last_index = -1; |
452 | 0 | int64_t code_points = 0; |
453 | 0 | while (lhs.size() >= string.size()) { |
454 | 0 | if (lhs.substr(0, string.size()) == string) { |
455 | 0 | last_index = code_points; |
456 | 0 | } |
457 | 0 | if (lhs.size() == string.size()) { |
458 | 0 | break; |
459 | 0 | } |
460 | 0 | size_t code_units = |
461 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
462 | 0 | lhs.remove_prefix(code_units); |
463 | 0 | ++code_points; |
464 | 0 | } |
465 | 0 | if (last_index < 0) return absl::nullopt; |
466 | 0 | return last_index; |
467 | 0 | }, |
468 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
469 | 0 | int64_t last_index = -1; |
470 | 0 | int64_t code_points = 0; |
471 | 0 | while (lhs.size() >= string.size()) { |
472 | 0 | if (lhs.StartsWith(string)) { |
473 | 0 | last_index = code_points; |
474 | 0 | } |
475 | 0 | if (lhs.size() == string.size()) { |
476 | 0 | break; |
477 | 0 | } |
478 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
479 | 0 | /*code_point=*/nullptr); |
480 | 0 | lhs.RemovePrefix(code_units); |
481 | 0 | ++code_points; |
482 | 0 | } |
483 | 0 | if (last_index < 0) return absl::nullopt; |
484 | 0 | return last_index; |
485 | 0 | })); |
486 | 0 | } |
487 | | |
488 | | absl::optional<int64_t> StringValue::LastIndexOf( |
489 | 0 | const StringValue& string) const { |
490 | 0 | return string.value_.Visit(absl::Overload( |
491 | 0 | [this](absl::string_view rhs) -> absl::optional<int64_t> { |
492 | 0 | return LastIndexOf(rhs); |
493 | 0 | }, |
494 | 0 | [this](const absl::Cord& rhs) -> absl::optional<int64_t> { |
495 | 0 | return LastIndexOf(rhs); |
496 | 0 | })); |
497 | 0 | } |
498 | | |
499 | | absl::optional<int64_t> StringValue::LastIndexOf(absl::string_view string, |
500 | 0 | int64_t pos) const { |
501 | 0 | if (pos < 0) { |
502 | 0 | return absl::nullopt; |
503 | 0 | } |
504 | 0 | return value_.Visit(absl::Overload( |
505 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
506 | 0 | int64_t last_index = -1; |
507 | 0 | int64_t code_points = 0; |
508 | 0 | while (lhs.size() >= string.size()) { |
509 | 0 | if (absl::StartsWith(lhs, string)) { |
510 | 0 | last_index = code_points; |
511 | 0 | } |
512 | 0 | if (code_points >= pos || lhs.size() == string.size()) { |
513 | 0 | break; |
514 | 0 | } |
515 | 0 | size_t code_units = |
516 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
517 | 0 | lhs.remove_prefix(code_units); |
518 | 0 | ++code_points; |
519 | 0 | } |
520 | 0 | if (last_index < 0) return absl::nullopt; |
521 | 0 | return last_index; |
522 | 0 | }, |
523 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
524 | 0 | int64_t last_index = -1; |
525 | 0 | int64_t code_points = 0; |
526 | 0 | while (lhs.size() >= string.size()) { |
527 | 0 | if (lhs.StartsWith(string)) { |
528 | 0 | last_index = code_points; |
529 | 0 | } |
530 | 0 | if (code_points >= pos || lhs.size() == string.size()) { |
531 | 0 | break; |
532 | 0 | } |
533 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
534 | 0 | /*code_point=*/nullptr); |
535 | 0 | lhs.RemovePrefix(code_units); |
536 | 0 | ++code_points; |
537 | 0 | } |
538 | 0 | if (last_index < 0) return absl::nullopt; |
539 | 0 | return last_index; |
540 | 0 | })); |
541 | 0 | } |
542 | | |
543 | | absl::optional<int64_t> StringValue::LastIndexOf(const absl::Cord& string, |
544 | 0 | int64_t pos) const { |
545 | 0 | if (pos < 0) { |
546 | 0 | return absl::nullopt; |
547 | 0 | } |
548 | 0 | return value_.Visit(absl::Overload( |
549 | 0 | [&](absl::string_view lhs) -> absl::optional<int64_t> { |
550 | 0 | int64_t last_index = -1; |
551 | 0 | int64_t code_points = 0; |
552 | 0 | while (lhs.size() >= string.size()) { |
553 | 0 | if (lhs.substr(0, string.size()) == string) { |
554 | 0 | last_index = code_points; |
555 | 0 | } |
556 | 0 | if (code_points >= pos || lhs.size() == string.size()) { |
557 | 0 | break; |
558 | 0 | } |
559 | 0 | size_t code_units = |
560 | 0 | cel::internal::Utf8Decode(lhs, /*code_point=*/nullptr); |
561 | 0 | lhs.remove_prefix(code_units); |
562 | 0 | ++code_points; |
563 | 0 | } |
564 | 0 | if (last_index < 0) return absl::nullopt; |
565 | 0 | return last_index; |
566 | 0 | }, |
567 | 0 | [&](absl::Cord lhs) -> absl::optional<int64_t> { |
568 | 0 | int64_t last_index = -1; |
569 | 0 | int64_t code_points = 0; |
570 | 0 | while (lhs.size() >= string.size()) { |
571 | 0 | if (lhs.StartsWith(string)) { |
572 | 0 | last_index = code_points; |
573 | 0 | } |
574 | 0 | if (code_points >= pos || lhs.size() == string.size()) { |
575 | 0 | break; |
576 | 0 | } |
577 | 0 | size_t code_units = cel::internal::Utf8Decode(lhs.char_begin(), |
578 | 0 | /*code_point=*/nullptr); |
579 | 0 | lhs.RemovePrefix(code_units); |
580 | 0 | ++code_points; |
581 | 0 | } |
582 | 0 | if (last_index < 0) return absl::nullopt; |
583 | 0 | return last_index; |
584 | 0 | })); |
585 | 0 | } |
586 | | |
587 | | absl::optional<int64_t> StringValue::LastIndexOf(const StringValue& string, |
588 | 0 | int64_t pos) const { |
589 | 0 | return string.value_.Visit(absl::Overload( |
590 | 0 | [this, pos](absl::string_view rhs) -> absl::optional<int64_t> { |
591 | 0 | return LastIndexOf(rhs, pos); |
592 | 0 | }, |
593 | 0 | [this, pos](const absl::Cord& rhs) -> absl::optional<int64_t> { |
594 | 0 | return LastIndexOf(rhs, pos); |
595 | 0 | })); |
596 | 0 | } |
597 | | |
598 | | namespace { |
599 | | |
600 | 0 | absl::StatusOr<size_t> SubstringImpl(absl::string_view string, uint64_t start) { |
601 | 0 | size_t size_code_points = 0; |
602 | 0 | size_t size_code_units = 0; |
603 | 0 | while (!string.empty()) { |
604 | 0 | char32_t code_point; |
605 | 0 | size_t code_units; |
606 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(string); |
607 | 0 | if (size_code_points == start) { |
608 | 0 | return size_code_units; |
609 | 0 | } |
610 | 0 | string.remove_prefix(code_units); |
611 | 0 | ++size_code_points; |
612 | 0 | size_code_units += code_units; |
613 | 0 | } |
614 | 0 | if (size_code_points == start) { |
615 | 0 | return size_code_units; |
616 | 0 | } |
617 | 0 | return absl::InvalidArgumentError( |
618 | 0 | "<string>.substring(<start>): <start> is greater than <string>.size()"); |
619 | 0 | } |
620 | | |
621 | | absl::StatusOr<absl::Cord> SubstringImpl(const absl::Cord& cord, |
622 | 0 | uint64_t start) { |
623 | 0 | absl::Cord::CharIterator char_begin = cord.char_begin(); |
624 | 0 | absl::Cord::CharIterator char_end = cord.char_end(); |
625 | 0 | size_t size_code_points = 0; |
626 | 0 | size_t size_code_units = 0; |
627 | 0 | while (char_begin != char_end) { |
628 | 0 | char32_t code_point; |
629 | 0 | size_t code_units; |
630 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(char_begin); |
631 | 0 | if (size_code_points == start) { |
632 | 0 | return cord.Subcord(size_code_units, std::numeric_limits<size_t>::max()); |
633 | 0 | } |
634 | 0 | absl::Cord::Advance(&char_begin, code_units); |
635 | 0 | ++size_code_points; |
636 | 0 | size_code_units += code_units; |
637 | 0 | } |
638 | 0 | if (size_code_points == start) { |
639 | 0 | return cord; |
640 | 0 | } |
641 | 0 | return absl::InvalidArgumentError( |
642 | 0 | "<string>.substring(<start>): <start> is greater than <string>.size()"); |
643 | 0 | } |
644 | | |
645 | | } // namespace |
646 | | |
647 | 0 | Value StringValue::Substring(int64_t start) const { |
648 | 0 | if (start < 0) { |
649 | 0 | return ErrorValue(absl::InvalidArgumentError( |
650 | 0 | "<string>.substring(<start>): <start> is less than 0")); |
651 | 0 | } |
652 | 0 | if (static_cast<uint64_t>(start) > value_.size()) { |
653 | 0 | return ErrorValue(absl::InvalidArgumentError( |
654 | 0 | "<string>.substring(<start>, <end>): <start> or <end> is greater than " |
655 | 0 | "<string>.size()")); |
656 | 0 | } |
657 | 0 | if (start == 0) { |
658 | 0 | return *this; |
659 | 0 | } |
660 | 0 | switch (value_.GetKind()) { |
661 | 0 | case common_internal::ByteStringKind::kSmall: { |
662 | 0 | absl::StatusOr<size_t> status_or_index = |
663 | 0 | (SubstringImpl)(value_.GetSmall(), start); |
664 | 0 | if (!status_or_index.ok()) { |
665 | 0 | return ErrorValue(std::move(status_or_index).status()); |
666 | 0 | } |
667 | 0 | StringValue result; |
668 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall; |
669 | 0 | result.value_.rep_.small.size = value_.rep_.small.size - *status_or_index; |
670 | 0 | std::memcpy(result.value_.rep_.small.data, |
671 | 0 | value_.rep_.small.data + *status_or_index, |
672 | 0 | result.value_.rep_.small.size); |
673 | 0 | result.value_.rep_.small.arena = value_.rep_.small.arena; |
674 | 0 | return result; |
675 | 0 | } |
676 | 0 | case common_internal::ByteStringKind::kMedium: { |
677 | 0 | absl::StatusOr<size_t> status_or_index = |
678 | 0 | (SubstringImpl)(value_.GetMedium(), start); |
679 | 0 | if (!status_or_index.ok()) { |
680 | 0 | return ErrorValue(std::move(status_or_index).status()); |
681 | 0 | } |
682 | 0 | StringValue result; |
683 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium; |
684 | 0 | result.value_.rep_.medium.size = |
685 | 0 | value_.rep_.medium.size - *status_or_index; |
686 | 0 | result.value_.rep_.medium.data = |
687 | 0 | value_.rep_.medium.data + *status_or_index; |
688 | 0 | result.value_.rep_.medium.owner = value_.rep_.medium.owner; |
689 | 0 | common_internal::StrongRef(result.value_.GetMediumReferenceCount()); |
690 | 0 | return result; |
691 | 0 | } |
692 | 0 | case common_internal::ByteStringKind::kLarge: { |
693 | 0 | absl::StatusOr<absl::Cord> status_or_cord = |
694 | 0 | (SubstringImpl)(value_.GetLarge(), start); |
695 | 0 | if (!status_or_cord.ok()) { |
696 | 0 | return ErrorValue(std::move(status_or_cord).status()); |
697 | 0 | } |
698 | 0 | return StringValue::Wrap(*std::move(status_or_cord)); |
699 | 0 | } |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | | namespace { |
704 | | |
705 | | absl::StatusOr<std::pair<size_t, size_t>> SubstringImpl( |
706 | 0 | absl::string_view string, uint64_t start, uint64_t end) { |
707 | 0 | size_t size_code_points = 0; |
708 | 0 | size_t size_code_units = 0; |
709 | 0 | size_t start_code_units; |
710 | 0 | while (!string.empty()) { |
711 | 0 | if (size_code_points == start) { |
712 | 0 | start_code_units = size_code_units; |
713 | 0 | } |
714 | 0 | if (size_code_points == end) { |
715 | 0 | return std::pair{start_code_units, size_code_units}; |
716 | 0 | } |
717 | 0 | char32_t code_point; |
718 | 0 | size_t code_units; |
719 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(string); |
720 | 0 | string.remove_prefix(code_units); |
721 | 0 | ++size_code_points; |
722 | 0 | size_code_units += code_units; |
723 | 0 | } |
724 | 0 | if (size_code_points == start && start == end) { |
725 | 0 | return std::pair{size_code_units, size_code_units}; |
726 | 0 | } |
727 | 0 | return absl::InvalidArgumentError( |
728 | 0 | "<string>.substring(<start>, <end>): <start> or <end> is greater than " |
729 | 0 | "<string>.size()"); |
730 | 0 | } |
731 | | |
732 | | absl::StatusOr<absl::Cord> SubstringImpl(const absl::Cord& cord, uint64_t start, |
733 | 0 | uint64_t end) { |
734 | 0 | absl::Cord::CharIterator char_begin = cord.char_begin(); |
735 | 0 | absl::Cord::CharIterator char_end = cord.char_end(); |
736 | 0 | size_t size_code_points = 0; |
737 | 0 | size_t size_code_units = 0; |
738 | 0 | size_t start_code_units; |
739 | 0 | while (char_begin != char_end) { |
740 | 0 | if (size_code_points == start) { |
741 | 0 | start_code_units = size_code_units; |
742 | 0 | } |
743 | 0 | if (size_code_points == end) { |
744 | 0 | return cord.Subcord(start_code_units, |
745 | 0 | size_code_points - start_code_units); |
746 | 0 | } |
747 | 0 | char32_t code_point; |
748 | 0 | size_t code_units; |
749 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(char_begin); |
750 | 0 | absl::Cord::Advance(&char_begin, code_units); |
751 | 0 | ++size_code_points; |
752 | 0 | size_code_units += code_units; |
753 | 0 | } |
754 | 0 | if (size_code_points == start && start == end) { |
755 | 0 | return absl::Cord(); |
756 | 0 | } |
757 | 0 | return absl::InvalidArgumentError( |
758 | 0 | "<string>.substring(<start>, <end>): <start> or <end> is greater than " |
759 | 0 | "<string>.size()"); |
760 | 0 | } |
761 | | |
762 | | } // namespace |
763 | | |
764 | 0 | Value StringValue::Substring(int64_t start, int64_t end) const { |
765 | 0 | if (start < 0) { |
766 | 0 | return ErrorValue(absl::InvalidArgumentError( |
767 | 0 | "<string>.substring(<start>, <end>): <start> is less than 0")); |
768 | 0 | } |
769 | 0 | if (end < start) { |
770 | 0 | return ErrorValue(absl::InvalidArgumentError( |
771 | 0 | "<string>.substring(<start>, <end>): <end> is less than <start>")); |
772 | 0 | } |
773 | 0 | if (static_cast<uint64_t>(start) > value_.size() || |
774 | 0 | static_cast<uint64_t>(end) > value_.size()) { |
775 | 0 | return ErrorValue(absl::InvalidArgumentError( |
776 | 0 | "<string>.substring(<start>, <end>): <start> or <end> is greater than " |
777 | 0 | "<string>.size()")); |
778 | 0 | } |
779 | 0 | switch (value_.GetKind()) { |
780 | 0 | case common_internal::ByteStringKind::kSmall: { |
781 | 0 | absl::StatusOr<std::pair<size_t, size_t>> status_or_indices = |
782 | 0 | (SubstringImpl)(value_.GetSmall(), start, end); |
783 | 0 | if (!status_or_indices.ok()) { |
784 | 0 | return ErrorValue(std::move(status_or_indices).status()); |
785 | 0 | } |
786 | 0 | StringValue result; |
787 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall; |
788 | 0 | result.value_.rep_.small.size = |
789 | 0 | (status_or_indices->second - status_or_indices->first); |
790 | 0 | std::memcpy(result.value_.rep_.small.data, |
791 | 0 | value_.rep_.small.data + status_or_indices->first, |
792 | 0 | result.value_.rep_.small.size); |
793 | 0 | result.value_.rep_.small.arena = value_.rep_.small.arena; |
794 | 0 | return result; |
795 | 0 | } |
796 | 0 | case common_internal::ByteStringKind::kMedium: { |
797 | 0 | absl::StatusOr<std::pair<size_t, size_t>> status_or_indices = |
798 | 0 | (SubstringImpl)(value_.GetMedium(), start, end); |
799 | 0 | if (!status_or_indices.ok()) { |
800 | 0 | return ErrorValue(std::move(status_or_indices).status()); |
801 | 0 | } |
802 | 0 | StringValue result; |
803 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium; |
804 | 0 | result.value_.rep_.medium.size = |
805 | 0 | (status_or_indices->second - status_or_indices->first); |
806 | 0 | result.value_.rep_.medium.data = |
807 | 0 | value_.rep_.medium.data + status_or_indices->first; |
808 | 0 | result.value_.rep_.medium.owner = value_.rep_.medium.owner; |
809 | 0 | common_internal::StrongRef(result.value_.GetMediumReferenceCount()); |
810 | 0 | return result; |
811 | 0 | } |
812 | 0 | case common_internal::ByteStringKind::kLarge: { |
813 | 0 | absl::StatusOr<absl::Cord> status_or_cord = |
814 | 0 | (SubstringImpl)(value_.GetLarge(), start, end); |
815 | 0 | if (!status_or_cord.ok()) { |
816 | 0 | return ErrorValue(std::move(status_or_cord).status()); |
817 | 0 | } |
818 | 0 | return StringValue::Wrap(*std::move(status_or_cord)); |
819 | 0 | } |
820 | 0 | } |
821 | 0 | } |
822 | | |
823 | | namespace { |
824 | | |
825 | 0 | bool LowerAsciiImpl(absl::string_view in, std::string* absl_nonnull out) { |
826 | 0 | if (in.empty()) { |
827 | 0 | return false; |
828 | 0 | } |
829 | 0 | bool needs_conversion = false; |
830 | 0 | for (char c : in) { |
831 | 0 | if (absl::ascii_isupper(c)) { |
832 | 0 | needs_conversion = true; |
833 | 0 | break; |
834 | 0 | } |
835 | 0 | } |
836 | |
|
837 | 0 | if (!needs_conversion) { |
838 | 0 | return false; |
839 | 0 | } |
840 | | |
841 | 0 | *out = absl::AsciiStrToLower(in); |
842 | 0 | return true; |
843 | 0 | } |
844 | | |
845 | 0 | absl::Cord LowerAsciiImpl(const absl::Cord& in) { |
846 | 0 | if (in.empty()) { |
847 | 0 | return in; |
848 | 0 | } |
849 | 0 | size_t pos = 0; |
850 | 0 | bool needs_conversion = false; |
851 | 0 | for (char c : in.Chars()) { |
852 | 0 | if (absl::ascii_isupper(c)) { |
853 | 0 | needs_conversion = true; |
854 | 0 | break; |
855 | 0 | } |
856 | 0 | pos++; |
857 | 0 | } |
858 | 0 | if (!needs_conversion) { |
859 | 0 | return in; |
860 | 0 | } |
861 | 0 | absl::Cord out = in.Subcord(0, pos); |
862 | 0 | absl::Cord rest = in.Subcord(pos, in.size() - pos); |
863 | 0 | std::string suffix; |
864 | 0 | suffix.resize(rest.size()); |
865 | 0 | size_t current = 0; |
866 | 0 | for (char c : rest.Chars()) { |
867 | 0 | suffix[current++] = absl::ascii_tolower(c); |
868 | 0 | } |
869 | 0 | out.Append(std::move(suffix)); |
870 | 0 | return out; |
871 | 0 | } |
872 | | |
873 | | } // namespace |
874 | | |
875 | 0 | StringValue StringValue::LowerAscii(google::protobuf::Arena* absl_nonnull arena) const { |
876 | 0 | ABSL_DCHECK(arena != nullptr); |
877 | |
|
878 | 0 | switch (value_.GetKind()) { |
879 | 0 | case common_internal::ByteStringKind::kSmall: { |
880 | 0 | std::string out; |
881 | 0 | if (!(LowerAsciiImpl)(value_.GetSmall(), &out)) { |
882 | 0 | return *this; |
883 | 0 | } |
884 | 0 | return StringValue::From(std::move(out), arena); |
885 | 0 | } |
886 | 0 | case common_internal::ByteStringKind::kMedium: { |
887 | 0 | std::string out; |
888 | 0 | if (!(LowerAsciiImpl)(value_.GetMedium(), &out)) { |
889 | 0 | return *this; |
890 | 0 | } |
891 | 0 | return StringValue::From(std::move(out), arena); |
892 | 0 | } |
893 | 0 | case common_internal::ByteStringKind::kLarge: |
894 | 0 | return StringValue::Wrap((LowerAsciiImpl)(value_.GetLarge())); |
895 | 0 | } |
896 | 0 | } |
897 | | |
898 | | namespace { |
899 | | |
900 | 0 | bool UpperAsciiImpl(absl::string_view in, std::string* absl_nonnull out) { |
901 | 0 | if (in.empty()) { |
902 | 0 | return false; |
903 | 0 | } |
904 | 0 | bool needs_conversion = false; |
905 | 0 | for (char c : in) { |
906 | 0 | if (absl::ascii_islower(c)) { |
907 | 0 | needs_conversion = true; |
908 | 0 | break; |
909 | 0 | } |
910 | 0 | } |
911 | |
|
912 | 0 | if (!needs_conversion) { |
913 | 0 | return false; |
914 | 0 | } |
915 | | |
916 | 0 | *out = absl::AsciiStrToUpper(in); |
917 | 0 | return true; |
918 | 0 | } |
919 | | |
920 | 0 | absl::Cord UpperAsciiImpl(const absl::Cord& in) { |
921 | 0 | if (in.empty()) { |
922 | 0 | return in; |
923 | 0 | } |
924 | 0 | size_t pos = 0; |
925 | 0 | bool needs_conversion = false; |
926 | 0 | for (char c : in.Chars()) { |
927 | 0 | if (absl::ascii_islower(c)) { |
928 | 0 | needs_conversion = true; |
929 | 0 | break; |
930 | 0 | } |
931 | 0 | pos++; |
932 | 0 | } |
933 | 0 | if (!needs_conversion) { |
934 | 0 | return in; |
935 | 0 | } |
936 | 0 | absl::Cord out = in.Subcord(0, pos); |
937 | 0 | absl::Cord rest = in.Subcord(pos, in.size() - pos); |
938 | 0 | std::string suffix; |
939 | 0 | suffix.resize(rest.size()); |
940 | 0 | size_t current = 0; |
941 | 0 | for (char c : rest.Chars()) { |
942 | 0 | suffix[current++] = absl::ascii_toupper(c); |
943 | 0 | } |
944 | 0 | out.Append(std::move(suffix)); |
945 | 0 | return out; |
946 | 0 | } |
947 | | |
948 | | } // namespace |
949 | | |
950 | 0 | StringValue StringValue::UpperAscii(google::protobuf::Arena* absl_nonnull arena) const { |
951 | 0 | ABSL_DCHECK(arena != nullptr); |
952 | |
|
953 | 0 | switch (value_.GetKind()) { |
954 | 0 | case common_internal::ByteStringKind::kSmall: { |
955 | 0 | std::string out; |
956 | 0 | if (!(UpperAsciiImpl)(value_.GetSmall(), &out)) { |
957 | 0 | return *this; |
958 | 0 | } |
959 | 0 | return StringValue::From(std::move(out), arena); |
960 | 0 | } |
961 | 0 | case common_internal::ByteStringKind::kMedium: { |
962 | 0 | std::string out; |
963 | 0 | if (!(UpperAsciiImpl)(value_.GetMedium(), &out)) { |
964 | 0 | return *this; |
965 | 0 | } |
966 | 0 | return StringValue::From(std::move(out), arena); |
967 | 0 | } |
968 | 0 | case common_internal::ByteStringKind::kLarge: |
969 | 0 | return StringValue::Wrap((UpperAsciiImpl)(value_.GetLarge())); |
970 | 0 | } |
971 | 0 | } |
972 | | |
973 | | namespace { |
974 | | |
975 | | // Per CEL spec, checking for Unicode whitespace. |
976 | 0 | bool IsUnicodeWhitespace(char32_t c) { |
977 | 0 | if (c <= 0x0020) { |
978 | 0 | return c == 0x0020 || (c >= 0x0009 && c <= 0x000D); |
979 | 0 | } |
980 | 0 | if (c > 0x3000) return false; |
981 | 0 | if (c == 0x0085 || c == 0x00a0 || c == 0x1680) return true; |
982 | 0 | if (c >= 0x2000 && c <= 0x200a) return true; |
983 | 0 | return c == 0x2028 || c == 0x2029 || c == 0x202f || c == 0x205f || |
984 | 0 | c == 0x3000; |
985 | 0 | } |
986 | | |
987 | 0 | std::pair<size_t, size_t> TrimImpl(absl::string_view string) { |
988 | 0 | absl::string_view temp_string = string; |
989 | 0 | size_t left_trim_bytes = 0; |
990 | 0 | while (!temp_string.empty()) { |
991 | 0 | char32_t c; |
992 | 0 | size_t char_len = cel::internal::Utf8Decode(temp_string, &c); |
993 | 0 | if (!IsUnicodeWhitespace(c)) { |
994 | 0 | break; |
995 | 0 | } |
996 | 0 | temp_string.remove_prefix(char_len); |
997 | 0 | left_trim_bytes += char_len; |
998 | 0 | } |
999 | |
|
1000 | 0 | if (left_trim_bytes == string.size()) { |
1001 | 0 | return {left_trim_bytes, 0}; |
1002 | 0 | } |
1003 | | |
1004 | 0 | size_t last_non_ws_end_bytes = 0; |
1005 | 0 | size_t current_pos_bytes = 0; |
1006 | 0 | temp_string = string; |
1007 | 0 | while (!temp_string.empty()) { |
1008 | 0 | char32_t c; |
1009 | 0 | size_t char_len = cel::internal::Utf8Decode(temp_string, &c); |
1010 | 0 | if (!IsUnicodeWhitespace(c)) { |
1011 | 0 | last_non_ws_end_bytes = current_pos_bytes + char_len; |
1012 | 0 | } |
1013 | 0 | current_pos_bytes += char_len; |
1014 | 0 | temp_string.remove_prefix(char_len); |
1015 | 0 | } |
1016 | |
|
1017 | 0 | return {left_trim_bytes, string.size() - last_non_ws_end_bytes}; |
1018 | 0 | } |
1019 | | |
1020 | 0 | absl::Cord TrimImpl(const absl::Cord& cord) { |
1021 | 0 | size_t left_trim_bytes = 0; |
1022 | 0 | { |
1023 | 0 | absl::Cord::CharIterator begin = cord.char_begin(); |
1024 | 0 | const absl::Cord::CharIterator end = cord.char_end(); |
1025 | 0 | while (begin != end) { |
1026 | 0 | char32_t c; |
1027 | 0 | size_t char_len; |
1028 | 0 | std::tie(c, char_len) = cel::internal::Utf8Decode(begin); |
1029 | 0 | if (!IsUnicodeWhitespace(c)) { |
1030 | 0 | break; |
1031 | 0 | } |
1032 | 0 | absl::Cord::Advance(&begin, char_len); |
1033 | 0 | left_trim_bytes += char_len; |
1034 | 0 | } |
1035 | 0 | } |
1036 | |
|
1037 | 0 | if (left_trim_bytes == cord.size()) { |
1038 | 0 | return absl::Cord(); |
1039 | 0 | } |
1040 | | |
1041 | 0 | absl::Cord ltrimmed = |
1042 | 0 | cord.Subcord(left_trim_bytes, cord.size() - left_trim_bytes); |
1043 | |
|
1044 | 0 | size_t last_non_ws_end_bytes = 0; |
1045 | 0 | size_t current_pos_bytes = 0; |
1046 | 0 | { |
1047 | 0 | absl::Cord::CharIterator begin = ltrimmed.char_begin(); |
1048 | 0 | const absl::Cord::CharIterator end = ltrimmed.char_end(); |
1049 | 0 | while (begin != end) { |
1050 | 0 | char32_t c; |
1051 | 0 | size_t char_len; |
1052 | 0 | std::tie(c, char_len) = cel::internal::Utf8Decode(begin); |
1053 | 0 | if (!IsUnicodeWhitespace(c)) { |
1054 | 0 | last_non_ws_end_bytes = current_pos_bytes + char_len; |
1055 | 0 | } |
1056 | 0 | absl::Cord::Advance(&begin, char_len); |
1057 | 0 | current_pos_bytes += char_len; |
1058 | 0 | } |
1059 | 0 | } |
1060 | 0 | return ltrimmed.Subcord(0, last_non_ws_end_bytes); |
1061 | 0 | } |
1062 | | |
1063 | | } // namespace |
1064 | | |
1065 | 0 | StringValue StringValue::Trim() const { |
1066 | 0 | switch (value_.GetKind()) { |
1067 | 0 | case common_internal::ByteStringKind::kSmall: { |
1068 | 0 | std::pair<size_t, size_t> trims = (TrimImpl)(value_.GetSmall()); |
1069 | 0 | StringValue result; |
1070 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kSmall; |
1071 | 0 | result.value_.rep_.small.size = |
1072 | 0 | value_.rep_.small.size - trims.first - trims.second; |
1073 | 0 | std::memcpy(result.value_.rep_.small.data, |
1074 | 0 | value_.rep_.small.data + trims.first, |
1075 | 0 | result.value_.rep_.small.size); |
1076 | 0 | result.value_.rep_.small.arena = value_.GetSmallArena(); |
1077 | 0 | return result; |
1078 | 0 | } |
1079 | 0 | case common_internal::ByteStringKind::kMedium: { |
1080 | 0 | std::pair<size_t, size_t> trims = (TrimImpl)(value_.GetMedium()); |
1081 | 0 | StringValue result; |
1082 | 0 | result.value_.rep_.header.kind = common_internal::ByteStringKind::kMedium; |
1083 | 0 | result.value_.rep_.medium.size = |
1084 | 0 | value_.rep_.medium.size - trims.first - trims.second; |
1085 | 0 | result.value_.rep_.medium.data = value_.rep_.medium.data + trims.first; |
1086 | 0 | result.value_.rep_.medium.owner = value_.rep_.medium.owner; |
1087 | 0 | common_internal::StrongRef(result.value_.GetMediumReferenceCount()); |
1088 | 0 | return result; |
1089 | 0 | } |
1090 | 0 | case common_internal::ByteStringKind::kLarge: { |
1091 | 0 | return StringValue::Wrap((TrimImpl)(value_.GetLarge())); |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | } |
1095 | | |
1096 | | namespace { |
1097 | | |
1098 | 0 | void AppendQuoteCodePoint(char32_t code_point, std::string& dst) { |
1099 | 0 | switch (code_point) { |
1100 | 0 | case '\a': |
1101 | 0 | dst.append("\\a"); |
1102 | 0 | break; |
1103 | 0 | case '\b': |
1104 | 0 | dst.append("\\b"); |
1105 | 0 | break; |
1106 | 0 | case '\f': |
1107 | 0 | dst.append("\\f"); |
1108 | 0 | break; |
1109 | 0 | case '\n': |
1110 | 0 | dst.append("\\n"); |
1111 | 0 | break; |
1112 | 0 | case '\r': |
1113 | 0 | dst.append("\\r"); |
1114 | 0 | break; |
1115 | 0 | case '\t': |
1116 | 0 | dst.append("\\t"); |
1117 | 0 | break; |
1118 | 0 | case '\v': |
1119 | 0 | dst.append("\\v"); |
1120 | 0 | break; |
1121 | 0 | case '\\': |
1122 | 0 | dst.append("\\\\"); |
1123 | 0 | break; |
1124 | 0 | case '\"': |
1125 | 0 | dst.append("\\\""); |
1126 | 0 | break; |
1127 | 0 | default: |
1128 | 0 | cel::internal::Utf8Encode(code_point, &dst); |
1129 | 0 | break; |
1130 | 0 | } |
1131 | 0 | } |
1132 | | |
1133 | | } // namespace |
1134 | | |
1135 | 0 | StringValue StringValue::Quote(google::protobuf::Arena* absl_nonnull arena) const { |
1136 | 0 | return value_.Visit(absl::Overload( |
1137 | 0 | [&](absl::string_view rep) -> StringValue { |
1138 | 0 | std::string result; |
1139 | 0 | result.push_back('\"'); |
1140 | 0 | while (!rep.empty()) { |
1141 | 0 | char32_t code_point; |
1142 | 0 | size_t code_units; |
1143 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(rep); |
1144 | 0 | AppendQuoteCodePoint(code_point, result); |
1145 | 0 | rep.remove_prefix(code_units); |
1146 | 0 | } |
1147 | 0 | result.push_back('\"'); |
1148 | 0 | return StringValue::From(std::move(result), arena); |
1149 | 0 | }, |
1150 | 0 | [&](const absl::Cord& rep) -> StringValue { |
1151 | 0 | absl::Cord::CharIterator begin = rep.char_begin(); |
1152 | 0 | absl::Cord::CharIterator end = rep.char_end(); |
1153 | 0 | std::string result; |
1154 | 0 | result.push_back('\"'); |
1155 | 0 | while (begin != end) { |
1156 | 0 | char32_t code_point; |
1157 | 0 | size_t code_units; |
1158 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(begin); |
1159 | 0 | AppendQuoteCodePoint(code_point, result); |
1160 | 0 | absl::Cord::Advance(&begin, code_units); |
1161 | 0 | } |
1162 | 0 | result.push_back('\"'); |
1163 | 0 | return StringValue::From(std::move(result), arena); |
1164 | 0 | })); |
1165 | 0 | } |
1166 | | |
1167 | 0 | StringValue StringValue::Reverse(google::protobuf::Arena* absl_nonnull arena) const { |
1168 | 0 | return value_.Visit(absl::Overload( |
1169 | 0 | [arena](absl::string_view string) -> StringValue { |
1170 | 0 | if (string.empty()) { |
1171 | 0 | return StringValue(); |
1172 | 0 | } |
1173 | 0 | std::string reversed; |
1174 | 0 | reversed.reserve(string.size()); |
1175 | 0 | const char* ptr = string.data() + string.size(); |
1176 | 0 | const char* begin = string.data(); |
1177 | 0 | while (ptr > begin) { |
1178 | 0 | const char* char_end = ptr; |
1179 | 0 | --ptr; |
1180 | | // Back up to beginning of encoded UTF-8 code point. |
1181 | 0 | while (ptr > begin && (*ptr & 0xC0) == 0x80) { |
1182 | 0 | --ptr; |
1183 | 0 | } |
1184 | 0 | reversed.append(ptr, char_end - ptr); |
1185 | 0 | } |
1186 | 0 | return StringValue::From(std::move(reversed), arena); |
1187 | 0 | }, |
1188 | 0 | [arena](const absl::Cord& cord) -> StringValue { |
1189 | 0 | if (cord.empty()) { |
1190 | 0 | return StringValue(); |
1191 | 0 | } |
1192 | 0 | std::vector<char32_t> code_points; |
1193 | 0 | absl::Cord::CharIterator char_begin = cord.char_begin(); |
1194 | 0 | absl::Cord::CharIterator char_end = cord.char_end(); |
1195 | 0 | while (char_begin != char_end) { |
1196 | 0 | char32_t code_point; |
1197 | 0 | size_t code_units = |
1198 | 0 | cel::internal::Utf8Decode(char_begin, &code_point); |
1199 | 0 | code_points.push_back(code_point); |
1200 | 0 | absl::Cord::Advance(&char_begin, code_units); |
1201 | 0 | } |
1202 | 0 | std::string reversed; |
1203 | 0 | reversed.reserve(cord.size()); |
1204 | 0 | for (auto it = code_points.rbegin(); it != code_points.rend(); ++it) { |
1205 | 0 | cel::internal::Utf8Encode(*it, &reversed); |
1206 | 0 | } |
1207 | 0 | return StringValue::From(std::move(reversed), arena); |
1208 | 0 | })); |
1209 | 0 | } |
1210 | | |
1211 | | absl::StatusOr<Value> StringValue::Join( |
1212 | | const ListValue& list, |
1213 | | const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool, |
1214 | | google::protobuf::MessageFactory* absl_nonnull message_factory, |
1215 | 0 | google::protobuf::Arena* absl_nonnull arena) const { |
1216 | 0 | Value result; |
1217 | 0 | CEL_RETURN_IF_ERROR( |
1218 | 0 | Join(list, descriptor_pool, message_factory, arena, &result)); |
1219 | 0 | return result; |
1220 | 0 | } |
1221 | | |
1222 | | absl::Status StringValue::Join( |
1223 | | const ListValue& list, |
1224 | | const google::protobuf::DescriptorPool* absl_nonnull descriptor_pool, |
1225 | | google::protobuf::MessageFactory* absl_nonnull message_factory, |
1226 | 0 | google::protobuf::Arena* absl_nonnull arena, Value* absl_nonnull result) const { |
1227 | 0 | ABSL_DCHECK(descriptor_pool != nullptr); |
1228 | 0 | ABSL_DCHECK(message_factory != nullptr); |
1229 | 0 | ABSL_DCHECK(arena != nullptr); |
1230 | 0 | ABSL_DCHECK(result != nullptr); |
1231 | |
|
1232 | 0 | std::string joined; |
1233 | |
|
1234 | 0 | CEL_ASSIGN_OR_RETURN(auto iterator, list.NewIterator()); |
1235 | |
|
1236 | 0 | CEL_ASSIGN_OR_RETURN( |
1237 | 0 | absl::optional<Value> element, |
1238 | 0 | iterator->Next1(descriptor_pool, message_factory, arena)); |
1239 | 0 | if (element) { |
1240 | 0 | if (auto string_element = element->AsString(); string_element) { |
1241 | 0 | string_element->AppendToString(&joined); |
1242 | 0 | } else { |
1243 | 0 | ABSL_DCHECK(!element->Is<ErrorValue>()); |
1244 | 0 | *result = |
1245 | 0 | ErrorValue(runtime_internal::CreateNoMatchingOverloadError("join")); |
1246 | 0 | return absl::OkStatus(); |
1247 | 0 | } |
1248 | 0 | while (true) { |
1249 | 0 | CEL_ASSIGN_OR_RETURN( |
1250 | 0 | element, iterator->Next1(descriptor_pool, message_factory, arena)); |
1251 | 0 | if (!element) { |
1252 | 0 | break; |
1253 | 0 | } |
1254 | 0 | AppendToString(&joined); |
1255 | 0 | if (auto string_element = element->AsString(); string_element) { |
1256 | 0 | string_element->AppendToString(&joined); |
1257 | 0 | } else { |
1258 | 0 | ABSL_DCHECK(!element->Is<ErrorValue>()); |
1259 | 0 | *result = |
1260 | 0 | ErrorValue(runtime_internal::CreateNoMatchingOverloadError("join")); |
1261 | 0 | return absl::OkStatus(); |
1262 | 0 | } |
1263 | 0 | } |
1264 | 0 | } |
1265 | | |
1266 | 0 | if (joined.size() > common_internal::kSmallByteStringCapacity) { |
1267 | 0 | joined.shrink_to_fit(); |
1268 | 0 | } |
1269 | |
|
1270 | 0 | *result = StringValue::From(std::move(joined), arena); |
1271 | 0 | return absl::OkStatus(); |
1272 | 0 | } |
1273 | | |
1274 | | absl::StatusOr<Value> StringValue::Split( |
1275 | | const StringValue& delimiter, int64_t limit, |
1276 | 0 | google::protobuf::Arena* absl_nonnull arena) const { |
1277 | 0 | Value result; |
1278 | 0 | CEL_RETURN_IF_ERROR(Split(delimiter, limit, arena, &result)); |
1279 | 0 | return result; |
1280 | 0 | } |
1281 | | |
1282 | | absl::Status StringValue::Split(const StringValue& delimiter, |
1283 | | google::protobuf::Arena* absl_nonnull arena, |
1284 | 0 | Value* absl_nonnull result) const { |
1285 | 0 | return Split(delimiter, -1, arena, result); |
1286 | 0 | } |
1287 | | |
1288 | | absl::StatusOr<Value> StringValue::Split( |
1289 | 0 | const StringValue& delimiter, google::protobuf::Arena* absl_nonnull arena) const { |
1290 | 0 | Value result; |
1291 | 0 | CEL_RETURN_IF_ERROR(Split(delimiter, -1, arena, &result)); |
1292 | 0 | return result; |
1293 | 0 | } |
1294 | | |
1295 | | absl::Status StringValue::Split(const StringValue& delimiter, int64_t limit, |
1296 | | google::protobuf::Arena* absl_nonnull arena, |
1297 | 0 | Value* absl_nonnull result) const { |
1298 | 0 | ABSL_DCHECK(arena != nullptr); |
1299 | 0 | ABSL_DCHECK(result != nullptr); |
1300 | |
|
1301 | 0 | if (limit == 0) { |
1302 | | // Per spec, when limit is 0 return an empty list. |
1303 | 0 | *result = ListValue(); |
1304 | 0 | return absl::OkStatus(); |
1305 | 0 | } |
1306 | 0 | if (limit < 0) { |
1307 | | // Per spec, when limit is negative treat it as unlimited splits. |
1308 | 0 | limit = std::numeric_limits<int64_t>::max(); |
1309 | 0 | } |
1310 | |
|
1311 | 0 | std::vector<std::pair<size_t, size_t>> splits; |
1312 | 0 | size_t pos = 0; |
1313 | 0 | const size_t len = value_.size(); |
1314 | |
|
1315 | 0 | if (delimiter.IsEmpty()) { |
1316 | 0 | value_.Visit(absl::Overload( |
1317 | 0 | [&](absl::string_view s) { |
1318 | 0 | while (pos < len && limit > 1) { |
1319 | 0 | size_t char_len = cel::internal::Utf8Decode(s.substr(pos), nullptr); |
1320 | 0 | splits.push_back({pos, pos + char_len}); |
1321 | 0 | pos += char_len; |
1322 | 0 | --limit; |
1323 | 0 | } |
1324 | 0 | }, |
1325 | 0 | [&](const absl::Cord& s) { |
1326 | 0 | while (pos < len && limit > 1) { |
1327 | 0 | size_t char_len = cel::internal::Utf8Decode( |
1328 | 0 | s.Subcord(pos, len - pos).char_begin(), nullptr); |
1329 | 0 | splits.push_back({pos, pos + char_len}); |
1330 | 0 | pos += char_len; |
1331 | 0 | --limit; |
1332 | 0 | } |
1333 | 0 | })); |
1334 | 0 | } else { |
1335 | 0 | while (pos < len && limit > 1) { |
1336 | 0 | absl::optional<size_t> next = value_.Find(delimiter.value_, pos); |
1337 | 0 | if (!next) { |
1338 | 0 | break; |
1339 | 0 | } |
1340 | 0 | splits.push_back(std::pair{pos, *next}); |
1341 | 0 | pos = *next + delimiter.value_.size(); |
1342 | 0 | --limit; |
1343 | 0 | ABSL_DCHECK_LE(pos, len); |
1344 | 0 | } |
1345 | 0 | } |
1346 | |
|
1347 | 0 | if (splits.empty() || !delimiter.IsEmpty() || pos < len) { |
1348 | 0 | splits.push_back(std::pair{pos, len}); |
1349 | 0 | } |
1350 | |
|
1351 | 0 | auto builder = NewListValueBuilder(arena); |
1352 | 0 | builder->Reserve(splits.size()); |
1353 | 0 | for (const std::pair<size_t, size_t>& split : splits) { |
1354 | 0 | builder->UnsafeAdd( |
1355 | 0 | StringValue(value_.Substring(split.first, split.second))); |
1356 | 0 | } |
1357 | 0 | *result = std::move(*builder).Build(); |
1358 | 0 | return absl::OkStatus(); |
1359 | 0 | } |
1360 | | |
1361 | | absl::StatusOr<Value> StringValue::Replace( |
1362 | | const StringValue& needle, const StringValue& replacement, int64_t limit, |
1363 | 0 | google::protobuf::Arena* absl_nonnull arena) const { |
1364 | 0 | Value result; |
1365 | 0 | CEL_RETURN_IF_ERROR(Replace(needle, replacement, limit, arena, &result)); |
1366 | 0 | return result; |
1367 | 0 | } |
1368 | | |
1369 | | absl::Status StringValue::Replace(const StringValue& needle, |
1370 | | const StringValue& replacement, |
1371 | | google::protobuf::Arena* absl_nonnull arena, |
1372 | 0 | Value* absl_nonnull result) const { |
1373 | 0 | return Replace(needle, replacement, -1, arena, result); |
1374 | 0 | } |
1375 | | |
1376 | | absl::StatusOr<Value> StringValue::Replace( |
1377 | | const StringValue& needle, const StringValue& replacement, |
1378 | 0 | google::protobuf::Arena* absl_nonnull arena) const { |
1379 | 0 | Value result; |
1380 | 0 | CEL_RETURN_IF_ERROR(Replace(needle, replacement, -1, arena, &result)); |
1381 | 0 | return result; |
1382 | 0 | } |
1383 | | |
1384 | | absl::Status StringValue::Replace(const StringValue& needle, |
1385 | | const StringValue& replacement, int64_t limit, |
1386 | | google::protobuf::Arena* absl_nonnull arena, |
1387 | 0 | Value* absl_nonnull result) const { |
1388 | 0 | ABSL_DCHECK(arena != nullptr); |
1389 | 0 | ABSL_DCHECK(result != nullptr); |
1390 | |
|
1391 | 0 | if (limit == 0) { |
1392 | | // Per spec, when limit is 0 return the original string. |
1393 | 0 | *result = *this; |
1394 | 0 | return absl::OkStatus(); |
1395 | 0 | } |
1396 | 0 | if (limit < 0) { |
1397 | | // Per spec, when limit is negative treat it as unlimited replacements. |
1398 | 0 | limit = std::numeric_limits<int64_t>::max(); |
1399 | 0 | } |
1400 | |
|
1401 | 0 | size_t pos = 0; |
1402 | 0 | const size_t len = value_.size(); |
1403 | 0 | const size_t needle_len = needle.value_.size(); |
1404 | 0 | std::string res_str; |
1405 | |
|
1406 | 0 | if (needle.IsEmpty()) { |
1407 | 0 | value_.Visit(absl::Overload( |
1408 | 0 | [&](absl::string_view s) { |
1409 | 0 | while (pos < len && limit > 0) { |
1410 | 0 | replacement.AppendToString(&res_str); |
1411 | 0 | size_t char_len = cel::internal::Utf8Decode(s.substr(pos), nullptr); |
1412 | 0 | value_.Substring(pos, pos + char_len).AppendToString(&res_str); |
1413 | 0 | pos += char_len; |
1414 | 0 | --limit; |
1415 | 0 | } |
1416 | 0 | }, |
1417 | 0 | [&](const absl::Cord& s) { |
1418 | 0 | while (pos < len && limit > 0) { |
1419 | 0 | replacement.AppendToString(&res_str); |
1420 | 0 | size_t char_len = cel::internal::Utf8Decode( |
1421 | 0 | s.Subcord(pos, len - pos).char_begin(), nullptr); |
1422 | 0 | value_.Substring(pos, pos + char_len).AppendToString(&res_str); |
1423 | 0 | pos += char_len; |
1424 | 0 | --limit; |
1425 | 0 | } |
1426 | 0 | })); |
1427 | 0 | if (limit > 0) { |
1428 | 0 | replacement.AppendToString(&res_str); |
1429 | 0 | } |
1430 | 0 | } else { |
1431 | 0 | while (pos < len && limit > 0) { |
1432 | 0 | absl::optional<size_t> next = value_.Find(needle.value_, pos); |
1433 | 0 | if (!next) { |
1434 | 0 | break; |
1435 | 0 | } |
1436 | | |
1437 | 0 | value_.Substring(pos, *next).AppendToString(&res_str); |
1438 | 0 | replacement.AppendToString(&res_str); |
1439 | |
|
1440 | 0 | pos = *next + needle_len; |
1441 | 0 | --limit; |
1442 | 0 | } |
1443 | 0 | } |
1444 | |
|
1445 | 0 | if (pos < len) { |
1446 | 0 | value_.Substring(pos, len).AppendToString(&res_str); |
1447 | 0 | } |
1448 | |
|
1449 | 0 | if (res_str.size() > common_internal::kSmallByteStringCapacity) { |
1450 | 0 | res_str.shrink_to_fit(); |
1451 | 0 | } |
1452 | |
|
1453 | 0 | *result = StringValue::From(std::move(res_str), arena); |
1454 | 0 | return absl::OkStatus(); |
1455 | 0 | } |
1456 | | |
1457 | 0 | Value StringValue::CharAt(int64_t pos) const { |
1458 | 0 | if (pos < 0) { |
1459 | 0 | return ErrorValue(absl::InvalidArgumentError( |
1460 | 0 | "<string>.charAt(<pos>): <pos> is less than 0")); |
1461 | 0 | } |
1462 | 0 | return value_.Visit(absl::Overload( |
1463 | 0 | [this, pos](absl::string_view rep) mutable -> Value { |
1464 | 0 | while (!rep.empty()) { |
1465 | 0 | char32_t code_point; |
1466 | 0 | size_t code_units; |
1467 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(rep); |
1468 | 0 | if (pos == 0) { |
1469 | 0 | StringValue result; |
1470 | 0 | result.value_.rep_.header.kind = |
1471 | 0 | common_internal::ByteStringKind::kSmall; |
1472 | 0 | result.value_.rep_.small.size = cel::internal::Utf8Encode( |
1473 | 0 | code_point, result.value_.rep_.small.data); |
1474 | 0 | result.value_.rep_.small.arena = value_.GetArena(); |
1475 | 0 | return result; |
1476 | 0 | } |
1477 | 0 | rep.remove_prefix(code_units); |
1478 | 0 | --pos; |
1479 | 0 | } |
1480 | | // If we exit the loop, we iterated through all the code points in |
1481 | | // `rep`. `pos == 0` means we were looking for a character at index |
1482 | | // `size()`, which is defined to return an empty string. |
1483 | 0 | if (pos == 0) { |
1484 | 0 | return StringValue(); |
1485 | 0 | } |
1486 | 0 | return ErrorValue(absl::InvalidArgumentError( |
1487 | 0 | "<string>.charAt(<pos>): <pos> is greater than <string>.size()")); |
1488 | 0 | }, |
1489 | 0 | [pos](const absl::Cord& rep) mutable -> Value { |
1490 | 0 | absl::Cord::CharIterator begin = rep.char_begin(); |
1491 | 0 | absl::Cord::CharIterator end = rep.char_end(); |
1492 | 0 | while (begin != end) { |
1493 | 0 | char32_t code_point; |
1494 | 0 | size_t code_units; |
1495 | 0 | std::tie(code_point, code_units) = cel::internal::Utf8Decode(begin); |
1496 | 0 | if (pos == 0) { |
1497 | 0 | StringValue result; |
1498 | 0 | result.value_.rep_.header.kind = |
1499 | 0 | common_internal::ByteStringKind::kSmall; |
1500 | 0 | result.value_.rep_.small.size = cel::internal::Utf8Encode( |
1501 | 0 | code_point, result.value_.rep_.small.data); |
1502 | 0 | result.value_.rep_.small.arena = nullptr; |
1503 | 0 | return result; |
1504 | 0 | } |
1505 | 0 | absl::Cord::Advance(&begin, code_units); |
1506 | 0 | --pos; |
1507 | 0 | } |
1508 | | // If we exit the loop, we iterated through all the code points in |
1509 | | // `rep`. `pos == 0` means we were looking for a character at index |
1510 | | // `size()`, which is defined to return an empty string. |
1511 | 0 | if (pos == 0) { |
1512 | 0 | return StringValue(); |
1513 | 0 | } |
1514 | 0 | return ErrorValue(absl::InvalidArgumentError( |
1515 | 0 | "<string>.charAt(<pos>): <pos> is greater than <string>.size()")); |
1516 | 0 | })); |
1517 | 0 | } |
1518 | | |
1519 | | } // namespace cel |