/src/abseil-cpp/absl/strings/cord.cc
Line | Count | Source |
1 | | // Copyright 2020 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "absl/strings/cord.h" |
16 | | |
17 | | #include <algorithm> |
18 | | #include <cassert> |
19 | | #include <cstddef> |
20 | | #include <cstdint> |
21 | | #include <cstdio> |
22 | | #include <cstdlib> |
23 | | #include <cstring> |
24 | | #include <iomanip> |
25 | | #include <ios> |
26 | | #include <iostream> |
27 | | #include <limits> |
28 | | #include <memory> |
29 | | #include <ostream> |
30 | | #include <sstream> |
31 | | #include <string> |
32 | | #include <utility> |
33 | | |
34 | | #include "absl/base/attributes.h" |
35 | | #include "absl/base/config.h" |
36 | | #include "absl/base/internal/endian.h" |
37 | | #include "absl/base/internal/raw_logging.h" |
38 | | #include "absl/base/macros.h" |
39 | | #include "absl/base/optimization.h" |
40 | | #include "absl/base/nullability.h" |
41 | | #include "absl/container/inlined_vector.h" |
42 | | #include "absl/crc/crc32c.h" |
43 | | #include "absl/crc/internal/crc_cord_state.h" |
44 | | #include "absl/functional/function_ref.h" |
45 | | #include "absl/strings/cord_buffer.h" |
46 | | #include "absl/strings/escaping.h" |
47 | | #include "absl/strings/internal/append_and_overwrite.h" |
48 | | #include "absl/strings/internal/cord_data_edge.h" |
49 | | #include "absl/strings/internal/cord_internal.h" |
50 | | #include "absl/strings/internal/cord_rep_btree.h" |
51 | | #include "absl/strings/internal/cord_rep_crc.h" |
52 | | #include "absl/strings/internal/cord_rep_flat.h" |
53 | | #include "absl/strings/internal/cordz_update_tracker.h" |
54 | | #include "absl/strings/match.h" |
55 | | #include "absl/strings/resize_and_overwrite.h" |
56 | | #include "absl/strings/str_cat.h" |
57 | | #include "absl/strings/string_view.h" |
58 | | #include "absl/strings/strip.h" |
59 | | #include "absl/types/optional.h" |
60 | | #include "absl/types/span.h" |
61 | | |
62 | | namespace absl { |
63 | | ABSL_NAMESPACE_BEGIN |
64 | | |
65 | | using ::absl::cord_internal::CordRep; |
66 | | using ::absl::cord_internal::CordRepBtree; |
67 | | using ::absl::cord_internal::CordRepCrc; |
68 | | using ::absl::cord_internal::CordRepExternal; |
69 | | using ::absl::cord_internal::CordRepFlat; |
70 | | using ::absl::cord_internal::CordRepSubstring; |
71 | | using ::absl::cord_internal::CordzUpdateTracker; |
72 | | using ::absl::cord_internal::InlineData; |
73 | | using ::absl::cord_internal::kMaxFlatLength; |
74 | | using ::absl::cord_internal::kMinFlatLength; |
75 | | |
76 | | using ::absl::cord_internal::kInlinedVectorSize; |
77 | | using ::absl::cord_internal::kMaxBytesToCopy; |
78 | | |
79 | | static void DumpNode(CordRep* absl_nonnull nonnull_rep, bool include_data, |
80 | | std::ostream* absl_nonnull os, int indent = 0); |
81 | | static bool VerifyNode(CordRep* absl_nonnull root, |
82 | | CordRep* absl_nonnull start_node); |
83 | | |
84 | 0 | static inline CordRep* absl_nullable VerifyTree(CordRep* absl_nullable node) { |
85 | 0 | assert(node == nullptr || VerifyNode(node, node)); |
86 | 0 | static_cast<void>(&VerifyNode); |
87 | 0 | return node; |
88 | 0 | } |
89 | | |
90 | | static CordRepFlat* absl_nonnull CreateFlat(const char* absl_nonnull data, |
91 | 0 | size_t length, size_t alloc_hint) { |
92 | 0 | CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); |
93 | 0 | flat->length = length; |
94 | 0 | memcpy(flat->Data(), data, length); |
95 | 0 | return flat; |
96 | 0 | } |
97 | | |
98 | | // Creates a new flat or Btree out of the specified array. |
99 | | // The returned node has a refcount of 1. |
100 | | static CordRep* absl_nonnull NewBtree(const char* absl_nonnull data, |
101 | 0 | size_t length, size_t alloc_hint) { |
102 | 0 | if (length <= kMaxFlatLength) { |
103 | 0 | return CreateFlat(data, length, alloc_hint); |
104 | 0 | } |
105 | 0 | CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); |
106 | 0 | data += kMaxFlatLength; |
107 | 0 | length -= kMaxFlatLength; |
108 | 0 | auto* root = CordRepBtree::Create(flat); |
109 | 0 | return CordRepBtree::Append(root, {data, length}, alloc_hint); |
110 | 0 | } |
111 | | |
112 | | // Create a new tree out of the specified array. |
113 | | // The returned node has a refcount of 1. |
114 | | static CordRep* absl_nullable NewTree(const char* absl_nullable data, |
115 | 0 | size_t length, size_t alloc_hint) { |
116 | 0 | if (length == 0) return nullptr; |
117 | 0 | return NewBtree(data, length, alloc_hint); |
118 | 0 | } |
119 | | |
120 | | namespace cord_internal { |
121 | | |
122 | | void InitializeCordRepExternal(absl::string_view data, |
123 | 0 | CordRepExternal* absl_nonnull rep) { |
124 | 0 | assert(!data.empty()); |
125 | 0 | rep->length = data.size(); |
126 | 0 | rep->tag = EXTERNAL; |
127 | 0 | rep->base = data.data(); |
128 | 0 | VerifyTree(rep); |
129 | 0 | } |
130 | | |
131 | | } // namespace cord_internal |
132 | | |
133 | | // Creates a CordRep from the provided string. If the string is large enough, |
134 | | // and not wasteful, we move the string into an external cord rep, preserving |
135 | | // the already allocated string contents. |
136 | | // Requires the provided string length to be larger than `kMaxInline`. |
137 | 0 | static CordRep* absl_nonnull CordRepFromString(std::string&& src) { |
138 | 0 | assert(src.length() > cord_internal::kMaxInline); |
139 | 0 | if ( |
140 | | // String is short: copy data to avoid external block overhead. |
141 | 0 | src.size() <= kMaxBytesToCopy || |
142 | | // String is wasteful: copy data to avoid pinning too much unused memory. |
143 | 0 | src.size() < src.capacity() / 2 |
144 | 0 | ) { |
145 | 0 | return NewTree(src.data(), src.size(), 0); |
146 | 0 | } |
147 | | |
148 | 0 | struct StringReleaser { |
149 | 0 | void operator()(absl::string_view /* data */) {} |
150 | 0 | std::string data; |
151 | 0 | }; |
152 | 0 | const absl::string_view original_data = src; |
153 | 0 | auto* rep = |
154 | 0 | static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( |
155 | 0 | absl::cord_internal::NewExternalRep(original_data, |
156 | 0 | StringReleaser{std::move(src)})); |
157 | | // Moving src may have invalidated its data pointer, so adjust it. |
158 | 0 | rep->base = rep->template get<0>().data.data(); |
159 | 0 | return rep; |
160 | 0 | } |
161 | | |
162 | | // -------------------------------------------------------------------- |
163 | | // Cord::InlineRep functions |
164 | | |
165 | | inline void Cord::InlineRep::set_data(const char* absl_nullable data, |
166 | 0 | size_t n) { |
167 | 0 | static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); |
168 | 0 | assert(data != nullptr || n == 0); |
169 | 0 | data_.set_inline_data(data, n); |
170 | 0 | } |
171 | | |
172 | 0 | inline char* absl_nonnull Cord::InlineRep::set_data(size_t n) { |
173 | 0 | assert(n <= kMaxInline); |
174 | 0 | ResetToEmpty(); |
175 | 0 | set_inline_size(n); |
176 | 0 | return data_.as_chars(); |
177 | 0 | } |
178 | | |
179 | 0 | inline void Cord::InlineRep::reduce_size(size_t n) { |
180 | 0 | size_t tag = inline_size(); |
181 | 0 | assert(tag <= kMaxInline); |
182 | 0 | assert(tag >= n); |
183 | 0 | tag -= n; |
184 | 0 | memset(data_.as_chars() + tag, 0, n); |
185 | 0 | set_inline_size(tag); |
186 | 0 | } |
187 | | |
188 | 0 | inline void Cord::InlineRep::remove_prefix(size_t n) { |
189 | 0 | cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n, |
190 | 0 | inline_size() - n); |
191 | 0 | reduce_size(n); |
192 | 0 | } |
193 | | |
194 | | // Returns `rep` converted into a CordRepBtree. |
195 | | // Directly returns `rep` if `rep` is already a CordRepBtree. |
196 | 0 | static CordRepBtree* absl_nonnull ForceBtree(CordRep* rep) { |
197 | 0 | return rep->IsBtree() |
198 | 0 | ? rep->btree() |
199 | 0 | : CordRepBtree::Create(cord_internal::RemoveCrcNode(rep)); |
200 | 0 | } |
201 | | |
202 | | void Cord::InlineRep::AppendTreeToInlined(CordRep* absl_nonnull tree, |
203 | 0 | MethodIdentifier method) { |
204 | 0 | assert(!is_tree()); |
205 | 0 | if (!data_.is_empty()) { |
206 | 0 | CordRepFlat* flat = MakeFlatWithExtraCapacity(0); |
207 | 0 | tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); |
208 | 0 | } |
209 | 0 | EmplaceTree(tree, method); |
210 | 0 | } |
211 | | |
212 | | void Cord::InlineRep::AppendTreeToTree(CordRep* absl_nonnull tree, |
213 | 0 | MethodIdentifier method) { |
214 | 0 | assert(is_tree()); |
215 | 0 | const CordzUpdateScope scope(data_.cordz_info(), method); |
216 | 0 | tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); |
217 | 0 | SetTree(tree, scope); |
218 | 0 | } |
219 | | |
220 | | void Cord::InlineRep::AppendTree(CordRep* absl_nonnull tree, |
221 | 0 | MethodIdentifier method) { |
222 | 0 | assert(tree != nullptr); |
223 | 0 | assert(tree->length != 0); |
224 | 0 | assert(!tree->IsCrc()); |
225 | 0 | if (data_.is_tree()) { |
226 | 0 | AppendTreeToTree(tree, method); |
227 | 0 | } else { |
228 | 0 | AppendTreeToInlined(tree, method); |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | void Cord::InlineRep::PrependTreeToInlined(CordRep* absl_nonnull tree, |
233 | 0 | MethodIdentifier method) { |
234 | 0 | assert(!is_tree()); |
235 | 0 | if (!data_.is_empty()) { |
236 | 0 | CordRepFlat* flat = MakeFlatWithExtraCapacity(0); |
237 | 0 | tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); |
238 | 0 | } |
239 | 0 | EmplaceTree(tree, method); |
240 | 0 | } |
241 | | |
242 | | void Cord::InlineRep::PrependTreeToTree(CordRep* absl_nonnull tree, |
243 | 0 | MethodIdentifier method) { |
244 | 0 | assert(is_tree()); |
245 | 0 | const CordzUpdateScope scope(data_.cordz_info(), method); |
246 | 0 | tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); |
247 | 0 | SetTree(tree, scope); |
248 | 0 | } |
249 | | |
250 | | void Cord::InlineRep::PrependTree(CordRep* absl_nonnull tree, |
251 | 0 | MethodIdentifier method) { |
252 | 0 | assert(tree != nullptr); |
253 | 0 | assert(tree->length != 0); |
254 | 0 | assert(!tree->IsCrc()); |
255 | 0 | if (data_.is_tree()) { |
256 | 0 | PrependTreeToTree(tree, method); |
257 | 0 | } else { |
258 | 0 | PrependTreeToInlined(tree, method); |
259 | 0 | } |
260 | 0 | } |
261 | | |
262 | | // Searches for a non-full flat node at the rightmost leaf of the tree. If a |
263 | | // suitable leaf is found, the function will update the length field for all |
264 | | // nodes to account for the size increase. The append region address will be |
265 | | // written to region and the actual size increase will be written to size. |
266 | | static inline bool PrepareAppendRegion(CordRep* absl_nonnull root, |
267 | | char* absl_nullable* absl_nonnull region, |
268 | | size_t* absl_nonnull size, |
269 | 0 | size_t max_length) { |
270 | 0 | if (root->IsBtree() && root->refcount.IsOne()) { |
271 | 0 | Span<char> span = root->btree()->GetAppendBuffer(max_length); |
272 | 0 | if (!span.empty()) { |
273 | 0 | *region = span.data(); |
274 | 0 | *size = span.size(); |
275 | 0 | return true; |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | CordRep* dst = root; |
280 | 0 | if (!dst->IsFlat() || !dst->refcount.IsOne()) { |
281 | 0 | *region = nullptr; |
282 | 0 | *size = 0; |
283 | 0 | return false; |
284 | 0 | } |
285 | | |
286 | 0 | const size_t in_use = dst->length; |
287 | 0 | const size_t capacity = dst->flat()->Capacity(); |
288 | 0 | if (in_use == capacity) { |
289 | 0 | *region = nullptr; |
290 | 0 | *size = 0; |
291 | 0 | return false; |
292 | 0 | } |
293 | | |
294 | 0 | const size_t size_increase = std::min(capacity - in_use, max_length); |
295 | 0 | dst->length += size_increase; |
296 | |
|
297 | 0 | *region = dst->flat()->Data() + in_use; |
298 | 0 | *size = size_increase; |
299 | 0 | return true; |
300 | 0 | } |
301 | | |
302 | 0 | void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { |
303 | 0 | assert(&src != this); |
304 | 0 | assert(is_tree() || src.is_tree()); |
305 | 0 | auto constexpr method = CordzUpdateTracker::kAssignCord; |
306 | 0 | if (ABSL_PREDICT_TRUE(!is_tree())) { |
307 | 0 | EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method); |
308 | 0 | return; |
309 | 0 | } |
310 | | |
311 | 0 | CordRep* tree = as_tree(); |
312 | 0 | if (CordRep* src_tree = src.tree()) { |
313 | | // Leave any existing `cordz_info` in place, and let MaybeTrackCord() |
314 | | // decide if this cord should be (or remains to be) sampled or not. |
315 | 0 | data_.set_tree(CordRep::Ref(src_tree)); |
316 | 0 | CordzInfo::MaybeTrackCord(data_, src.data_, method); |
317 | 0 | } else { |
318 | 0 | CordzInfo::MaybeUntrackCord(data_.cordz_info()); |
319 | 0 | data_ = src.data_; |
320 | 0 | } |
321 | 0 | CordRep::Unref(tree); |
322 | 0 | } |
323 | | |
324 | 0 | void Cord::InlineRep::UnrefTree() { |
325 | 0 | if (is_tree()) { |
326 | 0 | CordzInfo::MaybeUntrackCord(data_.cordz_info()); |
327 | 0 | CordRep::Unref(tree()); |
328 | 0 | } |
329 | 0 | } |
330 | | |
331 | | // -------------------------------------------------------------------- |
332 | | // Constructors and destructors |
333 | | |
334 | | Cord::Cord(absl::string_view src, MethodIdentifier method) |
335 | 0 | : contents_(InlineData::kDefaultInit) { |
336 | 0 | const size_t n = src.size(); |
337 | 0 | if (n <= InlineRep::kMaxInline) { |
338 | 0 | contents_.set_data(src.data(), n); |
339 | 0 | } else { |
340 | 0 | CordRep* rep = NewTree(src.data(), n, 0); |
341 | 0 | contents_.EmplaceTree(rep, method); |
342 | 0 | } |
343 | 0 | } |
344 | | |
345 | | template <typename T, Cord::EnableIfString<T>> |
346 | 0 | Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) { |
347 | 0 | if (src.size() <= InlineRep::kMaxInline) { |
348 | 0 | contents_.set_data(src.data(), src.size()); |
349 | 0 | } else { |
350 | 0 | CordRep* rep = CordRepFromString(std::forward<T>(src)); |
351 | 0 | contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString); |
352 | 0 | } |
353 | 0 | } |
354 | | |
355 | | template Cord::Cord(std::string&& src); |
356 | | |
357 | | // The destruction code is separate so that the compiler can determine |
358 | | // that it does not need to call the destructor on a moved-from Cord. |
359 | 0 | void Cord::DestroyCordSlow() { |
360 | 0 | assert(contents_.is_tree()); |
361 | 0 | CordzInfo::MaybeUntrackCord(contents_.cordz_info()); |
362 | 0 | CordRep::Unref(VerifyTree(contents_.as_tree())); |
363 | 0 | } |
364 | | |
365 | | // -------------------------------------------------------------------- |
366 | | // Mutators |
367 | | |
368 | 0 | void Cord::Clear() { |
369 | 0 | if (CordRep* tree = contents_.clear()) { |
370 | 0 | CordRep::Unref(tree); |
371 | 0 | } |
372 | 0 | } |
373 | | |
374 | 0 | Cord& Cord::AssignLargeString(std::string&& src) { |
375 | 0 | auto constexpr method = CordzUpdateTracker::kAssignString; |
376 | 0 | assert(src.size() > kMaxBytesToCopy); |
377 | 0 | CordRep* rep = CordRepFromString(std::move(src)); |
378 | 0 | if (CordRep* tree = contents_.tree()) { |
379 | 0 | CordzUpdateScope scope(contents_.cordz_info(), method); |
380 | 0 | contents_.SetTree(rep, scope); |
381 | 0 | CordRep::Unref(tree); |
382 | 0 | } else { |
383 | 0 | contents_.EmplaceTree(rep, method); |
384 | 0 | } |
385 | 0 | return *this; |
386 | 0 | } |
387 | | |
388 | 0 | Cord& Cord::operator=(absl::string_view src) { |
389 | 0 | auto constexpr method = CordzUpdateTracker::kAssignString; |
390 | 0 | const char* data = src.data(); |
391 | 0 | size_t length = src.size(); |
392 | 0 | CordRep* tree = contents_.tree(); |
393 | 0 | if (length <= InlineRep::kMaxInline) { |
394 | | // Embed into this->contents_, which is somewhat subtle: |
395 | | // - MaybeUntrackCord must be called before Unref(tree). |
396 | | // - MaybeUntrackCord must be called before set_data() clobbers cordz_info. |
397 | | // - set_data() must be called before Unref(tree) as it may reference tree. |
398 | 0 | if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info()); |
399 | 0 | contents_.set_data(data, length); |
400 | 0 | if (tree != nullptr) CordRep::Unref(tree); |
401 | 0 | return *this; |
402 | 0 | } |
403 | 0 | if (tree != nullptr) { |
404 | 0 | CordzUpdateScope scope(contents_.cordz_info(), method); |
405 | 0 | if (tree->IsFlat() && tree->flat()->Capacity() >= length && |
406 | 0 | tree->refcount.IsOne()) { |
407 | | // Copy in place if the existing FLAT node is reusable. |
408 | 0 | memmove(tree->flat()->Data(), data, length); |
409 | 0 | tree->length = length; |
410 | 0 | VerifyTree(tree); |
411 | 0 | return *this; |
412 | 0 | } |
413 | 0 | contents_.SetTree(NewTree(data, length, 0), scope); |
414 | 0 | CordRep::Unref(tree); |
415 | 0 | } else { |
416 | 0 | contents_.EmplaceTree(NewTree(data, length, 0), method); |
417 | 0 | } |
418 | 0 | return *this; |
419 | 0 | } |
420 | | |
421 | | // TODO(sanjay): Move to Cord::InlineRep section of file. For now, |
422 | | // we keep it here to make diffs easier. |
423 | | void Cord::InlineRep::AppendArray(absl::string_view src, |
424 | 0 | MethodIdentifier method) { |
425 | 0 | if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. |
426 | 0 | MaybeRemoveEmptyCrcNode(); |
427 | |
|
428 | 0 | size_t appended = 0; |
429 | 0 | CordRep* rep = tree(); |
430 | 0 | const CordRep* const root = rep; |
431 | 0 | CordzUpdateScope scope(root ? cordz_info() : nullptr, method); |
432 | 0 | if (root != nullptr) { |
433 | 0 | rep = cord_internal::RemoveCrcNode(rep); |
434 | 0 | char* region; |
435 | 0 | if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { |
436 | 0 | memcpy(region, src.data(), appended); |
437 | 0 | } |
438 | 0 | } else { |
439 | | // Try to fit in the inline buffer if possible. |
440 | 0 | size_t inline_length = inline_size(); |
441 | 0 | if (src.size() <= kMaxInline - inline_length) { |
442 | | // Append new data to embedded array |
443 | 0 | set_inline_size(inline_length + src.size()); |
444 | 0 | memcpy(data_.as_chars() + inline_length, src.data(), src.size()); |
445 | 0 | return; |
446 | 0 | } |
447 | | |
448 | | // Allocate flat to be a perfect fit on first append exceeding inlined size. |
449 | | // Subsequent growth will use amortized growth until we reach maximum flat |
450 | | // size. |
451 | 0 | rep = CordRepFlat::New(inline_length + src.size()); |
452 | 0 | appended = std::min(src.size(), rep->flat()->Capacity() - inline_length); |
453 | 0 | memcpy(rep->flat()->Data(), data_.as_chars(), inline_length); |
454 | 0 | memcpy(rep->flat()->Data() + inline_length, src.data(), appended); |
455 | 0 | rep->length = inline_length + appended; |
456 | 0 | } |
457 | | |
458 | 0 | src.remove_prefix(appended); |
459 | 0 | if (src.empty()) { |
460 | 0 | CommitTree(root, rep, scope, method); |
461 | 0 | return; |
462 | 0 | } |
463 | | |
464 | | // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. |
465 | 0 | rep = ForceBtree(rep); |
466 | 0 | const size_t min_growth = std::max<size_t>(rep->length / 10, src.size()); |
467 | 0 | rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size()); |
468 | |
|
469 | 0 | CommitTree(root, rep, scope, method); |
470 | 0 | } |
471 | | |
472 | 0 | inline CordRep* absl_nonnull Cord::TakeRep() const& { |
473 | 0 | return CordRep::Ref(contents_.tree()); |
474 | 0 | } |
475 | | |
476 | 0 | inline CordRep* absl_nonnull Cord::TakeRep() && { |
477 | 0 | CordRep* rep = contents_.tree(); |
478 | 0 | contents_.clear(); |
479 | 0 | return rep; |
480 | 0 | } |
481 | | |
482 | | template <typename C> |
483 | 0 | inline void Cord::AppendImpl(C&& src) { |
484 | 0 | auto constexpr method = CordzUpdateTracker::kAppendCord; |
485 | |
|
486 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
487 | 0 | if (src.empty()) return; |
488 | | |
489 | 0 | if (empty()) { |
490 | | // Since destination is empty, we can avoid allocating a node, |
491 | 0 | if (src.contents_.is_tree()) { |
492 | | // by taking the tree directly |
493 | 0 | CordRep* rep = |
494 | 0 | cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); |
495 | 0 | contents_.EmplaceTree(rep, method); |
496 | 0 | } else { |
497 | | // or copying over inline data |
498 | 0 | contents_.data_ = src.contents_.data_; |
499 | 0 | } |
500 | 0 | return; |
501 | 0 | } |
502 | | |
503 | | // For short cords, it is faster to copy data if there is room in dst. |
504 | 0 | const size_t src_size = src.contents_.size(); |
505 | 0 | if (src_size <= kMaxBytesToCopy) { |
506 | 0 | CordRep* src_tree = src.contents_.tree(); |
507 | 0 | if (src_tree == nullptr) { |
508 | | // src has embedded data. |
509 | 0 | contents_.AppendArray({src.contents_.data(), src_size}, method); |
510 | 0 | return; |
511 | 0 | } |
512 | 0 | if (src_tree->IsFlat()) { |
513 | | // src tree just has one flat node. |
514 | 0 | contents_.AppendArray({src_tree->flat()->Data(), src_size}, method); |
515 | 0 | return; |
516 | 0 | } |
517 | 0 | if (&src == this) { |
518 | | // ChunkIterator below assumes that src is not modified during traversal. |
519 | 0 | Append(Cord(src)); |
520 | 0 | return; |
521 | 0 | } |
522 | | // TODO(mec): Should we only do this if "dst" has space? |
523 | 0 | for (absl::string_view chunk : src.Chunks()) { |
524 | 0 | Append(chunk); |
525 | 0 | } |
526 | 0 | return; |
527 | 0 | } |
528 | | |
529 | | // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) |
530 | 0 | CordRep* rep = cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); |
531 | 0 | contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); |
532 | 0 | } Unexecuted instantiation: void absl::lts_20260107::Cord::AppendImpl<absl::lts_20260107::Cord const&>(absl::lts_20260107::Cord const&) Unexecuted instantiation: void absl::lts_20260107::Cord::AppendImpl<absl::lts_20260107::Cord>(absl::lts_20260107::Cord&&) |
533 | | |
534 | | static CordRep::ExtractResult ExtractAppendBuffer(CordRep* absl_nonnull rep, |
535 | 0 | size_t min_capacity) { |
536 | 0 | switch (rep->tag) { |
537 | 0 | case cord_internal::BTREE: |
538 | 0 | return CordRepBtree::ExtractAppendBuffer(rep->btree(), min_capacity); |
539 | 0 | default: |
540 | 0 | if (rep->IsFlat() && rep->refcount.IsOne() && |
541 | 0 | rep->flat()->Capacity() - rep->length >= min_capacity) { |
542 | 0 | return {nullptr, rep}; |
543 | 0 | } |
544 | 0 | return {rep, nullptr}; |
545 | 0 | } |
546 | 0 | } |
547 | | |
548 | | static CordBuffer CreateAppendBuffer(InlineData& data, size_t block_size, |
549 | 0 | size_t capacity) { |
550 | | // Watch out for overflow, people can ask for size_t::max(). |
551 | 0 | const size_t size = data.inline_size(); |
552 | 0 | const size_t max_capacity = std::numeric_limits<size_t>::max() - size; |
553 | 0 | capacity = (std::min)(max_capacity, capacity) + size; |
554 | 0 | CordBuffer buffer = |
555 | 0 | block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity) |
556 | 0 | : CordBuffer::CreateWithDefaultLimit(capacity); |
557 | 0 | cord_internal::SmallMemmove(buffer.data(), data.as_chars(), size); |
558 | 0 | buffer.SetLength(size); |
559 | 0 | data = {}; |
560 | 0 | return buffer; |
561 | 0 | } |
562 | | |
563 | | CordBuffer Cord::GetAppendBufferSlowPath(size_t block_size, size_t capacity, |
564 | 0 | size_t min_capacity) { |
565 | 0 | auto constexpr method = CordzUpdateTracker::kGetAppendBuffer; |
566 | 0 | CordRep* tree = contents_.tree(); |
567 | 0 | if (tree != nullptr) { |
568 | 0 | CordzUpdateScope scope(contents_.cordz_info(), method); |
569 | 0 | CordRep::ExtractResult result = ExtractAppendBuffer(tree, min_capacity); |
570 | 0 | if (result.extracted != nullptr) { |
571 | 0 | contents_.SetTreeOrEmpty(result.tree, scope); |
572 | 0 | return CordBuffer(result.extracted->flat()); |
573 | 0 | } |
574 | 0 | return block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity) |
575 | 0 | : CordBuffer::CreateWithDefaultLimit(capacity); |
576 | 0 | } |
577 | 0 | return CreateAppendBuffer(contents_.data_, block_size, capacity); |
578 | 0 | } |
579 | | |
580 | 0 | void Cord::Append(const Cord& src) { AppendImpl(src); } |
581 | | |
582 | 0 | void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } |
583 | | |
584 | | template <typename T, Cord::EnableIfString<T>> |
585 | 0 | void Cord::Append(T&& src) { |
586 | 0 | if (src.size() <= kMaxBytesToCopy) { |
587 | 0 | Append(absl::string_view(src)); |
588 | 0 | } else { |
589 | 0 | CordRep* rep = CordRepFromString(std::forward<T>(src)); |
590 | 0 | contents_.AppendTree(rep, CordzUpdateTracker::kAppendString); |
591 | 0 | } |
592 | 0 | } |
593 | | |
594 | | template void Cord::Append(std::string&& src); |
595 | | |
596 | 0 | void Cord::Prepend(const Cord& src) { |
597 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
598 | 0 | if (src.empty()) return; |
599 | | |
600 | 0 | CordRep* src_tree = src.contents_.tree(); |
601 | 0 | if (src_tree != nullptr) { |
602 | 0 | CordRep::Ref(src_tree); |
603 | 0 | contents_.PrependTree(cord_internal::RemoveCrcNode(src_tree), |
604 | 0 | CordzUpdateTracker::kPrependCord); |
605 | 0 | return; |
606 | 0 | } |
607 | | |
608 | | // `src` cord is inlined. |
609 | 0 | absl::string_view src_contents(src.contents_.data(), src.contents_.size()); |
610 | 0 | return Prepend(src_contents); |
611 | 0 | } |
612 | | |
613 | 0 | void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { |
614 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
615 | 0 | if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. |
616 | | |
617 | 0 | if (!contents_.is_tree()) { |
618 | 0 | size_t cur_size = contents_.inline_size(); |
619 | 0 | if (cur_size + src.size() <= InlineRep::kMaxInline) { |
620 | | // Use embedded storage. |
621 | 0 | InlineData data; |
622 | 0 | data.set_inline_size(cur_size + src.size()); |
623 | 0 | memcpy(data.as_chars(), src.data(), src.size()); |
624 | 0 | memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); |
625 | 0 | contents_.data_ = data; |
626 | 0 | return; |
627 | 0 | } |
628 | 0 | } |
629 | 0 | CordRep* rep = NewTree(src.data(), src.size(), 0); |
630 | 0 | contents_.PrependTree(rep, method); |
631 | 0 | } |
632 | | |
633 | 0 | void Cord::AppendPrecise(absl::string_view src, MethodIdentifier method) { |
634 | 0 | assert(!src.empty()); |
635 | 0 | assert(src.size() <= cord_internal::kMaxFlatLength); |
636 | 0 | if (contents_.remaining_inline_capacity() >= src.size()) { |
637 | 0 | const size_t inline_length = contents_.inline_size(); |
638 | 0 | contents_.set_inline_size(inline_length + src.size()); |
639 | 0 | memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size()); |
640 | 0 | } else { |
641 | 0 | contents_.AppendTree(CordRepFlat::Create(src), method); |
642 | 0 | } |
643 | 0 | } |
644 | | |
645 | 0 | void Cord::PrependPrecise(absl::string_view src, MethodIdentifier method) { |
646 | 0 | assert(!src.empty()); |
647 | 0 | assert(src.size() <= cord_internal::kMaxFlatLength); |
648 | 0 | if (contents_.remaining_inline_capacity() >= src.size()) { |
649 | 0 | const size_t cur_size = contents_.inline_size(); |
650 | 0 | InlineData data; |
651 | 0 | data.set_inline_size(cur_size + src.size()); |
652 | 0 | memcpy(data.as_chars(), src.data(), src.size()); |
653 | 0 | memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); |
654 | 0 | contents_.data_ = data; |
655 | 0 | } else { |
656 | 0 | contents_.PrependTree(CordRepFlat::Create(src), method); |
657 | 0 | } |
658 | 0 | } |
659 | | |
660 | | template <typename T, Cord::EnableIfString<T>> |
661 | 0 | inline void Cord::Prepend(T&& src) { |
662 | 0 | if (src.size() <= kMaxBytesToCopy) { |
663 | 0 | Prepend(absl::string_view(src)); |
664 | 0 | } else { |
665 | 0 | CordRep* rep = CordRepFromString(std::forward<T>(src)); |
666 | 0 | contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); |
667 | 0 | } |
668 | 0 | } |
669 | | |
670 | | template void Cord::Prepend(std::string&& src); |
671 | | |
672 | 0 | void Cord::RemovePrefix(size_t n) { |
673 | 0 | ABSL_INTERNAL_CHECK(n <= size(), |
674 | 0 | absl::StrCat("Requested prefix size ", n, |
675 | 0 | " exceeds Cord's size ", size())); |
676 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
677 | 0 | CordRep* tree = contents_.tree(); |
678 | 0 | if (tree == nullptr) { |
679 | 0 | contents_.remove_prefix(n); |
680 | 0 | } else { |
681 | 0 | auto constexpr method = CordzUpdateTracker::kRemovePrefix; |
682 | 0 | CordzUpdateScope scope(contents_.cordz_info(), method); |
683 | 0 | tree = cord_internal::RemoveCrcNode(tree); |
684 | 0 | if (n >= tree->length) { |
685 | 0 | CordRep::Unref(tree); |
686 | 0 | tree = nullptr; |
687 | 0 | } else if (tree->IsBtree()) { |
688 | 0 | CordRep* old = tree; |
689 | 0 | tree = tree->btree()->SubTree(n, tree->length - n); |
690 | 0 | CordRep::Unref(old); |
691 | 0 | } else if (tree->IsSubstring() && tree->refcount.IsOne()) { |
692 | 0 | tree->substring()->start += n; |
693 | 0 | tree->length -= n; |
694 | 0 | } else { |
695 | 0 | CordRep* rep = CordRepSubstring::Substring(tree, n, tree->length - n); |
696 | 0 | CordRep::Unref(tree); |
697 | 0 | tree = rep; |
698 | 0 | } |
699 | 0 | contents_.SetTreeOrEmpty(tree, scope); |
700 | 0 | } |
701 | 0 | } |
702 | | |
703 | 0 | void Cord::RemoveSuffix(size_t n) { |
704 | 0 | ABSL_INTERNAL_CHECK(n <= size(), |
705 | 0 | absl::StrCat("Requested suffix size ", n, |
706 | 0 | " exceeds Cord's size ", size())); |
707 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
708 | 0 | CordRep* tree = contents_.tree(); |
709 | 0 | if (tree == nullptr) { |
710 | 0 | contents_.reduce_size(n); |
711 | 0 | } else { |
712 | 0 | auto constexpr method = CordzUpdateTracker::kRemoveSuffix; |
713 | 0 | CordzUpdateScope scope(contents_.cordz_info(), method); |
714 | 0 | tree = cord_internal::RemoveCrcNode(tree); |
715 | 0 | if (n >= tree->length) { |
716 | 0 | CordRep::Unref(tree); |
717 | 0 | tree = nullptr; |
718 | 0 | } else if (tree->IsBtree()) { |
719 | 0 | tree = CordRepBtree::RemoveSuffix(tree->btree(), n); |
720 | 0 | } else if (!tree->IsExternal() && tree->refcount.IsOne()) { |
721 | 0 | assert(tree->IsFlat() || tree->IsSubstring()); |
722 | 0 | tree->length -= n; |
723 | 0 | } else { |
724 | 0 | CordRep* rep = CordRepSubstring::Substring(tree, 0, tree->length - n); |
725 | 0 | CordRep::Unref(tree); |
726 | 0 | tree = rep; |
727 | 0 | } |
728 | 0 | contents_.SetTreeOrEmpty(tree, scope); |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | 0 | Cord Cord::Subcord(size_t pos, size_t new_size) const { |
733 | 0 | Cord sub_cord; |
734 | 0 | size_t length = size(); |
735 | 0 | if (pos > length) pos = length; |
736 | 0 | if (new_size > length - pos) new_size = length - pos; |
737 | 0 | if (new_size == 0) return sub_cord; |
738 | | |
739 | 0 | CordRep* tree = contents_.tree(); |
740 | 0 | if (tree == nullptr) { |
741 | 0 | sub_cord.contents_.set_data(contents_.data() + pos, new_size); |
742 | 0 | return sub_cord; |
743 | 0 | } |
744 | | |
745 | 0 | if (new_size <= InlineRep::kMaxInline) { |
746 | 0 | sub_cord.contents_.set_inline_size(new_size); |
747 | 0 | char* dest = sub_cord.contents_.data_.as_chars(); |
748 | 0 | Cord::ChunkIterator it = chunk_begin(); |
749 | 0 | it.AdvanceBytes(pos); |
750 | 0 | size_t remaining_size = new_size; |
751 | 0 | while (remaining_size > it->size()) { |
752 | 0 | cord_internal::SmallMemmove(dest, it->data(), it->size()); |
753 | 0 | remaining_size -= it->size(); |
754 | 0 | dest += it->size(); |
755 | 0 | ++it; |
756 | 0 | } |
757 | 0 | cord_internal::SmallMemmove(dest, it->data(), remaining_size); |
758 | 0 | return sub_cord; |
759 | 0 | } |
760 | | |
761 | 0 | tree = cord_internal::SkipCrcNode(tree); |
762 | 0 | if (tree->IsBtree()) { |
763 | 0 | tree = tree->btree()->SubTree(pos, new_size); |
764 | 0 | } else { |
765 | 0 | tree = CordRepSubstring::Substring(tree, pos, new_size); |
766 | 0 | } |
767 | 0 | sub_cord.contents_.EmplaceTree(tree, contents_.data_, |
768 | 0 | CordzUpdateTracker::kSubCord); |
769 | 0 | return sub_cord; |
770 | 0 | } |
771 | | |
772 | | // -------------------------------------------------------------------- |
773 | | // Comparators |
774 | | |
775 | | namespace { |
776 | | |
777 | 0 | int ClampResult(int memcmp_res) { |
778 | 0 | return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); |
779 | 0 | } |
780 | | |
781 | | int CompareChunks(absl::string_view* absl_nonnull lhs, |
782 | | absl::string_view* absl_nonnull rhs, |
783 | 0 | size_t* absl_nonnull size_to_compare) { |
784 | 0 | size_t compared_size = std::min(lhs->size(), rhs->size()); |
785 | 0 | assert(*size_to_compare >= compared_size); |
786 | 0 | *size_to_compare -= compared_size; |
787 | |
|
788 | 0 | int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); |
789 | 0 | if (memcmp_res != 0) return memcmp_res; |
790 | | |
791 | 0 | lhs->remove_prefix(compared_size); |
792 | 0 | rhs->remove_prefix(compared_size); |
793 | |
|
794 | 0 | return 0; |
795 | 0 | } |
796 | | |
797 | | // This overload set computes comparison results from memcmp result. This |
798 | | // interface is used inside GenericCompare below. Different implementations |
799 | | // are specialized for int and bool. For int we clamp result to {-1, 0, 1} |
800 | | // set. For bool we just interested in "value == 0". |
801 | | template <typename ResultType> |
802 | 0 | ResultType ComputeCompareResult(int memcmp_res) { |
803 | 0 | return ClampResult(memcmp_res); |
804 | 0 | } |
805 | | template <> |
806 | 0 | bool ComputeCompareResult<bool>(int memcmp_res) { |
807 | 0 | return memcmp_res == 0; |
808 | 0 | } |
809 | | |
810 | | } // namespace |
811 | | |
812 | | // Helper routine. Locates the first flat or external chunk of the Cord without |
813 | | // initializing the iterator, and returns a string_view referencing the data. |
814 | 0 | inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { |
815 | 0 | if (!is_tree()) { |
816 | 0 | return absl::string_view(data_.as_chars(), data_.inline_size()); |
817 | 0 | } |
818 | | |
819 | 0 | CordRep* node = cord_internal::SkipCrcNode(tree()); |
820 | 0 | if (node->IsFlat()) { |
821 | 0 | return absl::string_view(node->flat()->Data(), node->length); |
822 | 0 | } |
823 | | |
824 | 0 | if (node->IsExternal()) { |
825 | 0 | return absl::string_view(node->external()->base, node->length); |
826 | 0 | } |
827 | | |
828 | 0 | if (node->IsBtree()) { |
829 | 0 | CordRepBtree* tree = node->btree(); |
830 | 0 | int height = tree->height(); |
831 | 0 | while (--height >= 0) { |
832 | 0 | tree = tree->Edge(CordRepBtree::kFront)->btree(); |
833 | 0 | } |
834 | 0 | return tree->Data(tree->begin()); |
835 | 0 | } |
836 | | |
837 | | // Get the child node if we encounter a SUBSTRING. |
838 | 0 | size_t offset = 0; |
839 | 0 | size_t length = node->length; |
840 | 0 | assert(length != 0); |
841 | | |
842 | 0 | if (node->IsSubstring()) { |
843 | 0 | offset = node->substring()->start; |
844 | 0 | node = node->substring()->child; |
845 | 0 | } |
846 | |
|
847 | 0 | if (node->IsFlat()) { |
848 | 0 | return absl::string_view(node->flat()->Data() + offset, length); |
849 | 0 | } |
850 | | |
851 | 0 | assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here"); |
852 | | |
853 | 0 | return absl::string_view(node->external()->base + offset, length); |
854 | 0 | } |
855 | | |
856 | 0 | void Cord::SetCrcCordState(crc_internal::CrcCordState state) { |
857 | 0 | auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; |
858 | 0 | if (empty()) { |
859 | 0 | contents_.MaybeRemoveEmptyCrcNode(); |
860 | 0 | CordRep* rep = CordRepCrc::New(nullptr, std::move(state)); |
861 | 0 | contents_.EmplaceTree(rep, method); |
862 | 0 | } else if (!contents_.is_tree()) { |
863 | 0 | CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); |
864 | 0 | rep = CordRepCrc::New(rep, std::move(state)); |
865 | 0 | contents_.EmplaceTree(rep, method); |
866 | 0 | } else { |
867 | 0 | const CordzUpdateScope scope(contents_.data_.cordz_info(), method); |
868 | 0 | CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state)); |
869 | 0 | contents_.SetTree(rep, scope); |
870 | 0 | } |
871 | 0 | } |
872 | | |
873 | 0 | void Cord::SetExpectedChecksum(uint32_t crc) { |
874 | | // Construct a CrcCordState with a single chunk. |
875 | 0 | crc_internal::CrcCordState state; |
876 | 0 | state.mutable_rep()->prefix_crc.push_back( |
877 | 0 | crc_internal::CrcCordState::PrefixCrc(size(), absl::crc32c_t{crc})); |
878 | 0 | SetCrcCordState(std::move(state)); |
879 | 0 | } |
880 | | |
881 | | const crc_internal::CrcCordState* absl_nullable Cord::MaybeGetCrcCordState() |
882 | 0 | const { |
883 | 0 | if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { |
884 | 0 | return nullptr; |
885 | 0 | } |
886 | 0 | return &contents_.tree()->crc()->crc_cord_state; |
887 | 0 | } |
888 | | |
889 | 0 | absl::optional<uint32_t> Cord::ExpectedChecksum() const { |
890 | 0 | if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { |
891 | 0 | return absl::nullopt; |
892 | 0 | } |
893 | 0 | return static_cast<uint32_t>( |
894 | 0 | contents_.tree()->crc()->crc_cord_state.Checksum()); |
895 | 0 | } |
896 | | |
897 | | inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, |
898 | 0 | size_t size_to_compare) const { |
899 | 0 | auto advance = [](Cord::ChunkIterator* absl_nonnull it, |
900 | 0 | absl::string_view* absl_nonnull chunk) { |
901 | 0 | if (!chunk->empty()) return true; |
902 | 0 | ++*it; |
903 | 0 | if (it->bytes_remaining_ == 0) return false; |
904 | 0 | *chunk = **it; |
905 | 0 | return true; |
906 | 0 | }; |
907 | |
|
908 | 0 | Cord::ChunkIterator lhs_it = chunk_begin(); |
909 | | |
910 | | // compared_size is inside first chunk. |
911 | 0 | absl::string_view lhs_chunk = |
912 | 0 | (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); |
913 | 0 | assert(compared_size <= lhs_chunk.size()); |
914 | 0 | assert(compared_size <= rhs.size()); |
915 | 0 | lhs_chunk.remove_prefix(compared_size); |
916 | 0 | rhs.remove_prefix(compared_size); |
917 | 0 | size_to_compare -= compared_size; // skip already compared size. |
918 | |
|
919 | 0 | while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { |
920 | 0 | int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); |
921 | 0 | if (comparison_result != 0) return comparison_result; |
922 | 0 | if (size_to_compare == 0) return 0; |
923 | 0 | } |
924 | | |
925 | 0 | return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); |
926 | 0 | } |
927 | | |
928 | | inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, |
929 | 0 | size_t size_to_compare) const { |
930 | 0 | auto advance = [](Cord::ChunkIterator* absl_nonnull it, |
931 | 0 | absl::string_view* absl_nonnull chunk) { |
932 | 0 | if (!chunk->empty()) return true; |
933 | 0 | ++*it; |
934 | 0 | if (it->bytes_remaining_ == 0) return false; |
935 | 0 | *chunk = **it; |
936 | 0 | return true; |
937 | 0 | }; |
938 | |
|
939 | 0 | Cord::ChunkIterator lhs_it = chunk_begin(); |
940 | 0 | Cord::ChunkIterator rhs_it = rhs.chunk_begin(); |
941 | | |
942 | | // compared_size is inside both first chunks. |
943 | 0 | absl::string_view lhs_chunk = |
944 | 0 | (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); |
945 | 0 | absl::string_view rhs_chunk = |
946 | 0 | (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); |
947 | 0 | assert(compared_size <= lhs_chunk.size()); |
948 | 0 | assert(compared_size <= rhs_chunk.size()); |
949 | 0 | lhs_chunk.remove_prefix(compared_size); |
950 | 0 | rhs_chunk.remove_prefix(compared_size); |
951 | 0 | size_to_compare -= compared_size; // skip already compared size. |
952 | |
|
953 | 0 | while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { |
954 | 0 | int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); |
955 | 0 | if (memcmp_res != 0) return memcmp_res; |
956 | 0 | if (size_to_compare == 0) return 0; |
957 | 0 | } |
958 | | |
959 | 0 | return static_cast<int>(rhs_chunk.empty()) - |
960 | 0 | static_cast<int>(lhs_chunk.empty()); |
961 | 0 | } |
962 | | |
963 | 0 | inline absl::string_view Cord::GetFirstChunk(const Cord& c) { |
964 | 0 | if (c.empty()) return {}; |
965 | 0 | return c.contents_.FindFlatStartPiece(); |
966 | 0 | } |
967 | 0 | inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { |
968 | 0 | return sv; |
969 | 0 | } |
970 | | |
971 | | // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed |
972 | | // that 'size_to_compare' is greater that size of smallest of first chunks. |
973 | | template <typename ResultType, typename RHS> |
974 | | ResultType GenericCompare(const Cord& lhs, const RHS& rhs, |
975 | 0 | size_t size_to_compare) { |
976 | 0 | absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); |
977 | 0 | absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); |
978 | |
|
979 | 0 | size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); |
980 | 0 | assert(size_to_compare >= compared_size); |
981 | 0 | int memcmp_res = compared_size > 0 ? ::memcmp(lhs_chunk.data(), |
982 | 0 | rhs_chunk.data(), compared_size) |
983 | 0 | : 0; |
984 | 0 | if (compared_size == size_to_compare || memcmp_res != 0) { |
985 | 0 | return ComputeCompareResult<ResultType>(memcmp_res); |
986 | 0 | } |
987 | | |
988 | 0 | return ComputeCompareResult<ResultType>( |
989 | 0 | lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); |
990 | 0 | } Unexecuted instantiation: bool absl::lts_20260107::GenericCompare<bool, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(absl::lts_20260107::Cord const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long) Unexecuted instantiation: bool absl::lts_20260107::GenericCompare<bool, absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&, absl::lts_20260107::Cord const&, unsigned long) Unexecuted instantiation: int absl::lts_20260107::GenericCompare<int, std::__1::basic_string_view<char, std::__1::char_traits<char> > >(absl::lts_20260107::Cord const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&, unsigned long) Unexecuted instantiation: int absl::lts_20260107::GenericCompare<int, absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&, absl::lts_20260107::Cord const&, unsigned long) |
991 | | |
992 | 0 | bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { |
993 | 0 | return GenericCompare<bool>(*this, rhs, size_to_compare); |
994 | 0 | } |
995 | | |
996 | 0 | bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { |
997 | 0 | return GenericCompare<bool>(*this, rhs, size_to_compare); |
998 | 0 | } |
999 | | |
1000 | | template <typename RHS> |
1001 | 0 | inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { |
1002 | 0 | size_t lhs_size = lhs.size(); |
1003 | 0 | size_t rhs_size = rhs.size(); |
1004 | 0 | if (lhs_size == rhs_size) { |
1005 | 0 | return GenericCompare<int>(lhs, rhs, lhs_size); |
1006 | 0 | } |
1007 | 0 | if (lhs_size < rhs_size) { |
1008 | 0 | auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); |
1009 | 0 | return data_comp_res == 0 ? -1 : data_comp_res; |
1010 | 0 | } |
1011 | | |
1012 | 0 | auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); |
1013 | 0 | return data_comp_res == 0 ? +1 : data_comp_res; |
1014 | 0 | } Unexecuted instantiation: int absl::lts_20260107::SharedCompareImpl<std::__1::basic_string_view<char, std::__1::char_traits<char> > >(absl::lts_20260107::Cord const&, std::__1::basic_string_view<char, std::__1::char_traits<char> > const&) Unexecuted instantiation: int absl::lts_20260107::SharedCompareImpl<absl::lts_20260107::Cord>(absl::lts_20260107::Cord const&, absl::lts_20260107::Cord const&) |
1015 | | |
1016 | 0 | int Cord::Compare(absl::string_view rhs) const { |
1017 | 0 | return SharedCompareImpl(*this, rhs); |
1018 | 0 | } |
1019 | | |
1020 | 0 | int Cord::CompareImpl(const Cord& rhs) const { |
1021 | 0 | return SharedCompareImpl(*this, rhs); |
1022 | 0 | } |
1023 | | |
1024 | 0 | bool Cord::EndsWith(absl::string_view rhs) const { |
1025 | 0 | size_t my_size = size(); |
1026 | 0 | size_t rhs_size = rhs.size(); |
1027 | |
|
1028 | 0 | if (my_size < rhs_size) return false; |
1029 | | |
1030 | 0 | Cord tmp(*this); |
1031 | 0 | tmp.RemovePrefix(my_size - rhs_size); |
1032 | 0 | return tmp.EqualsImpl(rhs, rhs_size); |
1033 | 0 | } |
1034 | | |
1035 | 0 | bool Cord::EndsWith(const Cord& rhs) const { |
1036 | 0 | size_t my_size = size(); |
1037 | 0 | size_t rhs_size = rhs.size(); |
1038 | |
|
1039 | 0 | if (my_size < rhs_size) return false; |
1040 | | |
1041 | 0 | Cord tmp(*this); |
1042 | 0 | tmp.RemovePrefix(my_size - rhs_size); |
1043 | 0 | return tmp.EqualsImpl(rhs, rhs_size); |
1044 | 0 | } |
1045 | | |
1046 | | // -------------------------------------------------------------------- |
1047 | | // Misc. |
1048 | | |
1049 | 0 | Cord::operator std::string() const { |
1050 | 0 | std::string s; |
1051 | 0 | absl::CopyCordToString(*this, &s); |
1052 | 0 | return s; |
1053 | 0 | } |
1054 | | |
1055 | 0 | void CopyCordToString(const Cord& src, std::string* absl_nonnull dst) { |
1056 | 0 | if (!src.contents_.is_tree()) { |
1057 | 0 | src.contents_.CopyTo(dst); |
1058 | 0 | } else { |
1059 | 0 | StringResizeAndOverwrite(*dst, src.size(), |
1060 | 0 | [&src](char* buf, size_t buf_size) { |
1061 | 0 | src.CopyToArraySlowPath(buf); |
1062 | 0 | return buf_size; |
1063 | 0 | }); |
1064 | 0 | } |
1065 | 0 | } |
1066 | | |
1067 | 0 | void AppendCordToString(const Cord& src, std::string* absl_nonnull dst) { |
1068 | 0 | strings_internal::StringAppendAndOverwrite( |
1069 | 0 | *dst, src.size(), [&src](char* buf, size_t buf_size) { |
1070 | 0 | src.CopyToArrayImpl(buf); |
1071 | 0 | return buf_size; |
1072 | 0 | }); |
1073 | 0 | } |
1074 | | |
1075 | 0 | void Cord::CopyToArraySlowPath(char* absl_nonnull dst) const { |
1076 | 0 | assert(contents_.is_tree()); |
1077 | 0 | absl::string_view fragment; |
1078 | 0 | if (GetFlatAux(contents_.tree(), &fragment) && !fragment.empty()) { |
1079 | 0 | memcpy(dst, fragment.data(), fragment.size()); |
1080 | 0 | return; |
1081 | 0 | } |
1082 | 0 | for (absl::string_view chunk : Chunks()) { |
1083 | 0 | memcpy(dst, chunk.data(), chunk.size()); |
1084 | 0 | dst += chunk.size(); |
1085 | 0 | } |
1086 | 0 | } |
1087 | | |
1088 | 0 | Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { |
1089 | 0 | ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && |
1090 | 0 | "Attempted to iterate past `end()`"); |
1091 | 0 | Cord subcord; |
1092 | 0 | auto constexpr method = CordzUpdateTracker::kCordReader; |
1093 | |
|
1094 | 0 | if (n <= InlineRep::kMaxInline) { |
1095 | | // Range to read fits in inline data. Flatten it. |
1096 | 0 | char* data = subcord.contents_.set_data(n); |
1097 | 0 | while (n > current_chunk_.size()) { |
1098 | 0 | memcpy(data, current_chunk_.data(), current_chunk_.size()); |
1099 | 0 | data += current_chunk_.size(); |
1100 | 0 | n -= current_chunk_.size(); |
1101 | 0 | ++*this; |
1102 | 0 | } |
1103 | 0 | memcpy(data, current_chunk_.data(), n); |
1104 | 0 | if (n < current_chunk_.size()) { |
1105 | 0 | RemoveChunkPrefix(n); |
1106 | 0 | } else if (n > 0) { |
1107 | 0 | ++*this; |
1108 | 0 | } |
1109 | 0 | return subcord; |
1110 | 0 | } |
1111 | | |
1112 | 0 | if (btree_reader_) { |
1113 | 0 | size_t chunk_size = current_chunk_.size(); |
1114 | 0 | if (n <= chunk_size && n <= kMaxBytesToCopy) { |
1115 | 0 | subcord = Cord(current_chunk_.substr(0, n), method); |
1116 | 0 | if (n < chunk_size) { |
1117 | 0 | current_chunk_.remove_prefix(n); |
1118 | 0 | } else { |
1119 | 0 | current_chunk_ = btree_reader_.Next(); |
1120 | 0 | } |
1121 | 0 | } else { |
1122 | 0 | CordRep* rep; |
1123 | 0 | current_chunk_ = btree_reader_.Read(n, chunk_size, rep); |
1124 | 0 | subcord.contents_.EmplaceTree(rep, method); |
1125 | 0 | } |
1126 | 0 | bytes_remaining_ -= n; |
1127 | 0 | return subcord; |
1128 | 0 | } |
1129 | | |
1130 | | // Short circuit if reading the entire data edge. |
1131 | 0 | assert(current_leaf_ != nullptr); |
1132 | 0 | if (n == current_leaf_->length) { |
1133 | 0 | bytes_remaining_ = 0; |
1134 | 0 | current_chunk_ = {}; |
1135 | 0 | CordRep* tree = CordRep::Ref(current_leaf_); |
1136 | 0 | subcord.contents_.EmplaceTree(VerifyTree(tree), method); |
1137 | 0 | return subcord; |
1138 | 0 | } |
1139 | | |
1140 | | // From this point on, we need a partial substring node. |
1141 | | // Get pointer to the underlying flat or external data payload and |
1142 | | // compute data pointer and offset into current flat or external. |
1143 | 0 | CordRep* payload = current_leaf_->IsSubstring() |
1144 | 0 | ? current_leaf_->substring()->child |
1145 | 0 | : current_leaf_; |
1146 | 0 | const char* data = payload->IsExternal() ? payload->external()->base |
1147 | 0 | : payload->flat()->Data(); |
1148 | 0 | const size_t offset = static_cast<size_t>(current_chunk_.data() - data); |
1149 | |
|
1150 | 0 | auto* tree = CordRepSubstring::Substring(payload, offset, n); |
1151 | 0 | subcord.contents_.EmplaceTree(VerifyTree(tree), method); |
1152 | 0 | bytes_remaining_ -= n; |
1153 | 0 | current_chunk_.remove_prefix(n); |
1154 | 0 | return subcord; |
1155 | 0 | } |
1156 | | |
1157 | 0 | char Cord::operator[](size_t i) const { |
1158 | 0 | ABSL_HARDENING_ASSERT(i < size()); |
1159 | 0 | size_t offset = i; |
1160 | 0 | const CordRep* rep = contents_.tree(); |
1161 | 0 | if (rep == nullptr) { |
1162 | 0 | return contents_.data()[i]; |
1163 | 0 | } |
1164 | 0 | rep = cord_internal::SkipCrcNode(rep); |
1165 | 0 | while (true) { |
1166 | 0 | assert(rep != nullptr); |
1167 | 0 | assert(offset < rep->length); |
1168 | 0 | if (rep->IsFlat()) { |
1169 | | // Get the "i"th character directly from the flat array. |
1170 | 0 | return rep->flat()->Data()[offset]; |
1171 | 0 | } else if (rep->IsBtree()) { |
1172 | 0 | return rep->btree()->GetCharacter(offset); |
1173 | 0 | } else if (rep->IsExternal()) { |
1174 | | // Get the "i"th character from the external array. |
1175 | 0 | return rep->external()->base[offset]; |
1176 | 0 | } else { |
1177 | | // This must be a substring a node, so bypass it to get to the child. |
1178 | 0 | assert(rep->IsSubstring()); |
1179 | 0 | offset += rep->substring()->start; |
1180 | 0 | rep = rep->substring()->child; |
1181 | 0 | } |
1182 | 0 | } |
1183 | 0 | } |
1184 | | |
1185 | | namespace { |
1186 | | |
1187 | | // Tests whether the sequence of chunks beginning at `position` starts with |
1188 | | // `needle`. |
1189 | | // |
1190 | | // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or |
1191 | | // equal to `needle.size()`. |
1192 | | bool IsSubstringInCordAt(absl::Cord::CharIterator position, |
1193 | 0 | absl::string_view needle) { |
1194 | 0 | auto haystack_chunk = absl::Cord::ChunkRemaining(position); |
1195 | 0 | while (true) { |
1196 | | // Precondition is that `absl::Cord::ChunkRemaining(position)` is not |
1197 | | // empty. This assert will trigger if that is not true. |
1198 | 0 | assert(!haystack_chunk.empty()); |
1199 | 0 | auto min_length = std::min(haystack_chunk.size(), needle.size()); |
1200 | 0 | if (!absl::ConsumePrefix(&needle, haystack_chunk.substr(0, min_length))) { |
1201 | 0 | return false; |
1202 | 0 | } |
1203 | 0 | if (needle.empty()) { |
1204 | 0 | return true; |
1205 | 0 | } |
1206 | 0 | absl::Cord::Advance(&position, min_length); |
1207 | 0 | haystack_chunk = absl::Cord::ChunkRemaining(position); |
1208 | 0 | } |
1209 | 0 | } |
1210 | | |
1211 | | } // namespace |
1212 | | |
1213 | | // A few options how this could be implemented: |
1214 | | // (a) Flatten the Cord and find, i.e. |
1215 | | // haystack.Flatten().find(needle) |
1216 | | // For large 'haystack' (where Cord makes sense to be used), this copies |
1217 | | // the whole 'haystack' and can be slow. |
1218 | | // (b) Use std::search, i.e. |
1219 | | // std::search(haystack.char_begin(), haystack.char_end(), |
1220 | | // needle.begin(), needle.end()) |
1221 | | // This avoids the copy, but compares one byte at a time, and branches a |
1222 | | // lot every time it has to advance. It is also not possible to use |
1223 | | // std::search as is, because CharIterator is only an input iterator, not a |
1224 | | // forward iterator. |
1225 | | // (c) Use string_view::find in each fragment, and specifically handle fragment |
1226 | | // boundaries. |
1227 | | // |
1228 | | // This currently implements option (b). |
1229 | | absl::Cord::CharIterator absl::Cord::FindImpl(CharIterator it, |
1230 | 0 | absl::string_view needle) const { |
1231 | | // Ensure preconditions are met by callers first. |
1232 | | |
1233 | | // Needle must not be empty. |
1234 | 0 | assert(!needle.empty()); |
1235 | | // Haystack must be at least as large as needle. |
1236 | 0 | assert(it.chunk_iterator_.bytes_remaining_ >= needle.size()); |
1237 | | |
1238 | | // Cord is a sequence of chunks. To find `needle` we go chunk by chunk looking |
1239 | | // for the first char of needle, up until we have advanced `N` defined as |
1240 | | // `haystack.size() - needle.size()`. If we find the first char of needle at |
1241 | | // `P` and `P` is less than `N`, we then call `IsSubstringInCordAt` to |
1242 | | // see if this is the needle. If not, we advance to `P + 1` and try again. |
1243 | 0 | while (it.chunk_iterator_.bytes_remaining_ >= needle.size()) { |
1244 | 0 | auto haystack_chunk = Cord::ChunkRemaining(it); |
1245 | 0 | assert(!haystack_chunk.empty()); |
1246 | | // Look for the first char of `needle` in the current chunk. |
1247 | 0 | auto idx = haystack_chunk.find(needle.front()); |
1248 | 0 | if (idx == absl::string_view::npos) { |
1249 | | // No potential match in this chunk, advance past it. |
1250 | 0 | Cord::Advance(&it, haystack_chunk.size()); |
1251 | 0 | continue; |
1252 | 0 | } |
1253 | | // We found the start of a potential match in the chunk. Advance the |
1254 | | // iterator and haystack chunk to the match the position. |
1255 | 0 | Cord::Advance(&it, idx); |
1256 | | // Check if there is enough haystack remaining to actually have a match. |
1257 | 0 | if (it.chunk_iterator_.bytes_remaining_ < needle.size()) { |
1258 | 0 | break; |
1259 | 0 | } |
1260 | | // Check if this is `needle`. |
1261 | 0 | if (IsSubstringInCordAt(it, needle)) { |
1262 | 0 | return it; |
1263 | 0 | } |
1264 | | // No match, increment the iterator for the next attempt. |
1265 | 0 | Cord::Advance(&it, 1); |
1266 | 0 | } |
1267 | | // If we got here, we did not find `needle`. |
1268 | 0 | return char_end(); |
1269 | 0 | } |
1270 | | |
1271 | 0 | absl::Cord::CharIterator absl::Cord::Find(absl::string_view needle) const { |
1272 | 0 | if (needle.empty()) { |
1273 | 0 | return char_begin(); |
1274 | 0 | } |
1275 | 0 | if (needle.size() > size()) { |
1276 | 0 | return char_end(); |
1277 | 0 | } |
1278 | 0 | if (needle.size() == size()) { |
1279 | 0 | return *this == needle ? char_begin() : char_end(); |
1280 | 0 | } |
1281 | 0 | return FindImpl(char_begin(), needle); |
1282 | 0 | } |
1283 | | |
1284 | | namespace { |
1285 | | |
1286 | | // Tests whether the sequence of chunks beginning at `haystack` starts with the |
1287 | | // sequence of chunks beginning at `needle_begin` and extending to `needle_end`. |
1288 | | // |
1289 | | // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or |
1290 | | // equal to `needle_end - needle_begin` and `advance`. |
1291 | | bool IsSubcordInCordAt(absl::Cord::CharIterator haystack, |
1292 | | absl::Cord::CharIterator needle_begin, |
1293 | 0 | absl::Cord::CharIterator needle_end) { |
1294 | 0 | while (needle_begin != needle_end) { |
1295 | 0 | auto haystack_chunk = absl::Cord::ChunkRemaining(haystack); |
1296 | 0 | assert(!haystack_chunk.empty()); |
1297 | 0 | auto needle_chunk = absl::Cord::ChunkRemaining(needle_begin); |
1298 | 0 | auto min_length = std::min(haystack_chunk.size(), needle_chunk.size()); |
1299 | 0 | if (haystack_chunk.substr(0, min_length) != |
1300 | 0 | needle_chunk.substr(0, min_length)) { |
1301 | 0 | return false; |
1302 | 0 | } |
1303 | 0 | absl::Cord::Advance(&haystack, min_length); |
1304 | 0 | absl::Cord::Advance(&needle_begin, min_length); |
1305 | 0 | } |
1306 | 0 | return true; |
1307 | 0 | } |
1308 | | |
1309 | | // Tests whether the sequence of chunks beginning at `position` starts with the |
1310 | | // cord `needle`. |
1311 | | // |
1312 | | // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or |
1313 | | // equal to `needle.size()`. |
1314 | | bool IsSubcordInCordAt(absl::Cord::CharIterator position, |
1315 | 0 | const absl::Cord& needle) { |
1316 | 0 | return IsSubcordInCordAt(position, needle.char_begin(), needle.char_end()); |
1317 | 0 | } |
1318 | | |
1319 | | } // namespace |
1320 | | |
1321 | 0 | absl::Cord::CharIterator absl::Cord::Find(const absl::Cord& needle) const { |
1322 | 0 | if (needle.empty()) { |
1323 | 0 | return char_begin(); |
1324 | 0 | } |
1325 | 0 | const auto needle_size = needle.size(); |
1326 | 0 | if (needle_size > size()) { |
1327 | 0 | return char_end(); |
1328 | 0 | } |
1329 | 0 | if (needle_size == size()) { |
1330 | 0 | return *this == needle ? char_begin() : char_end(); |
1331 | 0 | } |
1332 | 0 | const auto needle_chunk = Cord::ChunkRemaining(needle.char_begin()); |
1333 | 0 | auto haystack_it = char_begin(); |
1334 | 0 | while (true) { |
1335 | 0 | haystack_it = FindImpl(haystack_it, needle_chunk); |
1336 | 0 | if (haystack_it == char_end() || |
1337 | 0 | haystack_it.chunk_iterator_.bytes_remaining_ < needle_size) { |
1338 | 0 | break; |
1339 | 0 | } |
1340 | | // We found the first chunk of `needle` at `haystack_it` but not the entire |
1341 | | // subcord. Advance past the first chunk and check for the remainder. |
1342 | 0 | auto haystack_advanced_it = haystack_it; |
1343 | 0 | auto needle_it = needle.char_begin(); |
1344 | 0 | Cord::Advance(&haystack_advanced_it, needle_chunk.size()); |
1345 | 0 | Cord::Advance(&needle_it, needle_chunk.size()); |
1346 | 0 | if (IsSubcordInCordAt(haystack_advanced_it, needle_it, needle.char_end())) { |
1347 | 0 | return haystack_it; |
1348 | 0 | } |
1349 | 0 | Cord::Advance(&haystack_it, 1); |
1350 | 0 | if (haystack_it.chunk_iterator_.bytes_remaining_ < needle_size) { |
1351 | 0 | break; |
1352 | 0 | } |
1353 | 0 | if (haystack_it.chunk_iterator_.bytes_remaining_ == needle_size) { |
1354 | | // Special case, if there is exactly `needle_size` bytes remaining, the |
1355 | | // subcord is either at `haystack_it` or not at all. |
1356 | 0 | if (IsSubcordInCordAt(haystack_it, needle)) { |
1357 | 0 | return haystack_it; |
1358 | 0 | } |
1359 | 0 | break; |
1360 | 0 | } |
1361 | 0 | } |
1362 | 0 | return char_end(); |
1363 | 0 | } |
1364 | | |
1365 | 0 | bool Cord::Contains(absl::string_view rhs) const { |
1366 | 0 | return rhs.empty() || Find(rhs) != char_end(); |
1367 | 0 | } |
1368 | | |
1369 | 0 | bool Cord::Contains(const absl::Cord& rhs) const { |
1370 | 0 | return rhs.empty() || Find(rhs) != char_end(); |
1371 | 0 | } |
1372 | | |
1373 | 0 | absl::string_view Cord::FlattenSlowPath() { |
1374 | 0 | assert(contents_.is_tree()); |
1375 | 0 | size_t total_size = size(); |
1376 | 0 | CordRep* new_rep; |
1377 | 0 | char* new_buffer; |
1378 | | |
1379 | | // Try to put the contents into a new flat rep. If they won't fit in the |
1380 | | // biggest possible flat node, use an external rep instead. |
1381 | 0 | if (total_size <= kMaxFlatLength) { |
1382 | 0 | new_rep = CordRepFlat::New(total_size); |
1383 | 0 | new_rep->length = total_size; |
1384 | 0 | new_buffer = new_rep->flat()->Data(); |
1385 | 0 | CopyToArraySlowPath(new_buffer); |
1386 | 0 | } else { |
1387 | 0 | new_buffer = std::allocator<char>().allocate(total_size); |
1388 | 0 | CopyToArraySlowPath(new_buffer); |
1389 | 0 | new_rep = absl::cord_internal::NewExternalRep( |
1390 | 0 | absl::string_view(new_buffer, total_size), [](absl::string_view s) { |
1391 | 0 | std::allocator<char>().deallocate(const_cast<char*>(s.data()), |
1392 | 0 | s.size()); |
1393 | 0 | }); |
1394 | 0 | } |
1395 | 0 | CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten); |
1396 | 0 | CordRep::Unref(contents_.as_tree()); |
1397 | 0 | contents_.SetTree(new_rep, scope); |
1398 | 0 | return absl::string_view(new_buffer, total_size); |
1399 | 0 | } |
1400 | | |
1401 | | /* static */ bool Cord::GetFlatAux(CordRep* absl_nonnull rep, |
1402 | 0 | absl::string_view* absl_nonnull fragment) { |
1403 | 0 | assert(rep != nullptr); |
1404 | 0 | if (rep->length == 0) { |
1405 | 0 | *fragment = absl::string_view(); |
1406 | 0 | return true; |
1407 | 0 | } |
1408 | 0 | rep = cord_internal::SkipCrcNode(rep); |
1409 | 0 | if (rep->IsFlat()) { |
1410 | 0 | *fragment = absl::string_view(rep->flat()->Data(), rep->length); |
1411 | 0 | return true; |
1412 | 0 | } else if (rep->IsExternal()) { |
1413 | 0 | *fragment = absl::string_view(rep->external()->base, rep->length); |
1414 | 0 | return true; |
1415 | 0 | } else if (rep->IsBtree()) { |
1416 | 0 | return rep->btree()->IsFlat(fragment); |
1417 | 0 | } else if (rep->IsSubstring()) { |
1418 | 0 | CordRep* child = rep->substring()->child; |
1419 | 0 | if (child->IsFlat()) { |
1420 | 0 | *fragment = absl::string_view( |
1421 | 0 | child->flat()->Data() + rep->substring()->start, rep->length); |
1422 | 0 | return true; |
1423 | 0 | } else if (child->IsExternal()) { |
1424 | 0 | *fragment = absl::string_view( |
1425 | 0 | child->external()->base + rep->substring()->start, rep->length); |
1426 | 0 | return true; |
1427 | 0 | } else if (child->IsBtree()) { |
1428 | 0 | return child->btree()->IsFlat(rep->substring()->start, rep->length, |
1429 | 0 | fragment); |
1430 | 0 | } |
1431 | 0 | } |
1432 | 0 | return false; |
1433 | 0 | } |
1434 | | |
1435 | | /* static */ void Cord::ForEachChunkAux( |
1436 | | absl::cord_internal::CordRep* absl_nonnull rep, |
1437 | 0 | absl::FunctionRef<void(absl::string_view)> callback) { |
1438 | 0 | assert(rep != nullptr); |
1439 | 0 | if (rep->length == 0) return; |
1440 | 0 | rep = cord_internal::SkipCrcNode(rep); |
1441 | |
|
1442 | 0 | if (rep->IsBtree()) { |
1443 | 0 | ChunkIterator it(rep), end; |
1444 | 0 | while (it != end) { |
1445 | 0 | callback(*it); |
1446 | 0 | ++it; |
1447 | 0 | } |
1448 | 0 | return; |
1449 | 0 | } |
1450 | | |
1451 | | // This is a leaf node, so invoke our callback. |
1452 | 0 | absl::cord_internal::CordRep* current_node = cord_internal::SkipCrcNode(rep); |
1453 | 0 | absl::string_view chunk; |
1454 | 0 | bool success = GetFlatAux(current_node, &chunk); |
1455 | 0 | assert(success); |
1456 | 0 | if (success) { |
1457 | 0 | callback(chunk); |
1458 | 0 | } |
1459 | 0 | } |
1460 | | |
1461 | | static void DumpNode(CordRep* absl_nonnull nonnull_rep, bool include_data, |
1462 | 0 | std::ostream* absl_nonnull os, int indent) { |
1463 | 0 | CordRep* rep = nonnull_rep; |
1464 | 0 | const int kIndentStep = 1; |
1465 | 0 | for (;;) { |
1466 | 0 | *os << std::setw(3) << (rep == nullptr ? 0 : rep->refcount.Get()); |
1467 | 0 | *os << " " << std::setw(7) << (rep == nullptr ? 0 : rep->length); |
1468 | 0 | *os << " ["; |
1469 | 0 | if (include_data) *os << static_cast<void*>(rep); |
1470 | 0 | *os << "]"; |
1471 | 0 | *os << " " << std::setw(indent) << ""; |
1472 | 0 | bool leaf = false; |
1473 | 0 | if (rep == nullptr) { |
1474 | 0 | *os << "NULL\n"; |
1475 | 0 | leaf = true; |
1476 | 0 | } else if (rep->IsCrc()) { |
1477 | 0 | *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n"; |
1478 | 0 | indent += kIndentStep; |
1479 | 0 | rep = rep->crc()->child; |
1480 | 0 | } else if (rep->IsSubstring()) { |
1481 | 0 | *os << "SUBSTRING @ " << rep->substring()->start << "\n"; |
1482 | 0 | indent += kIndentStep; |
1483 | 0 | rep = rep->substring()->child; |
1484 | 0 | } else { // Leaf or ring |
1485 | 0 | leaf = true; |
1486 | 0 | if (rep->IsExternal()) { |
1487 | 0 | *os << "EXTERNAL ["; |
1488 | 0 | if (include_data) |
1489 | 0 | *os << absl::CEscape( |
1490 | 0 | absl::string_view(rep->external()->base, rep->length)); |
1491 | 0 | *os << "]\n"; |
1492 | 0 | } else if (rep->IsFlat()) { |
1493 | 0 | *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; |
1494 | 0 | if (include_data) |
1495 | 0 | *os << absl::CEscape( |
1496 | 0 | absl::string_view(rep->flat()->Data(), rep->length)); |
1497 | 0 | *os << "]\n"; |
1498 | 0 | } else { |
1499 | 0 | CordRepBtree::Dump(rep, /*label=*/"", include_data, *os); |
1500 | 0 | } |
1501 | 0 | } |
1502 | 0 | if (leaf) { |
1503 | 0 | break; |
1504 | 0 | } |
1505 | 0 | } |
1506 | 0 | } |
1507 | | |
1508 | | static std::string ReportError(CordRep* absl_nonnull root, |
1509 | 0 | CordRep* absl_nonnull node) { |
1510 | 0 | std::ostringstream buf; |
1511 | 0 | buf << "Error at node " << node << " in:"; |
1512 | 0 | DumpNode(root, true, &buf); |
1513 | 0 | return buf.str(); |
1514 | 0 | } |
1515 | | |
1516 | | static bool VerifyNode(CordRep* absl_nonnull root, |
1517 | 0 | CordRep* absl_nonnull start_node) { |
1518 | 0 | absl::InlinedVector<CordRep* absl_nonnull, 2> worklist; |
1519 | 0 | worklist.push_back(start_node); |
1520 | 0 | do { |
1521 | 0 | CordRep* node = worklist.back(); |
1522 | 0 | worklist.pop_back(); |
1523 | |
|
1524 | 0 | ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); |
1525 | 0 | if (node != root) { |
1526 | 0 | ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); |
1527 | 0 | ABSL_INTERNAL_CHECK(!node->IsCrc(), ReportError(root, node)); |
1528 | 0 | } |
1529 | | |
1530 | 0 | if (node->IsFlat()) { |
1531 | 0 | ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(), |
1532 | 0 | ReportError(root, node)); |
1533 | 0 | } else if (node->IsExternal()) { |
1534 | 0 | ABSL_INTERNAL_CHECK(node->external()->base != nullptr, |
1535 | 0 | ReportError(root, node)); |
1536 | 0 | } else if (node->IsSubstring()) { |
1537 | 0 | ABSL_INTERNAL_CHECK( |
1538 | 0 | node->substring()->start < node->substring()->child->length, |
1539 | 0 | ReportError(root, node)); |
1540 | 0 | ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= |
1541 | 0 | node->substring()->child->length, |
1542 | 0 | ReportError(root, node)); |
1543 | 0 | } else if (node->IsCrc()) { |
1544 | 0 | ABSL_INTERNAL_CHECK( |
1545 | 0 | node->crc()->child != nullptr || node->crc()->length == 0, |
1546 | 0 | ReportError(root, node)); |
1547 | 0 | if (node->crc()->child != nullptr) { |
1548 | 0 | ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, |
1549 | 0 | ReportError(root, node)); |
1550 | 0 | worklist.push_back(node->crc()->child); |
1551 | 0 | } |
1552 | 0 | } |
1553 | 0 | } while (!worklist.empty()); |
1554 | 0 | return true; |
1555 | 0 | } |
1556 | | |
1557 | 0 | std::ostream& operator<<(std::ostream& out, const Cord& cord) { |
1558 | 0 | for (absl::string_view chunk : cord.Chunks()) { |
1559 | 0 | out.write(chunk.data(), static_cast<std::streamsize>(chunk.size())); |
1560 | 0 | } |
1561 | 0 | return out; |
1562 | 0 | } |
1563 | | |
1564 | | namespace strings_internal { |
1565 | 0 | size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; } |
1566 | 0 | size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; } |
1567 | 0 | size_t CordTestAccess::FlatTagToLength(uint8_t tag) { |
1568 | 0 | return cord_internal::TagToLength(tag); |
1569 | 0 | } |
1570 | 0 | uint8_t CordTestAccess::LengthToTag(size_t s) { |
1571 | 0 | ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); |
1572 | 0 | return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); |
1573 | 0 | } |
1574 | 0 | size_t CordTestAccess::SizeofCordRepExternal() { |
1575 | 0 | return sizeof(CordRepExternal); |
1576 | 0 | } |
1577 | 0 | size_t CordTestAccess::SizeofCordRepSubstring() { |
1578 | 0 | return sizeof(CordRepSubstring); |
1579 | 0 | } |
1580 | | } // namespace strings_internal |
1581 | | ABSL_NAMESPACE_END |
1582 | | } // namespace absl |