Line data Source code
1 : // Copyright 2014 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 :
6 : #include "src/unicode-inl.h"
7 : #include "src/unicode-decoder.h"
8 : #include <stdio.h>
9 : #include <stdlib.h>
10 :
11 : namespace unibrow {
12 :
13 207699304 : uint16_t Utf8Iterator::operator*() {
14 212967528 : if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode)) {
15 : return trailing_ ? Utf16::TrailSurrogate(char_)
16 201308 : : Utf16::LeadSurrogate(char_);
17 : }
18 :
19 : DCHECK_EQ(trailing_, false);
20 212766268 : return char_;
21 : }
22 :
23 246117448 : Utf8Iterator& Utf8Iterator::operator++() {
24 246117448 : if (V8_UNLIKELY(this->Done())) {
25 593 : char_ = Utf8::kBufferEmpty;
26 593 : return *this;
27 : }
28 :
29 246116855 : if (V8_UNLIKELY(char_ > Utf16::kMaxNonSurrogateCharCode && !trailing_)) {
30 197592 : trailing_ = true;
31 197592 : return *this;
32 : }
33 :
34 245919263 : trailing_ = false;
35 245919263 : offset_ = cursor_;
36 :
37 : char_ =
38 : Utf8::ValueOf(reinterpret_cast<const uint8_t*>(stream_.begin()) + cursor_,
39 491838526 : stream_.length() - cursor_, &cursor_);
40 245919374 : return *this;
41 : }
42 :
43 0 : Utf8Iterator Utf8Iterator::operator++(int) {
44 0 : Utf8Iterator old(*this);
45 0 : ++*this;
46 0 : return old;
47 : }
48 :
49 250511505 : bool Utf8Iterator::Done() {
50 1013544464 : return offset_ == static_cast<size_t>(stream_.length());
51 : }
52 :
53 5956 : void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
54 : const v8::internal::Vector<const char>& stream) {
55 : size_t utf16_length = 0;
56 :
57 : Utf8Iterator it = Utf8Iterator(stream);
58 : // Loop until stream is read, writing to buffer as long as buffer has space.
59 821447 : while (utf16_length < buffer_length && !it.Done()) {
60 810162 : *buffer++ = *it;
61 405081 : ++it;
62 405081 : utf16_length++;
63 : }
64 5956 : bytes_read_ = it.Offset();
65 5956 : trailing_ = it.Trailing();
66 5956 : chars_written_ = utf16_length;
67 :
68 : // Now that writing to buffer is done, we just need to calculate utf16_length
69 4875055 : while (!it.Done()) {
70 4863143 : ++it;
71 4863143 : utf16_length++;
72 : }
73 5956 : utf16_length_ = utf16_length;
74 5956 : }
75 :
76 627 : void Utf8DecoderBase::WriteUtf16Slow(
77 : uint16_t* data, size_t length,
78 : const v8::internal::Vector<const char>& stream, size_t offset,
79 : bool trailing) {
80 : Utf8Iterator it = Utf8Iterator(stream, offset, trailing);
81 4863770 : while (!it.Done()) {
82 : DCHECK_GT(length--, 0);
83 9726286 : *data++ = *it;
84 4863143 : ++it;
85 : }
86 627 : }
87 :
88 183867 : } // namespace unibrow
|