/src/sentencepiece/src/char_model.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2016 Google Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License.! |
14 | | |
15 | | #include "char_model.h" |
16 | | #include "util.h" |
17 | | |
18 | | namespace sentencepiece { |
19 | | namespace character { |
20 | | |
21 | 0 | Model::Model(const ModelProto &model_proto) { |
22 | 0 | model_proto_ = &model_proto; |
23 | 0 | InitializePieces(); |
24 | 0 | } |
25 | | |
26 | 0 | Model::~Model() {} |
27 | | |
28 | 0 | EncodeResult Model::Encode(absl::string_view normalized) const { |
29 | 0 | if (!status().ok() || normalized.empty()) { |
30 | 0 | return {}; |
31 | 0 | } |
32 | | |
33 | | // Splits the input into character sequence |
34 | 0 | EncodeResult output; |
35 | 0 | while (!normalized.empty()) { |
36 | 0 | const int mblen = matcher_->PrefixMatch(normalized); |
37 | 0 | absl::string_view w(normalized.data(), mblen); |
38 | 0 | output.emplace_back(w, PieceToId(w)); |
39 | 0 | normalized.remove_prefix(mblen); |
40 | 0 | } |
41 | |
|
42 | 0 | return output; |
43 | 0 | } |
44 | | |
45 | | } // namespace character |
46 | | } // namespace sentencepiece |