/src/sentencepiece/src/word_model.cc
Line | Count | Source |
1 | | // Copyright 2016 Google Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License.! |
14 | | |
15 | | #include "word_model.h" |
16 | | |
17 | | #include "util.h" |
18 | | |
19 | | namespace sentencepiece { |
20 | | namespace word { |
21 | | |
22 | 286 | Model::Model(const ModelProto &model_proto) { |
23 | 286 | model_proto_ = &model_proto; |
24 | 286 | InitializePieces(); |
25 | 286 | } |
26 | | |
27 | 286 | Model::~Model() {} |
28 | | |
29 | 777 | EncodeResult Model::Encode(absl::string_view normalized) const { |
30 | 777 | if (!status().ok() || normalized.empty()) { |
31 | 16 | return {}; |
32 | 16 | } |
33 | | |
34 | 761 | EncodeResult output; |
35 | 107k | for (const auto &w : SplitIntoWords(normalized)) { |
36 | 107k | output.emplace_back(w, PieceToId(w)); |
37 | 107k | } |
38 | | |
39 | 761 | return output; |
40 | 777 | } |
41 | | |
42 | | } // namespace word |
43 | | } // namespace sentencepiece |