/src/tesseract/src/dict/hyphen.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * File: hyphen.cpp (Formerly hyphen.c) |
3 | | * Description: Functions for maintaining information about hyphenated words. |
4 | | * Author: Mark Seaman, OCR Technology |
5 | | * Status: Reusable Software Component |
6 | | * |
7 | | * (c) Copyright 1987, Hewlett-Packard Company. |
8 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | ** you may not use this file except in compliance with the License. |
10 | | ** You may obtain a copy of the License at |
11 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
12 | | ** Unless required by applicable law or agreed to in writing, software |
13 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
14 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | ** See the License for the specific language governing permissions and |
16 | | ** limitations under the License. |
17 | | * |
18 | | *****************************************************************************/ |
19 | | |
20 | | #include "dict.h" |
21 | | |
22 | | namespace tesseract { |
23 | | |
24 | | // Unless the previous word was the last one on the line, and the current |
25 | | // one is not (thus it is the first one on the line), erase hyphen_word_, |
26 | | // clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_. |
27 | 265k | void Dict::reset_hyphen_vars(bool last_word_on_line) { |
28 | 265k | if (!(last_word_on_line_ == true && last_word_on_line == false)) { |
29 | 258k | if (hyphen_word_ != nullptr) { |
30 | 0 | delete hyphen_word_; |
31 | 0 | hyphen_word_ = nullptr; |
32 | 0 | hyphen_active_dawgs_.clear(); |
33 | 0 | } |
34 | 258k | } |
35 | 265k | if (hyphen_debug_level) { |
36 | 0 | tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n", last_word_on_line_, |
37 | 0 | last_word_on_line); |
38 | 0 | } |
39 | 265k | last_word_on_line_ = last_word_on_line; |
40 | 265k | } |
41 | | |
42 | | // Update hyphen_word_, and copy the given DawgPositionVectors into |
43 | | // hyphen_active_dawgs_. |
44 | 0 | void Dict::set_hyphen_word(const WERD_CHOICE &word, const DawgPositionVector &active_dawgs) { |
45 | 0 | if (hyphen_word_ == nullptr) { |
46 | 0 | hyphen_word_ = new WERD_CHOICE(word.unicharset()); |
47 | 0 | hyphen_word_->make_bad(); |
48 | 0 | } |
49 | 0 | if (hyphen_word_->rating() > word.rating()) { |
50 | 0 | *hyphen_word_ = word; |
51 | | // Remove the last unichar id as it is a hyphen, and remove |
52 | | // any unichar_string/lengths that are present. |
53 | 0 | hyphen_word_->remove_last_unichar_id(); |
54 | 0 | hyphen_active_dawgs_ = active_dawgs; |
55 | 0 | } |
56 | 0 | if (hyphen_debug_level) { |
57 | 0 | hyphen_word_->print("set_hyphen_word: "); |
58 | 0 | } |
59 | 0 | } |
60 | | } // namespace tesseract |