/src/tesseract/src/ccmain/adaptions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: adaptions.cpp (Formerly adaptions.c) |
3 | | * Description: Functions used to adapt to blobs already confidently |
4 | | * identified |
5 | | * Author: Chris Newton |
6 | | * |
7 | | * (C) Copyright 1992, Hewlett-Packard Ltd. |
8 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | ** you may not use this file except in compliance with the License. |
10 | | ** You may obtain a copy of the License at |
11 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
12 | | ** Unless required by applicable law or agreed to in writing, software |
13 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
14 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | ** See the License for the specific language governing permissions and |
16 | | ** limitations under the License. |
17 | | * |
18 | | **********************************************************************/ |
19 | | |
20 | | #include <cctype> |
21 | | #include <cstring> |
22 | | #include "control.h" |
23 | | #include "reject.h" |
24 | | #include "stopper.h" |
25 | | #include "tesseractclass.h" |
26 | | #include "tessvars.h" |
27 | | |
28 | | // Include automatically generated configuration file if running autoconf. |
29 | | #ifdef HAVE_CONFIG_H |
30 | | # include "config_auto.h" |
31 | | #endif |
32 | | |
33 | | namespace tesseract { |
34 | | bool Tesseract::word_adaptable( // should we adapt? |
35 | 46.2k | WERD_RES *word, uint16_t mode) { |
36 | 46.2k | if (tessedit_adaption_debug) { |
37 | 0 | tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", |
38 | 0 | word->best_choice->unichar_string().c_str(), word->best_choice->rating(), |
39 | 0 | word->best_choice->certainty()); |
40 | 0 | } |
41 | | |
42 | 46.2k | bool status = false; |
43 | 46.2k | std::bitset<16> flags(mode); |
44 | | |
45 | 46.2k | enum MODES { |
46 | 46.2k | ADAPTABLE_WERD, |
47 | 46.2k | ACCEPTABLE_WERD, |
48 | 46.2k | CHECK_DAWGS, |
49 | 46.2k | CHECK_SPACES, |
50 | 46.2k | CHECK_ONE_ELL_CONFLICT, |
51 | 46.2k | CHECK_AMBIG_WERD |
52 | 46.2k | }; |
53 | | |
54 | | /* |
55 | | 0: NO adaption |
56 | | */ |
57 | 46.2k | if (mode == 0) { |
58 | 0 | if (tessedit_adaption_debug) { |
59 | 0 | tprintf("adaption disabled\n"); |
60 | 0 | } |
61 | 0 | return false; |
62 | 0 | } |
63 | | |
64 | 46.2k | if (flags[ADAPTABLE_WERD]) { |
65 | 46.2k | status |= word->tess_would_adapt; // result of Classify::AdaptableWord() |
66 | 46.2k | if (tessedit_adaption_debug && !status) { |
67 | 0 | tprintf("tess_would_adapt bit is false\n"); |
68 | 0 | } |
69 | 46.2k | } |
70 | | |
71 | 46.2k | if (flags[ACCEPTABLE_WERD]) { |
72 | 46.2k | status |= word->tess_accepted; |
73 | 46.2k | if (tessedit_adaption_debug && !status) { |
74 | 0 | tprintf("tess_accepted bit is false\n"); |
75 | 0 | } |
76 | 46.2k | } |
77 | | |
78 | 46.2k | if (!status) { // If not set then |
79 | 43.2k | return false; // ignore other checks |
80 | 43.2k | } |
81 | | |
82 | 3.03k | if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) && |
83 | 3.03k | (word->best_choice->permuter() != FREQ_DAWG_PERM) && |
84 | 3.03k | (word->best_choice->permuter() != USER_DAWG_PERM) && |
85 | 3.03k | (word->best_choice->permuter() != NUMBER_PERM)) { |
86 | 417 | if (tessedit_adaption_debug) { |
87 | 0 | tprintf("word not in dawgs\n"); |
88 | 0 | } |
89 | 417 | return false; |
90 | 417 | } |
91 | | |
92 | 2.61k | if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) { |
93 | 0 | if (tessedit_adaption_debug) { |
94 | 0 | tprintf("word has ell conflict\n"); |
95 | 0 | } |
96 | 0 | return false; |
97 | 0 | } |
98 | | |
99 | 2.61k | if (flags[CHECK_SPACES] && |
100 | 2.61k | (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) { |
101 | 0 | if (tessedit_adaption_debug) { |
102 | 0 | tprintf("word contains spaces\n"); |
103 | 0 | } |
104 | 0 | return false; |
105 | 0 | } |
106 | | |
107 | 2.61k | if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) { |
108 | 0 | if (tessedit_adaption_debug) { |
109 | 0 | tprintf("word is ambiguous\n"); |
110 | 0 | } |
111 | 0 | return false; |
112 | 0 | } |
113 | | |
114 | 2.61k | if (tessedit_adaption_debug) { |
115 | 0 | tprintf("returning status %d\n", status); |
116 | 0 | } |
117 | 2.61k | return status; |
118 | 2.61k | } |
119 | | |
120 | | } // namespace tesseract |