/src/tesseract/src/ccmain/adaptions.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /**********************************************************************  | 
2  |  |  * File:        adaptions.cpp  (Formerly adaptions.c)  | 
3  |  |  * Description: Functions used to adapt to blobs already confidently  | 
4  |  |  *              identified  | 
5  |  |  * Author:      Chris Newton  | 
6  |  |  *  | 
7  |  |  * (C) Copyright 1992, Hewlett-Packard Ltd.  | 
8  |  |  ** Licensed under the Apache License, Version 2.0 (the "License");  | 
9  |  |  ** you may not use this file except in compliance with the License.  | 
10  |  |  ** You may obtain a copy of the License at  | 
11  |  |  ** http://www.apache.org/licenses/LICENSE-2.0  | 
12  |  |  ** Unless required by applicable law or agreed to in writing, software  | 
13  |  |  ** distributed under the License is distributed on an "AS IS" BASIS,  | 
14  |  |  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
15  |  |  ** See the License for the specific language governing permissions and  | 
16  |  |  ** limitations under the License.  | 
17  |  |  *  | 
18  |  |  **********************************************************************/  | 
19  |  |  | 
20  |  | #include <cctype>  | 
21  |  | #include <cstring>  | 
22  |  | #include "control.h"  | 
23  |  | #include "reject.h"  | 
24  |  | #include "stopper.h"  | 
25  |  | #include "tesseractclass.h"  | 
26  |  | #include "tessvars.h"  | 
27  |  |  | 
28  |  | // Include automatically generated configuration file if running autoconf.  | 
29  |  | #ifdef HAVE_CONFIG_H  | 
30  |  | #  include "config_auto.h"  | 
31  |  | #endif  | 
32  |  |  | 
33  |  | namespace tesseract { | 
34  |  | bool Tesseract::word_adaptable( // should we adapt?  | 
35  | 46.2k  |     WERD_RES *word, uint16_t mode) { | 
36  | 46.2k  |   if (tessedit_adaption_debug) { | 
37  | 0  |     tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", | 
38  | 0  |             word->best_choice->unichar_string().c_str(), word->best_choice->rating(),  | 
39  | 0  |             word->best_choice->certainty());  | 
40  | 0  |   }  | 
41  |  |  | 
42  | 46.2k  |   bool status = false;  | 
43  | 46.2k  |   std::bitset<16> flags(mode);  | 
44  |  |  | 
45  | 46.2k  |   enum MODES { | 
46  | 46.2k  |     ADAPTABLE_WERD,  | 
47  | 46.2k  |     ACCEPTABLE_WERD,  | 
48  | 46.2k  |     CHECK_DAWGS,  | 
49  | 46.2k  |     CHECK_SPACES,  | 
50  | 46.2k  |     CHECK_ONE_ELL_CONFLICT,  | 
51  | 46.2k  |     CHECK_AMBIG_WERD  | 
52  | 46.2k  |   };  | 
53  |  |  | 
54  |  |   /*  | 
55  |  | 0: NO adaption  | 
56  |  | */  | 
57  | 46.2k  |   if (mode == 0) { | 
58  | 0  |     if (tessedit_adaption_debug) { | 
59  | 0  |       tprintf("adaption disabled\n"); | 
60  | 0  |     }  | 
61  | 0  |     return false;  | 
62  | 0  |   }  | 
63  |  |  | 
64  | 46.2k  |   if (flags[ADAPTABLE_WERD]) { | 
65  | 46.2k  |     status |= word->tess_would_adapt; // result of Classify::AdaptableWord()  | 
66  | 46.2k  |     if (tessedit_adaption_debug && !status) { | 
67  | 0  |       tprintf("tess_would_adapt bit is false\n"); | 
68  | 0  |     }  | 
69  | 46.2k  |   }  | 
70  |  |  | 
71  | 46.2k  |   if (flags[ACCEPTABLE_WERD]) { | 
72  | 46.2k  |     status |= word->tess_accepted;  | 
73  | 46.2k  |     if (tessedit_adaption_debug && !status) { | 
74  | 0  |       tprintf("tess_accepted bit is false\n"); | 
75  | 0  |     }  | 
76  | 46.2k  |   }  | 
77  |  |  | 
78  | 46.2k  |   if (!status) {  // If not set then | 
79  | 43.2k  |     return false; // ignore other checks  | 
80  | 43.2k  |   }  | 
81  |  |  | 
82  | 3.03k  |   if (flags[CHECK_DAWGS] && (word->best_choice->permuter() != SYSTEM_DAWG_PERM) &&  | 
83  | 3.03k  |       (word->best_choice->permuter() != FREQ_DAWG_PERM) &&  | 
84  | 3.03k  |       (word->best_choice->permuter() != USER_DAWG_PERM) &&  | 
85  | 3.03k  |       (word->best_choice->permuter() != NUMBER_PERM)) { | 
86  | 417  |     if (tessedit_adaption_debug) { | 
87  | 0  |       tprintf("word not in dawgs\n"); | 
88  | 0  |     }  | 
89  | 417  |     return false;  | 
90  | 417  |   }  | 
91  |  |  | 
92  | 2.61k  |   if (flags[CHECK_ONE_ELL_CONFLICT] && one_ell_conflict(word, false)) { | 
93  | 0  |     if (tessedit_adaption_debug) { | 
94  | 0  |       tprintf("word has ell conflict\n"); | 
95  | 0  |     }  | 
96  | 0  |     return false;  | 
97  | 0  |   }  | 
98  |  |  | 
99  | 2.61k  |   if (flags[CHECK_SPACES] &&  | 
100  | 2.61k  |       (strchr(word->best_choice->unichar_string().c_str(), ' ') != nullptr)) { | 
101  | 0  |     if (tessedit_adaption_debug) { | 
102  | 0  |       tprintf("word contains spaces\n"); | 
103  | 0  |     }  | 
104  | 0  |     return false;  | 
105  | 0  |   }  | 
106  |  |  | 
107  | 2.61k  |   if (flags[CHECK_AMBIG_WERD] && word->best_choice->dangerous_ambig_found()) { | 
108  | 0  |     if (tessedit_adaption_debug) { | 
109  | 0  |       tprintf("word is ambiguous\n"); | 
110  | 0  |     }  | 
111  | 0  |     return false;  | 
112  | 0  |   }  | 
113  |  |  | 
114  | 2.61k  |   if (tessedit_adaption_debug) { | 
115  | 0  |     tprintf("returning status %d\n", status); | 
116  | 0  |   }  | 
117  | 2.61k  |   return status;  | 
118  | 2.61k  | }  | 
119  |  |  | 
120  |  | } // namespace tesseract  |