/src/tesseract/src/ccmain/ltrresultiterator.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: ltrresultiterator.cpp |
3 | | // Description: Iterator for tesseract results in strict left-to-right |
4 | | // order that avoids using tesseract internal data structures. |
5 | | // Author: Ray Smith |
6 | | // |
7 | | // (C) Copyright 2010, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #include <tesseract/ltrresultiterator.h> |
21 | | |
22 | | #include "pageres.h" |
23 | | #include "tesseractclass.h" |
24 | | |
25 | | #include <allheaders.h> |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | | LTRResultIterator::LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, |
30 | | int scaled_yres, int rect_left, int rect_top, int rect_width, |
31 | | int rect_height) |
32 | | : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top, rect_width, |
33 | | rect_height) |
34 | | , line_separator_("\n") |
35 | 32.3k | , paragraph_separator_("\n") {} |
36 | | |
37 | | // Destructor. |
38 | | // It is defined here, so the compiler can create a single vtable |
39 | | // instead of weak vtables in every compilation unit. |
40 | 5.66M | LTRResultIterator::~LTRResultIterator() = default; |
41 | | |
42 | | // Returns the null terminated UTF-8 encoded text string for the current |
43 | | // object at the given level. Use delete [] to free after use. |
44 | 0 | char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const { |
45 | 0 | if (it_->word() == nullptr) { |
46 | 0 | return nullptr; // Already at the end! |
47 | 0 | } |
48 | 0 | std::string text; |
49 | 0 | PAGE_RES_IT res_it(*it_); |
50 | 0 | WERD_CHOICE *best_choice = res_it.word()->best_choice; |
51 | 0 | ASSERT_HOST(best_choice != nullptr); |
52 | 0 | if (level == RIL_SYMBOL) { |
53 | 0 | text = res_it.word()->BestUTF8(blob_index_, false); |
54 | 0 | } else if (level == RIL_WORD) { |
55 | 0 | text = best_choice->unichar_string(); |
56 | 0 | } else { |
57 | 0 | bool eol = false; // end of line? |
58 | 0 | bool eop = false; // end of paragraph? |
59 | 0 | do { // for each paragraph in a block |
60 | 0 | do { // for each text line in a paragraph |
61 | 0 | do { // for each word in a text line |
62 | 0 | best_choice = res_it.word()->best_choice; |
63 | 0 | ASSERT_HOST(best_choice != nullptr); |
64 | 0 | text += best_choice->unichar_string(); |
65 | 0 | text += " "; |
66 | 0 | res_it.forward(); |
67 | 0 | eol = res_it.row() != res_it.prev_row(); |
68 | 0 | } while (!eol); |
69 | 0 | text.resize(text.length() - 1); |
70 | 0 | text += line_separator_; |
71 | 0 | eop = res_it.block() != res_it.prev_block() || |
72 | 0 | res_it.row()->row->para() != res_it.prev_row()->row->para(); |
73 | 0 | } while (level != RIL_TEXTLINE && !eop); |
74 | 0 | if (eop) { |
75 | 0 | text += paragraph_separator_; |
76 | 0 | } |
77 | 0 | } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block()); |
78 | 0 | } |
79 | 0 | int length = text.length() + 1; |
80 | 0 | char *result = new char[length]; |
81 | 0 | strncpy(result, text.c_str(), length); |
82 | 0 | return result; |
83 | 0 | } |
84 | | |
85 | | // Set the string inserted at the end of each text line. "\n" by default. |
86 | 0 | void LTRResultIterator::SetLineSeparator(const char *new_line) { |
87 | 0 | line_separator_ = new_line; |
88 | 0 | } |
89 | | |
90 | | // Set the string inserted at the end of each paragraph. "\n" by default. |
91 | 0 | void LTRResultIterator::SetParagraphSeparator(const char *new_para) { |
92 | 0 | paragraph_separator_ = new_para; |
93 | 0 | } |
94 | | |
95 | | // Returns the mean confidence of the current object at the given level. |
96 | | // The number should be interpreted as a percent probability. (0.0f-100.0f) |
97 | 0 | float LTRResultIterator::Confidence(PageIteratorLevel level) const { |
98 | 0 | if (it_->word() == nullptr) { |
99 | 0 | return 0.0f; // Already at the end! |
100 | 0 | } |
101 | 0 | float mean_certainty = 0.0f; |
102 | 0 | int certainty_count = 0; |
103 | 0 | PAGE_RES_IT res_it(*it_); |
104 | 0 | WERD_CHOICE *best_choice = res_it.word()->best_choice; |
105 | 0 | ASSERT_HOST(best_choice != nullptr); |
106 | 0 | switch (level) { |
107 | 0 | case RIL_BLOCK: |
108 | 0 | do { |
109 | 0 | best_choice = res_it.word()->best_choice; |
110 | 0 | ASSERT_HOST(best_choice != nullptr); |
111 | 0 | mean_certainty += best_choice->certainty(); |
112 | 0 | ++certainty_count; |
113 | 0 | res_it.forward(); |
114 | 0 | } while (res_it.block() == res_it.prev_block()); |
115 | 0 | break; |
116 | 0 | case RIL_PARA: |
117 | 0 | do { |
118 | 0 | best_choice = res_it.word()->best_choice; |
119 | 0 | ASSERT_HOST(best_choice != nullptr); |
120 | 0 | mean_certainty += best_choice->certainty(); |
121 | 0 | ++certainty_count; |
122 | 0 | res_it.forward(); |
123 | 0 | } while (res_it.block() == res_it.prev_block() && |
124 | 0 | res_it.row()->row->para() == res_it.prev_row()->row->para()); |
125 | 0 | break; |
126 | 0 | case RIL_TEXTLINE: |
127 | 0 | do { |
128 | 0 | best_choice = res_it.word()->best_choice; |
129 | 0 | ASSERT_HOST(best_choice != nullptr); |
130 | 0 | mean_certainty += best_choice->certainty(); |
131 | 0 | ++certainty_count; |
132 | 0 | res_it.forward(); |
133 | 0 | } while (res_it.row() == res_it.prev_row()); |
134 | 0 | break; |
135 | 0 | case RIL_WORD: |
136 | 0 | mean_certainty += best_choice->certainty(); |
137 | 0 | ++certainty_count; |
138 | 0 | break; |
139 | 0 | case RIL_SYMBOL: |
140 | 0 | mean_certainty += best_choice->certainty(blob_index_); |
141 | 0 | ++certainty_count; |
142 | 0 | } |
143 | 0 | if (certainty_count > 0) { |
144 | 0 | mean_certainty /= certainty_count; |
145 | 0 | return ClipToRange(100 + 5 * mean_certainty, 0.0f, 100.0f); |
146 | 0 | } |
147 | 0 | return 0.0f; |
148 | 0 | } |
149 | | |
150 | | // Returns the font attributes of the current word. If iterating at a higher |
151 | | // level object than words, eg textlines, then this will return the |
152 | | // attributes of the first word in that textline. |
153 | | // The actual return value is a string representing a font name. It points |
154 | | // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as |
155 | | // the iterator itself, ie rendered invalid by various members of |
156 | | // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. |
157 | | // Pointsize is returned in printers points (1/72 inch.) |
158 | | const char *LTRResultIterator::WordFontAttributes(bool *is_bold, bool *is_italic, |
159 | | bool *is_underlined, bool *is_monospace, |
160 | | bool *is_serif, bool *is_smallcaps, |
161 | 0 | int *pointsize, int *font_id) const { |
162 | 0 | const char *result = nullptr; |
163 | |
|
164 | 0 | if (it_->word() == nullptr) { |
165 | | // Already at the end! |
166 | 0 | *pointsize = 0; |
167 | 0 | } else { |
168 | 0 | float row_height = |
169 | 0 | it_->row()->row->x_height() + it_->row()->row->ascenders() - it_->row()->row->descenders(); |
170 | | // Convert from pixels to printers points. |
171 | 0 | *pointsize = |
172 | 0 | scaled_yres_ > 0 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5) : 0; |
173 | |
|
174 | 0 | #ifndef DISABLED_LEGACY_ENGINE |
175 | 0 | const FontInfo *font_info = it_->word()->fontinfo; |
176 | 0 | if (font_info) { |
177 | | // Font information available. |
178 | 0 | *font_id = font_info->universal_id; |
179 | 0 | *is_bold = font_info->is_bold(); |
180 | 0 | *is_italic = font_info->is_italic(); |
181 | 0 | *is_underlined = false; // TODO(rays) fix this! |
182 | 0 | *is_monospace = font_info->is_fixed_pitch(); |
183 | 0 | *is_serif = font_info->is_serif(); |
184 | 0 | result = font_info->name; |
185 | 0 | } |
186 | 0 | #endif // ndef DISABLED_LEGACY_ENGINE |
187 | |
|
188 | 0 | *is_smallcaps = it_->word()->small_caps; |
189 | 0 | } |
190 | |
|
191 | 0 | if (!result) { |
192 | 0 | *is_bold = false; |
193 | 0 | *is_italic = false; |
194 | 0 | *is_underlined = false; |
195 | 0 | *is_monospace = false; |
196 | 0 | *is_serif = false; |
197 | 0 | *is_smallcaps = false; |
198 | 0 | *font_id = -1; |
199 | 0 | } |
200 | |
|
201 | 0 | return result; |
202 | 0 | } |
203 | | |
204 | | // Returns the name of the language used to recognize this word. |
205 | 0 | const char *LTRResultIterator::WordRecognitionLanguage() const { |
206 | 0 | if (it_->word() == nullptr || it_->word()->tesseract == nullptr) { |
207 | 0 | return nullptr; |
208 | 0 | } |
209 | 0 | return it_->word()->tesseract->lang.c_str(); |
210 | 0 | } |
211 | | |
212 | | // Return the overall directionality of this word. |
213 | 6.61M | StrongScriptDirection LTRResultIterator::WordDirection() const { |
214 | 6.61M | if (it_->word() == nullptr) { |
215 | 0 | return DIR_NEUTRAL; |
216 | 0 | } |
217 | 6.61M | bool has_rtl = it_->word()->AnyRtlCharsInWord(); |
218 | 6.61M | bool has_ltr = it_->word()->AnyLtrCharsInWord(); |
219 | 6.61M | if (has_rtl && !has_ltr) { |
220 | 0 | return DIR_RIGHT_TO_LEFT; |
221 | 0 | } |
222 | 6.61M | if (has_ltr && !has_rtl) { |
223 | 4.09M | return DIR_LEFT_TO_RIGHT; |
224 | 4.09M | } |
225 | 2.52M | if (!has_ltr && !has_rtl) { |
226 | 2.52M | return DIR_NEUTRAL; |
227 | 2.52M | } |
228 | 0 | return DIR_MIX; |
229 | 2.52M | } |
230 | | |
231 | | // Returns true if the current word was found in a dictionary. |
232 | 0 | bool LTRResultIterator::WordIsFromDictionary() const { |
233 | 0 | if (it_->word() == nullptr) { |
234 | 0 | return false; // Already at the end! |
235 | 0 | } |
236 | 0 | int permuter = it_->word()->best_choice->permuter(); |
237 | 0 | return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM || permuter == USER_DAWG_PERM; |
238 | 0 | } |
239 | | |
240 | | // Returns the number of blanks before the current word. |
241 | 0 | int LTRResultIterator::BlanksBeforeWord() const { |
242 | 0 | if (it_->word() == nullptr) { |
243 | 0 | return 1; |
244 | 0 | } |
245 | 0 | return it_->word()->word->space(); |
246 | 0 | } |
247 | | |
248 | | // Returns true if the current word is numeric. |
249 | 0 | bool LTRResultIterator::WordIsNumeric() const { |
250 | 0 | if (it_->word() == nullptr) { |
251 | 0 | return false; // Already at the end! |
252 | 0 | } |
253 | 0 | int permuter = it_->word()->best_choice->permuter(); |
254 | 0 | return permuter == NUMBER_PERM; |
255 | 0 | } |
256 | | |
257 | | // Returns true if the word contains blamer information. |
258 | 0 | bool LTRResultIterator::HasBlamerInfo() const { |
259 | 0 | return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr && |
260 | 0 | it_->word()->blamer_bundle->HasDebugInfo(); |
261 | 0 | } |
262 | | |
263 | | #ifndef DISABLED_LEGACY_ENGINE |
264 | | // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle |
265 | | // of the current word. |
266 | 0 | const void *LTRResultIterator::GetParamsTrainingBundle() const { |
267 | 0 | return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr) |
268 | 0 | ? &(it_->word()->blamer_bundle->params_training_bundle()) |
269 | 0 | : nullptr; |
270 | 0 | } |
271 | | #endif // ndef DISABLED_LEGACY_ENGINE |
272 | | |
273 | | // Returns the pointer to the string with blamer information for this word. |
274 | | // Assumes that the word's blamer_bundle is not nullptr. |
275 | 0 | const char *LTRResultIterator::GetBlamerDebug() const { |
276 | 0 | return it_->word()->blamer_bundle->debug().c_str(); |
277 | 0 | } |
278 | | |
279 | | // Returns the pointer to the string with misadaption information for this word. |
280 | | // Assumes that the word's blamer_bundle is not nullptr. |
281 | 0 | const char *LTRResultIterator::GetBlamerMisadaptionDebug() const { |
282 | 0 | return it_->word()->blamer_bundle->misadaption_debug().c_str(); |
283 | 0 | } |
284 | | |
285 | | // Returns true if a truth string was recorded for the current word. |
286 | 0 | bool LTRResultIterator::HasTruthString() const { |
287 | 0 | if (it_->word() == nullptr) { |
288 | 0 | return false; // Already at the end! |
289 | 0 | } |
290 | 0 | if (it_->word()->blamer_bundle == nullptr || it_->word()->blamer_bundle->NoTruth()) { |
291 | 0 | return false; // no truth information for this word |
292 | 0 | } |
293 | 0 | return true; |
294 | 0 | } |
295 | | |
296 | | // Returns true if the given string is equivalent to the truth string for |
297 | | // the current word. |
298 | 0 | bool LTRResultIterator::EquivalentToTruth(const char *str) const { |
299 | 0 | if (!HasTruthString()) { |
300 | 0 | return false; |
301 | 0 | } |
302 | 0 | ASSERT_HOST(it_->word()->uch_set != nullptr); |
303 | 0 | WERD_CHOICE str_wd(str, *(it_->word()->uch_set)); |
304 | 0 | return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd); |
305 | 0 | } |
306 | | |
307 | | // Returns the null terminated UTF-8 encoded truth string for the current word. |
308 | | // Use delete [] to free after use. |
309 | 0 | char *LTRResultIterator::WordTruthUTF8Text() const { |
310 | 0 | if (!HasTruthString()) { |
311 | 0 | return nullptr; |
312 | 0 | } |
313 | 0 | std::string truth_text = it_->word()->blamer_bundle->TruthString(); |
314 | 0 | int length = truth_text.length() + 1; |
315 | 0 | char *result = new char[length]; |
316 | 0 | strncpy(result, truth_text.c_str(), length); |
317 | 0 | return result; |
318 | 0 | } |
319 | | |
320 | | // Returns the null terminated UTF-8 encoded normalized OCR string for the |
321 | | // current word. Use delete [] to free after use. |
322 | 0 | char *LTRResultIterator::WordNormedUTF8Text() const { |
323 | 0 | if (it_->word() == nullptr) { |
324 | 0 | return nullptr; // Already at the end! |
325 | 0 | } |
326 | 0 | std::string ocr_text; |
327 | 0 | WERD_CHOICE *best_choice = it_->word()->best_choice; |
328 | 0 | const UNICHARSET *unicharset = it_->word()->uch_set; |
329 | 0 | ASSERT_HOST(best_choice != nullptr); |
330 | 0 | for (unsigned i = 0; i < best_choice->length(); ++i) { |
331 | 0 | ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); |
332 | 0 | } |
333 | 0 | auto length = ocr_text.length() + 1; |
334 | 0 | char *result = new char[length]; |
335 | 0 | strncpy(result, ocr_text.c_str(), length); |
336 | 0 | return result; |
337 | 0 | } |
338 | | |
339 | | // Returns a pointer to serialized choice lattice. |
340 | | // Fills lattice_size with the number of bytes in lattice data. |
341 | 0 | const char *LTRResultIterator::WordLattice(int *lattice_size) const { |
342 | 0 | if (it_->word() == nullptr) { |
343 | 0 | return nullptr; // Already at the end! |
344 | 0 | } |
345 | 0 | if (it_->word()->blamer_bundle == nullptr) { |
346 | 0 | return nullptr; |
347 | 0 | } |
348 | 0 | *lattice_size = it_->word()->blamer_bundle->lattice_size(); |
349 | 0 | return it_->word()->blamer_bundle->lattice_data(); |
350 | 0 | } |
351 | | |
352 | | // Returns true if the current symbol is a superscript. |
353 | | // If iterating at a higher level object than symbols, eg words, then |
354 | | // this will return the attributes of the first symbol in that word. |
355 | 0 | bool LTRResultIterator::SymbolIsSuperscript() const { |
356 | 0 | if (cblob_it_ == nullptr && it_->word() != nullptr) { |
357 | 0 | return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUPERSCRIPT; |
358 | 0 | } |
359 | 0 | return false; |
360 | 0 | } |
361 | | |
362 | | // Returns true if the current symbol is a subscript. |
363 | | // If iterating at a higher level object than symbols, eg words, then |
364 | | // this will return the attributes of the first symbol in that word. |
365 | 0 | bool LTRResultIterator::SymbolIsSubscript() const { |
366 | 0 | if (cblob_it_ == nullptr && it_->word() != nullptr) { |
367 | 0 | return it_->word()->best_choice->BlobPosition(blob_index_) == SP_SUBSCRIPT; |
368 | 0 | } |
369 | 0 | return false; |
370 | 0 | } |
371 | | |
372 | | // Returns true if the current symbol is a dropcap. |
373 | | // If iterating at a higher level object than symbols, eg words, then |
374 | | // this will return the attributes of the first symbol in that word. |
375 | 0 | bool LTRResultIterator::SymbolIsDropcap() const { |
376 | 0 | if (cblob_it_ == nullptr && it_->word() != nullptr) { |
377 | 0 | return it_->word()->best_choice->BlobPosition(blob_index_) == SP_DROPCAP; |
378 | 0 | } |
379 | 0 | return false; |
380 | 0 | } |
381 | | |
382 | 0 | ChoiceIterator::ChoiceIterator(const LTRResultIterator &result_it) { |
383 | 0 | ASSERT_HOST(result_it.it_->word() != nullptr); |
384 | 0 | word_res_ = result_it.it_->word(); |
385 | 0 | oemLSTM_ = word_res_->tesseract->AnyLSTMLang(); |
386 | | // Is there legacy engine related trained data? |
387 | 0 | bool oemLegacy = word_res_->tesseract->AnyTessLang(); |
388 | | // Is lstm_choice_mode activated? |
389 | 0 | bool lstm_choice_mode = word_res_->tesseract->lstm_choice_mode; |
390 | 0 | rating_coefficient_ = word_res_->tesseract->lstm_rating_coefficient; |
391 | 0 | blanks_before_word_ = result_it.BlanksBeforeWord(); |
392 | 0 | BLOB_CHOICE_LIST *choices = nullptr; |
393 | 0 | tstep_index_ = &result_it.blob_index_; |
394 | 0 | if (oemLSTM_ && !word_res_->CTC_symbol_choices.empty()) { |
395 | 0 | if (!word_res_->CTC_symbol_choices[0].empty() && |
396 | 0 | strcmp(word_res_->CTC_symbol_choices[0][0].first, " ")) { |
397 | 0 | blanks_before_word_ = 0; |
398 | 0 | } |
399 | 0 | unsigned index = *tstep_index_; |
400 | 0 | index += blanks_before_word_; |
401 | 0 | if (index < word_res_->CTC_symbol_choices.size()) { |
402 | 0 | LSTM_choices_ = &word_res_->CTC_symbol_choices[index]; |
403 | 0 | filterSpaces(); |
404 | 0 | } |
405 | 0 | } |
406 | 0 | if ((oemLegacy || !lstm_choice_mode) && word_res_->ratings != nullptr) { |
407 | 0 | choices = word_res_->GetBlobChoices(result_it.blob_index_); |
408 | 0 | } |
409 | 0 | if (choices != nullptr && !choices->empty()) { |
410 | 0 | choice_it_ = new BLOB_CHOICE_IT(choices); |
411 | 0 | choice_it_->mark_cycle_pt(); |
412 | 0 | } else { |
413 | 0 | choice_it_ = nullptr; |
414 | 0 | } |
415 | 0 | if (LSTM_choices_ != nullptr && !LSTM_choices_->empty()) { |
416 | 0 | LSTM_choice_it_ = LSTM_choices_->begin(); |
417 | 0 | } |
418 | 0 | } |
419 | 0 | ChoiceIterator::~ChoiceIterator() { |
420 | 0 | delete choice_it_; |
421 | 0 | } |
422 | | |
423 | | // Moves to the next choice for the symbol and returns false if there |
424 | | // are none left. |
425 | 0 | bool ChoiceIterator::Next() { |
426 | 0 | if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) { |
427 | 0 | if (LSTM_choice_it_ == LSTM_choices_->end() || |
428 | 0 | next(LSTM_choice_it_) == LSTM_choices_->end()) { |
429 | 0 | return false; |
430 | 0 | } else { |
431 | 0 | ++LSTM_choice_it_; |
432 | 0 | return true; |
433 | 0 | } |
434 | 0 | } else { |
435 | 0 | if (choice_it_ == nullptr) { |
436 | 0 | return false; |
437 | 0 | } |
438 | 0 | choice_it_->forward(); |
439 | 0 | return !choice_it_->cycled_list(); |
440 | 0 | } |
441 | 0 | } |
442 | | |
443 | | // Returns the null terminated UTF-8 encoded text string for the current |
444 | | // choice. Do NOT use delete [] to free after use. |
445 | 0 | const char *ChoiceIterator::GetUTF8Text() const { |
446 | 0 | if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) { |
447 | 0 | std::pair<const char *, float> choice = *LSTM_choice_it_; |
448 | 0 | return choice.first; |
449 | 0 | } else { |
450 | 0 | if (choice_it_ == nullptr) { |
451 | 0 | return nullptr; |
452 | 0 | } |
453 | 0 | UNICHAR_ID id = choice_it_->data()->unichar_id(); |
454 | 0 | return word_res_->uch_set->id_to_unichar_ext(id); |
455 | 0 | } |
456 | 0 | } |
457 | | |
458 | | // Returns the confidence of the current choice depending on the used language |
459 | | // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All |
460 | | // choices for one symbol should roughly add up to 1.0f. |
461 | | // If only traineddata of the legacy engine is used, the number should be |
462 | | // interpreted as a percent probability. (0.0f-100.0f) In this case |
463 | | // probabilities won't add up to 100. Each one stands on its own. |
464 | 0 | float ChoiceIterator::Confidence() const { |
465 | 0 | float confidence; |
466 | 0 | if (oemLSTM_ && LSTM_choices_ != nullptr && !LSTM_choices_->empty()) { |
467 | 0 | std::pair<const char *, float> choice = *LSTM_choice_it_; |
468 | 0 | confidence = 100 - rating_coefficient_ * choice.second; |
469 | 0 | } else { |
470 | 0 | if (choice_it_ == nullptr) { |
471 | 0 | return 0.0f; |
472 | 0 | } |
473 | 0 | confidence = 100 + 5 * choice_it_->data()->certainty(); |
474 | 0 | } |
475 | 0 | return ClipToRange(confidence, 0.0f, 100.0f); |
476 | 0 | } |
477 | | |
478 | | // Returns the set of timesteps which belong to the current symbol |
479 | 0 | std::vector<std::vector<std::pair<const char *, float>>> *ChoiceIterator::Timesteps() const { |
480 | 0 | unsigned offset = *tstep_index_ + blanks_before_word_; |
481 | 0 | if (offset >= word_res_->segmented_timesteps.size() || !oemLSTM_) { |
482 | 0 | return nullptr; |
483 | 0 | } |
484 | 0 | return &word_res_->segmented_timesteps[offset]; |
485 | 0 | } |
486 | | |
487 | 0 | void ChoiceIterator::filterSpaces() { |
488 | 0 | if (LSTM_choices_->empty()) { |
489 | 0 | return; |
490 | 0 | } |
491 | 0 | std::vector<std::pair<const char *, float>>::iterator it; |
492 | 0 | for (it = LSTM_choices_->begin(); it != LSTM_choices_->end();) { |
493 | 0 | if (!strcmp(it->first, " ")) { |
494 | 0 | it = LSTM_choices_->erase(it); |
495 | 0 | } else { |
496 | 0 | ++it; |
497 | 0 | } |
498 | 0 | } |
499 | 0 | } |
500 | | } // namespace tesseract. |