/src/tesseract/src/ccmain/osdetect.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | ///////////////////////////////////////////////////////////////////////  | 
2  |  | // File:        osdetect.cpp  | 
3  |  | // Description: Orientation and script detection.  | 
4  |  | // Author:      Samuel Charron  | 
5  |  | //              Ranjith Unnikrishnan  | 
6  |  | //  | 
7  |  | // (C) Copyright 2008, Google Inc.  | 
8  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
9  |  | // you may not use this file except in compliance with the License.  | 
10  |  | // You may obtain a copy of the License at  | 
11  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
12  |  | // Unless required by applicable law or agreed to in writing, software  | 
13  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
14  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
15  |  | // See the License for the specific language governing permissions and  | 
16  |  | // limitations under the License.  | 
17  |  | //  | 
18  |  | ///////////////////////////////////////////////////////////////////////  | 
19  |  |  | 
20  |  | #include <tesseract/osdetect.h>  | 
21  |  |  | 
22  |  | #include "blobbox.h"  | 
23  |  | #include "blread.h"  | 
24  |  | #include "colfind.h"  | 
25  |  | #include "fontinfo.h"  | 
26  |  | #include "imagefind.h"  | 
27  |  | #include "linefind.h"  | 
28  |  | #include "oldlist.h"  | 
29  |  | #include "qrsequence.h"  | 
30  |  | #include "ratngs.h"  | 
31  |  | #include "tabvector.h"  | 
32  |  | #include "tesseractclass.h"  | 
33  |  | #include "textord.h"  | 
34  |  |  | 
35  |  | #include <algorithm>  | 
36  |  | #include <cmath> // for std::fabs  | 
37  |  | #include <memory>  | 
38  |  |  | 
39  |  | namespace tesseract { | 
40  |  |  | 
41  |  | const float kSizeRatioToReject = 2.0;  | 
42  |  | const int kMinAcceptableBlobHeight = 10;  | 
43  |  |  | 
44  |  | const float kScriptAcceptRatio = 1.3;  | 
45  |  |  | 
46  |  | const float kHanRatioInKorean = 0.7;  | 
47  |  | const float kHanRatioInJapanese = 0.3;  | 
48  |  |  | 
49  |  | const float kNonAmbiguousMargin = 1.0;  | 
50  |  |  | 
51  |  | // General scripts  | 
52  |  | static const char *han_script = "Han";  | 
53  |  | static const char *latin_script = "Latin";  | 
54  |  | static const char *katakana_script = "Katakana";  | 
55  |  | static const char *hiragana_script = "Hiragana";  | 
56  |  | static const char *hangul_script = "Hangul";  | 
57  |  |  | 
58  |  | // Pseudo-scripts Name  | 
59  |  | const char *ScriptDetector::korean_script_ = "Korean";  | 
60  |  | const char *ScriptDetector::japanese_script_ = "Japanese";  | 
61  |  | const char *ScriptDetector::fraktur_script_ = "Fraktur";  | 
62  |  |  | 
63  | 0  | void OSResults::update_best_orientation() { | 
64  | 0  |   float first = orientations[0];  | 
65  | 0  |   float second = orientations[1];  | 
66  | 0  |   best_result.orientation_id = 0;  | 
67  | 0  |   if (orientations[0] < orientations[1]) { | 
68  | 0  |     first = orientations[1];  | 
69  | 0  |     second = orientations[0];  | 
70  | 0  |     best_result.orientation_id = 1;  | 
71  | 0  |   }  | 
72  | 0  |   for (int i = 2; i < 4; ++i) { | 
73  | 0  |     if (orientations[i] > first) { | 
74  | 0  |       second = first;  | 
75  | 0  |       first = orientations[i];  | 
76  | 0  |       best_result.orientation_id = i;  | 
77  | 0  |     } else if (orientations[i] > second) { | 
78  | 0  |       second = orientations[i];  | 
79  | 0  |     }  | 
80  | 0  |   }  | 
81  |  |   // Store difference of top two orientation scores.  | 
82  | 0  |   best_result.oconfidence = first - second;  | 
83  | 0  | }  | 
84  |  |  | 
85  | 0  | void OSResults::set_best_orientation(int orientation_id) { | 
86  | 0  |   best_result.orientation_id = orientation_id;  | 
87  | 0  |   best_result.oconfidence = 0;  | 
88  | 0  | }  | 
89  |  |  | 
90  | 0  | void OSResults::update_best_script(int orientation) { | 
91  |  |   // We skip index 0 to ignore the "Common" script.  | 
92  | 0  |   float first = scripts_na[orientation][1];  | 
93  | 0  |   float second = scripts_na[orientation][2];  | 
94  | 0  |   best_result.script_id = 1;  | 
95  | 0  |   if (scripts_na[orientation][1] < scripts_na[orientation][2]) { | 
96  | 0  |     first = scripts_na[orientation][2];  | 
97  | 0  |     second = scripts_na[orientation][1];  | 
98  | 0  |     best_result.script_id = 2;  | 
99  | 0  |   }  | 
100  | 0  |   for (int i = 3; i < kMaxNumberOfScripts; ++i) { | 
101  | 0  |     if (scripts_na[orientation][i] > first) { | 
102  | 0  |       best_result.script_id = i;  | 
103  | 0  |       second = first;  | 
104  | 0  |       first = scripts_na[orientation][i];  | 
105  | 0  |     } else if (scripts_na[orientation][i] > second) { | 
106  | 0  |       second = scripts_na[orientation][i];  | 
107  | 0  |     }  | 
108  | 0  |   }  | 
109  | 0  |   best_result.sconfidence =  | 
110  | 0  |       (second == 0.0f) ? 2.0f : (first / second - 1.0) / (kScriptAcceptRatio - 1.0);  | 
111  | 0  | }  | 
112  |  |  | 
113  | 0  | int OSResults::get_best_script(int orientation_id) const { | 
114  | 0  |   int max_id = -1;  | 
115  | 0  |   for (int j = 0; j < kMaxNumberOfScripts; ++j) { | 
116  | 0  |     const char *script = unicharset->get_script_from_script_id(j);  | 
117  | 0  |     if (strcmp(script, "Common") && strcmp(script, "NULL")) { | 
118  | 0  |       if (max_id == -1 || scripts_na[orientation_id][j] > scripts_na[orientation_id][max_id]) { | 
119  | 0  |         max_id = j;  | 
120  | 0  |       }  | 
121  | 0  |     }  | 
122  | 0  |   }  | 
123  | 0  |   return max_id;  | 
124  | 0  | }  | 
125  |  |  | 
126  |  | // Print the script scores for all possible orientations.  | 
127  | 0  | void OSResults::print_scores(void) const { | 
128  | 0  |   for (int i = 0; i < 4; ++i) { | 
129  | 0  |     tprintf("Orientation id #%d", i); | 
130  | 0  |     print_scores(i);  | 
131  | 0  |   }  | 
132  | 0  | }  | 
133  |  |  | 
134  |  | // Print the script scores for the given candidate orientation.  | 
135  | 0  | void OSResults::print_scores(int orientation_id) const { | 
136  | 0  |   for (int j = 0; j < kMaxNumberOfScripts; ++j) { | 
137  | 0  |     if (scripts_na[orientation_id][j]) { | 
138  | 0  |       tprintf("%12s\t: %f\n", unicharset->get_script_from_script_id(j), | 
139  | 0  |               scripts_na[orientation_id][j]);  | 
140  | 0  |     }  | 
141  | 0  |   }  | 
142  | 0  | }  | 
143  |  |  | 
144  |  | // Accumulate scores with given OSResults instance and update the best script.  | 
145  | 0  | void OSResults::accumulate(const OSResults &osr) { | 
146  | 0  |   for (int i = 0; i < 4; ++i) { | 
147  | 0  |     orientations[i] += osr.orientations[i];  | 
148  | 0  |     for (int j = 0; j < kMaxNumberOfScripts; ++j) { | 
149  | 0  |       scripts_na[i][j] += osr.scripts_na[i][j];  | 
150  | 0  |     }  | 
151  | 0  |   }  | 
152  | 0  |   unicharset = osr.unicharset;  | 
153  | 0  |   update_best_orientation();  | 
154  | 0  |   update_best_script(best_result.orientation_id);  | 
155  | 0  | }  | 
156  |  |  | 
157  |  | // Detect and erase horizontal/vertical lines and picture regions from the  | 
158  |  | // image, so that non-text blobs are removed from consideration.  | 
159  |  | static void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,  | 
160  | 0  |                                    TO_BLOCK_LIST *to_blocks) { | 
161  | 0  |   Image pix = tess->pix_binary();  | 
162  | 0  |   ASSERT_HOST(pix != nullptr);  | 
163  | 0  |   int vertical_x = 0;  | 
164  | 0  |   int vertical_y = 1;  | 
165  | 0  |   tesseract::TabVector_LIST v_lines;  | 
166  | 0  |   tesseract::TabVector_LIST h_lines;  | 
167  | 0  |   int resolution;  | 
168  | 0  |   if (kMinCredibleResolution > pixGetXRes(pix)) { | 
169  | 0  |     resolution = kMinCredibleResolution;  | 
170  | 0  |     tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", pixGetXRes(pix), resolution); | 
171  | 0  |   } else { | 
172  | 0  |     resolution = pixGetXRes(pix);  | 
173  | 0  |   }  | 
174  |  | 
  | 
175  | 0  |   tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y,  | 
176  | 0  |                                             nullptr, &v_lines, &h_lines);  | 
177  | 0  |   Image im_pix = tesseract::ImageFind::FindImages(pix, nullptr);  | 
178  | 0  |   if (im_pix != nullptr) { | 
179  | 0  |     pixSubtract(pix, pix, im_pix);  | 
180  | 0  |     im_pix.destroy();  | 
181  | 0  |   }  | 
182  | 0  |   tess->mutable_textord()->find_components(tess->pix_binary(), blocks, to_blocks);  | 
183  | 0  | }  | 
184  |  |  | 
185  |  | // Find connected components in the page and process a subset until finished or  | 
186  |  | // a stopping criterion is met.  | 
187  |  | // Returns the number of blobs used in making the estimate. 0 implies failure.  | 
188  |  | int orientation_and_script_detection(const char *filename, OSResults *osr,  | 
189  | 0  |                                      tesseract::Tesseract *tess) { | 
190  | 0  |   std::string name = filename; // truncated name  | 
191  |  | 
  | 
192  | 0  |   const char *lastdot = strrchr(name.c_str(), '.');  | 
193  | 0  |   if (lastdot != nullptr) { | 
194  | 0  |     name[lastdot - name.c_str()] = '\0';  | 
195  | 0  |   }  | 
196  |  | 
  | 
197  | 0  |   ASSERT_HOST(tess->pix_binary() != nullptr);  | 
198  | 0  |   int width = pixGetWidth(tess->pix_binary());  | 
199  | 0  |   int height = pixGetHeight(tess->pix_binary());  | 
200  |  | 
  | 
201  | 0  |   BLOCK_LIST blocks;  | 
202  | 0  |   if (!read_unlv_file(name, width, height, &blocks)) { | 
203  | 0  |     FullPageBlock(width, height, &blocks);  | 
204  | 0  |   }  | 
205  |  |  | 
206  |  |   // Try to remove non-text regions from consideration.  | 
207  | 0  |   TO_BLOCK_LIST land_blocks, port_blocks;  | 
208  | 0  |   remove_nontext_regions(tess, &blocks, &port_blocks);  | 
209  |  | 
  | 
210  | 0  |   if (port_blocks.empty()) { | 
211  |  |     // page segmentation did not succeed, so we need to find_components first.  | 
212  | 0  |     tess->mutable_textord()->find_components(tess->pix_binary(), &blocks, &port_blocks);  | 
213  | 0  |   } else { | 
214  | 0  |     TBOX page_box(0, 0, width, height);  | 
215  |  |     // Filter_blobs sets up the TO_BLOCKs the same as find_components does.  | 
216  | 0  |     tess->mutable_textord()->filter_blobs(page_box.topright(), &port_blocks, true);  | 
217  | 0  |   }  | 
218  |  | 
  | 
219  | 0  |   return os_detect(&port_blocks, osr, tess);  | 
220  | 0  | }  | 
221  |  |  | 
222  |  | // Filter and sample the blobs.  | 
223  |  | // Returns a non-zero number of blobs if the page was successfully processed, or  | 
224  |  | // zero if the page had too few characters to be reliable  | 
225  | 0  | int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr, tesseract::Tesseract *tess) { | 
226  |  | #if !defined(NDEBUG)  | 
227  |  |   int blobs_total = 0;  | 
228  |  | #endif  | 
229  | 0  |   TO_BLOCK_IT block_it;  | 
230  | 0  |   block_it.set_to_list(port_blocks);  | 
231  |  | 
  | 
232  | 0  |   BLOBNBOX_CLIST filtered_list;  | 
233  | 0  |   BLOBNBOX_C_IT filtered_it(&filtered_list);  | 
234  |  | 
  | 
235  | 0  |   for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { | 
236  | 0  |     TO_BLOCK *to_block = block_it.data();  | 
237  | 0  |     if (to_block->block->pdblk.poly_block() && !to_block->block->pdblk.poly_block()->IsText()) { | 
238  | 0  |       continue;  | 
239  | 0  |     }  | 
240  | 0  |     BLOBNBOX_IT bbox_it;  | 
241  | 0  |     bbox_it.set_to_list(&to_block->blobs);  | 
242  | 0  |     for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { | 
243  | 0  |       BLOBNBOX *bbox = bbox_it.data();  | 
244  | 0  |       C_BLOB *blob = bbox->cblob();  | 
245  | 0  |       TBOX box = blob->bounding_box();  | 
246  |  | #if !defined(NDEBUG)  | 
247  |  |       ++blobs_total;  | 
248  |  | #endif  | 
249  |  |  | 
250  |  |       // Catch illegal value of box width and avoid division by zero.  | 
251  | 0  |       if (box.width() == 0) { | 
252  | 0  |         continue;  | 
253  | 0  |       }  | 
254  |  |       // TODO: Can height and width be negative? If not, remove fabs.  | 
255  | 0  |       float y_x = std::fabs((box.height() * 1.0f) / box.width());  | 
256  | 0  |       float x_y = 1.0f / y_x;  | 
257  |  |       // Select a >= 1.0 ratio  | 
258  | 0  |       float ratio = x_y > y_x ? x_y : y_x;  | 
259  |  |       // Blob is ambiguous  | 
260  | 0  |       if (ratio > kSizeRatioToReject) { | 
261  | 0  |         continue;  | 
262  | 0  |       }  | 
263  | 0  |       if (box.height() < kMinAcceptableBlobHeight) { | 
264  | 0  |         continue;  | 
265  | 0  |       }  | 
266  | 0  |       filtered_it.add_to_end(bbox);  | 
267  | 0  |     }  | 
268  | 0  |   }  | 
269  | 0  |   return os_detect_blobs(nullptr, &filtered_list, osr, tess);  | 
270  | 0  | }  | 
271  |  |  | 
272  |  | // Detect orientation and script from a list of blobs.  | 
273  |  | // Returns a non-zero number of blobs if the list was successfully processed, or  | 
274  |  | // zero if the list had too few characters to be reliable.  | 
275  |  | // If allowed_scripts is non-null and non-empty, it is a list of scripts that  | 
276  |  | // constrains both orientation and script detection to consider only scripts  | 
277  |  | // from the list.  | 
278  |  | int os_detect_blobs(const std::vector<int> *allowed_scripts, BLOBNBOX_CLIST *blob_list,  | 
279  | 0  |                     OSResults *osr, tesseract::Tesseract *tess) { | 
280  | 0  |   OSResults osr_;  | 
281  | 0  |   int minCharactersToTry = tess->min_characters_to_try;  | 
282  | 0  |   int maxCharactersToTry = 5 * minCharactersToTry;  | 
283  | 0  |   if (osr == nullptr) { | 
284  | 0  |     osr = &osr_;  | 
285  | 0  |   }  | 
286  |  | 
  | 
287  | 0  |   osr->unicharset = &tess->unicharset;  | 
288  | 0  |   OrientationDetector o(allowed_scripts, osr);  | 
289  | 0  |   ScriptDetector s(allowed_scripts, osr, tess);  | 
290  |  | 
  | 
291  | 0  |   BLOBNBOX_C_IT filtered_it(blob_list);  | 
292  | 0  |   int real_max = std::min(filtered_it.length(), maxCharactersToTry);  | 
293  |  |   // tprintf("Total blobs found = %d\n", blobs_total); | 
294  |  |   // tprintf("Number of blobs post-filtering = %d\n", filtered_it.length()); | 
295  |  |   // tprintf("Number of blobs to try = %d\n", real_max); | 
296  |  |  | 
297  |  |   // If there are too few characters, skip this page entirely.  | 
298  | 0  |   if (real_max < minCharactersToTry / 2) { | 
299  | 0  |     tprintf("Too few characters. Skipping this page\n"); | 
300  | 0  |     return 0;  | 
301  | 0  |   }  | 
302  |  |  | 
303  | 0  |   auto **blobs = new BLOBNBOX *[filtered_it.length()];  | 
304  | 0  |   int number_of_blobs = 0;  | 
305  | 0  |   for (filtered_it.mark_cycle_pt(); !filtered_it.cycled_list(); filtered_it.forward()) { | 
306  | 0  |     blobs[number_of_blobs++] = filtered_it.data();  | 
307  | 0  |   }  | 
308  | 0  |   QRSequenceGenerator sequence(number_of_blobs);  | 
309  | 0  |   int num_blobs_evaluated = 0;  | 
310  | 0  |   for (int i = 0; i < real_max; ++i) { | 
311  | 0  |     if (os_detect_blob(blobs[sequence.GetVal()], &o, &s, osr, tess) && i > minCharactersToTry) { | 
312  | 0  |       break;  | 
313  | 0  |     }  | 
314  | 0  |     ++num_blobs_evaluated;  | 
315  | 0  |   }  | 
316  | 0  |   delete[] blobs;  | 
317  |  |  | 
318  |  |   // Make sure the best_result is up-to-date  | 
319  | 0  |   int orientation = o.get_orientation();  | 
320  | 0  |   osr->update_best_script(orientation);  | 
321  | 0  |   return num_blobs_evaluated;  | 
322  | 0  | }  | 
323  |  |  | 
324  |  | // Processes a single blob to estimate script and orientation.  | 
325  |  | // Return true if estimate of orientation and script satisfies stopping  | 
326  |  | // criteria.  | 
327  |  | bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s, OSResults *osr,  | 
328  | 0  |                     tesseract::Tesseract *tess) { | 
329  | 0  |   tess->tess_cn_matching.set_value(true); // turn it on  | 
330  | 0  |   tess->tess_bn_matching.set_value(false);  | 
331  | 0  |   C_BLOB *blob = bbox->cblob();  | 
332  | 0  |   TBLOB *tblob = TBLOB::PolygonalCopy(tess->poly_allow_detailed_fx, blob);  | 
333  | 0  |   TBOX box = tblob->bounding_box();  | 
334  | 0  |   FCOORD current_rotation(1.0f, 0.0f);  | 
335  | 0  |   FCOORD rotation90(0.0f, 1.0f);  | 
336  | 0  |   BLOB_CHOICE_LIST ratings[4];  | 
337  |  |   // Test the 4 orientations  | 
338  | 0  |   for (int i = 0; i < 4; ++i) { | 
339  |  |     // Normalize the blob. Set the origin to the place we want to be the  | 
340  |  |     // bottom-middle after rotation.  | 
341  |  |     // Scaling is to make the rotated height the x-height.  | 
342  | 0  |     float scaling = static_cast<float>(kBlnXHeight) / box.height();  | 
343  | 0  |     float x_origin = (box.left() + box.right()) / 2.0f;  | 
344  | 0  |     float y_origin = (box.bottom() + box.top()) / 2.0f;  | 
345  | 0  |     if (i == 0 || i == 2) { | 
346  |  |       // Rotation is 0 or 180.  | 
347  | 0  |       y_origin = i == 0 ? box.bottom() : box.top();  | 
348  | 0  |     } else { | 
349  |  |       // Rotation is 90 or 270.  | 
350  | 0  |       scaling = static_cast<float>(kBlnXHeight) / box.width();  | 
351  | 0  |       x_origin = i == 1 ? box.left() : box.right();  | 
352  | 0  |     }  | 
353  | 0  |     std::unique_ptr<TBLOB> rotated_blob(new TBLOB(*tblob));  | 
354  | 0  |     rotated_blob->Normalize(nullptr, ¤t_rotation, nullptr, x_origin, y_origin, scaling,  | 
355  | 0  |                             scaling, 0.0f, static_cast<float>(kBlnBaselineOffset), false, nullptr);  | 
356  | 0  |     tess->AdaptiveClassifier(rotated_blob.get(), ratings + i);  | 
357  | 0  |     current_rotation.rotate(rotation90);  | 
358  | 0  |   }  | 
359  | 0  |   delete tblob;  | 
360  |  | 
  | 
361  | 0  |   bool stop = o->detect_blob(ratings);  | 
362  | 0  |   s->detect_blob(ratings);  | 
363  | 0  |   int orientation = o->get_orientation();  | 
364  | 0  |   stop = s->must_stop(orientation) && stop;  | 
365  | 0  |   return stop;  | 
366  | 0  | }  | 
367  |  |  | 
368  | 0  | OrientationDetector::OrientationDetector(const std::vector<int> *allowed_scripts, OSResults *osr) { | 
369  | 0  |   osr_ = osr;  | 
370  | 0  |   allowed_scripts_ = allowed_scripts;  | 
371  | 0  | }  | 
372  |  |  | 
373  |  | // Score the given blob and return true if it is now sure of the orientation  | 
374  |  | // after adding this block.  | 
375  | 0  | bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) { | 
376  | 0  |   float blob_o_score[4] = {0.0f, 0.0f, 0.0f, 0.0f}; | 
377  | 0  |   float total_blob_o_score = 0.0f;  | 
378  |  | 
  | 
379  | 0  |   for (int i = 0; i < 4; ++i) { | 
380  | 0  |     BLOB_CHOICE_IT choice_it(scores + i);  | 
381  | 0  |     if (!choice_it.empty()) { | 
382  | 0  |       BLOB_CHOICE *choice = nullptr;  | 
383  | 0  |       if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) { | 
384  |  |         // Find the top choice in an allowed script.  | 
385  | 0  |         for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && choice == nullptr;  | 
386  | 0  |              choice_it.forward()) { | 
387  | 0  |           int choice_script = choice_it.data()->script_id();  | 
388  | 0  |           unsigned s = 0;  | 
389  | 0  |           for (s = 0; s < allowed_scripts_->size(); ++s) { | 
390  | 0  |             if ((*allowed_scripts_)[s] == choice_script) { | 
391  | 0  |               choice = choice_it.data();  | 
392  | 0  |               break;  | 
393  | 0  |             }  | 
394  | 0  |           }  | 
395  | 0  |         }  | 
396  | 0  |       } else { | 
397  | 0  |         choice = choice_it.data();  | 
398  | 0  |       }  | 
399  | 0  |       if (choice != nullptr) { | 
400  |  |         // The certainty score ranges between [-20,0]. This is converted here to  | 
401  |  |         // [0,1], with 1 indicating best match.  | 
402  | 0  |         blob_o_score[i] = 1 + 0.05 * choice->certainty();  | 
403  | 0  |         total_blob_o_score += blob_o_score[i];  | 
404  | 0  |       }  | 
405  | 0  |     }  | 
406  | 0  |   }  | 
407  | 0  |   if (total_blob_o_score == 0.0) { | 
408  | 0  |     return false;  | 
409  | 0  |   }  | 
410  |  |   // Fill in any blanks with the worst score of the others. This is better than  | 
411  |  |   // picking an arbitrary probability for it and way better than -inf.  | 
412  | 0  |   float worst_score = 0.0f;  | 
413  | 0  |   int num_good_scores = 0;  | 
414  | 0  |   for (float f : blob_o_score) { | 
415  | 0  |     if (f > 0.0f) { | 
416  | 0  |       ++num_good_scores;  | 
417  | 0  |       if (worst_score == 0.0f || f < worst_score) { | 
418  | 0  |         worst_score = f;  | 
419  | 0  |       }  | 
420  | 0  |     }  | 
421  | 0  |   }  | 
422  | 0  |   if (num_good_scores == 1) { | 
423  |  |     // Lower worst if there is only one.  | 
424  | 0  |     worst_score /= 2.0f;  | 
425  | 0  |   }  | 
426  | 0  |   for (float &f : blob_o_score) { | 
427  | 0  |     if (f == 0.0f) { | 
428  | 0  |       f = worst_score;  | 
429  | 0  |       total_blob_o_score += worst_score;  | 
430  | 0  |     }  | 
431  | 0  |   }  | 
432  |  |   // Normalize the orientation scores for the blob and use them to  | 
433  |  |   // update the aggregated orientation score.  | 
434  | 0  |   for (int i = 0; total_blob_o_score != 0 && i < 4; ++i) { | 
435  | 0  |     osr_->orientations[i] += std::log(blob_o_score[i] / total_blob_o_score);  | 
436  | 0  |   }  | 
437  |  |  | 
438  |  |   // TODO(ranjith) Add an early exit test, based on min_orientation_margin,  | 
439  |  |   // as used in pagesegmain.cpp.  | 
440  | 0  |   return false;  | 
441  | 0  | }  | 
442  |  |  | 
443  | 0  | int OrientationDetector::get_orientation() { | 
444  | 0  |   osr_->update_best_orientation();  | 
445  | 0  |   return osr_->best_result.orientation_id;  | 
446  | 0  | }  | 
447  |  |  | 
448  |  | ScriptDetector::ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,  | 
449  | 0  |                                tesseract::Tesseract *tess) { | 
450  | 0  |   osr_ = osr;  | 
451  | 0  |   tess_ = tess;  | 
452  | 0  |   allowed_scripts_ = allowed_scripts;  | 
453  | 0  |   katakana_id_ = tess_->unicharset.add_script(katakana_script);  | 
454  | 0  |   hiragana_id_ = tess_->unicharset.add_script(hiragana_script);  | 
455  | 0  |   han_id_ = tess_->unicharset.add_script(han_script);  | 
456  | 0  |   hangul_id_ = tess_->unicharset.add_script(hangul_script);  | 
457  | 0  |   japanese_id_ = tess_->unicharset.add_script(japanese_script_);  | 
458  | 0  |   korean_id_ = tess_->unicharset.add_script(korean_script_);  | 
459  | 0  |   latin_id_ = tess_->unicharset.add_script(latin_script);  | 
460  | 0  |   fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);  | 
461  | 0  | }  | 
462  |  |  | 
463  |  | // Score the given blob and return true if it is now sure of the script after  | 
464  |  | // adding this blob.  | 
465  | 0  | void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) { | 
466  | 0  |   for (int i = 0; i < 4; ++i) { | 
467  | 0  |     std::vector<bool> done(kMaxNumberOfScripts);  | 
468  |  | 
  | 
469  | 0  |     BLOB_CHOICE_IT choice_it;  | 
470  | 0  |     choice_it.set_to_list(scores + i);  | 
471  |  | 
  | 
472  | 0  |     float prev_score = -1;  | 
473  | 0  |     int script_count = 0;  | 
474  | 0  |     int prev_id = -1;  | 
475  | 0  |     int prev_fontinfo_id = -1;  | 
476  | 0  |     const char *prev_unichar = "";  | 
477  | 0  |     const char *unichar = "";  | 
478  |  | 
  | 
479  | 0  |     for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) { | 
480  | 0  |       BLOB_CHOICE *choice = choice_it.data();  | 
481  | 0  |       int id = choice->script_id();  | 
482  | 0  |       if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) { | 
483  |  |         // Check that the choice is in an allowed script.  | 
484  | 0  |         size_t s = 0;  | 
485  | 0  |         for (s = 0; s < allowed_scripts_->size(); ++s) { | 
486  | 0  |           if ((*allowed_scripts_)[s] == id) { | 
487  | 0  |             break;  | 
488  | 0  |           }  | 
489  | 0  |         }  | 
490  | 0  |         if (s == allowed_scripts_->size()) { | 
491  | 0  |           continue; // Not found in list.  | 
492  | 0  |         }  | 
493  | 0  |       }  | 
494  |  |       // Script already processed before.  | 
495  | 0  |       if (done.at(id)) { | 
496  | 0  |         continue;  | 
497  | 0  |       }  | 
498  | 0  |       done[id] = true;  | 
499  |  | 
  | 
500  | 0  |       unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());  | 
501  |  |       // Save data from the first match  | 
502  | 0  |       if (prev_score < 0) { | 
503  | 0  |         prev_score = -choice->certainty();  | 
504  | 0  |         script_count = 1;  | 
505  | 0  |         prev_id = id;  | 
506  | 0  |         prev_unichar = unichar;  | 
507  | 0  |         prev_fontinfo_id = choice->fontinfo_id();  | 
508  | 0  |       } else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) { | 
509  | 0  |         ++script_count;  | 
510  | 0  |       }  | 
511  |  | 
  | 
512  | 0  |       if (strlen(prev_unichar) == 1) { | 
513  | 0  |         if (unichar[0] >= '0' && unichar[0] <= '9') { | 
514  | 0  |           break;  | 
515  | 0  |         }  | 
516  | 0  |       }  | 
517  |  |  | 
518  |  |       // if script_count is >= 2, character is ambiguous, skip other matches  | 
519  |  |       // since they are useless.  | 
520  | 0  |       if (script_count >= 2) { | 
521  | 0  |         break;  | 
522  | 0  |       }  | 
523  | 0  |     }  | 
524  |  |     // Character is non ambiguous  | 
525  | 0  |     if (script_count == 1) { | 
526  |  |       // Update the score of the winning script  | 
527  | 0  |       osr_->scripts_na[i][prev_id] += 1.0;  | 
528  |  |  | 
529  |  |       // Workaround for Fraktur  | 
530  | 0  |       if (prev_id == latin_id_) { | 
531  | 0  |         if (prev_fontinfo_id >= 0) { | 
532  | 0  |           const tesseract::FontInfo &fi = tess_->get_fontinfo_table().at(prev_fontinfo_id);  | 
533  |  |           // printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name, | 
534  |  |           //       fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),  | 
535  |  |           //       fi.is_serif(), fi.is_fraktur(),  | 
536  |  |           //       prev_unichar);  | 
537  | 0  |           if (fi.is_fraktur()) { | 
538  | 0  |             osr_->scripts_na[i][prev_id] -= 1.0;  | 
539  | 0  |             osr_->scripts_na[i][fraktur_id_] += 1.0;  | 
540  | 0  |           }  | 
541  | 0  |         }  | 
542  | 0  |       }  | 
543  |  |  | 
544  |  |       // Update Japanese / Korean pseudo-scripts  | 
545  | 0  |       if (prev_id == katakana_id_) { | 
546  | 0  |         osr_->scripts_na[i][japanese_id_] += 1.0;  | 
547  | 0  |       }  | 
548  | 0  |       if (prev_id == hiragana_id_) { | 
549  | 0  |         osr_->scripts_na[i][japanese_id_] += 1.0;  | 
550  | 0  |       }  | 
551  | 0  |       if (prev_id == hangul_id_) { | 
552  | 0  |         osr_->scripts_na[i][korean_id_] += 1.0;  | 
553  | 0  |       }  | 
554  | 0  |       if (prev_id == han_id_) { | 
555  | 0  |         osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;  | 
556  | 0  |         osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;  | 
557  | 0  |       }  | 
558  | 0  |     }  | 
559  | 0  |   } // iterate over each orientation  | 
560  | 0  | }  | 
561  |  |  | 
562  | 0  | bool ScriptDetector::must_stop(int orientation) const { | 
563  | 0  |   osr_->update_best_script(orientation);  | 
564  | 0  |   return osr_->best_result.sconfidence > 1;  | 
565  | 0  | }  | 
566  |  |  | 
567  |  | // Helper method to convert an orientation index to its value in degrees.  | 
568  |  | // The value represents the amount of clockwise rotation in degrees that must be  | 
569  |  | // applied for the text to be upright (readable).  | 
570  | 0  | int OrientationIdToValue(const int &id) { | 
571  | 0  |   switch (id) { | 
572  | 0  |     case 0:  | 
573  | 0  |       return 0;  | 
574  | 0  |     case 1:  | 
575  | 0  |       return 270;  | 
576  | 0  |     case 2:  | 
577  | 0  |       return 180;  | 
578  | 0  |     case 3:  | 
579  | 0  |       return 90;  | 
580  | 0  |     default:  | 
581  | 0  |       return -1;  | 
582  | 0  |   }  | 
583  | 0  | }  | 
584  |  |  | 
585  |  | } // namespace tesseract  |