/src/tesseract/src/ccstruct/matrix.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /******************************************************************************  | 
2  |  |  *  | 
3  |  |  * File:         matrix.cpp  (Formerly matrix.c)  | 
4  |  |  * Description:  Ratings matrix code. (Used by associator)  | 
5  |  |  * Author:       Mark Seaman, OCR Technology  | 
6  |  |  *  | 
7  |  |  * (c) Copyright 1990, Hewlett-Packard Company.  | 
8  |  |  ** Licensed under the Apache License, Version 2.0 (the "License");  | 
9  |  |  ** you may not use this file except in compliance with the License.  | 
10  |  |  ** You may obtain a copy of the License at  | 
11  |  |  ** http://www.apache.org/licenses/LICENSE-2.0  | 
12  |  |  ** Unless required by applicable law or agreed to in writing, software  | 
13  |  |  ** distributed under the License is distributed on an "AS IS" BASIS,  | 
14  |  |  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
15  |  |  ** See the License for the specific language governing permissions and  | 
16  |  |  ** limitations under the License.  | 
17  |  |  *  | 
18  |  |  *****************************************************************************/  | 
19  |  | /*----------------------------------------------------------------------  | 
20  |  |               I n c l u d e s  | 
21  |  | ----------------------------------------------------------------------*/  | 
22  |  | #include "matrix.h"  | 
23  |  |  | 
24  |  | #include "ratngs.h"  | 
25  |  | #include "tprintf.h"  | 
26  |  | #include "unicharset.h"  | 
27  |  |  | 
28  |  | namespace tesseract { | 
29  |  |  | 
30  |  | // Destructor.  | 
31  |  | // It is defined here, so the compiler can create a single vtable  | 
32  |  | // instead of weak vtables in every compilation unit.  | 
33  | 791k  | MATRIX::~MATRIX() = default;  | 
34  |  |  | 
35  |  | // Returns true if there are any real classification results.  | 
36  | 11.4M  | bool MATRIX::Classified(int col, int row, int wildcard_id) const { | 
37  | 11.4M  |   if (get(col, row) == NOT_CLASSIFIED) { | 
38  | 8.08M  |     return false;  | 
39  | 8.08M  |   }  | 
40  | 3.35M  |   BLOB_CHOICE_IT b_it(get(col, row));  | 
41  | 3.67M  |   for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { | 
42  | 3.36M  |     BLOB_CHOICE *choice = b_it.data();  | 
43  | 3.36M  |     if (choice->IsClassified()) { | 
44  | 3.04M  |       return true;  | 
45  | 3.04M  |     }  | 
46  | 3.36M  |   }  | 
47  | 313k  |   return false;  | 
48  | 3.35M  | }  | 
49  |  |  | 
50  |  | // Expands the existing matrix in-place to make the band wider, without  | 
51  |  | // losing any existing data.  | 
52  | 64.2k  | void MATRIX::IncreaseBandSize(int bandwidth) { | 
53  | 64.2k  |   ResizeWithCopy(dimension(), bandwidth);  | 
54  | 64.2k  | }  | 
55  |  |  | 
56  |  | // Returns a bigger MATRIX with a new column and row in the matrix in order  | 
57  |  | // to split the blob at the given (ind,ind) diagonal location.  | 
58  |  | // Entries are relocated to the new MATRIX using the transformation defined  | 
59  |  | // by MATRIX_COORD::MapForSplit.  | 
60  |  | // Transfers the pointer data to the new MATRIX and deletes *this.  | 
61  | 188k  | MATRIX *MATRIX::ConsumeAndMakeBigger(int ind) { | 
62  | 188k  |   int dim = dimension();  | 
63  | 188k  |   int band_width = bandwidth();  | 
64  |  |   // Check to see if bandwidth needs expanding.  | 
65  | 894k  |   for (int col = ind; col >= 0 && col > ind - band_width; --col) { | 
66  | 742k  |     if (array_[col * band_width + band_width - 1] != empty_) { | 
67  | 37.5k  |       ++band_width;  | 
68  | 37.5k  |       break;  | 
69  | 37.5k  |     }  | 
70  | 742k  |   }  | 
71  | 188k  |   auto *result = new MATRIX(dim + 1, band_width);  | 
72  |  |  | 
73  | 2.87M  |   for (int col = 0; col < dim; ++col) { | 
74  | 17.8M  |     for (int row = col; row < dim && row < col + bandwidth(); ++row) { | 
75  | 15.1M  |       MATRIX_COORD coord(col, row);  | 
76  | 15.1M  |       coord.MapForSplit(ind);  | 
77  | 15.1M  |       BLOB_CHOICE_LIST *choices = get(col, row);  | 
78  | 15.1M  |       if (choices != nullptr) { | 
79  |  |         // Correct matrix location on each choice.  | 
80  | 3.84M  |         BLOB_CHOICE_IT bc_it(choices);  | 
81  | 21.3M  |         for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { | 
82  | 17.5M  |           BLOB_CHOICE *choice = bc_it.data();  | 
83  | 17.5M  |           choice->set_matrix_cell(coord.col, coord.row);  | 
84  | 17.5M  |         }  | 
85  | 3.84M  |         ASSERT_HOST(coord.Valid(*result));  | 
86  | 3.84M  |         result->put(coord.col, coord.row, choices);  | 
87  | 3.84M  |       }  | 
88  | 15.1M  |     }  | 
89  | 2.68M  |   }  | 
90  | 188k  |   delete this;  | 
91  | 188k  |   return result;  | 
92  | 188k  | }  | 
93  |  |  | 
94  |  | // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs  | 
95  |  | // on the lists, but not any LanguageModelState that may be attached to the  | 
96  |  | // BLOB_CHOICEs.  | 
97  | 0  | MATRIX *MATRIX::DeepCopy() const { | 
98  | 0  |   int dim = dimension();  | 
99  | 0  |   int band_width = bandwidth();  | 
100  | 0  |   auto *result = new MATRIX(dim, band_width);  | 
101  | 0  |   for (int col = 0; col < dim; ++col) { | 
102  | 0  |     for (int row = col; row < dim && row < col + band_width; ++row) { | 
103  | 0  |       BLOB_CHOICE_LIST *choices = get(col, row);  | 
104  | 0  |       if (choices != nullptr) { | 
105  | 0  |         auto *copy_choices = new BLOB_CHOICE_LIST;  | 
106  | 0  |         copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);  | 
107  | 0  |         result->put(col, row, copy_choices);  | 
108  | 0  |       }  | 
109  | 0  |     }  | 
110  | 0  |   }  | 
111  | 0  |   return result;  | 
112  | 0  | }  | 
113  |  |  | 
114  |  | // Print the best guesses out of the match rating matrix.  | 
115  | 0  | void MATRIX::print(const UNICHARSET &unicharset) const { | 
116  | 0  |   tprintf("Ratings Matrix (top 3 choices)\n"); | 
117  | 0  |   int dim = dimension();  | 
118  | 0  |   int band_width = bandwidth();  | 
119  | 0  |   int row, col;  | 
120  | 0  |   for (col = 0; col < dim; ++col) { | 
121  | 0  |     for (row = col; row < dim && row < col + band_width; ++row) { | 
122  | 0  |       BLOB_CHOICE_LIST *rating = this->get(col, row);  | 
123  | 0  |       if (rating == NOT_CLASSIFIED) { | 
124  | 0  |         continue;  | 
125  | 0  |       }  | 
126  | 0  |       BLOB_CHOICE_IT b_it(rating);  | 
127  | 0  |       tprintf("col=%d row=%d ", col, row); | 
128  | 0  |       for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { | 
129  | 0  |         tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()), | 
130  | 0  |                 b_it.data()->rating(), b_it.data()->certainty());  | 
131  | 0  |       }  | 
132  | 0  |       tprintf("\n"); | 
133  | 0  |     }  | 
134  | 0  |     tprintf("\n"); | 
135  | 0  |   }  | 
136  | 0  |   tprintf("\n"); | 
137  | 0  |   for (col = 0; col < dim; ++col) { | 
138  | 0  |     tprintf("\t%d", col); | 
139  | 0  |   }  | 
140  | 0  |   tprintf("\n"); | 
141  | 0  |   for (row = 0; row < dim; ++row) { | 
142  | 0  |     for (col = 0; col <= row; ++col) { | 
143  | 0  |       if (col == 0) { | 
144  | 0  |         tprintf("%d\t", row); | 
145  | 0  |       }  | 
146  | 0  |       if (row >= col + band_width) { | 
147  | 0  |         tprintf(" \t"); | 
148  | 0  |         continue;  | 
149  | 0  |       }  | 
150  | 0  |       BLOB_CHOICE_LIST *rating = this->get(col, row);  | 
151  | 0  |       if (rating != NOT_CLASSIFIED) { | 
152  | 0  |         BLOB_CHOICE_IT b_it(rating);  | 
153  | 0  |         int counter = 0;  | 
154  | 0  |         for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { | 
155  | 0  |           tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id())); | 
156  | 0  |           ++counter;  | 
157  | 0  |           if (counter == 3) { | 
158  | 0  |             break;  | 
159  | 0  |           }  | 
160  | 0  |         }  | 
161  | 0  |         tprintf("\t"); | 
162  | 0  |       } else { | 
163  | 0  |         tprintf(" \t"); | 
164  | 0  |       }  | 
165  | 0  |     }  | 
166  | 0  |     tprintf("\n"); | 
167  | 0  |   }  | 
168  | 0  | }  | 
169  |  |  | 
170  |  | } // namespace tesseract  |