/src/tesseract/src/ccstruct/matrix.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: matrix.cpp (Formerly matrix.c) |
4 | | * Description: Ratings matrix code. (Used by associator) |
5 | | * Author: Mark Seaman, OCR Technology |
6 | | * |
7 | | * (c) Copyright 1990, Hewlett-Packard Company. |
8 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | ** you may not use this file except in compliance with the License. |
10 | | ** You may obtain a copy of the License at |
11 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
12 | | ** Unless required by applicable law or agreed to in writing, software |
13 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
14 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | ** See the License for the specific language governing permissions and |
16 | | ** limitations under the License. |
17 | | * |
18 | | *****************************************************************************/ |
19 | | /*---------------------------------------------------------------------- |
20 | | I n c l u d e s |
21 | | ----------------------------------------------------------------------*/ |
22 | | #include "matrix.h" |
23 | | |
24 | | #include "ratngs.h" |
25 | | #include "tprintf.h" |
26 | | #include "unicharset.h" |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | // Destructor. |
31 | | // It is defined here, so the compiler can create a single vtable |
32 | | // instead of weak vtables in every compilation unit. |
33 | 791k | MATRIX::~MATRIX() = default; |
34 | | |
35 | | // Returns true if there are any real classification results. |
36 | 11.4M | bool MATRIX::Classified(int col, int row, int wildcard_id) const { |
37 | 11.4M | if (get(col, row) == NOT_CLASSIFIED) { |
38 | 8.08M | return false; |
39 | 8.08M | } |
40 | 3.35M | BLOB_CHOICE_IT b_it(get(col, row)); |
41 | 3.67M | for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { |
42 | 3.36M | BLOB_CHOICE *choice = b_it.data(); |
43 | 3.36M | if (choice->IsClassified()) { |
44 | 3.04M | return true; |
45 | 3.04M | } |
46 | 3.36M | } |
47 | 313k | return false; |
48 | 3.35M | } |
49 | | |
50 | | // Expands the existing matrix in-place to make the band wider, without |
51 | | // losing any existing data. |
52 | 64.2k | void MATRIX::IncreaseBandSize(int bandwidth) { |
53 | 64.2k | ResizeWithCopy(dimension(), bandwidth); |
54 | 64.2k | } |
55 | | |
56 | | // Returns a bigger MATRIX with a new column and row in the matrix in order |
57 | | // to split the blob at the given (ind,ind) diagonal location. |
58 | | // Entries are relocated to the new MATRIX using the transformation defined |
59 | | // by MATRIX_COORD::MapForSplit. |
60 | | // Transfers the pointer data to the new MATRIX and deletes *this. |
61 | 188k | MATRIX *MATRIX::ConsumeAndMakeBigger(int ind) { |
62 | 188k | int dim = dimension(); |
63 | 188k | int band_width = bandwidth(); |
64 | | // Check to see if bandwidth needs expanding. |
65 | 894k | for (int col = ind; col >= 0 && col > ind - band_width; --col) { |
66 | 742k | if (array_[col * band_width + band_width - 1] != empty_) { |
67 | 37.5k | ++band_width; |
68 | 37.5k | break; |
69 | 37.5k | } |
70 | 742k | } |
71 | 188k | auto *result = new MATRIX(dim + 1, band_width); |
72 | | |
73 | 2.87M | for (int col = 0; col < dim; ++col) { |
74 | 17.8M | for (int row = col; row < dim && row < col + bandwidth(); ++row) { |
75 | 15.1M | MATRIX_COORD coord(col, row); |
76 | 15.1M | coord.MapForSplit(ind); |
77 | 15.1M | BLOB_CHOICE_LIST *choices = get(col, row); |
78 | 15.1M | if (choices != nullptr) { |
79 | | // Correct matrix location on each choice. |
80 | 3.84M | BLOB_CHOICE_IT bc_it(choices); |
81 | 21.3M | for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { |
82 | 17.5M | BLOB_CHOICE *choice = bc_it.data(); |
83 | 17.5M | choice->set_matrix_cell(coord.col, coord.row); |
84 | 17.5M | } |
85 | 3.84M | ASSERT_HOST(coord.Valid(*result)); |
86 | 3.84M | result->put(coord.col, coord.row, choices); |
87 | 3.84M | } |
88 | 15.1M | } |
89 | 2.68M | } |
90 | 188k | delete this; |
91 | 188k | return result; |
92 | 188k | } |
93 | | |
94 | | // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs |
95 | | // on the lists, but not any LanguageModelState that may be attached to the |
96 | | // BLOB_CHOICEs. |
97 | 0 | MATRIX *MATRIX::DeepCopy() const { |
98 | 0 | int dim = dimension(); |
99 | 0 | int band_width = bandwidth(); |
100 | 0 | auto *result = new MATRIX(dim, band_width); |
101 | 0 | for (int col = 0; col < dim; ++col) { |
102 | 0 | for (int row = col; row < dim && row < col + band_width; ++row) { |
103 | 0 | BLOB_CHOICE_LIST *choices = get(col, row); |
104 | 0 | if (choices != nullptr) { |
105 | 0 | auto *copy_choices = new BLOB_CHOICE_LIST; |
106 | 0 | copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy); |
107 | 0 | result->put(col, row, copy_choices); |
108 | 0 | } |
109 | 0 | } |
110 | 0 | } |
111 | 0 | return result; |
112 | 0 | } |
113 | | |
114 | | // Print the best guesses out of the match rating matrix. |
115 | 0 | void MATRIX::print(const UNICHARSET &unicharset) const { |
116 | 0 | tprintf("Ratings Matrix (top 3 choices)\n"); |
117 | 0 | int dim = dimension(); |
118 | 0 | int band_width = bandwidth(); |
119 | 0 | int row, col; |
120 | 0 | for (col = 0; col < dim; ++col) { |
121 | 0 | for (row = col; row < dim && row < col + band_width; ++row) { |
122 | 0 | BLOB_CHOICE_LIST *rating = this->get(col, row); |
123 | 0 | if (rating == NOT_CLASSIFIED) { |
124 | 0 | continue; |
125 | 0 | } |
126 | 0 | BLOB_CHOICE_IT b_it(rating); |
127 | 0 | tprintf("col=%d row=%d ", col, row); |
128 | 0 | for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { |
129 | 0 | tprintf("%s rat=%g cert=%g ", unicharset.id_to_unichar(b_it.data()->unichar_id()), |
130 | 0 | b_it.data()->rating(), b_it.data()->certainty()); |
131 | 0 | } |
132 | 0 | tprintf("\n"); |
133 | 0 | } |
134 | 0 | tprintf("\n"); |
135 | 0 | } |
136 | 0 | tprintf("\n"); |
137 | 0 | for (col = 0; col < dim; ++col) { |
138 | 0 | tprintf("\t%d", col); |
139 | 0 | } |
140 | 0 | tprintf("\n"); |
141 | 0 | for (row = 0; row < dim; ++row) { |
142 | 0 | for (col = 0; col <= row; ++col) { |
143 | 0 | if (col == 0) { |
144 | 0 | tprintf("%d\t", row); |
145 | 0 | } |
146 | 0 | if (row >= col + band_width) { |
147 | 0 | tprintf(" \t"); |
148 | 0 | continue; |
149 | 0 | } |
150 | 0 | BLOB_CHOICE_LIST *rating = this->get(col, row); |
151 | 0 | if (rating != NOT_CLASSIFIED) { |
152 | 0 | BLOB_CHOICE_IT b_it(rating); |
153 | 0 | int counter = 0; |
154 | 0 | for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { |
155 | 0 | tprintf("%s ", unicharset.id_to_unichar(b_it.data()->unichar_id())); |
156 | 0 | ++counter; |
157 | 0 | if (counter == 3) { |
158 | 0 | break; |
159 | 0 | } |
160 | 0 | } |
161 | 0 | tprintf("\t"); |
162 | 0 | } else { |
163 | 0 | tprintf(" \t"); |
164 | 0 | } |
165 | 0 | } |
166 | 0 | tprintf("\n"); |
167 | 0 | } |
168 | 0 | } |
169 | | |
170 | | } // namespace tesseract |