/src/tesseract/src/textord/linefind.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: linefind.cpp |
3 | | // Description: Class to find vertical lines in an image and create |
4 | | // a corresponding list of empty blobs. |
5 | | // Author: Ray Smith |
6 | | // |
7 | | // (C) Copyright 2008, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | #ifdef HAVE_CONFIG_H |
21 | | # include "config_auto.h" |
22 | | #endif |
23 | | |
24 | | #include "alignedblob.h" |
25 | | #include "blobbox.h" |
26 | | #include "crakedge.h" // for CRACKEDGE |
27 | | #include "edgblob.h" |
28 | | #include "linefind.h" |
29 | | #include "tabvector.h" |
30 | | |
31 | | #include <algorithm> |
32 | | |
33 | | namespace tesseract { |
34 | | |
35 | | /// Denominator of resolution makes max pixel width to allow thin lines. |
36 | | const int kThinLineFraction = 20; |
37 | | /// Denominator of resolution makes min pixels to demand line lengths to be. |
38 | | const int kMinLineLengthFraction = 4; |
39 | | /// Spacing of cracks across the page to break up tall vertical lines. |
40 | | const int kCrackSpacing = 100; |
41 | | /// Grid size used by line finder. Not very critical. |
42 | | const int kLineFindGridSize = 50; |
43 | | // Min width of a line in pixels to be considered thick. |
44 | | const int kMinThickLineWidth = 12; |
45 | | // Max size of line residue. (The pixels that fail the long thin opening, and |
46 | | // therefore don't make it to the candidate line mask, but are nevertheless |
47 | | // part of the line.) |
48 | | const int kMaxLineResidue = 6; |
49 | | // Min length in inches of a line segment that exceeds kMinThickLineWidth in |
50 | | // thickness. (Such lines shouldn't break by simple image degradation.) |
51 | | const double kThickLengthMultiple = 0.75; |
52 | | // Max fraction of line box area that can be occupied by non-line pixels. |
53 | | const double kMaxNonLineDensity = 0.25; |
54 | | // Max height of a music stave in inches. |
55 | | const double kMaxStaveHeight = 1.0; |
56 | | // Minimum fraction of pixels in a music rectangle connected to the staves. |
57 | | const double kMinMusicPixelFraction = 0.75; |
58 | | |
59 | | // Erases the unused blobs from the line_pix image, taking into account |
60 | | // whether this was a horizontal or vertical line set. |
61 | | static void RemoveUnusedLineSegments(bool horizontal_lines, BLOBNBOX_LIST *line_bblobs, |
62 | 0 | Image line_pix) { |
63 | 0 | int height = pixGetHeight(line_pix); |
64 | 0 | BLOBNBOX_IT bbox_it(line_bblobs); |
65 | 0 | for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { |
66 | 0 | BLOBNBOX *blob = bbox_it.data(); |
67 | 0 | if (blob->left_tab_type() != TT_VLINE) { |
68 | 0 | const TBOX &box = blob->bounding_box(); |
69 | 0 | Box *pixbox = nullptr; |
70 | 0 | if (horizontal_lines) { |
71 | | // Horizontal lines are in tess format and also have x and y flipped |
72 | | // (to use FindVerticalAlignment) so we have to flip x and y and then |
73 | | // convert to Leptonica by height - flipped x (ie the right edge). |
74 | | // See GetLineBoxes for more explanation. |
75 | 0 | pixbox = boxCreate(box.bottom(), height - box.right(), box.height(), box.width()); |
76 | 0 | } else { |
77 | | // For vertical lines, just flip upside-down to convert to Leptonica. |
78 | | // The y position of the box in Leptonica terms is the distance from |
79 | | // the top of the image to the top of the box. |
80 | 0 | pixbox = boxCreate(box.left(), height - box.top(), box.width(), box.height()); |
81 | 0 | } |
82 | 0 | pixClearInRect(line_pix, pixbox); |
83 | 0 | boxDestroy(&pixbox); |
84 | 0 | } |
85 | 0 | } |
86 | 0 | } |
87 | | |
88 | | // Helper subtracts the line_pix image from the src_pix, and removes residue |
89 | | // as well by removing components that touch the line, but are not in the |
90 | | // non_line_pix mask. It is assumed that the non_line_pix mask has already |
91 | | // been prepared to required accuracy. |
92 | | static void SubtractLinesAndResidue(Image line_pix, Image non_line_pix, |
93 | 0 | Image src_pix) { |
94 | | // First remove the lines themselves. |
95 | 0 | pixSubtract(src_pix, src_pix, line_pix); |
96 | | // Subtract the non-lines from the image to get the residue. |
97 | 0 | Image residue_pix = pixSubtract(nullptr, src_pix, non_line_pix); |
98 | | // Dilate the lines so they touch the residue. |
99 | 0 | Image fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3); |
100 | | // Seed fill the fat lines to get all the residue. |
101 | 0 | pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); |
102 | | // Subtract the residue from the original image. |
103 | 0 | pixSubtract(src_pix, src_pix, fat_line_pix); |
104 | 0 | fat_line_pix.destroy(); |
105 | 0 | residue_pix.destroy(); |
106 | 0 | } |
107 | | |
108 | | // Returns the maximum strokewidth in the given binary image by doubling |
109 | | // the maximum of the distance function. |
110 | 0 | static int MaxStrokeWidth(Image pix) { |
111 | 0 | Image dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); |
112 | 0 | int width = pixGetWidth(dist_pix); |
113 | 0 | int height = pixGetHeight(dist_pix); |
114 | 0 | int wpl = pixGetWpl(dist_pix); |
115 | 0 | l_uint32 *data = pixGetData(dist_pix); |
116 | | // Find the maximum value in the distance image. |
117 | 0 | int max_dist = 0; |
118 | 0 | for (int y = 0; y < height; ++y) { |
119 | 0 | for (int x = 0; x < width; ++x) { |
120 | 0 | int pixel = GET_DATA_BYTE(data, x); |
121 | 0 | if (pixel > max_dist) { |
122 | 0 | max_dist = pixel; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | data += wpl; |
126 | 0 | } |
127 | 0 | dist_pix.destroy(); |
128 | 0 | return max_dist * 2; |
129 | 0 | } |
130 | | |
131 | | // Returns the number of components in the intersection_pix touched by line_box. |
132 | 0 | static int NumTouchingIntersections(Box *line_box, Image intersection_pix) { |
133 | 0 | if (intersection_pix == nullptr) { |
134 | 0 | return 0; |
135 | 0 | } |
136 | 0 | Image rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr); |
137 | 0 | Boxa *boxa = pixConnComp(rect_pix, nullptr, 8); |
138 | 0 | rect_pix.destroy(); |
139 | 0 | if (boxa == nullptr) { |
140 | 0 | return false; |
141 | 0 | } |
142 | 0 | int result = boxaGetCount(boxa); |
143 | 0 | boxaDestroy(&boxa); |
144 | 0 | return result; |
145 | 0 | } |
146 | | |
147 | | // Returns the number of black pixels found in the box made by adding the line |
148 | | // width to both sides of the line bounding box. (Increasing the smallest |
149 | | // dimension of the bounding box.) |
150 | 0 | static int CountPixelsAdjacentToLine(int line_width, Box *line_box, Image nonline_pix) { |
151 | 0 | l_int32 x, y, box_width, box_height; |
152 | 0 | boxGetGeometry(line_box, &x, &y, &box_width, &box_height); |
153 | 0 | if (box_width > box_height) { |
154 | | // horizontal line. |
155 | 0 | int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width); |
156 | 0 | y = std::max(0, y - line_width); |
157 | 0 | box_height = bottom - y; |
158 | 0 | } else { |
159 | | // Vertical line. |
160 | 0 | int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width); |
161 | 0 | x = std::max(0, x - line_width); |
162 | 0 | box_width = right - x; |
163 | 0 | } |
164 | 0 | Box *box = boxCreate(x, y, box_width, box_height); |
165 | 0 | Image rect_pix = pixClipRectangle(nonline_pix, box, nullptr); |
166 | 0 | boxDestroy(&box); |
167 | 0 | l_int32 result; |
168 | 0 | pixCountPixels(rect_pix, &result, nullptr); |
169 | 0 | rect_pix.destroy(); |
170 | 0 | return result; |
171 | 0 | } |
172 | | |
173 | | // Helper erases false-positive line segments from the input/output line_pix. |
174 | | // 1. Since thick lines shouldn't really break up, we can eliminate some false |
175 | | // positives by marking segments that are at least kMinThickLineWidth |
176 | | // thickness, yet have a length less than min_thick_length. |
177 | | // 2. Lines that don't have at least 2 intersections with other lines and have |
178 | | // a lot of neighbouring non-lines are probably not lines (perhaps arabic |
179 | | // or Hindi words, or underlines.) |
180 | | // Bad line components are erased from line_pix. |
181 | | // Returns the number of remaining connected components. |
182 | | static int FilterFalsePositives(int resolution, Image nonline_pix, Image intersection_pix, |
183 | 0 | Image line_pix) { |
184 | 0 | int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); |
185 | 0 | Pixa *pixa = nullptr; |
186 | 0 | Boxa *boxa = pixConnComp(line_pix, &pixa, 8); |
187 | | // Iterate over the boxes to remove false positives. |
188 | 0 | int nboxes = boxaGetCount(boxa); |
189 | 0 | int remaining_boxes = nboxes; |
190 | 0 | for (int i = 0; i < nboxes; ++i) { |
191 | 0 | Box *box = boxaGetBox(boxa, i, L_CLONE); |
192 | 0 | l_int32 x, y, box_width, box_height; |
193 | 0 | boxGetGeometry(box, &x, &y, &box_width, &box_height); |
194 | 0 | Image comp_pix = pixaGetPix(pixa, i, L_CLONE); |
195 | 0 | int max_width = MaxStrokeWidth(comp_pix); |
196 | 0 | comp_pix.destroy(); |
197 | 0 | bool bad_line = false; |
198 | | // If the length is too short to stand-alone as a line, and the box width |
199 | | // is thick enough, and the stroke width is thick enough it is bad. |
200 | 0 | if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && |
201 | 0 | box_width < min_thick_length && box_height < min_thick_length && |
202 | 0 | max_width > kMinThickLineWidth) { |
203 | | // Too thick for the length. |
204 | 0 | bad_line = true; |
205 | 0 | } |
206 | 0 | if (!bad_line && (NumTouchingIntersections(box, intersection_pix) < 2)) { |
207 | | // Test non-line density near the line. |
208 | 0 | int nonline_count = CountPixelsAdjacentToLine(max_width, box, nonline_pix); |
209 | 0 | if (nonline_count > box_height * box_width * kMaxNonLineDensity) { |
210 | 0 | bad_line = true; |
211 | 0 | } |
212 | 0 | } |
213 | 0 | if (bad_line) { |
214 | | // Not a good line. |
215 | 0 | pixClearInRect(line_pix, box); |
216 | 0 | --remaining_boxes; |
217 | 0 | } |
218 | 0 | boxDestroy(&box); |
219 | 0 | } |
220 | 0 | pixaDestroy(&pixa); |
221 | 0 | boxaDestroy(&boxa); |
222 | 0 | return remaining_boxes; |
223 | 0 | } |
224 | | |
225 | | // Converts the Boxa array to a list of C_BLOB, getting rid of severely |
226 | | // overlapping outlines and those that are children of a bigger one. |
227 | | // The output is a list of C_BLOBs that are owned by the list. |
228 | | // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty |
229 | | // bounding boxes. The Boxa is consumed and destroyed. |
230 | | static void ConvertBoxaToBlobs(int image_width, int image_height, Boxa **boxes, |
231 | 0 | C_BLOB_LIST *blobs) { |
232 | 0 | C_OUTLINE_LIST outlines; |
233 | 0 | C_OUTLINE_IT ol_it = &outlines; |
234 | | // Iterate the boxes to convert to outlines. |
235 | 0 | int nboxes = boxaGetCount(*boxes); |
236 | 0 | for (int i = 0; i < nboxes; ++i) { |
237 | 0 | l_int32 x, y, width, height; |
238 | 0 | boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); |
239 | | // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, |
240 | | // as there is no outline, just a bounding box, but with some very |
241 | | // small changes to coutln.cpp, it works nicely. |
242 | 0 | ICOORD top_left(x, y); |
243 | 0 | ICOORD bot_right(x + width, y + height); |
244 | 0 | CRACKEDGE startpt; |
245 | 0 | startpt.pos = top_left; |
246 | 0 | auto *outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); |
247 | 0 | ol_it.add_after_then_move(outline); |
248 | 0 | } |
249 | | // Use outlines_to_blobs to convert the outlines to blobs and find |
250 | | // overlapping and contained objects. The output list of blobs in the block |
251 | | // has all the bad ones filtered out and deleted. |
252 | 0 | BLOCK block; |
253 | 0 | ICOORD page_tl(0, 0); |
254 | 0 | ICOORD page_br(image_width, image_height); |
255 | 0 | outlines_to_blobs(&block, page_tl, page_br, &outlines); |
256 | | // Transfer the created blobs to the output list. |
257 | 0 | C_BLOB_IT blob_it(blobs); |
258 | 0 | blob_it.add_list_after(block.blob_list()); |
259 | | // The boxes aren't needed any more. |
260 | 0 | boxaDestroy(boxes); |
261 | 0 | } |
262 | | |
263 | | // Returns a list of boxes corresponding to the candidate line segments. Sets |
264 | | // the line_crossings member of the boxes so we can later determine the number |
265 | | // of intersections touched by a full line. |
266 | | static void GetLineBoxes(bool horizontal_lines, Image pix_lines, Image pix_intersections, |
267 | 0 | C_BLOB_LIST *line_cblobs, BLOBNBOX_LIST *line_bblobs) { |
268 | | // Put a single pixel crack in every line at an arbitrary spacing, |
269 | | // so they break up and the bounding boxes can be used to get the |
270 | | // direction accurately enough without needing outlines. |
271 | 0 | int wpl = pixGetWpl(pix_lines); |
272 | 0 | int width = pixGetWidth(pix_lines); |
273 | 0 | int height = pixGetHeight(pix_lines); |
274 | 0 | l_uint32 *data = pixGetData(pix_lines); |
275 | 0 | if (horizontal_lines) { |
276 | 0 | for (int y = 0; y < height; ++y, data += wpl) { |
277 | 0 | for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { |
278 | 0 | CLEAR_DATA_BIT(data, x); |
279 | 0 | } |
280 | 0 | } |
281 | 0 | } else { |
282 | 0 | for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { |
283 | 0 | memset(data + wpl * y, 0, wpl * sizeof(*data)); |
284 | 0 | } |
285 | 0 | } |
286 | | // Get the individual connected components |
287 | 0 | Boxa *boxa = pixConnComp(pix_lines, nullptr, 8); |
288 | 0 | ConvertBoxaToBlobs(width, height, &boxa, line_cblobs); |
289 | | // Make the BLOBNBOXes from the C_BLOBs. |
290 | 0 | C_BLOB_IT blob_it(line_cblobs); |
291 | 0 | BLOBNBOX_IT bbox_it(line_bblobs); |
292 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
293 | 0 | C_BLOB *cblob = blob_it.data(); |
294 | 0 | auto *bblob = new BLOBNBOX(cblob); |
295 | 0 | bbox_it.add_to_end(bblob); |
296 | | // Determine whether the line segment touches two intersections. |
297 | 0 | const TBOX &bbox = bblob->bounding_box(); |
298 | 0 | Box *box = boxCreate(bbox.left(), bbox.bottom(), bbox.width(), bbox.height()); |
299 | 0 | bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); |
300 | 0 | boxDestroy(&box); |
301 | | // Transform the bounding box prior to finding lines. To save writing |
302 | | // two line finders, flip x and y for horizontal lines and re-use the |
303 | | // tab-stop detection code. For vertical lines we still have to flip the |
304 | | // y-coordinates to switch from leptonica coords to tesseract coords. |
305 | 0 | if (horizontal_lines) { |
306 | | // Note that we have Leptonica coords stored in a Tesseract box, so that |
307 | | // bbox.bottom(), being the MIN y coord, is actually the top, so to get |
308 | | // back to Leptonica coords in RemoveUnusedLineSegments, we have to |
309 | | // use height - box.right() as the top, which looks very odd. |
310 | 0 | TBOX new_box(height - bbox.top(), bbox.left(), height - bbox.bottom(), bbox.right()); |
311 | 0 | bblob->set_bounding_box(new_box); |
312 | 0 | } else { |
313 | 0 | TBOX new_box(bbox.left(), height - bbox.top(), bbox.right(), height - bbox.bottom()); |
314 | 0 | bblob->set_bounding_box(new_box); |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | | // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright |
320 | | // are the bounds of the image on which the input line_bblobs were found. |
321 | | // The input line_bblobs list is const really. |
322 | | // The output vertical_x and vertical_y are the total of all the vectors. |
323 | | // The output list of TabVector makes no reference to the input BLOBNBOXes. |
324 | | static void FindLineVectors(const ICOORD &bleft, const ICOORD &tright, |
325 | | BLOBNBOX_LIST *line_bblobs, int *vertical_x, int *vertical_y, |
326 | 0 | TabVector_LIST *vectors) { |
327 | 0 | BLOBNBOX_IT bbox_it(line_bblobs); |
328 | 0 | int b_count = 0; |
329 | | // Put all the blobs into the grid to find the lines, and move the blobs |
330 | | // to the output lists. |
331 | 0 | AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); |
332 | 0 | for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { |
333 | 0 | BLOBNBOX *bblob = bbox_it.data(); |
334 | 0 | bblob->set_left_tab_type(TT_MAYBE_ALIGNED); |
335 | 0 | bblob->set_left_rule(bleft.x()); |
336 | 0 | bblob->set_right_rule(tright.x()); |
337 | 0 | bblob->set_left_crossing_rule(bleft.x()); |
338 | 0 | bblob->set_right_crossing_rule(tright.x()); |
339 | 0 | blob_grid.InsertBBox(false, true, bblob); |
340 | 0 | ++b_count; |
341 | 0 | } |
342 | 0 | if (b_count == 0) { |
343 | 0 | return; |
344 | 0 | } |
345 | | |
346 | | // Search the entire grid, looking for vertical line vectors. |
347 | 0 | BlobGridSearch lsearch(&blob_grid); |
348 | 0 | BLOBNBOX *bbox; |
349 | 0 | TabVector_IT vector_it(vectors); |
350 | 0 | *vertical_x = 0; |
351 | 0 | *vertical_y = 1; |
352 | 0 | lsearch.StartFullSearch(); |
353 | 0 | while ((bbox = lsearch.NextFullSearch()) != nullptr) { |
354 | 0 | if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { |
355 | 0 | const TBOX &box = bbox->bounding_box(); |
356 | 0 | if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { |
357 | 0 | tprintf("Finding line vector starting at bbox (%d,%d)\n", box.left(), box.bottom()); |
358 | 0 | } |
359 | 0 | AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); |
360 | 0 | TabVector *vector = |
361 | 0 | blob_grid.FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); |
362 | 0 | if (vector != nullptr) { |
363 | 0 | vector->Freeze(); |
364 | 0 | vector_it.add_to_end(vector); |
365 | 0 | } |
366 | 0 | } |
367 | 0 | } |
368 | 0 | } |
369 | | |
370 | | // Returns a Pix music mask if music is detected. |
371 | | // Any vertical line that has at least 5 intersections in sufficient density |
372 | | // is taken to be a bar. Bars are used as a seed and the entire touching |
373 | | // component is added to the output music mask and subtracted from the lines. |
374 | | // Returns nullptr and does minimal work if no music is found. |
375 | | static Image FilterMusic(int resolution, Image pix_closed, Image pix_vline, Image pix_hline, |
376 | 0 | bool &v_empty, bool &h_empty) { |
377 | 0 | int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight); |
378 | 0 | Image intersection_pix = pix_vline & pix_hline; |
379 | 0 | Boxa *boxa = pixConnComp(pix_vline, nullptr, 8); |
380 | | // Iterate over the boxes to find music bars. |
381 | 0 | int nboxes = boxaGetCount(boxa); |
382 | 0 | Image music_mask = nullptr; |
383 | 0 | for (int i = 0; i < nboxes; ++i) { |
384 | 0 | Box *box = boxaGetBox(boxa, i, L_CLONE); |
385 | 0 | l_int32 x, y, box_width, box_height; |
386 | 0 | boxGetGeometry(box, &x, &y, &box_width, &box_height); |
387 | 0 | int joins = NumTouchingIntersections(box, intersection_pix); |
388 | | // Test for the join density being at least 5 per max_stave_height, |
389 | | // ie (joins-1)/box_height >= (5-1)/max_stave_height. |
390 | 0 | if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { |
391 | | // This is a music bar. Add to the mask. |
392 | 0 | if (music_mask == nullptr) { |
393 | 0 | music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 1); |
394 | 0 | } |
395 | 0 | pixSetInRect(music_mask, box); |
396 | 0 | } |
397 | 0 | boxDestroy(&box); |
398 | 0 | } |
399 | 0 | boxaDestroy(&boxa); |
400 | 0 | intersection_pix.destroy(); |
401 | 0 | if (music_mask != nullptr) { |
402 | | // The mask currently contains just the bars. Use the mask as a seed |
403 | | // and the pix_closed as the mask for a seedfill to get all the |
404 | | // intersecting staves. |
405 | 0 | pixSeedfillBinary(music_mask, music_mask, pix_closed, 8); |
406 | | // Filter out false positives. CCs in the music_mask should be the vast |
407 | | // majority of the pixels in their bounding boxes, as we expect just a |
408 | | // tiny amount of text, a few phrase marks, and crescendo etc left. |
409 | 0 | Boxa *boxa = pixConnComp(music_mask, nullptr, 8); |
410 | | // Iterate over the boxes to find music components. |
411 | 0 | int nboxes = boxaGetCount(boxa); |
412 | 0 | for (int i = 0; i < nboxes; ++i) { |
413 | 0 | Box *box = boxaGetBox(boxa, i, L_CLONE); |
414 | 0 | Image rect_pix = pixClipRectangle(music_mask, box, nullptr); |
415 | 0 | l_int32 music_pixels; |
416 | 0 | pixCountPixels(rect_pix, &music_pixels, nullptr); |
417 | 0 | rect_pix.destroy(); |
418 | 0 | rect_pix = pixClipRectangle(pix_closed, box, nullptr); |
419 | 0 | l_int32 all_pixels; |
420 | 0 | pixCountPixels(rect_pix, &all_pixels, nullptr); |
421 | 0 | rect_pix.destroy(); |
422 | 0 | if (music_pixels < kMinMusicPixelFraction * all_pixels) { |
423 | | // False positive. Delete from the music mask. |
424 | 0 | pixClearInRect(music_mask, box); |
425 | 0 | } |
426 | 0 | boxDestroy(&box); |
427 | 0 | } |
428 | 0 | boxaDestroy(&boxa); |
429 | 0 | if (music_mask.isZero()) { |
430 | 0 | music_mask.destroy(); |
431 | 0 | } else { |
432 | 0 | pixSubtract(pix_vline, pix_vline, music_mask); |
433 | 0 | pixSubtract(pix_hline, pix_hline, music_mask); |
434 | | // We may have deleted all the lines |
435 | 0 | v_empty = pix_vline.isZero(); |
436 | 0 | h_empty = pix_hline.isZero(); |
437 | 0 | } |
438 | 0 | } |
439 | 0 | return music_mask; |
440 | 0 | } |
441 | | |
442 | | // Most of the heavy lifting of line finding. Given src_pix and its separate |
443 | | // resolution, returns image masks: |
444 | | // pix_vline candidate vertical lines. |
445 | | // pix_non_vline pixels that didn't look like vertical lines. |
446 | | // pix_hline candidate horizontal lines. |
447 | | // pix_non_hline pixels that didn't look like horizontal lines. |
448 | | // pix_intersections pixels where vertical and horizontal lines meet. |
449 | | // pix_music_mask candidate music staves. |
450 | | // This function promises to initialize all the output (2nd level) pointers, |
451 | | // but any of the returns that are empty will be nullptr on output. |
452 | | // None of the input (1st level) pointers may be nullptr except |
453 | | // pix_music_mask, which will disable music detection, and pixa_display, which |
454 | | // is for debug. |
455 | | static void GetLineMasks(int resolution, Image src_pix, Image *pix_vline, Image *pix_non_vline, |
456 | | Image *pix_hline, Image *pix_non_hline, Image *pix_intersections, |
457 | 0 | Image *pix_music_mask, Pixa *pixa_display) { |
458 | 0 | Image pix_closed = nullptr; |
459 | 0 | Image pix_hollow = nullptr; |
460 | |
|
461 | 0 | int max_line_width = resolution / kThinLineFraction; |
462 | 0 | int min_line_length = resolution / kMinLineLengthFraction; |
463 | 0 | if (pixa_display != nullptr) { |
464 | 0 | tprintf("Image resolution = %d, max line width = %d, min length=%d\n", resolution, |
465 | 0 | max_line_width, min_line_length); |
466 | 0 | } |
467 | 0 | int closing_brick = max_line_width / 3; |
468 | | |
469 | | // Close up small holes, making it less likely that false alarms are found |
470 | | // in thickened text (as it will become more solid) and also smoothing over |
471 | | // some line breaks and nicks in the edges of the lines. |
472 | 0 | pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); |
473 | 0 | if (pixa_display != nullptr) { |
474 | 0 | pixaAddPix(pixa_display, pix_closed, L_CLONE); |
475 | 0 | } |
476 | | // Open up with a big box to detect solid areas, which can then be |
477 | | // subtracted. This is very generous and will leave in even quite wide |
478 | | // lines. |
479 | 0 | Image pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, max_line_width); |
480 | 0 | if (pixa_display != nullptr) { |
481 | 0 | pixaAddPix(pixa_display, pix_solid, L_CLONE); |
482 | 0 | } |
483 | 0 | pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); |
484 | |
|
485 | 0 | pix_solid.destroy(); |
486 | | |
487 | | // Now open up in both directions independently to find lines of at least |
488 | | // 1 inch/kMinLineLengthFraction in length. |
489 | 0 | if (pixa_display != nullptr) { |
490 | 0 | pixaAddPix(pixa_display, pix_hollow, L_CLONE); |
491 | 0 | } |
492 | 0 | *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); |
493 | 0 | *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); |
494 | |
|
495 | 0 | pix_hollow.destroy(); |
496 | | |
497 | | // Lines are sufficiently rare, that it is worth checking for a zero image. |
498 | 0 | bool v_empty = pix_vline->isZero(); |
499 | 0 | bool h_empty = pix_hline->isZero(); |
500 | 0 | if (pix_music_mask != nullptr) { |
501 | 0 | if (!v_empty && !h_empty) { |
502 | 0 | *pix_music_mask = |
503 | 0 | FilterMusic(resolution, pix_closed, *pix_vline, *pix_hline, v_empty, h_empty); |
504 | 0 | } else { |
505 | 0 | *pix_music_mask = nullptr; |
506 | 0 | } |
507 | 0 | } |
508 | 0 | pix_closed.destroy(); |
509 | 0 | Image pix_nonlines = nullptr; |
510 | 0 | *pix_intersections = nullptr; |
511 | 0 | Image extra_non_hlines = nullptr; |
512 | 0 | if (!v_empty) { |
513 | | // Subtract both line candidates from the source to get definite non-lines. |
514 | 0 | pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline); |
515 | 0 | if (!h_empty) { |
516 | 0 | pixSubtract(pix_nonlines, pix_nonlines, *pix_hline); |
517 | | // Intersections are a useful indicator for likelihood of being a line. |
518 | 0 | *pix_intersections = *pix_vline & *pix_hline; |
519 | | // Candidate vlines are not hlines (apart from the intersections) |
520 | | // and vice versa. |
521 | 0 | extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections); |
522 | 0 | } |
523 | 0 | *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1); |
524 | 0 | pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8); |
525 | 0 | if (!h_empty) { |
526 | | // Candidate hlines are not vlines. |
527 | 0 | *pix_non_vline |= *pix_hline; |
528 | 0 | pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); |
529 | 0 | } |
530 | 0 | if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, *pix_vline)) { |
531 | 0 | pix_vline->destroy(); // No candidates left. |
532 | 0 | } |
533 | 0 | } else { |
534 | | // No vertical lines. |
535 | 0 | pix_vline->destroy(); |
536 | 0 | *pix_non_vline = nullptr; |
537 | 0 | if (!h_empty) { |
538 | 0 | pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline); |
539 | 0 | } |
540 | 0 | } |
541 | 0 | if (h_empty) { |
542 | 0 | pix_hline->destroy(); |
543 | 0 | *pix_non_hline = nullptr; |
544 | 0 | if (v_empty) { |
545 | 0 | return; |
546 | 0 | } |
547 | 0 | } else { |
548 | 0 | *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue); |
549 | 0 | pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); |
550 | 0 | if (extra_non_hlines != nullptr) { |
551 | 0 | *pix_non_hline |= extra_non_hlines; |
552 | 0 | extra_non_hlines.destroy(); |
553 | 0 | } |
554 | 0 | if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, *pix_hline)) { |
555 | 0 | pix_hline->destroy(); // No candidates left. |
556 | 0 | } |
557 | 0 | } |
558 | 0 | if (pixa_display != nullptr) { |
559 | 0 | if (*pix_vline != nullptr) { |
560 | 0 | pixaAddPix(pixa_display, *pix_vline, L_CLONE); |
561 | 0 | } |
562 | 0 | if (*pix_hline != nullptr) { |
563 | 0 | pixaAddPix(pixa_display, *pix_hline, L_CLONE); |
564 | 0 | } |
565 | 0 | if (pix_nonlines != nullptr) { |
566 | 0 | pixaAddPix(pixa_display, pix_nonlines, L_CLONE); |
567 | 0 | } |
568 | 0 | if (*pix_non_vline != nullptr) { |
569 | 0 | pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); |
570 | 0 | } |
571 | 0 | if (*pix_non_hline != nullptr) { |
572 | 0 | pixaAddPix(pixa_display, *pix_non_hline, L_CLONE); |
573 | 0 | } |
574 | 0 | if (*pix_intersections != nullptr) { |
575 | 0 | pixaAddPix(pixa_display, *pix_intersections, L_CLONE); |
576 | 0 | } |
577 | 0 | if (pix_music_mask != nullptr && *pix_music_mask != nullptr) { |
578 | 0 | pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); |
579 | 0 | } |
580 | 0 | } |
581 | 0 | pix_nonlines.destroy(); |
582 | 0 | } |
583 | | |
584 | | // Finds vertical line objects in pix_vline and removes them from src_pix. |
585 | | // Uses the given resolution to determine size thresholds instead of any |
586 | | // that may be present in the pix. |
587 | | // The output vertical_x and vertical_y contain a sum of the output vectors, |
588 | | // thereby giving the mean vertical direction. |
589 | | // The output vectors are owned by the list and Frozen (cannot refit) by |
590 | | // having no boxes, as there is no need to refit or merge separator lines. |
591 | | // If no good lines are found, pix_vline is destroyed. |
592 | | // None of the input pointers may be nullptr, and if *pix_vline is nullptr then |
593 | | // the function does nothing. |
594 | | static void FindAndRemoveVLines(Image pix_intersections, int *vertical_x, |
595 | | int *vertical_y, Image *pix_vline, Image pix_non_vline, |
596 | 0 | Image src_pix, TabVector_LIST *vectors) { |
597 | 0 | if (pix_vline == nullptr || *pix_vline == nullptr) { |
598 | 0 | return; |
599 | 0 | } |
600 | 0 | C_BLOB_LIST line_cblobs; |
601 | 0 | BLOBNBOX_LIST line_bblobs; |
602 | 0 | GetLineBoxes(false, *pix_vline, pix_intersections, &line_cblobs, &line_bblobs); |
603 | 0 | int width = pixGetWidth(src_pix); |
604 | 0 | int height = pixGetHeight(src_pix); |
605 | 0 | ICOORD bleft(0, 0); |
606 | 0 | ICOORD tright(width, height); |
607 | 0 | FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); |
608 | 0 | if (!vectors->empty()) { |
609 | 0 | RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline); |
610 | 0 | SubtractLinesAndResidue(*pix_vline, pix_non_vline, src_pix); |
611 | 0 | ICOORD vertical; |
612 | 0 | vertical.set_with_shrink(*vertical_x, *vertical_y); |
613 | 0 | TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); |
614 | 0 | } else { |
615 | 0 | pix_vline->destroy(); |
616 | 0 | } |
617 | 0 | } |
618 | | |
619 | | // Finds horizontal line objects in pix_hline and removes them from src_pix. |
620 | | // Uses the given resolution to determine size thresholds instead of any |
621 | | // that may be present in the pix. |
622 | | // The output vertical_x and vertical_y contain a sum of the output vectors, |
623 | | // thereby giving the mean vertical direction. |
624 | | // The output vectors are owned by the list and Frozen (cannot refit) by |
625 | | // having no boxes, as there is no need to refit or merge separator lines. |
626 | | // If no good lines are found, pix_hline is destroyed. |
627 | | // None of the input pointers may be nullptr, and if *pix_hline is nullptr then |
628 | | // the function does nothing. |
629 | | static void FindAndRemoveHLines(Image pix_intersections, int vertical_x, |
630 | | int vertical_y, Image *pix_hline, Image pix_non_hline, |
631 | 0 | Image src_pix, TabVector_LIST *vectors) { |
632 | 0 | if (pix_hline == nullptr || *pix_hline == nullptr) { |
633 | 0 | return; |
634 | 0 | } |
635 | 0 | C_BLOB_LIST line_cblobs; |
636 | 0 | BLOBNBOX_LIST line_bblobs; |
637 | 0 | GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs); |
638 | 0 | int width = pixGetWidth(src_pix); |
639 | 0 | int height = pixGetHeight(src_pix); |
640 | 0 | ICOORD bleft(0, 0); |
641 | 0 | ICOORD tright(height, width); |
642 | 0 | FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, vectors); |
643 | 0 | if (!vectors->empty()) { |
644 | 0 | RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline); |
645 | 0 | SubtractLinesAndResidue(*pix_hline, pix_non_hline, src_pix); |
646 | 0 | ICOORD vertical; |
647 | 0 | vertical.set_with_shrink(vertical_x, vertical_y); |
648 | 0 | TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); |
649 | | // Iterate the vectors to flip them. x and y were flipped for horizontal |
650 | | // lines, so FindLineVectors can work just with the vertical case. |
651 | | // See GetLineBoxes for more on the flip. |
652 | 0 | TabVector_IT h_it(vectors); |
653 | 0 | for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { |
654 | 0 | h_it.data()->XYFlip(); |
655 | 0 | } |
656 | 0 | } else { |
657 | 0 | pix_hline->destroy(); |
658 | 0 | } |
659 | 0 | } |
660 | | |
661 | | // Finds vertical and horizontal line objects in the given pix. |
662 | | // Uses the given resolution to determine size thresholds instead of any |
663 | | // that may be present in the pix. |
664 | | // The output vertical_x and vertical_y contain a sum of the output vectors, |
665 | | // thereby giving the mean vertical direction. |
666 | | // If pix_music_mask != nullptr, and music is detected, a mask of the staves |
667 | | // and anything that is connected (bars, notes etc.) will be returned in |
668 | | // pix_music_mask, the mask subtracted from pix, and the lines will not |
669 | | // appear in v_lines or h_lines. |
670 | | // The output vectors are owned by the list and Frozen (cannot refit) by |
671 | | // having no boxes, as there is no need to refit or merge separator lines. |
672 | | // The detected lines are removed from the pix. |
673 | | void LineFinder::FindAndRemoveLines(int resolution, bool debug, Image pix, int *vertical_x, |
674 | | int *vertical_y, Image *pix_music_mask, TabVector_LIST *v_lines, |
675 | 0 | TabVector_LIST *h_lines) { |
676 | 0 | if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) { |
677 | 0 | tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); |
678 | 0 | return; |
679 | 0 | } |
680 | 0 | Image pix_vline = nullptr; |
681 | 0 | Image pix_non_vline = nullptr; |
682 | 0 | Image pix_hline = nullptr; |
683 | 0 | Image pix_non_hline = nullptr; |
684 | 0 | Image pix_intersections = nullptr; |
685 | 0 | Pixa *pixa_display = debug ? pixaCreate(0) : nullptr; |
686 | 0 | GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, &pix_non_hline, |
687 | 0 | &pix_intersections, pix_music_mask, pixa_display); |
688 | | // Find lines, convert to TabVector_LIST and remove those that are used. |
689 | 0 | FindAndRemoveVLines(pix_intersections, vertical_x, vertical_y, &pix_vline, |
690 | 0 | pix_non_vline, pix, v_lines); |
691 | 0 | pix_intersections.destroy(); |
692 | 0 | if (pix_hline != nullptr) { |
693 | | // Recompute intersections and re-filter false positive h-lines. |
694 | 0 | if (pix_vline != nullptr) { |
695 | 0 | pix_intersections = pix_vline & pix_hline; |
696 | 0 | } |
697 | 0 | if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, pix_hline)) { |
698 | 0 | pix_hline.destroy(); |
699 | 0 | } |
700 | 0 | } |
701 | 0 | FindAndRemoveHLines(pix_intersections, *vertical_x, *vertical_y, &pix_hline, |
702 | 0 | pix_non_hline, pix, h_lines); |
703 | 0 | if (pixa_display != nullptr && pix_vline != nullptr) { |
704 | 0 | pixaAddPix(pixa_display, pix_vline, L_CLONE); |
705 | 0 | } |
706 | 0 | if (pixa_display != nullptr && pix_hline != nullptr) { |
707 | 0 | pixaAddPix(pixa_display, pix_hline, L_CLONE); |
708 | 0 | } |
709 | 0 | pix_intersections.destroy(); |
710 | 0 | if (pix_vline != nullptr && pix_hline != nullptr) { |
711 | | // Remove joins (intersections) where lines cross, and the residue. |
712 | | // Recalculate the intersections, since some lines have been deleted. |
713 | 0 | pix_intersections = pix_vline & pix_hline; |
714 | | // Fatten up the intersections and seed-fill to get the intersection |
715 | | // residue. |
716 | 0 | Image pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5); |
717 | 0 | pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); |
718 | | // Now remove the intersection residue. |
719 | 0 | pixSubtract(pix, pix, pix_join_residue); |
720 | 0 | pix_join_residue.destroy(); |
721 | 0 | } |
722 | | // Remove any detected music. |
723 | 0 | if (pix_music_mask != nullptr && *pix_music_mask != nullptr) { |
724 | 0 | if (pixa_display != nullptr) { |
725 | 0 | pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); |
726 | 0 | } |
727 | 0 | pixSubtract(pix, pix, *pix_music_mask); |
728 | 0 | } |
729 | 0 | if (pixa_display != nullptr) { |
730 | 0 | pixaAddPix(pixa_display, pix, L_CLONE); |
731 | 0 | } |
732 | |
|
733 | 0 | pix_vline.destroy(); |
734 | 0 | pix_non_vline.destroy(); |
735 | 0 | pix_hline.destroy(); |
736 | 0 | pix_non_hline.destroy(); |
737 | 0 | pix_intersections.destroy(); |
738 | 0 | if (pixa_display != nullptr) { |
739 | 0 | pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", "vhlinefinding.pdf"); |
740 | 0 | pixaDestroy(&pixa_display); |
741 | 0 | } |
742 | 0 | } |
743 | | |
744 | | } // namespace tesseract. |