Coverage Report

Created: 2025-06-13 07:02

/src/tesseract/src/textord/tordmain.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 * File:        tordmain.cpp  (Formerly textordp.c)
3
 * Description: C++ top level textord code.
4
 * Author:      Ray Smith
5
 *
6
 * (C) Copyright 1992, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#define _USE_MATH_DEFINES // for M_PI
20
21
#ifdef HAVE_CONFIG_H
22
#  include "config_auto.h"
23
#endif
24
25
#include "tordmain.h"
26
27
#include "arrayaccess.h" // for GET_DATA_BYTE
28
#include "blobbox.h"     // for BLOBNBOX_IT, BLOBNBOX, TO_BLOCK, TO_B...
29
#include "ccstruct.h"    // for CCStruct, CCStruct::kXHeightFraction
30
#include "clst.h"        // for CLISTIZE
31
#include "coutln.h"      // for C_OUTLINE_IT, C_OUTLINE_LIST, C_OUTLINE
32
#include "drawtord.h"    // for plot_box_list, to_win, create_to_win
33
#include "edgblob.h"     // for extract_edges
34
#include "errcode.h"     // for ASSERT_HOST, ...
35
#include "makerow.h"     // for textord_test_x, textord_test_y, texto...
36
#include "ocrblock.h"    // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only)
37
#include "ocrrow.h"      // for ROW, ROW_IT, ROW_LIST, tweak_row_base...
38
#include "params.h"      // for DoubleParam, BoolParam, IntParam
39
#include "pdblock.h"     // for PDBLK
40
#include "points.h"      // for FCOORD, ICOORD
41
#include "polyblk.h"     // for POLY_BLOCK
42
#include "quadratc.h"    // for QUAD_COEFFS
43
#include "quspline.h"    // for QSPLINE, tweak_row_baseline
44
#include "rect.h"        // for TBOX
45
#include "scrollview.h"  // for ScrollView, ScrollView::WHITE
46
#include "statistc.h"    // for STATS
47
#include "stepblob.h"    // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
48
#include "textord.h"     // for Textord, WordWithBox, WordGrid, WordS...
49
#include "tprintf.h"     // for tprintf
50
#include "werd.h"        // for WERD_IT, WERD, WERD_LIST, W_DONT_CHOP
51
52
#include <allheaders.h> // for pixDestroy, pixGetHeight, boxCreate
53
54
#include <cfloat>  // for FLT_MAX
55
#include <cmath>   // for ceil, floor, M_PI
56
#include <cstdint> // for INT16_MAX, uint32_t, int32_t, int16_t
57
#include <memory>
58
59
namespace tesseract {
60
61
7.74k
#define MAX_NEAREST_DIST 600 // for block skew stats
62
63
/**********************************************************************
64
 * SetBlobStrokeWidth
65
 *
66
 * Set the horizontal and vertical stroke widths in the blob.
67
 **********************************************************************/
68
559k
void SetBlobStrokeWidth(Image pix, BLOBNBOX *blob) {
69
  // Cut the blob rectangle into a Pix.
70
559k
  int pix_height = pixGetHeight(pix);
71
559k
  const TBOX &box = blob->bounding_box();
72
559k
  int width = box.width();
73
559k
  int height = box.height();
74
559k
  Box *blob_pix_box = boxCreate(box.left(), pix_height - box.top(), width, height);
75
559k
  Image pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
76
559k
  boxDestroy(&blob_pix_box);
77
559k
  Image dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
78
559k
  pix_blob.destroy();
79
  // Compute the stroke widths.
80
559k
  uint32_t *data = pixGetData(dist_pix);
81
559k
  int wpl = pixGetWpl(dist_pix);
82
  // Horizontal width of stroke.
83
559k
  STATS h_stats(0, width);
84
3.87M
  for (int y = 0; y < height; ++y) {
85
3.31M
    uint32_t *pixels = data + y * wpl;
86
3.31M
    int prev_pixel = 0;
87
3.31M
    int pixel = GET_DATA_BYTE(pixels, 0);
88
18.0M
    for (int x = 1; x < width; ++x) {
89
14.7M
      int next_pixel = GET_DATA_BYTE(pixels, x);
90
      // We are looking for a pixel that is equal to its vertical neighbours,
91
      // yet greater than its left neighbour.
92
14.7M
      if (prev_pixel < pixel && (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
93
14.7M
          (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
94
1.65M
        if (pixel > next_pixel) {
95
          // Single local max, so an odd width.
96
1.19M
          h_stats.add(pixel * 2 - 1, 1);
97
1.19M
        } else if (pixel == next_pixel && x + 1 < width && pixel > GET_DATA_BYTE(pixels, x + 1)) {
98
          // Double local max, so an even width.
99
131k
          h_stats.add(pixel * 2, 1);
100
131k
        }
101
1.65M
      }
102
14.7M
      prev_pixel = pixel;
103
14.7M
      pixel = next_pixel;
104
14.7M
    }
105
3.31M
  }
106
  // Vertical width of stroke.
107
559k
  STATS v_stats(0, height);
108
2.24M
  for (int x = 0; x < width; ++x) {
109
1.68M
    int prev_pixel = 0;
110
1.68M
    int pixel = GET_DATA_BYTE(data, x);
111
18.0M
    for (int y = 1; y < height; ++y) {
112
16.3M
      uint32_t *pixels = data + y * wpl;
113
16.3M
      int next_pixel = GET_DATA_BYTE(pixels, x);
114
      // We are looking for a pixel that is equal to its horizontal neighbours,
115
      // yet greater than its upper neighbour.
116
16.3M
      if (prev_pixel < pixel && (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
117
16.3M
          (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
118
1.17M
        if (pixel > next_pixel) {
119
          // Single local max, so an odd width.
120
507k
          v_stats.add(pixel * 2 - 1, 1);
121
670k
        } else if (pixel == next_pixel && y + 1 < height &&
122
670k
                   pixel > GET_DATA_BYTE(pixels + wpl, x)) {
123
          // Double local max, so an even width.
124
173k
          v_stats.add(pixel * 2, 1);
125
173k
        }
126
1.17M
      }
127
16.3M
      prev_pixel = pixel;
128
16.3M
      pixel = next_pixel;
129
16.3M
    }
130
1.68M
  }
131
559k
  dist_pix.destroy();
132
  // Store the horizontal and vertical width in the blob, keeping both
133
  // widths if there is enough information, otherwise only the one with
134
  // the most samples.
135
  // If there are insufficient samples, store zero, rather than using
136
  // 2*area/perimeter, as the numbers that gives do not match the numbers
137
  // from the distance method.
138
559k
  if (h_stats.get_total() >= (width + height) / 4) {
139
159k
    blob->set_horz_stroke_width(h_stats.ile(0.5f));
140
159k
    if (v_stats.get_total() >= (width + height) / 4) {
141
33.7k
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
142
125k
    } else {
143
125k
      blob->set_vert_stroke_width(0.0f);
144
125k
    }
145
400k
  } else {
146
400k
    if (v_stats.get_total() >= (width + height) / 4 || v_stats.get_total() > h_stats.get_total()) {
147
42.5k
      blob->set_horz_stroke_width(0.0f);
148
42.5k
      blob->set_vert_stroke_width(v_stats.ile(0.5f));
149
357k
    } else {
150
357k
      blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) : 0.0f);
151
357k
      blob->set_vert_stroke_width(0.0f);
152
357k
    }
153
400k
  }
154
559k
}
155
156
/**********************************************************************
157
 * assign_blobs_to_blocks2
158
 *
159
 * Make a list of TO_BLOCKs for portrait and landscape orientation.
160
 **********************************************************************/
161
162
void assign_blobs_to_blocks2(Image pix,
163
                             BLOCK_LIST *blocks,           // blocks to process
164
7.74k
                             TO_BLOCK_LIST *port_blocks) { // output list
165
7.74k
  BLOCK_IT block_it = blocks;
166
7.74k
  C_BLOB_IT blob_it;       // iterator
167
7.74k
  BLOBNBOX_IT port_box_it; // iterator
168
                           // destination iterator
169
7.74k
  TO_BLOCK_IT port_block_it = port_blocks;
170
171
15.4k
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
172
7.74k
    auto block = block_it.data();
173
7.74k
    auto port_block = new TO_BLOCK(block);
174
175
    // Convert the good outlines to block->blob_list
176
7.74k
    port_box_it.set_to_list(&port_block->blobs);
177
7.74k
    blob_it.set_to_list(block->blob_list());
178
566k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
179
558k
      auto blob = blob_it.extract();
180
558k
      auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
181
558k
      newblob->set_owns_cblob(true);
182
558k
      SetBlobStrokeWidth(pix, newblob);
183
558k
      port_box_it.add_after_then_move(newblob);
184
558k
    }
185
186
    // Put the rejected outlines in block->noise_blobs, which allows them to
187
    // be reconsidered and sorted back into rows and recover outlines mistakenly
188
    // rejected.
189
7.74k
    port_box_it.set_to_list(&port_block->noise_blobs);
190
7.74k
    blob_it.set_to_list(block->reject_blobs());
191
8.48k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
192
741
      auto blob = blob_it.extract();
193
741
      auto newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
194
741
      newblob->set_owns_cblob(true);
195
741
      SetBlobStrokeWidth(pix, newblob);
196
741
      port_box_it.add_after_then_move(newblob);
197
741
    }
198
199
7.74k
    port_block_it.add_after_then_move(port_block);
200
7.74k
  }
201
7.74k
}
202
203
/**********************************************************************
204
 * find_components
205
 *
206
 * Find the C_OUTLINEs of the connected components in each block, put them
207
 * in C_BLOBs, and filter them by size, putting the different size
208
 * grades on different lists in the matching TO_BLOCK in to_blocks.
209
 **********************************************************************/
210
211
7.74k
void Textord::find_components(Image pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {
212
7.74k
  int width = pixGetWidth(pix);
213
7.74k
  int height = pixGetHeight(pix);
214
7.74k
  if (width > INT16_MAX || height > INT16_MAX) {
215
0
    tprintf("Input image too large! (%d, %d)\n", width, height);
216
0
    return; // Can't handle it.
217
0
  }
218
219
7.74k
  BLOCK_IT block_it(blocks); // iterator
220
15.4k
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
221
7.74k
    BLOCK *block = block_it.data();
222
7.74k
    if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
223
7.74k
      extract_edges(pix, block);
224
7.74k
    }
225
7.74k
  }
226
227
7.74k
  assign_blobs_to_blocks2(pix, blocks, to_blocks);
228
7.74k
  ICOORD page_tr(width, height);
229
7.74k
  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
230
7.74k
}
231
232
/**********************************************************************
233
 * filter_blobs
234
 *
235
 * Sort the blobs into sizes in all the blocks for later work.
236
 **********************************************************************/
237
238
void Textord::filter_blobs(ICOORD page_tr,        // top right
239
                           TO_BLOCK_LIST *blocks, // output list
240
7.74k
                           bool testing_on) {     // for plotting
241
7.74k
  TO_BLOCK_IT block_it = blocks;                  // destination iterator
242
7.74k
  TO_BLOCK *block;                                // created block
243
244
#ifndef GRAPHICS_DISABLED
245
  if (to_win != nullptr) {
246
    to_win->Clear();
247
  }
248
#endif // !GRAPHICS_DISABLED
249
250
15.4k
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
251
7.74k
    block = block_it.data();
252
7.74k
    block->line_size = filter_noise_blobs(&block->blobs, &block->noise_blobs, &block->small_blobs,
253
7.74k
                                          &block->large_blobs);
254
7.74k
    if (block->line_size == 0) {
255
188
      block->line_size = 1;
256
188
    }
257
7.74k
    block->line_spacing =
258
7.74k
        block->line_size *
259
7.74k
        (tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction +
260
7.74k
         2 * tesseract::CCStruct::kAscenderFraction) /
261
7.74k
        tesseract::CCStruct::kXHeightFraction;
262
7.74k
    block->line_size *= textord_min_linesize;
263
7.74k
    block->max_blob_size = block->line_size * textord_excess_blobsize;
264
265
#ifndef GRAPHICS_DISABLED
266
    if (textord_show_blobs && testing_on) {
267
      if (to_win == nullptr) {
268
        create_to_win(page_tr);
269
      }
270
      block->plot_graded_blobs(to_win);
271
    }
272
    if (textord_show_boxes && testing_on) {
273
      if (to_win == nullptr) {
274
        create_to_win(page_tr);
275
      }
276
      plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE);
277
      plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE);
278
      plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE);
279
      plot_box_list(to_win, &block->blobs, ScrollView::WHITE);
280
    }
281
#endif // !GRAPHICS_DISABLED
282
7.74k
  }
283
7.74k
}
284
285
/**********************************************************************
286
 * filter_noise_blobs
287
 *
288
 * Move small blobs to a separate list.
289
 **********************************************************************/
290
291
float Textord::filter_noise_blobs(BLOBNBOX_LIST *src_list,     // original list
292
                                  BLOBNBOX_LIST *noise_list,   // noise list
293
                                  BLOBNBOX_LIST *small_list,   // small blobs
294
7.74k
                                  BLOBNBOX_LIST *large_list) { // large blobs
295
7.74k
  int16_t height;                                              // height of blob
296
7.74k
  int16_t width;                                               // of blob
297
7.74k
  BLOBNBOX *blob;                                              // current blob
298
7.74k
  float initial_x;                                             // first guess
299
7.74k
  BLOBNBOX_IT src_it = src_list;                               // iterators
300
7.74k
  BLOBNBOX_IT noise_it = noise_list;
301
7.74k
  BLOBNBOX_IT small_it = small_list;
302
7.74k
  BLOBNBOX_IT large_it = large_list;
303
7.74k
  STATS size_stats(0, MAX_NEAREST_DIST - 1);
304
  // blob heights
305
7.74k
  float min_y; // size limits
306
7.74k
  float max_y;
307
7.74k
  float max_x;
308
7.74k
  float max_height; // of good blobs
309
310
566k
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
311
558k
    blob = src_it.data();
312
558k
    if (blob->bounding_box().height() < textord_max_noise_size) {
313
309k
      noise_it.add_after_then_move(src_it.extract());
314
309k
    } else if (blob->enclosed_area() >= blob->bounding_box().height() *
315
249k
                                            blob->bounding_box().width() *
316
249k
                                            textord_noise_area_ratio) {
317
93.4k
      small_it.add_after_then_move(src_it.extract());
318
93.4k
    }
319
558k
  }
320
163k
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
321
155k
    size_stats.add(src_it.data()->bounding_box().height(), 1);
322
155k
  }
323
7.74k
  initial_x = size_stats.ile(textord_initialx_ile);
324
7.74k
  max_y = ceil(initial_x *
325
7.74k
               (tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction +
326
7.74k
                2 * tesseract::CCStruct::kAscenderFraction) /
327
7.74k
               tesseract::CCStruct::kXHeightFraction);
328
7.74k
  min_y = std::floor(initial_x / 2);
329
7.74k
  max_x = ceil(initial_x * textord_width_limit);
330
7.74k
  small_it.move_to_first();
331
101k
  for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
332
93.4k
    height = small_it.data()->bounding_box().height();
333
93.4k
    if (height > max_y) {
334
1.27k
      large_it.add_after_then_move(small_it.extract());
335
92.1k
    } else if (height >= min_y) {
336
87.5k
      src_it.add_after_then_move(small_it.extract());
337
87.5k
    }
338
93.4k
  }
339
7.74k
  size_stats.clear();
340
251k
  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
341
243k
    height = src_it.data()->bounding_box().height();
342
243k
    width = src_it.data()->bounding_box().width();
343
243k
    if (height < min_y) {
344
3.63k
      small_it.add_after_then_move(src_it.extract());
345
239k
    } else if (height > max_y || width > max_x) {
346
2.34k
      large_it.add_after_then_move(src_it.extract());
347
237k
    } else {
348
237k
      size_stats.add(height, 1);
349
237k
    }
350
243k
  }
351
7.74k
  max_height = size_stats.ile(textord_initialasc_ile);
352
  //      tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
353
  //              max_y,min_y,initial_x,max_height);
354
7.74k
  max_height *= tesseract::CCStruct::kXHeightCapRatio;
355
7.74k
  if (max_height > initial_x) {
356
904
    initial_x = max_height;
357
904
  }
358
  //      tprintf(" ret=%g\n",initial_x);
359
7.74k
  return initial_x;
360
7.74k
}
361
362
// Fixes the block so it obeys all the rules:
363
// Must have at least one ROW.
364
// Must have at least one WERD.
365
// WERDs contain a fake blob.
366
0
void Textord::cleanup_nontext_block(BLOCK *block) {
367
  // Non-text blocks must contain at least one row.
368
0
  ROW_IT row_it(block->row_list());
369
0
  if (row_it.empty()) {
370
0
    const TBOX &box = block->pdblk.bounding_box();
371
0
    float height = box.height();
372
0
    int32_t xstarts[2] = {box.left(), box.right()};
373
0
    double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
374
0
    ROW *row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, height / 4.0f, 0, 1);
375
0
    row_it.add_after_then_move(row);
376
0
  }
377
  // Each row must contain at least one word.
378
0
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
379
0
    ROW *row = row_it.data();
380
0
    WERD_IT w_it(row->word_list());
381
0
    if (w_it.empty()) {
382
      // Make a fake blob to put in the word.
383
0
      TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box() : row->bounding_box();
384
0
      C_BLOB *blob = C_BLOB::FakeBlob(box);
385
0
      C_BLOB_LIST blobs;
386
0
      C_BLOB_IT blob_it(&blobs);
387
0
      blob_it.add_after_then_move(blob);
388
0
      WERD *word = new WERD(&blobs, 0, nullptr);
389
0
      w_it.add_after_then_move(word);
390
0
    }
391
    // Each word must contain a fake blob.
392
0
    for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
393
0
      WERD *word = w_it.data();
394
      // Just assert that this is true, as it would be useful to find
395
      // out why it isn't.
396
0
      ASSERT_HOST(!word->cblob_list()->empty());
397
0
    }
398
0
    row->recalc_bounding_box();
399
0
  }
400
0
}
401
402
/**********************************************************************
403
 * cleanup_blocks
404
 *
405
 * Delete empty blocks, rows from the page.
406
 **********************************************************************/
407
408
7.74k
void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks) {
409
7.74k
  BLOCK_IT block_it = blocks; // iterator
410
7.74k
  ROW_IT row_it;              // row iterator
411
412
7.74k
  int num_rows = 0;
413
7.74k
  int num_rows_all = 0;
414
7.74k
  int num_blocks = 0;
415
7.74k
  int num_blocks_all = 0;
416
15.4k
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
417
7.74k
    BLOCK *block = block_it.data();
418
7.74k
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
419
0
      cleanup_nontext_block(block);
420
0
      continue;
421
0
    }
422
7.74k
    num_rows = 0;
423
7.74k
    num_rows_all = 0;
424
7.74k
    if (clean_noise) {
425
7.74k
      row_it.set_to_list(block->row_list());
426
64.0k
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
427
56.2k
        ROW *row = row_it.data();
428
56.2k
        ++num_rows_all;
429
56.2k
        clean_small_noise_from_words(row);
430
56.2k
        if ((textord_noise_rejrows && !row->word_list()->empty() && clean_noise_from_row(row)) ||
431
56.2k
            row->word_list()->empty()) {
432
9.10k
          delete row_it.extract(); // lose empty row.
433
47.1k
        } else {
434
47.1k
          if (textord_noise_rejwords) {
435
47.1k
            clean_noise_from_words(row_it.data());
436
47.1k
          }
437
47.1k
          if (textord_blshift_maxshift >= 0) {
438
47.1k
            tweak_row_baseline(row, textord_blshift_maxshift, textord_blshift_xfraction);
439
47.1k
          }
440
47.1k
          ++num_rows;
441
47.1k
        }
442
56.2k
      }
443
7.74k
    }
444
7.74k
    if (block->row_list()->empty()) {
445
863
      delete block_it.extract(); // Lose empty text blocks.
446
6.88k
    } else {
447
6.88k
      ++num_blocks;
448
6.88k
    }
449
7.74k
    ++num_blocks_all;
450
7.74k
    if (textord_noise_debug) {
451
0
      tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
452
0
    }
453
7.74k
  }
454
7.74k
  if (textord_noise_debug) {
455
0
    tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
456
0
  }
457
7.74k
}
458
459
/**********************************************************************
460
 * clean_noise_from_row
461
 *
462
 * Move blobs of words from rows of garbage into the reject blobs list.
463
 **********************************************************************/
464
465
bool Textord::clean_noise_from_row( // remove empties
466
    ROW *row                        // row to clean
467
56.1k
) {
468
56.1k
  bool testing_on;
469
56.1k
  TBOX blob_box;            // bounding box
470
56.1k
  C_BLOB *blob;             // current blob
471
56.1k
  C_OUTLINE *outline;       // current outline
472
56.1k
  WERD *word;               // current word
473
56.1k
  int32_t blob_size;        // biggest size
474
56.1k
  int32_t trans_count = 0;  // no of transitions
475
56.1k
  int32_t trans_threshold;  // noise tolerance
476
56.1k
  int32_t dot_count;        // small objects
477
56.1k
  int32_t norm_count;       // normal objects
478
56.1k
  int32_t super_norm_count; // real char-like
479
                            // words of row
480
56.1k
  WERD_IT word_it = row->word_list();
481
56.1k
  C_BLOB_IT blob_it;   // blob iterator
482
56.1k
  C_OUTLINE_IT out_it; // outline iterator
483
484
56.1k
  testing_on = textord_test_y > row->base_line(textord_test_x) && textord_show_blobs &&
485
56.1k
               textord_test_y < row->base_line(textord_test_x) + row->x_height();
486
56.1k
  dot_count = 0;
487
56.1k
  norm_count = 0;
488
56.1k
  super_norm_count = 0;
489
131k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
490
75.5k
    word = word_it.data(); // current word
491
                           // blobs in word
492
75.5k
    blob_it.set_to_list(word->cblob_list());
493
355k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
494
279k
      blob = blob_it.data();
495
279k
      if (!word->flag(W_DONT_CHOP)) {
496
        // get outlines
497
271k
        out_it.set_to_list(blob->out_list());
498
738k
        for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
499
467k
          outline = out_it.data();
500
467k
          blob_box = outline->bounding_box();
501
467k
          blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
502
467k
          if (blob_size < textord_noise_sizelimit * row->x_height()) {
503
244k
            dot_count++; // count small outlines
504
244k
          }
505
467k
          if (!outline->child()->empty() &&
506
467k
              blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
507
467k
              blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
508
467k
              blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
509
467k
              blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
510
1.42k
            super_norm_count++; // count small outlines
511
1.42k
          }
512
467k
        }
513
271k
      } else {
514
8.77k
        super_norm_count++;
515
8.77k
      }
516
279k
      blob_box = blob->bounding_box();
517
279k
      blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
518
279k
      if (blob_size >= textord_noise_sizelimit * row->x_height() &&
519
279k
          blob_size < row->x_height() * 2) {
520
213k
        trans_threshold = blob_size / textord_noise_sizefraction;
521
213k
        trans_count = blob->count_transitions(trans_threshold);
522
213k
        if (trans_count < textord_noise_translimit) {
523
201k
          norm_count++;
524
201k
        }
525
213k
      } else if (blob_box.height() > row->x_height() * 2 &&
526
66.2k
                 (!word_it.at_first() || !blob_it.at_first())) {
527
3.03k
        dot_count += 2;
528
3.03k
      }
529
279k
      if (testing_on) {
530
0
        tprintf("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", blob_box.left(),
531
0
                blob_box.bottom(), blob_box.right(), blob_box.top(), blob->out_list()->length(),
532
0
                trans_count, blob_box.bottom() - row->base_line(blob_box.left()));
533
0
      }
534
279k
    }
535
75.5k
  }
536
  // TODO: check whether `&& super_norm_count < textord_noise_sncount`should always be added here.
537
56.1k
  bool rejected = dot_count > norm_count * textord_noise_normratio &&
538
56.1k
                  dot_count > 2;
539
56.1k
  if (textord_noise_debug) {
540
0
    tprintf("Row ending at (%d,%g):", blob_box.right(), row->base_line(blob_box.right()));
541
0
    tprintf(" R=%g, dc=%d, nc=%d, %s\n",
542
0
            norm_count > 0 ? static_cast<float>(dot_count) / norm_count : 9999, dot_count,
543
0
            norm_count,
544
0
            rejected? "REJECTED": "ACCEPTED");
545
0
  }
546
56.1k
  return super_norm_count < textord_noise_sncount && rejected;
547
56.1k
}
548
549
/**********************************************************************
550
 * clean_noise_from_words
551
 *
552
 * Move blobs of words from rows of garbage into the reject blobs list.
553
 **********************************************************************/
554
555
void Textord::clean_noise_from_words( // remove empties
556
    ROW *row                          // row to clean
557
47.1k
) {
558
47.1k
  TBOX blob_box;           // bounding box
559
47.1k
  C_BLOB *blob;            // current blob
560
47.1k
  C_OUTLINE *outline;      // current outline
561
47.1k
  WERD *word;              // current word
562
47.1k
  int32_t blob_size;       // biggest size
563
47.1k
  int32_t trans_count;     // no of transitions
564
47.1k
  int32_t trans_threshold; // noise tolerance
565
47.1k
  int32_t dot_count;       // small objects
566
47.1k
  int32_t norm_count;      // normal objects
567
47.1k
  int32_t dud_words;       // number discarded
568
47.1k
  int32_t ok_words;        // number remaining
569
47.1k
  int32_t word_index;      // current word
570
                           // words of row
571
47.1k
  WERD_IT word_it = row->word_list();
572
47.1k
  C_BLOB_IT blob_it;   // blob iterator
573
47.1k
  C_OUTLINE_IT out_it; // outline iterator
574
575
47.1k
  ok_words = word_it.length();
576
47.1k
  if (ok_words == 0 || textord_no_rejects) {
577
0
    return;
578
0
  }
579
  // was it chucked
580
47.1k
  std::vector<int8_t> word_dud(ok_words);
581
47.1k
  dud_words = 0;
582
47.1k
  ok_words = 0;
583
47.1k
  word_index = 0;
584
108k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
585
61.3k
    word = word_it.data(); // current word
586
61.3k
    dot_count = 0;
587
61.3k
    norm_count = 0;
588
    // blobs in word
589
61.3k
    blob_it.set_to_list(word->cblob_list());
590
284k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
591
223k
      blob = blob_it.data();
592
223k
      if (!word->flag(W_DONT_CHOP)) {
593
        // get outlines
594
214k
        out_it.set_to_list(blob->out_list());
595
518k
        for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
596
303k
          outline = out_it.data();
597
303k
          blob_box = outline->bounding_box();
598
303k
          blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
599
303k
          if (blob_size < textord_noise_sizelimit * row->x_height()) {
600
104k
            dot_count++; // count small outlines
601
104k
          }
602
303k
          if (!outline->child()->empty() &&
603
303k
              blob_box.height() < (1 + textord_noise_syfract) * row->x_height() &&
604
303k
              blob_box.height() > (1 - textord_noise_syfract) * row->x_height() &&
605
303k
              blob_box.width() < (1 + textord_noise_sxfract) * row->x_height() &&
606
303k
              blob_box.width() > (1 - textord_noise_sxfract) * row->x_height()) {
607
1.42k
            norm_count++; // count small outlines
608
1.42k
          }
609
303k
        }
610
214k
      } else {
611
8.77k
        norm_count++;
612
8.77k
      }
613
223k
      blob_box = blob->bounding_box();
614
223k
      blob_size = blob_box.width() > blob_box.height() ? blob_box.width() : blob_box.height();
615
223k
      if (blob_size >= textord_noise_sizelimit * row->x_height() &&
616
223k
          blob_size < row->x_height() * 2) {
617
187k
        trans_threshold = blob_size / textord_noise_sizefraction;
618
187k
        trans_count = blob->count_transitions(trans_threshold);
619
187k
        if (trans_count < textord_noise_translimit) {
620
180k
          norm_count++;
621
180k
        }
622
187k
      } else if (blob_box.height() > row->x_height() * 2 &&
623
35.6k
                 (!word_it.at_first() || !blob_it.at_first())) {
624
1.43k
        dot_count += 2;
625
1.43k
      }
626
223k
    }
627
61.3k
    if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
628
9.27k
      if (dot_count > norm_count * textord_noise_normratio * 2) {
629
723
        word_dud[word_index] = 2;
630
8.55k
      } else if (dot_count > norm_count * textord_noise_normratio) {
631
617
        word_dud[word_index] = 1;
632
7.93k
      } else {
633
7.93k
        word_dud[word_index] = 0;
634
7.93k
      }
635
52.0k
    } else {
636
52.0k
      word_dud[word_index] = 0;
637
52.0k
    }
638
61.3k
    if (word_dud[word_index] == 2) {
639
723
      dud_words++;
640
60.6k
    } else {
641
60.6k
      ok_words++;
642
60.6k
    }
643
61.3k
    word_index++;
644
61.3k
  }
645
646
47.1k
  word_index = 0;
647
108k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
648
61.3k
    if (word_dud[word_index] == 2 || (word_dud[word_index] == 1 && dud_words > ok_words)) {
649
734
      word = word_it.data(); // Current word.
650
      // Previously we threw away the entire word.
651
      // Now just aggressively throw all small blobs into the reject list, where
652
      // the classifier can decide whether they are actually needed.
653
734
      word->CleanNoise(textord_noise_sizelimit * row->x_height());
654
734
    }
655
61.3k
    word_index++;
656
61.3k
  }
657
47.1k
}
658
659
// Remove outlines that are a tiny fraction in either width or height
660
// of the word height.
661
56.2k
void Textord::clean_small_noise_from_words(ROW *row) {
662
56.2k
  WERD_IT word_it(row->word_list());
663
132k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
664
75.7k
    WERD *word = word_it.data();
665
75.7k
    int min_size = static_cast<int>(textord_noise_hfract * word->bounding_box().height() + 0.5);
666
75.7k
    C_BLOB_IT blob_it(word->cblob_list());
667
356k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
668
280k
      C_BLOB *blob = blob_it.data();
669
280k
      C_OUTLINE_IT out_it(blob->out_list());
670
779k
      for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
671
498k
        C_OUTLINE *outline = out_it.data();
672
498k
        outline->RemoveSmallRecursive(min_size, &out_it);
673
498k
      }
674
280k
      if (blob->out_list()->empty()) {
675
751
        delete blob_it.extract();
676
751
      }
677
280k
    }
678
75.7k
    if (word->cblob_list()->empty()) {
679
277
      if (!word_it.at_last()) {
680
        // The next word is no longer a fuzzy non space if it was before,
681
        // since the word before is about to be deleted.
682
102
        WERD *next_word = word_it.data_relative(1);
683
102
        if (next_word->flag(W_FUZZY_NON)) {
684
12
          next_word->set_flag(W_FUZZY_NON, false);
685
12
        }
686
102
      }
687
277
      delete word_it.extract();
688
277
    }
689
75.7k
  }
690
56.2k
}
691
692
// Local struct to hold a group of blocks.
693
struct BlockGroup {
694
0
  BlockGroup() : rotation(1.0f, 0.0f), angle(0.0f), min_xheight(1.0f) {}
695
  explicit BlockGroup(BLOCK *block)
696
6.88k
      : bounding_box(block->pdblk.bounding_box())
697
6.88k
      , rotation(block->re_rotation())
698
6.88k
      , angle(block->re_rotation().angle())
699
6.88k
      , min_xheight(block->x_height()) {
700
6.88k
    blocks.push_back(block);
701
6.88k
  }
702
  // Union of block bounding boxes.
703
  TBOX bounding_box;
704
  // Common rotation of the blocks.
705
  FCOORD rotation;
706
  // Angle of rotation.
707
  float angle;
708
  // Min xheight of the blocks.
709
  float min_xheight;
710
  // Collection of borrowed pointers to the blocks in the group.
711
  std::vector<BLOCK *> blocks;
712
};
713
714
// Groups blocks by rotation, then, for each group, makes a WordGrid and calls
715
// TransferDiacriticsToWords to copy the diacritic blobs to the most
716
// appropriate words in the group of blocks. Source blobs are not touched.
717
7.74k
void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks) {
718
  // Angle difference larger than this is too much to consider equal.
719
  // They should only be in multiples of M_PI/2 anyway.
720
7.74k
  const double kMaxAngleDiff = 0.01; // About 0.6 degrees.
721
7.74k
  std::vector<std::unique_ptr<BlockGroup>> groups;
722
7.74k
  BLOCK_IT bk_it(blocks);
723
14.6k
  for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) {
724
6.88k
    BLOCK *block = bk_it.data();
725
6.88k
    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
726
0
      continue;
727
0
    }
728
    // Linear search of the groups to find a matching rotation.
729
6.88k
    float block_angle = block->re_rotation().angle();
730
6.88k
    int best_g = 0;
731
6.88k
    float best_angle_diff = FLT_MAX;
732
6.88k
    for (const auto &group : groups) {
733
0
      double angle_diff = std::fabs(block_angle - group->angle);
734
0
      if (angle_diff > M_PI) {
735
0
        angle_diff = fabs(angle_diff - 2.0 * M_PI);
736
0
      }
737
0
      if (angle_diff < best_angle_diff) {
738
0
        best_angle_diff = angle_diff;
739
0
        best_g = &group - &groups[0];
740
0
      }
741
0
    }
742
6.88k
    if (best_angle_diff > kMaxAngleDiff) {
743
6.88k
      groups.push_back(std::make_unique<BlockGroup>(block));
744
6.88k
    } else {
745
0
      groups[best_g]->blocks.push_back(block);
746
0
      groups[best_g]->bounding_box += block->pdblk.bounding_box();
747
0
      float x_height = block->x_height();
748
0
      if (x_height < groups[best_g]->min_xheight) {
749
0
        groups[best_g]->min_xheight = x_height;
750
0
      }
751
0
    }
752
6.88k
  }
753
  // Now process each group of blocks.
754
7.74k
  std::vector<std::unique_ptr<WordWithBox>> word_ptrs;
755
7.74k
  for (const auto &group : groups) {
756
6.88k
    if (group->bounding_box.null_box()) {
757
0
      continue;
758
0
    }
759
6.88k
    WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(),
760
6.88k
                       group->bounding_box.topright());
761
6.88k
    for (auto b : group->blocks) {
762
6.88k
      ROW_IT row_it(b->row_list());
763
54.0k
      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
764
47.1k
        ROW *row = row_it.data();
765
        // Put the words of the row into the grid.
766
47.1k
        WERD_IT w_it(row->word_list());
767
108k
        for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
768
61.3k
          WERD *word = w_it.data();
769
61.3k
          auto box_word = std::make_unique<WordWithBox>(word);
770
61.3k
          word_grid.InsertBBox(true, true, box_word.get());
771
          // Save the pointer where it will be auto-deleted.
772
61.3k
          word_ptrs.emplace_back(std::move(box_word));
773
61.3k
        }
774
47.1k
      }
775
6.88k
    }
776
6.88k
    FCOORD rotation = group->rotation;
777
    // Make it a forward rotation that will transform blob coords to block.
778
6.88k
    rotation.set_y(-rotation.y());
779
6.88k
    TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid);
780
6.88k
  }
781
7.74k
}
782
783
// Places a copy of blobs that are near a word (after applying rotation to the
784
// blob) in the most appropriate word, unless there is doubt, in which case a
785
// blob can end up in two words. Source blobs are not touched.
786
void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation,
787
6.88k
                                        WordGrid *word_grid) {
788
6.88k
  WordSearch ws(word_grid);
789
6.88k
  BLOBNBOX_IT b_it(diacritic_blobs);
790
  // Apply rotation to each blob before finding the nearest words. The rotation
791
  // allows us to only consider above/below placement and not left/right on
792
  // vertical text, because all text is horizontal here.
793
6.88k
  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
794
0
    BLOBNBOX *blobnbox = b_it.data();
795
0
    TBOX blob_box = blobnbox->bounding_box();
796
0
    blob_box.rotate(rotation);
797
0
    ws.StartRectSearch(blob_box);
798
    // Above/below refer to word position relative to diacritic. Since some
799
    // scripts eg Kannada/Telugu habitually put diacritics below words, and
800
    // others eg Thai/Vietnamese/Latin put most diacritics above words, try
801
    // for both if there isn't much in it.
802
0
    WordWithBox *best_above_word = nullptr;
803
0
    WordWithBox *best_below_word = nullptr;
804
0
    int best_above_distance = 0;
805
0
    int best_below_distance = 0;
806
0
    for (WordWithBox *word = ws.NextRectSearch(); word != nullptr; word = ws.NextRectSearch()) {
807
0
      if (word->word()->flag(W_REP_CHAR)) {
808
0
        continue;
809
0
      }
810
0
      TBOX word_box = word->true_bounding_box();
811
0
      int x_distance = blob_box.x_gap(word_box);
812
0
      int y_distance = blob_box.y_gap(word_box);
813
0
      if (x_distance > 0) {
814
        // Arbitrarily divide x-distance by 2 if there is a major y overlap,
815
        // and the word is to the left of the diacritic. If the
816
        // diacritic is a dropped broken character between two words, this will
817
        // help send all the pieces to a single word, instead of splitting them
818
        // over the 2 words.
819
0
        if (word_box.major_y_overlap(blob_box) && blob_box.left() > word_box.right()) {
820
0
          x_distance /= 2;
821
0
        }
822
0
        y_distance += x_distance;
823
0
      }
824
0
      if (word_box.y_middle() > blob_box.y_middle() &&
825
0
          (best_above_word == nullptr || y_distance < best_above_distance)) {
826
0
        best_above_word = word;
827
0
        best_above_distance = y_distance;
828
0
      }
829
0
      if (word_box.y_middle() <= blob_box.y_middle() &&
830
0
          (best_below_word == nullptr || y_distance < best_below_distance)) {
831
0
        best_below_word = word;
832
0
        best_below_distance = y_distance;
833
0
      }
834
0
    }
835
0
    bool above_good = best_above_word != nullptr &&
836
0
                      (best_below_word == nullptr ||
837
0
                       best_above_distance < best_below_distance + blob_box.height());
838
0
    bool below_good = best_below_word != nullptr && best_below_word != best_above_word &&
839
0
                      (best_above_word == nullptr ||
840
0
                       best_below_distance < best_above_distance + blob_box.height());
841
0
    if (below_good) {
842
0
      C_BLOB *copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
843
0
      copied_blob->rotate(rotation);
844
      // Put the blob into the word's reject blobs list.
845
0
      C_BLOB_IT blob_it(best_below_word->RejBlobs());
846
0
      blob_it.add_to_end(copied_blob);
847
0
    }
848
0
    if (above_good) {
849
0
      C_BLOB *copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
850
0
      copied_blob->rotate(rotation);
851
      // Put the blob into the word's reject blobs list.
852
0
      C_BLOB_IT blob_it(best_above_word->RejBlobs());
853
0
      blob_it.add_to_end(copied_blob);
854
0
    }
855
0
  }
856
6.88k
}
857
858
/**********************************************************************
859
 * tweak_row_baseline
860
 *
861
 * Shift baseline to fit the blobs more accurately where they are
862
 * close enough.
863
 **********************************************************************/
864
865
47.1k
void tweak_row_baseline(ROW *row, double blshift_maxshift, double blshift_xfraction) {
866
47.1k
  TBOX blob_box;      // bounding box
867
47.1k
  C_BLOB *blob;       // current blob
868
47.1k
  WERD *word;         // current word
869
47.1k
  int32_t blob_count; // no of blobs
870
47.1k
  int32_t src_index;  // source segment
871
47.1k
  int32_t dest_index; // destination segment
872
47.1k
  float ydiff;        // baseline error
873
47.1k
  float x_centre;     // centre of blob
874
                      // words of row
875
47.1k
  WERD_IT word_it = row->word_list();
876
47.1k
  C_BLOB_IT blob_it; // blob iterator
877
878
47.1k
  blob_count = 0;
879
108k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
880
61.3k
    word = word_it.data(); // current word
881
                           // get total blobs
882
61.3k
    blob_count += word->cblob_list()->length();
883
61.3k
  }
884
47.1k
  if (blob_count == 0) {
885
0
    return;
886
0
  }
887
  // spline segments
888
47.1k
  std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
889
  // spline coeffs
890
47.1k
  std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
891
892
47.1k
  src_index = 0;
893
47.1k
  dest_index = 0;
894
47.1k
  xstarts[0] = row->baseline.xcoords[0];
895
108k
  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
896
61.3k
    word = word_it.data(); // current word
897
                           // blobs in word
898
61.3k
    blob_it.set_to_list(word->cblob_list());
899
283k
    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
900
222k
      blob = blob_it.data();
901
222k
      blob_box = blob->bounding_box();
902
222k
      x_centre = (blob_box.left() + blob_box.right()) / 2.0;
903
222k
      ydiff = blob_box.bottom() - row->base_line(x_centre);
904
222k
      if (ydiff < 0) {
905
78.4k
        ydiff = -ydiff / row->x_height();
906
143k
      } else {
907
143k
        ydiff = ydiff / row->x_height();
908
143k
      }
909
222k
      if (ydiff < blshift_maxshift && blob_box.height() / row->x_height() > blshift_xfraction) {
910
0
        if (xstarts[dest_index] >= x_centre) {
911
0
          xstarts[dest_index] = blob_box.left();
912
0
        }
913
0
        coeffs[dest_index * 3] = 0;
914
0
        coeffs[dest_index * 3 + 1] = 0;
915
0
        coeffs[dest_index * 3 + 2] = blob_box.bottom();
916
        // shift it
917
0
        dest_index++;
918
0
        xstarts[dest_index] = blob_box.right() + 1;
919
222k
      } else {
920
222k
        if (xstarts[dest_index] <= x_centre) {
921
131k
          while (row->baseline.xcoords[src_index + 1] <= x_centre &&
922
131k
                 src_index < row->baseline.segments - 1) {
923
51.3k
            if (row->baseline.xcoords[src_index + 1] > xstarts[dest_index]) {
924
17.9k
              coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
925
17.9k
              coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
926
17.9k
              coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
927
17.9k
              dest_index++;
928
17.9k
              xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
929
17.9k
            }
930
51.3k
            src_index++;
931
51.3k
          }
932
80.5k
          coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
933
80.5k
          coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
934
80.5k
          coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
935
80.5k
          dest_index++;
936
80.5k
          xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
937
80.5k
        }
938
222k
      }
939
222k
    }
940
61.3k
  }
941
94.3k
  while (src_index < row->baseline.segments &&
942
94.3k
         row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) {
943
47.1k
    src_index++;
944
47.1k
  }
945
83.8k
  while (src_index < row->baseline.segments) {
946
36.6k
    coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
947
36.6k
    coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
948
36.6k
    coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
949
36.6k
    dest_index++;
950
36.6k
    src_index++;
951
36.6k
    xstarts[dest_index] = row->baseline.xcoords[src_index];
952
36.6k
  }
953
  // turn to spline
954
47.1k
  row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
955
47.1k
}
956
957
} // namespace tesseract