Coverage Report

Created: 2025-11-16 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccmain/werdit.cpp
Line
Count
Source
1
/**********************************************************************
2
 * File:        werdit.cpp  (Formerly wordit.c)
3
 * Description: An iterator for passing over all the words in a document.
4
 * Author:      Ray Smith
5
 * Created:     Mon Apr 27 08:51:22 BST 1992
6
 *
7
 * (C) Copyright 1992, Hewlett-Packard Ltd.
8
 ** Licensed under the Apache License, Version 2.0 (the "License");
9
 ** you may not use this file except in compliance with the License.
10
 ** You may obtain a copy of the License at
11
 ** http://www.apache.org/licenses/LICENSE-2.0
12
 ** Unless required by applicable law or agreed to in writing, software
13
 ** distributed under the License is distributed on an "AS IS" BASIS,
14
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 ** See the License for the specific language governing permissions and
16
 ** limitations under the License.
17
 *
18
 **********************************************************************/
19
20
#include "werdit.h"
21
22
#include "errcode.h"  // for ASSERT_HOST
23
#include "pageres.h"  // for PAGE_RES_IT, PAGE_RES (ptr only), WERD_RES
24
#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
25
#include "werd.h"     // for WERD
26
27
namespace tesseract {
28
29
/**********************************************************************
30
 * make_pseudo_word
31
 *
32
 * Make all the blobs inside a selection into a single word.
33
 * The returned PAGE_RES_IT* it points to the new word. After use, call
34
 * it->DeleteCurrentWord() to delete the fake word, and then
35
 * delete it to get rid of the iterator itself.
36
 **********************************************************************/
37
38
0
PAGE_RES_IT *make_pseudo_word(PAGE_RES *page_res, const TBOX &selection_box) {
39
0
  PAGE_RES_IT pr_it(page_res);
40
0
  C_BLOB_LIST new_blobs;              // list of gathered blobs
41
0
  C_BLOB_IT new_blob_it = &new_blobs; // iterator
42
43
0
  for (WERD_RES *word_res = pr_it.word(); word_res != nullptr; word_res = pr_it.forward()) {
44
0
    WERD *word = word_res->word;
45
0
    if (word->bounding_box().overlap(selection_box)) {
46
0
      C_BLOB_IT blob_it(word->cblob_list());
47
0
      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
48
0
        C_BLOB *blob = blob_it.data();
49
0
        if (blob->bounding_box().overlap(selection_box)) {
50
0
          new_blob_it.add_after_then_move(C_BLOB::deep_copy(blob));
51
0
        }
52
0
      }
53
0
      if (!new_blobs.empty()) {
54
0
        WERD *pseudo_word = new WERD(&new_blobs, 1, nullptr);
55
0
        word_res = pr_it.InsertSimpleCloneWord(*word_res, pseudo_word);
56
0
        auto *it = new PAGE_RES_IT(page_res);
57
0
        while (it->word() != word_res && it->word() != nullptr) {
58
0
          it->forward();
59
0
        }
60
0
        ASSERT_HOST(it->word() == word_res);
61
0
        return it;
62
0
      }
63
0
    }
64
0
  }
65
0
  return nullptr;
66
0
}
67
68
} // namespace tesseract