Coverage Report

Created: 2025-12-31 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/classify/blobclass.cpp
Line
Count
Source
1
/******************************************************************************
2
 **      Filename:       blobclass.c
3
 **      Purpose:        High level blob classification and training routines.
4
 **      Author:         Dan Johnson
5
 **
6
 **      (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
#include <cstdio>
19
20
#include "classify.h"
21
#include "featdefs.h"
22
#include "mf.h"
23
#include "normfeat.h"
24
25
namespace tesseract {
26
27
/*---------------------------------------------------------------------------*/
28
29
// Extracts features from the given blob and saves them in the tr_file_data_
30
// member variable.
31
// fontname:  Name of font that this blob was printed in.
32
// cn_denorm: Character normalization transformation to apply to the blob.
33
// fx_info:   Character normalization parameters computed with cn_denorm.
34
// blob_text: Ground truth text for the blob.
35
void Classify::LearnBlob(const std::string &fontname, TBLOB *blob, const DENORM &cn_denorm,
36
0
                         const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) {
37
0
  std::unique_ptr<CHAR_DESC_STRUCT> CharDesc(new CHAR_DESC_STRUCT(feature_defs_));
38
0
  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
39
0
  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
40
0
  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
41
0
  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
42
43
0
  if (ValidCharDescription(feature_defs_, CharDesc.get())) {
44
    // Label the features with a class name and font name.
45
0
    tr_file_data_ += "\n";
46
0
    tr_file_data_ += fontname;
47
0
    tr_file_data_ += " ";
48
0
    tr_file_data_ += blob_text;
49
0
    tr_file_data_ += "\n";
50
51
    // write micro-features to file and clean up
52
0
    WriteCharDescription(feature_defs_, CharDesc.get(), tr_file_data_);
53
0
  } else {
54
0
    tprintf("Blob learned was invalid!\n");
55
0
  }
56
0
} // LearnBlob
57
58
// Writes stored training data to a .tr file based on the given filename.
59
// Returns false on error.
60
0
bool Classify::WriteTRFile(const char *filename) {
61
0
  bool result = false;
62
0
  std::string tr_filename = filename;
63
0
  tr_filename += ".tr";
64
0
  FILE *fp = fopen(tr_filename.c_str(), "wb");
65
0
  if (fp) {
66
0
    result = tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length());
67
0
    fclose(fp);
68
0
  }
69
0
  tr_file_data_.resize(0);
70
0
  return result;
71
0
}
72
73
} // namespace tesseract