/src/tesseract/src/classify/blobclass.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | ** Filename: blobclass.c |
3 | | ** Purpose: High level blob classification and training routines. |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | #include <cstdio> |
19 | | |
20 | | #include "classify.h" |
21 | | #include "featdefs.h" |
22 | | #include "mf.h" |
23 | | #include "normfeat.h" |
24 | | |
25 | | namespace tesseract { |
26 | | |
27 | | /*---------------------------------------------------------------------------*/ |
28 | | |
29 | | // Extracts features from the given blob and saves them in the tr_file_data_ |
30 | | // member variable. |
31 | | // fontname: Name of font that this blob was printed in. |
32 | | // cn_denorm: Character normalization transformation to apply to the blob. |
33 | | // fx_info: Character normalization parameters computed with cn_denorm. |
34 | | // blob_text: Ground truth text for the blob. |
35 | | void Classify::LearnBlob(const std::string &fontname, TBLOB *blob, const DENORM &cn_denorm, |
36 | 0 | const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text) { |
37 | 0 | std::unique_ptr<CHAR_DESC_STRUCT> CharDesc(new CHAR_DESC_STRUCT(feature_defs_)); |
38 | 0 | CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm); |
39 | 0 | CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info); |
40 | 0 | CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info); |
41 | 0 | CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info); |
42 | |
|
43 | 0 | if (ValidCharDescription(feature_defs_, CharDesc.get())) { |
44 | | // Label the features with a class name and font name. |
45 | 0 | tr_file_data_ += "\n"; |
46 | 0 | tr_file_data_ += fontname; |
47 | 0 | tr_file_data_ += " "; |
48 | 0 | tr_file_data_ += blob_text; |
49 | 0 | tr_file_data_ += "\n"; |
50 | | |
51 | | // write micro-features to file and clean up |
52 | 0 | WriteCharDescription(feature_defs_, CharDesc.get(), tr_file_data_); |
53 | 0 | } else { |
54 | 0 | tprintf("Blob learned was invalid!\n"); |
55 | 0 | } |
56 | 0 | } // LearnBlob |
57 | | |
58 | | // Writes stored training data to a .tr file based on the given filename. |
59 | | // Returns false on error. |
60 | 0 | bool Classify::WriteTRFile(const char *filename) { |
61 | 0 | bool result = false; |
62 | 0 | std::string tr_filename = filename; |
63 | 0 | tr_filename += ".tr"; |
64 | 0 | FILE *fp = fopen(tr_filename.c_str(), "wb"); |
65 | 0 | if (fp) { |
66 | 0 | result = tesseract::Serialize(fp, &tr_file_data_[0], tr_file_data_.length()); |
67 | 0 | fclose(fp); |
68 | 0 | } |
69 | 0 | tr_file_data_.resize(0); |
70 | 0 | return result; |
71 | 0 | } |
72 | | |
73 | | } // namespace tesseract |