Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/include/tesseract/renderer.h
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: Apache-2.0
2
// File:        renderer.h
3
// Description: Rendering interface to inject into TessBaseAPI
4
//
5
// (C) Copyright 2011, Google Inc.
6
// Licensed under the Apache License, Version 2.0 (the "License");
7
// you may not use this file except in compliance with the License.
8
// You may obtain a copy of the License at
9
// http://www.apache.org/licenses/LICENSE-2.0
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
#ifndef TESSERACT_API_RENDERER_H_
17
#define TESSERACT_API_RENDERER_H_
18
19
#include "export.h"
20
21
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
22
// complexity of includes here. Use forward declarations wherever possible
23
// and hide includes of complex types in baseapi.cpp.
24
#include <cstdint>
25
#include <string> // for std::string
26
#include <vector> // for std::vector
27
28
struct Pix;
29
30
namespace tesseract {
31
32
class TessBaseAPI;
33
34
/**
35
 * Interface for rendering tesseract results into a document, such as text,
36
 * HOCR or pdf. This class is abstract. Specific classes handle individual
37
 * formats. This interface is then used to inject the renderer class into
38
 * tesseract when processing images.
39
 *
40
 * For simplicity implementing this with tesseract version 3.01,
41
 * the renderer contains document state that is cleared from document
42
 * to document just as the TessBaseAPI is. This way the base API can just
43
 * delegate its rendering functionality to injected renderers, and the
44
 * renderers can manage the associated state needed for the specific formats
45
 * in addition to the heuristics for producing it.
46
 */
47
class TESS_API TessResultRenderer {
48
public:
49
  virtual ~TessResultRenderer();
50
51
  // Takes ownership of pointer so must be new'd instance.
52
  // Renderers aren't ordered, but appends the sequences of next parameter
53
  // and existing next(). The renderers should be unique across both lists.
54
  void insert(TessResultRenderer *next);
55
56
  // Returns the next renderer or nullptr.
57
0
  TessResultRenderer *next() {
58
0
    return next_;
59
0
  }
60
61
  /**
62
   * Starts a new document with the given title.
63
   * This clears the contents of the output data.
64
   * Title should use UTF-8 encoding.
65
   */
66
  bool BeginDocument(const char *title);
67
68
  /**
69
   * Adds the recognized text from the source image to the current document.
70
   * Invalid if BeginDocument not yet called.
71
   *
72
   * Note that this API is a bit weird but is designed to fit into the
73
   * current TessBaseAPI implementation where the api has lots of state
74
   * information that we might want to add in.
75
   */
76
  bool AddImage(TessBaseAPI *api);
77
78
  /**
79
   * Finishes the document and finalizes the output data
80
   * Invalid if BeginDocument not yet called.
81
   */
82
  bool EndDocument();
83
84
0
  const char *file_extension() const {
85
0
    return file_extension_;
86
0
  }
87
0
  const char *title() const {
88
0
    return title_.c_str();
89
0
  }
90
91
  // Is everything fine? Otherwise something went wrong.
92
0
  bool happy() const {
93
0
    return happy_;
94
0
  }
95
96
  /**
97
   * Returns the index of the last image given to AddImage
98
   * (i.e. images are incremented whether the image succeeded or not)
99
   *
100
   * This is always defined. It means either the number of the
101
   * current image, the last image ended, or in the completed document
102
   * depending on when in the document lifecycle you are looking at it.
103
   * Will return -1 if a document was never started.
104
   */
105
0
  int imagenum() const {
106
0
    return imagenum_;
107
0
  }
108
109
protected:
110
  /**
111
   * Called by concrete classes.
112
   *
113
   * outputbase is the name of the output file excluding
114
   * extension. For example, "/path/to/chocolate-chip-cookie-recipe"
115
   *
116
   * extension indicates the file extension to be used for output
117
   * files. For example "pdf" will produce a .pdf file, and "hocr"
118
   * will produce .hocr files.
119
   */
120
  TessResultRenderer(const char *outputbase, const char *extension);
121
122
  // Hook for specialized handling in BeginDocument()
123
  virtual bool BeginDocumentHandler();
124
125
  // This must be overridden to render the OCR'd results
126
  virtual bool AddImageHandler(TessBaseAPI *api) = 0;
127
128
  // Hook for specialized handling in EndDocument()
129
  virtual bool EndDocumentHandler();
130
131
  // Renderers can call this to append '\0' terminated strings into
132
  // the output string returned by GetOutput.
133
  // This method will grow the output buffer if needed.
134
  void AppendString(const char *s);
135
136
  // Renderers can call this to append binary byte sequences into
137
  // the output string returned by GetOutput. Note that s is not necessarily
138
  // '\0' terminated (and can contain '\0' within it).
139
  // This method will grow the output buffer if needed.
140
  void AppendData(const char *s, int len);
141
142
  template <typename T>
143
  auto AppendData(T &&d) {
144
    AppendData(d.data(), d.size());
145
    return d.size();
146
  }
147
148
private:
149
  TessResultRenderer *next_;   // Can link multiple renderers together
150
  FILE *fout_;                 // output file pointer
151
  const char *file_extension_; // standard extension for generated output
152
  std::string title_;          // title of document being rendered
153
  int imagenum_;               // index of last image added
154
  bool happy_;                 // I get grumpy when the disk fills up, etc.
155
};
156
157
/**
158
 * Renders tesseract output into a plain UTF-8 text string
159
 */
160
class TESS_API TessTextRenderer : public TessResultRenderer {
161
public:
162
  explicit TessTextRenderer(const char *outputbase);
163
164
protected:
165
  bool AddImageHandler(TessBaseAPI *api) override;
166
};
167
168
/**
169
 * Renders tesseract output into an hocr text string
170
 */
171
class TESS_API TessHOcrRenderer : public TessResultRenderer {
172
public:
173
  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
174
  explicit TessHOcrRenderer(const char *outputbase);
175
176
protected:
177
  bool BeginDocumentHandler() override;
178
  bool AddImageHandler(TessBaseAPI *api) override;
179
  bool EndDocumentHandler() override;
180
181
private:
182
  bool font_info_; // whether to print font information
183
};
184
185
/**
186
 * Renders tesseract output into an alto text string
187
 */
188
class TESS_API TessAltoRenderer : public TessResultRenderer {
189
public:
190
  explicit TessAltoRenderer(const char *outputbase);
191
192
protected:
193
  bool BeginDocumentHandler() override;
194
  bool AddImageHandler(TessBaseAPI *api) override;
195
  bool EndDocumentHandler() override;
196
197
private:
198
  bool begin_document;
199
};
200
201
/**
202
 * Renders Tesseract output into a PAGE XML text string
203
 */
204
class TESS_API TessPAGERenderer : public TessResultRenderer {
205
public:
206
  explicit TessPAGERenderer(const char *outputbase);
207
208
protected:
209
  bool BeginDocumentHandler() override;
210
  bool AddImageHandler(TessBaseAPI *api) override;
211
  bool EndDocumentHandler() override;
212
213
private:
214
  bool begin_document;
215
};
216
217
218
/**
219
 * Renders Tesseract output into a TSV string
220
 */
221
class TESS_API TessTsvRenderer : public TessResultRenderer {
222
public:
223
  explicit TessTsvRenderer(const char *outputbase, bool font_info);
224
  explicit TessTsvRenderer(const char *outputbase);
225
226
protected:
227
  bool BeginDocumentHandler() override;
228
  bool AddImageHandler(TessBaseAPI *api) override;
229
  bool EndDocumentHandler() override;
230
231
private:
232
  bool font_info_; // whether to print font information
233
};
234
235
/**
236
 * Renders tesseract output into searchable PDF
237
 */
238
class TESS_API TessPDFRenderer : public TessResultRenderer {
239
public:
240
  // datadir is the location of the TESSDATA. We need it because
241
  // we load a custom PDF font from this location.
242
  TessPDFRenderer(const char *outputbase, const char *datadir,
243
                  bool textonly = false);
244
245
protected:
246
  bool BeginDocumentHandler() override;
247
  bool AddImageHandler(TessBaseAPI *api) override;
248
  bool EndDocumentHandler() override;
249
250
private:
251
  // We don't want to have every image in memory at once,
252
  // so we store some metadata as we go along producing
253
  // PDFs one page at a time. At the end, that metadata is
254
  // used to make everything that isn't easily handled in a
255
  // streaming fashion.
256
  long int obj_;                  // counter for PDF objects
257
  std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
258
  std::vector<long int> pages_;   // object number for every /Page object
259
  std::string datadir_;           // where to find the custom font
260
  bool textonly_;                 // skip images if set
261
  // Bookkeeping only. DIY = Do It Yourself.
262
  void AppendPDFObjectDIY(size_t objectsize);
263
  // Bookkeeping + emit data.
264
  void AppendPDFObject(const char *data);
265
  // Create the /Contents object for an entire page.
266
  char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
267
  // Turn an image into a PDF object. Only transcode if we have to.
268
  static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
269
                            char **pdf_object, long int *pdf_object_size,
270
                            int jpg_quality);
271
};
272
273
/**
274
 * Renders tesseract output into a plain UTF-8 text string
275
 */
276
class TESS_API TessUnlvRenderer : public TessResultRenderer {
277
public:
278
  explicit TessUnlvRenderer(const char *outputbase);
279
280
protected:
281
  bool AddImageHandler(TessBaseAPI *api) override;
282
};
283
284
/**
285
 * Renders tesseract output into a plain UTF-8 text string for LSTMBox
286
 */
287
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
288
public:
289
  explicit TessLSTMBoxRenderer(const char *outputbase);
290
291
protected:
292
  bool AddImageHandler(TessBaseAPI *api) override;
293
};
294
295
/**
296
 * Renders tesseract output into a plain UTF-8 text string
297
 */
298
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
299
public:
300
  explicit TessBoxTextRenderer(const char *outputbase);
301
302
protected:
303
  bool AddImageHandler(TessBaseAPI *api) override;
304
};
305
306
/**
307
 * Renders tesseract output into a plain UTF-8 text string in WordStr format
308
 */
309
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
310
public:
311
  explicit TessWordStrBoxRenderer(const char *outputbase);
312
313
protected:
314
  bool AddImageHandler(TessBaseAPI *api) override;
315
};
316
317
#ifndef DISABLED_LEGACY_ENGINE
318
319
/**
320
 * Renders tesseract output into an osd text string
321
 */
322
class TESS_API TessOsdRenderer : public TessResultRenderer {
323
public:
324
  explicit TessOsdRenderer(const char *outputbase);
325
326
protected:
327
  bool AddImageHandler(TessBaseAPI *api) override;
328
};
329
330
#endif // ndef DISABLED_LEGACY_ENGINE
331
332
} // namespace tesseract.
333
334
#endif // TESSERACT_API_RENDERER_H_