/src/tesseract/include/tesseract/renderer.h
Line | Count | Source (jump to first uncovered line) |
1 | | // SPDX-License-Identifier: Apache-2.0 |
2 | | // File: renderer.h |
3 | | // Description: Rendering interface to inject into TessBaseAPI |
4 | | // |
5 | | // (C) Copyright 2011, Google Inc. |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
7 | | // you may not use this file except in compliance with the License. |
8 | | // You may obtain a copy of the License at |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // Unless required by applicable law or agreed to in writing, software |
11 | | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | // See the License for the specific language governing permissions and |
14 | | // limitations under the License. |
15 | | |
16 | | #ifndef TESSERACT_API_RENDERER_H_ |
17 | | #define TESSERACT_API_RENDERER_H_ |
18 | | |
19 | | #include "export.h" |
20 | | |
21 | | // To avoid collision with other typenames include the ABSOLUTE MINIMUM |
22 | | // complexity of includes here. Use forward declarations wherever possible |
23 | | // and hide includes of complex types in baseapi.cpp. |
24 | | #include <cstdint> |
25 | | #include <string> // for std::string |
26 | | #include <vector> // for std::vector |
27 | | |
28 | | struct Pix; |
29 | | |
30 | | namespace tesseract { |
31 | | |
32 | | class TessBaseAPI; |
33 | | |
34 | | /** |
35 | | * Interface for rendering tesseract results into a document, such as text, |
36 | | * HOCR or pdf. This class is abstract. Specific classes handle individual |
37 | | * formats. This interface is then used to inject the renderer class into |
38 | | * tesseract when processing images. |
39 | | * |
40 | | * For simplicity implementing this with tesseract version 3.01, |
41 | | * the renderer contains document state that is cleared from document |
42 | | * to document just as the TessBaseAPI is. This way the base API can just |
43 | | * delegate its rendering functionality to injected renderers, and the |
44 | | * renderers can manage the associated state needed for the specific formats |
45 | | * in addition to the heuristics for producing it. |
46 | | */ |
47 | | class TESS_API TessResultRenderer { |
48 | | public: |
49 | | virtual ~TessResultRenderer(); |
50 | | |
51 | | // Takes ownership of pointer so must be new'd instance. |
52 | | // Renderers aren't ordered, but appends the sequences of next parameter |
53 | | // and existing next(). The renderers should be unique across both lists. |
54 | | void insert(TessResultRenderer *next); |
55 | | |
56 | | // Returns the next renderer or nullptr. |
57 | 0 | TessResultRenderer *next() { |
58 | 0 | return next_; |
59 | 0 | } |
60 | | |
61 | | /** |
62 | | * Starts a new document with the given title. |
63 | | * This clears the contents of the output data. |
64 | | * Title should use UTF-8 encoding. |
65 | | */ |
66 | | bool BeginDocument(const char *title); |
67 | | |
68 | | /** |
69 | | * Adds the recognized text from the source image to the current document. |
70 | | * Invalid if BeginDocument not yet called. |
71 | | * |
72 | | * Note that this API is a bit weird but is designed to fit into the |
73 | | * current TessBaseAPI implementation where the api has lots of state |
74 | | * information that we might want to add in. |
75 | | */ |
76 | | bool AddImage(TessBaseAPI *api); |
77 | | |
78 | | /** |
79 | | * Finishes the document and finalizes the output data |
80 | | * Invalid if BeginDocument not yet called. |
81 | | */ |
82 | | bool EndDocument(); |
83 | | |
84 | 0 | const char *file_extension() const { |
85 | 0 | return file_extension_; |
86 | 0 | } |
87 | 0 | const char *title() const { |
88 | 0 | return title_.c_str(); |
89 | 0 | } |
90 | | |
91 | | // Is everything fine? Otherwise something went wrong. |
92 | 0 | bool happy() const { |
93 | 0 | return happy_; |
94 | 0 | } |
95 | | |
96 | | /** |
97 | | * Returns the index of the last image given to AddImage |
98 | | * (i.e. images are incremented whether the image succeeded or not) |
99 | | * |
100 | | * This is always defined. It means either the number of the |
101 | | * current image, the last image ended, or in the completed document |
102 | | * depending on when in the document lifecycle you are looking at it. |
103 | | * Will return -1 if a document was never started. |
104 | | */ |
105 | 0 | int imagenum() const { |
106 | 0 | return imagenum_; |
107 | 0 | } |
108 | | |
109 | | protected: |
110 | | /** |
111 | | * Called by concrete classes. |
112 | | * |
113 | | * outputbase is the name of the output file excluding |
114 | | * extension. For example, "/path/to/chocolate-chip-cookie-recipe" |
115 | | * |
116 | | * extension indicates the file extension to be used for output |
117 | | * files. For example "pdf" will produce a .pdf file, and "hocr" |
118 | | * will produce .hocr files. |
119 | | */ |
120 | | TessResultRenderer(const char *outputbase, const char *extension); |
121 | | |
122 | | // Hook for specialized handling in BeginDocument() |
123 | | virtual bool BeginDocumentHandler(); |
124 | | |
125 | | // This must be overridden to render the OCR'd results |
126 | | virtual bool AddImageHandler(TessBaseAPI *api) = 0; |
127 | | |
128 | | // Hook for specialized handling in EndDocument() |
129 | | virtual bool EndDocumentHandler(); |
130 | | |
131 | | // Renderers can call this to append '\0' terminated strings into |
132 | | // the output string returned by GetOutput. |
133 | | // This method will grow the output buffer if needed. |
134 | | void AppendString(const char *s); |
135 | | |
136 | | // Renderers can call this to append binary byte sequences into |
137 | | // the output string returned by GetOutput. Note that s is not necessarily |
138 | | // '\0' terminated (and can contain '\0' within it). |
139 | | // This method will grow the output buffer if needed. |
140 | | void AppendData(const char *s, int len); |
141 | | |
142 | | template <typename T> |
143 | | auto AppendData(T &&d) { |
144 | | AppendData(d.data(), d.size()); |
145 | | return d.size(); |
146 | | } |
147 | | |
148 | | private: |
149 | | TessResultRenderer *next_; // Can link multiple renderers together |
150 | | FILE *fout_; // output file pointer |
151 | | const char *file_extension_; // standard extension for generated output |
152 | | std::string title_; // title of document being rendered |
153 | | int imagenum_; // index of last image added |
154 | | bool happy_; // I get grumpy when the disk fills up, etc. |
155 | | }; |
156 | | |
157 | | /** |
158 | | * Renders tesseract output into a plain UTF-8 text string |
159 | | */ |
160 | | class TESS_API TessTextRenderer : public TessResultRenderer { |
161 | | public: |
162 | | explicit TessTextRenderer(const char *outputbase); |
163 | | |
164 | | protected: |
165 | | bool AddImageHandler(TessBaseAPI *api) override; |
166 | | }; |
167 | | |
168 | | /** |
169 | | * Renders tesseract output into an hocr text string |
170 | | */ |
171 | | class TESS_API TessHOcrRenderer : public TessResultRenderer { |
172 | | public: |
173 | | explicit TessHOcrRenderer(const char *outputbase, bool font_info); |
174 | | explicit TessHOcrRenderer(const char *outputbase); |
175 | | |
176 | | protected: |
177 | | bool BeginDocumentHandler() override; |
178 | | bool AddImageHandler(TessBaseAPI *api) override; |
179 | | bool EndDocumentHandler() override; |
180 | | |
181 | | private: |
182 | | bool font_info_; // whether to print font information |
183 | | }; |
184 | | |
185 | | /** |
186 | | * Renders tesseract output into an alto text string |
187 | | */ |
188 | | class TESS_API TessAltoRenderer : public TessResultRenderer { |
189 | | public: |
190 | | explicit TessAltoRenderer(const char *outputbase); |
191 | | |
192 | | protected: |
193 | | bool BeginDocumentHandler() override; |
194 | | bool AddImageHandler(TessBaseAPI *api) override; |
195 | | bool EndDocumentHandler() override; |
196 | | |
197 | | private: |
198 | | bool begin_document; |
199 | | }; |
200 | | |
201 | | /** |
202 | | * Renders Tesseract output into a PAGE XML text string |
203 | | */ |
204 | | class TESS_API TessPAGERenderer : public TessResultRenderer { |
205 | | public: |
206 | | explicit TessPAGERenderer(const char *outputbase); |
207 | | |
208 | | protected: |
209 | | bool BeginDocumentHandler() override; |
210 | | bool AddImageHandler(TessBaseAPI *api) override; |
211 | | bool EndDocumentHandler() override; |
212 | | |
213 | | private: |
214 | | bool begin_document; |
215 | | }; |
216 | | |
217 | | |
218 | | /** |
219 | | * Renders Tesseract output into a TSV string |
220 | | */ |
221 | | class TESS_API TessTsvRenderer : public TessResultRenderer { |
222 | | public: |
223 | | explicit TessTsvRenderer(const char *outputbase, bool font_info); |
224 | | explicit TessTsvRenderer(const char *outputbase); |
225 | | |
226 | | protected: |
227 | | bool BeginDocumentHandler() override; |
228 | | bool AddImageHandler(TessBaseAPI *api) override; |
229 | | bool EndDocumentHandler() override; |
230 | | |
231 | | private: |
232 | | bool font_info_; // whether to print font information |
233 | | }; |
234 | | |
235 | | /** |
236 | | * Renders tesseract output into searchable PDF |
237 | | */ |
238 | | class TESS_API TessPDFRenderer : public TessResultRenderer { |
239 | | public: |
240 | | // datadir is the location of the TESSDATA. We need it because |
241 | | // we load a custom PDF font from this location. |
242 | | TessPDFRenderer(const char *outputbase, const char *datadir, |
243 | | bool textonly = false); |
244 | | |
245 | | protected: |
246 | | bool BeginDocumentHandler() override; |
247 | | bool AddImageHandler(TessBaseAPI *api) override; |
248 | | bool EndDocumentHandler() override; |
249 | | |
250 | | private: |
251 | | // We don't want to have every image in memory at once, |
252 | | // so we store some metadata as we go along producing |
253 | | // PDFs one page at a time. At the end, that metadata is |
254 | | // used to make everything that isn't easily handled in a |
255 | | // streaming fashion. |
256 | | long int obj_; // counter for PDF objects |
257 | | std::vector<uint64_t> offsets_; // offset of every PDF object in bytes |
258 | | std::vector<long int> pages_; // object number for every /Page object |
259 | | std::string datadir_; // where to find the custom font |
260 | | bool textonly_; // skip images if set |
261 | | // Bookkeeping only. DIY = Do It Yourself. |
262 | | void AppendPDFObjectDIY(size_t objectsize); |
263 | | // Bookkeeping + emit data. |
264 | | void AppendPDFObject(const char *data); |
265 | | // Create the /Contents object for an entire page. |
266 | | char *GetPDFTextObjects(TessBaseAPI *api, double width, double height); |
267 | | // Turn an image into a PDF object. Only transcode if we have to. |
268 | | static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum, |
269 | | char **pdf_object, long int *pdf_object_size, |
270 | | int jpg_quality); |
271 | | }; |
272 | | |
273 | | /** |
274 | | * Renders tesseract output into a plain UTF-8 text string |
275 | | */ |
276 | | class TESS_API TessUnlvRenderer : public TessResultRenderer { |
277 | | public: |
278 | | explicit TessUnlvRenderer(const char *outputbase); |
279 | | |
280 | | protected: |
281 | | bool AddImageHandler(TessBaseAPI *api) override; |
282 | | }; |
283 | | |
284 | | /** |
285 | | * Renders tesseract output into a plain UTF-8 text string for LSTMBox |
286 | | */ |
287 | | class TESS_API TessLSTMBoxRenderer : public TessResultRenderer { |
288 | | public: |
289 | | explicit TessLSTMBoxRenderer(const char *outputbase); |
290 | | |
291 | | protected: |
292 | | bool AddImageHandler(TessBaseAPI *api) override; |
293 | | }; |
294 | | |
295 | | /** |
296 | | * Renders tesseract output into a plain UTF-8 text string |
297 | | */ |
298 | | class TESS_API TessBoxTextRenderer : public TessResultRenderer { |
299 | | public: |
300 | | explicit TessBoxTextRenderer(const char *outputbase); |
301 | | |
302 | | protected: |
303 | | bool AddImageHandler(TessBaseAPI *api) override; |
304 | | }; |
305 | | |
306 | | /** |
307 | | * Renders tesseract output into a plain UTF-8 text string in WordStr format |
308 | | */ |
309 | | class TESS_API TessWordStrBoxRenderer : public TessResultRenderer { |
310 | | public: |
311 | | explicit TessWordStrBoxRenderer(const char *outputbase); |
312 | | |
313 | | protected: |
314 | | bool AddImageHandler(TessBaseAPI *api) override; |
315 | | }; |
316 | | |
317 | | #ifndef DISABLED_LEGACY_ENGINE |
318 | | |
319 | | /** |
320 | | * Renders tesseract output into an osd text string |
321 | | */ |
322 | | class TESS_API TessOsdRenderer : public TessResultRenderer { |
323 | | public: |
324 | | explicit TessOsdRenderer(const char *outputbase); |
325 | | |
326 | | protected: |
327 | | bool AddImageHandler(TessBaseAPI *api) override; |
328 | | }; |
329 | | |
330 | | #endif // ndef DISABLED_LEGACY_ENGINE |
331 | | |
332 | | } // namespace tesseract. |
333 | | |
334 | | #endif // TESSERACT_API_RENDERER_H_ |