/src/tesseract/include/tesseract/renderer.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // SPDX-License-Identifier: Apache-2.0  | 
2  |  | // File:        renderer.h  | 
3  |  | // Description: Rendering interface to inject into TessBaseAPI  | 
4  |  | //  | 
5  |  | // (C) Copyright 2011, Google Inc.  | 
6  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
7  |  | // you may not use this file except in compliance with the License.  | 
8  |  | // You may obtain a copy of the License at  | 
9  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
10  |  | // Unless required by applicable law or agreed to in writing, software  | 
11  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
12  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
13  |  | // See the License for the specific language governing permissions and  | 
14  |  | // limitations under the License.  | 
15  |  |  | 
16  |  | #ifndef TESSERACT_API_RENDERER_H_  | 
17  |  | #define TESSERACT_API_RENDERER_H_  | 
18  |  |  | 
19  |  | #include "export.h"  | 
20  |  |  | 
21  |  | // To avoid collision with other typenames include the ABSOLUTE MINIMUM  | 
22  |  | // complexity of includes here. Use forward declarations wherever possible  | 
23  |  | // and hide includes of complex types in baseapi.cpp.  | 
24  |  | #include <cstdint>  | 
25  |  | #include <string> // for std::string  | 
26  |  | #include <vector> // for std::vector  | 
27  |  |  | 
28  |  | struct Pix;  | 
29  |  |  | 
30  |  | namespace tesseract { | 
31  |  |  | 
32  |  | class TessBaseAPI;  | 
33  |  |  | 
34  |  | /**  | 
35  |  |  * Interface for rendering tesseract results into a document, such as text,  | 
36  |  |  * HOCR or pdf. This class is abstract. Specific classes handle individual  | 
37  |  |  * formats. This interface is then used to inject the renderer class into  | 
38  |  |  * tesseract when processing images.  | 
39  |  |  *  | 
40  |  |  * For simplicity implementing this with tesseract version 3.01,  | 
41  |  |  * the renderer contains document state that is cleared from document  | 
42  |  |  * to document just as the TessBaseAPI is. This way the base API can just  | 
43  |  |  * delegate its rendering functionality to injected renderers, and the  | 
44  |  |  * renderers can manage the associated state needed for the specific formats  | 
45  |  |  * in addition to the heuristics for producing it.  | 
46  |  |  */  | 
47  |  | class TESS_API TessResultRenderer { | 
48  |  | public:  | 
49  |  |   virtual ~TessResultRenderer();  | 
50  |  |  | 
51  |  |   // Takes ownership of pointer so must be new'd instance.  | 
52  |  |   // Renderers aren't ordered, but appends the sequences of next parameter  | 
53  |  |   // and existing next(). The renderers should be unique across both lists.  | 
54  |  |   void insert(TessResultRenderer *next);  | 
55  |  |  | 
56  |  |   // Returns the next renderer or nullptr.  | 
57  | 0  |   TessResultRenderer *next() { | 
58  | 0  |     return next_;  | 
59  | 0  |   }  | 
60  |  |  | 
61  |  |   /**  | 
62  |  |    * Starts a new document with the given title.  | 
63  |  |    * This clears the contents of the output data.  | 
64  |  |    * Title should use UTF-8 encoding.  | 
65  |  |    */  | 
66  |  |   bool BeginDocument(const char *title);  | 
67  |  |  | 
68  |  |   /**  | 
69  |  |    * Adds the recognized text from the source image to the current document.  | 
70  |  |    * Invalid if BeginDocument not yet called.  | 
71  |  |    *  | 
72  |  |    * Note that this API is a bit weird but is designed to fit into the  | 
73  |  |    * current TessBaseAPI implementation where the api has lots of state  | 
74  |  |    * information that we might want to add in.  | 
75  |  |    */  | 
76  |  |   bool AddImage(TessBaseAPI *api);  | 
77  |  |  | 
78  |  |   /**  | 
79  |  |    * Finishes the document and finalizes the output data  | 
80  |  |    * Invalid if BeginDocument not yet called.  | 
81  |  |    */  | 
82  |  |   bool EndDocument();  | 
83  |  |  | 
84  | 0  |   const char *file_extension() const { | 
85  | 0  |     return file_extension_;  | 
86  | 0  |   }  | 
87  | 0  |   const char *title() const { | 
88  | 0  |     return title_.c_str();  | 
89  | 0  |   }  | 
90  |  |  | 
91  |  |   // Is everything fine? Otherwise something went wrong.  | 
92  | 0  |   bool happy() const { | 
93  | 0  |     return happy_;  | 
94  | 0  |   }  | 
95  |  |  | 
96  |  |   /**  | 
97  |  |    * Returns the index of the last image given to AddImage  | 
98  |  |    * (i.e. images are incremented whether the image succeeded or not)  | 
99  |  |    *  | 
100  |  |    * This is always defined. It means either the number of the  | 
101  |  |    * current image, the last image ended, or in the completed document  | 
102  |  |    * depending on when in the document lifecycle you are looking at it.  | 
103  |  |    * Will return -1 if a document was never started.  | 
104  |  |    */  | 
105  | 0  |   int imagenum() const { | 
106  | 0  |     return imagenum_;  | 
107  | 0  |   }  | 
108  |  |  | 
109  |  | protected:  | 
110  |  |   /**  | 
111  |  |    * Called by concrete classes.  | 
112  |  |    *  | 
113  |  |    * outputbase is the name of the output file excluding  | 
114  |  |    * extension. For example, "/path/to/chocolate-chip-cookie-recipe"  | 
115  |  |    *  | 
116  |  |    * extension indicates the file extension to be used for output  | 
117  |  |    * files. For example "pdf" will produce a .pdf file, and "hocr"  | 
118  |  |    * will produce .hocr files.  | 
119  |  |    */  | 
120  |  |   TessResultRenderer(const char *outputbase, const char *extension);  | 
121  |  |  | 
122  |  |   // Hook for specialized handling in BeginDocument()  | 
123  |  |   virtual bool BeginDocumentHandler();  | 
124  |  |  | 
125  |  |   // This must be overridden to render the OCR'd results  | 
126  |  |   virtual bool AddImageHandler(TessBaseAPI *api) = 0;  | 
127  |  |  | 
128  |  |   // Hook for specialized handling in EndDocument()  | 
129  |  |   virtual bool EndDocumentHandler();  | 
130  |  |  | 
131  |  |   // Renderers can call this to append '\0' terminated strings into  | 
132  |  |   // the output string returned by GetOutput.  | 
133  |  |   // This method will grow the output buffer if needed.  | 
134  |  |   void AppendString(const char *s);  | 
135  |  |  | 
136  |  |   // Renderers can call this to append binary byte sequences into  | 
137  |  |   // the output string returned by GetOutput. Note that s is not necessarily  | 
138  |  |   // '\0' terminated (and can contain '\0' within it).  | 
139  |  |   // This method will grow the output buffer if needed.  | 
140  |  |   void AppendData(const char *s, int len);  | 
141  |  |  | 
142  |  |   template <typename T>  | 
143  |  |   auto AppendData(T &&d) { | 
144  |  |     AppendData(d.data(), d.size());  | 
145  |  |     return d.size();  | 
146  |  |   }  | 
147  |  |  | 
148  |  | private:  | 
149  |  |   TessResultRenderer *next_;   // Can link multiple renderers together  | 
150  |  |   FILE *fout_;                 // output file pointer  | 
151  |  |   const char *file_extension_; // standard extension for generated output  | 
152  |  |   std::string title_;          // title of document being rendered  | 
153  |  |   int imagenum_;               // index of last image added  | 
154  |  |   bool happy_;                 // I get grumpy when the disk fills up, etc.  | 
155  |  | };  | 
156  |  |  | 
157  |  | /**  | 
158  |  |  * Renders tesseract output into a plain UTF-8 text string  | 
159  |  |  */  | 
160  |  | class TESS_API TessTextRenderer : public TessResultRenderer { | 
161  |  | public:  | 
162  |  |   explicit TessTextRenderer(const char *outputbase);  | 
163  |  |  | 
164  |  | protected:  | 
165  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
166  |  | };  | 
167  |  |  | 
168  |  | /**  | 
169  |  |  * Renders tesseract output into an hocr text string  | 
170  |  |  */  | 
171  |  | class TESS_API TessHOcrRenderer : public TessResultRenderer { | 
172  |  | public:  | 
173  |  |   explicit TessHOcrRenderer(const char *outputbase, bool font_info);  | 
174  |  |   explicit TessHOcrRenderer(const char *outputbase);  | 
175  |  |  | 
176  |  | protected:  | 
177  |  |   bool BeginDocumentHandler() override;  | 
178  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
179  |  |   bool EndDocumentHandler() override;  | 
180  |  |  | 
181  |  | private:  | 
182  |  |   bool font_info_; // whether to print font information  | 
183  |  | };  | 
184  |  |  | 
185  |  | /**  | 
186  |  |  * Renders tesseract output into an alto text string  | 
187  |  |  */  | 
188  |  | class TESS_API TessAltoRenderer : public TessResultRenderer { | 
189  |  | public:  | 
190  |  |   explicit TessAltoRenderer(const char *outputbase);  | 
191  |  |  | 
192  |  | protected:  | 
193  |  |   bool BeginDocumentHandler() override;  | 
194  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
195  |  |   bool EndDocumentHandler() override;  | 
196  |  |  | 
197  |  | private:  | 
198  |  |   bool begin_document;  | 
199  |  | };  | 
200  |  |  | 
201  |  | /**  | 
202  |  |  * Renders Tesseract output into a PAGE XML text string  | 
203  |  |  */  | 
204  |  | class TESS_API TessPAGERenderer : public TessResultRenderer { | 
205  |  | public:  | 
206  |  |   explicit TessPAGERenderer(const char *outputbase);  | 
207  |  |  | 
208  |  | protected:  | 
209  |  |   bool BeginDocumentHandler() override;  | 
210  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
211  |  |   bool EndDocumentHandler() override;  | 
212  |  |  | 
213  |  | private:  | 
214  |  |   bool begin_document;  | 
215  |  | };  | 
216  |  |  | 
217  |  |  | 
218  |  | /**  | 
219  |  |  * Renders Tesseract output into a TSV string  | 
220  |  |  */  | 
221  |  | class TESS_API TessTsvRenderer : public TessResultRenderer { | 
222  |  | public:  | 
223  |  |   explicit TessTsvRenderer(const char *outputbase, bool font_info);  | 
224  |  |   explicit TessTsvRenderer(const char *outputbase);  | 
225  |  |  | 
226  |  | protected:  | 
227  |  |   bool BeginDocumentHandler() override;  | 
228  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
229  |  |   bool EndDocumentHandler() override;  | 
230  |  |  | 
231  |  | private:  | 
232  |  |   bool font_info_; // whether to print font information  | 
233  |  | };  | 
234  |  |  | 
235  |  | /**  | 
236  |  |  * Renders tesseract output into searchable PDF  | 
237  |  |  */  | 
238  |  | class TESS_API TessPDFRenderer : public TessResultRenderer { | 
239  |  | public:  | 
240  |  |   // datadir is the location of the TESSDATA. We need it because  | 
241  |  |   // we load a custom PDF font from this location.  | 
242  |  |   TessPDFRenderer(const char *outputbase, const char *datadir,  | 
243  |  |                   bool textonly = false);  | 
244  |  |  | 
245  |  | protected:  | 
246  |  |   bool BeginDocumentHandler() override;  | 
247  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
248  |  |   bool EndDocumentHandler() override;  | 
249  |  |  | 
250  |  | private:  | 
251  |  |   // We don't want to have every image in memory at once,  | 
252  |  |   // so we store some metadata as we go along producing  | 
253  |  |   // PDFs one page at a time. At the end, that metadata is  | 
254  |  |   // used to make everything that isn't easily handled in a  | 
255  |  |   // streaming fashion.  | 
256  |  |   long int obj_;                  // counter for PDF objects  | 
257  |  |   std::vector<uint64_t> offsets_; // offset of every PDF object in bytes  | 
258  |  |   std::vector<long int> pages_;   // object number for every /Page object  | 
259  |  |   std::string datadir_;           // where to find the custom font  | 
260  |  |   bool textonly_;                 // skip images if set  | 
261  |  |   // Bookkeeping only. DIY = Do It Yourself.  | 
262  |  |   void AppendPDFObjectDIY(size_t objectsize);  | 
263  |  |   // Bookkeeping + emit data.  | 
264  |  |   void AppendPDFObject(const char *data);  | 
265  |  |   // Create the /Contents object for an entire page.  | 
266  |  |   char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);  | 
267  |  |   // Turn an image into a PDF object. Only transcode if we have to.  | 
268  |  |   static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,  | 
269  |  |                             char **pdf_object, long int *pdf_object_size,  | 
270  |  |                             int jpg_quality);  | 
271  |  | };  | 
272  |  |  | 
273  |  | /**  | 
274  |  |  * Renders tesseract output into a plain UTF-8 text string  | 
275  |  |  */  | 
276  |  | class TESS_API TessUnlvRenderer : public TessResultRenderer { | 
277  |  | public:  | 
278  |  |   explicit TessUnlvRenderer(const char *outputbase);  | 
279  |  |  | 
280  |  | protected:  | 
281  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
282  |  | };  | 
283  |  |  | 
284  |  | /**  | 
285  |  |  * Renders tesseract output into a plain UTF-8 text string for LSTMBox  | 
286  |  |  */  | 
287  |  | class TESS_API TessLSTMBoxRenderer : public TessResultRenderer { | 
288  |  | public:  | 
289  |  |   explicit TessLSTMBoxRenderer(const char *outputbase);  | 
290  |  |  | 
291  |  | protected:  | 
292  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
293  |  | };  | 
294  |  |  | 
295  |  | /**  | 
296  |  |  * Renders tesseract output into a plain UTF-8 text string  | 
297  |  |  */  | 
298  |  | class TESS_API TessBoxTextRenderer : public TessResultRenderer { | 
299  |  | public:  | 
300  |  |   explicit TessBoxTextRenderer(const char *outputbase);  | 
301  |  |  | 
302  |  | protected:  | 
303  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
304  |  | };  | 
305  |  |  | 
306  |  | /**  | 
307  |  |  * Renders tesseract output into a plain UTF-8 text string in WordStr format  | 
308  |  |  */  | 
309  |  | class TESS_API TessWordStrBoxRenderer : public TessResultRenderer { | 
310  |  | public:  | 
311  |  |   explicit TessWordStrBoxRenderer(const char *outputbase);  | 
312  |  |  | 
313  |  | protected:  | 
314  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
315  |  | };  | 
316  |  |  | 
317  |  | #ifndef DISABLED_LEGACY_ENGINE  | 
318  |  |  | 
319  |  | /**  | 
320  |  |  * Renders tesseract output into an osd text string  | 
321  |  |  */  | 
322  |  | class TESS_API TessOsdRenderer : public TessResultRenderer { | 
323  |  | public:  | 
324  |  |   explicit TessOsdRenderer(const char *outputbase);  | 
325  |  |  | 
326  |  | protected:  | 
327  |  |   bool AddImageHandler(TessBaseAPI *api) override;  | 
328  |  | };  | 
329  |  |  | 
330  |  | #endif // ndef DISABLED_LEGACY_ENGINE  | 
331  |  |  | 
332  |  | } // namespace tesseract.  | 
333  |  |  | 
334  |  | #endif // TESSERACT_API_RENDERER_H_  |