/src/tesseract/src/api/renderer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: renderer.cpp |
3 | | // Description: Rendering interface to inject into TessBaseAPI |
4 | | // |
5 | | // (C) Copyright 2011, Google Inc. |
6 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
7 | | // you may not use this file except in compliance with the License. |
8 | | // You may obtain a copy of the License at |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // Unless required by applicable law or agreed to in writing, software |
11 | | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | // See the License for the specific language governing permissions and |
14 | | // limitations under the License. |
15 | | // |
16 | | /////////////////////////////////////////////////////////////////////// |
17 | | |
18 | | #ifdef HAVE_CONFIG_H |
19 | | # include "config_auto.h" |
20 | | #endif |
21 | | #include <tesseract/baseapi.h> |
22 | | #include <tesseract/renderer.h> |
23 | | #include <cstring> |
24 | | #include <memory> // std::unique_ptr |
25 | | #include <string> // std::string |
26 | | #include "serialis.h" // Serialize |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | /********************************************************************** |
31 | | * Base Renderer interface implementation |
32 | | **********************************************************************/ |
33 | | TessResultRenderer::TessResultRenderer(const char *outputbase, const char *extension) |
34 | 0 | : next_(nullptr) |
35 | | , fout_(stdout) |
36 | 0 | , file_extension_(extension) |
37 | 0 | , title_("") |
38 | 0 | , imagenum_(-1) |
39 | 0 | , happy_(true) { |
40 | 0 | if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) { |
41 | 0 | std::string outfile = std::string(outputbase) + "." + extension; |
42 | 0 | fout_ = fopen(outfile.c_str(), "wb"); |
43 | 0 | if (fout_ == nullptr) { |
44 | 0 | happy_ = false; |
45 | 0 | } |
46 | 0 | } |
47 | 0 | } |
48 | | |
49 | 0 | TessResultRenderer::~TessResultRenderer() { |
50 | 0 | if (fout_ != nullptr) { |
51 | 0 | if (fout_ != stdout) { |
52 | 0 | fclose(fout_); |
53 | 0 | } else { |
54 | 0 | clearerr(fout_); |
55 | 0 | } |
56 | 0 | } |
57 | 0 | delete next_; |
58 | 0 | } |
59 | | |
60 | 0 | void TessResultRenderer::insert(TessResultRenderer *next) { |
61 | 0 | if (next == nullptr) { |
62 | 0 | return; |
63 | 0 | } |
64 | | |
65 | 0 | TessResultRenderer *remainder = next_; |
66 | 0 | next_ = next; |
67 | 0 | if (remainder) { |
68 | 0 | while (next->next_ != nullptr) { |
69 | 0 | next = next->next_; |
70 | 0 | } |
71 | 0 | next->next_ = remainder; |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | 0 | bool TessResultRenderer::BeginDocument(const char *title) { |
76 | 0 | if (!happy_) { |
77 | 0 | return false; |
78 | 0 | } |
79 | 0 | title_ = title; |
80 | 0 | imagenum_ = -1; |
81 | 0 | bool ok = BeginDocumentHandler(); |
82 | 0 | if (next_) { |
83 | 0 | ok = next_->BeginDocument(title) && ok; |
84 | 0 | } |
85 | 0 | return ok; |
86 | 0 | } |
87 | | |
88 | 0 | bool TessResultRenderer::AddImage(TessBaseAPI *api) { |
89 | 0 | if (!happy_) { |
90 | 0 | return false; |
91 | 0 | } |
92 | 0 | ++imagenum_; |
93 | 0 | bool ok = AddImageHandler(api); |
94 | 0 | if (next_) { |
95 | 0 | ok = next_->AddImage(api) && ok; |
96 | 0 | } |
97 | 0 | return ok; |
98 | 0 | } |
99 | | |
100 | 0 | bool TessResultRenderer::EndDocument() { |
101 | 0 | if (!happy_) { |
102 | 0 | return false; |
103 | 0 | } |
104 | 0 | bool ok = EndDocumentHandler(); |
105 | 0 | if (next_) { |
106 | 0 | ok = next_->EndDocument() && ok; |
107 | 0 | } |
108 | 0 | return ok; |
109 | 0 | } |
110 | | |
111 | 0 | void TessResultRenderer::AppendString(const char *s) { |
112 | 0 | if (s == nullptr) { |
113 | 0 | return; |
114 | 0 | } |
115 | 0 | AppendData(s, strlen(s)); |
116 | 0 | } |
117 | | |
118 | 0 | void TessResultRenderer::AppendData(const char *s, int len) { |
119 | 0 | if (!tesseract::Serialize(fout_, s, len)) { |
120 | 0 | happy_ = false; |
121 | 0 | } |
122 | 0 | fflush(fout_); |
123 | 0 | } |
124 | | |
125 | 0 | bool TessResultRenderer::BeginDocumentHandler() { |
126 | 0 | return happy_; |
127 | 0 | } |
128 | | |
129 | 0 | bool TessResultRenderer::EndDocumentHandler() { |
130 | 0 | return happy_; |
131 | 0 | } |
132 | | |
133 | | /********************************************************************** |
134 | | * UTF8 Text Renderer interface implementation |
135 | | **********************************************************************/ |
136 | | TessTextRenderer::TessTextRenderer(const char *outputbase) |
137 | 0 | : TessResultRenderer(outputbase, "txt") {} |
138 | | |
139 | 0 | bool TessTextRenderer::AddImageHandler(TessBaseAPI *api) { |
140 | 0 | const std::unique_ptr<const char[]> utf8(api->GetUTF8Text()); |
141 | 0 | if (utf8 == nullptr) { |
142 | 0 | return false; |
143 | 0 | } |
144 | | |
145 | 0 | const char *pageSeparator = api->GetStringVariable("page_separator"); |
146 | 0 | if (pageSeparator != nullptr && *pageSeparator != '\0' && imagenum() > 0) { |
147 | 0 | AppendString(pageSeparator); |
148 | 0 | } |
149 | |
|
150 | 0 | AppendString(utf8.get()); |
151 | |
|
152 | 0 | return true; |
153 | 0 | } |
154 | | |
155 | | /********************************************************************** |
156 | | * TSV Text Renderer interface implementation |
157 | | **********************************************************************/ |
158 | 0 | TessTsvRenderer::TessTsvRenderer(const char *outputbase) : TessResultRenderer(outputbase, "tsv") { |
159 | 0 | font_info_ = false; |
160 | 0 | } |
161 | | |
162 | | TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) |
163 | 0 | : TessResultRenderer(outputbase, "tsv") { |
164 | 0 | font_info_ = font_info; |
165 | 0 | } |
166 | | |
167 | 0 | bool TessTsvRenderer::BeginDocumentHandler() { |
168 | | // Output TSV column headings |
169 | 0 | AppendString( |
170 | 0 | "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" |
171 | 0 | "num\tleft\ttop\twidth\theight\tconf\ttext\n"); |
172 | 0 | return true; |
173 | 0 | } |
174 | | |
175 | 0 | bool TessTsvRenderer::EndDocumentHandler() { |
176 | 0 | return true; |
177 | 0 | } |
178 | | |
179 | 0 | bool TessTsvRenderer::AddImageHandler(TessBaseAPI *api) { |
180 | 0 | const std::unique_ptr<const char[]> tsv(api->GetTSVText(imagenum())); |
181 | 0 | if (tsv == nullptr) { |
182 | 0 | return false; |
183 | 0 | } |
184 | | |
185 | 0 | AppendString(tsv.get()); |
186 | |
|
187 | 0 | return true; |
188 | 0 | } |
189 | | |
190 | | /********************************************************************** |
191 | | * UNLV Text Renderer interface implementation |
192 | | **********************************************************************/ |
193 | | TessUnlvRenderer::TessUnlvRenderer(const char *outputbase) |
194 | 0 | : TessResultRenderer(outputbase, "unlv") {} |
195 | | |
196 | 0 | bool TessUnlvRenderer::AddImageHandler(TessBaseAPI *api) { |
197 | 0 | const std::unique_ptr<const char[]> unlv(api->GetUNLVText()); |
198 | 0 | if (unlv == nullptr) { |
199 | 0 | return false; |
200 | 0 | } |
201 | | |
202 | 0 | AppendString(unlv.get()); |
203 | |
|
204 | 0 | return true; |
205 | 0 | } |
206 | | |
207 | | /********************************************************************** |
208 | | * BoxText Renderer interface implementation |
209 | | **********************************************************************/ |
210 | | TessBoxTextRenderer::TessBoxTextRenderer(const char *outputbase) |
211 | 0 | : TessResultRenderer(outputbase, "box") {} |
212 | | |
213 | 0 | bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI *api) { |
214 | 0 | const std::unique_ptr<const char[]> text(api->GetBoxText(imagenum())); |
215 | 0 | if (text == nullptr) { |
216 | 0 | return false; |
217 | 0 | } |
218 | | |
219 | 0 | AppendString(text.get()); |
220 | |
|
221 | 0 | return true; |
222 | 0 | } |
223 | | |
224 | | #ifndef DISABLED_LEGACY_ENGINE |
225 | | |
226 | | /********************************************************************** |
227 | | * Osd Text Renderer interface implementation |
228 | | **********************************************************************/ |
229 | 0 | TessOsdRenderer::TessOsdRenderer(const char *outputbase) : TessResultRenderer(outputbase, "osd") {} |
230 | | |
231 | 0 | bool TessOsdRenderer::AddImageHandler(TessBaseAPI *api) { |
232 | 0 | const std::unique_ptr<const char[]> osd(api->GetOsdText(imagenum())); |
233 | 0 | if (osd == nullptr) { |
234 | 0 | return false; |
235 | 0 | } |
236 | | |
237 | 0 | AppendString(osd.get()); |
238 | |
|
239 | 0 | return true; |
240 | 0 | } |
241 | | |
242 | | #endif // ndef DISABLED_LEGACY_ENGINE |
243 | | |
244 | | } // namespace tesseract |