Coverage Report

Created: 2026-02-14 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/include/tesseract/baseapi.h
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// File:        baseapi.h
3
// Description: Simple API for calling tesseract.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2006, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
17
#ifndef TESSERACT_API_BASEAPI_H_
18
#define TESSERACT_API_BASEAPI_H_
19
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h" // DISABLED_LEGACY_ENGINE
22
#endif
23
24
#include "export.h"
25
#include "pageiterator.h"
26
#include "publictypes.h"
27
#include "resultiterator.h"
28
#include "unichar.h"
29
30
#include <tesseract/version.h>
31
32
#include <cstdio>
33
#include <vector> // for std::vector
34
35
struct Pix;
36
struct Pixa;
37
struct Boxa;
38
39
namespace tesseract {
40
41
class PAGE_RES;
42
class ParagraphModel;
43
class BLOCK_LIST;
44
class ETEXT_DESC;
45
struct OSResults;
46
class UNICHARSET;
47
48
class Dawg;
49
class Dict;
50
class EquationDetect;
51
class PageIterator;
52
class ImageThresholder;
53
class LTRResultIterator;
54
class ResultIterator;
55
class MutableIterator;
56
class TessResultRenderer;
57
class Tesseract;
58
59
// Function to read a std::vector<char> from a whole file.
60
// Returns false on failure.
61
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
62
63
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
64
                               bool) const;
65
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
66
                                                  int, const char *, int);
67
68
/**
69
 * Base class for all tesseract APIs.
70
 * Specific classes can add ability to work on different inputs or produce
71
 * different outputs.
72
 * This class is mostly an interface layer on top of the Tesseract instance
73
 * class to hide the data types so that users of this class don't have to
74
 * include any other Tesseract headers.
75
 */
76
class TESS_API TessBaseAPI {
77
public:
78
  TessBaseAPI();
79
  virtual ~TessBaseAPI();
80
  // Copy constructor and assignment operator are currently unsupported.
81
  TessBaseAPI(TessBaseAPI const &) = delete;
82
  TessBaseAPI &operator=(TessBaseAPI const &) = delete;
83
84
  /**
85
   * Returns the version identifier as a static string. Do not delete.
86
   */
87
  static const char *Version();
88
89
  /**
90
   * Set the name of the input file. Needed for training and
91
   * reading a UNLV zone file, and for searchable PDF output.
92
   */
93
  void SetInputName(const char *name);
94
  /**
95
   * These functions are required for searchable PDF output.
96
   * We need our hands on the input file so that we can include
97
   * it in the PDF without transcoding. If that is not possible,
98
   * we need the original image. Finally, resolution metadata
99
   * is stored in the PDF so we need that as well.
100
   */
101
  const char *GetInputName();
102
  // Takes ownership of the input pix.
103
  void SetInputImage(Pix *pix);
104
  Pix *GetInputImage();
105
  int GetSourceYResolution();
106
  const char *GetDatapath();
107
108
  /** Set the name of the bonus output files. Needed only for debugging. */
109
  void SetOutputName(const char *name);
110
111
  /**
112
   * Set the value of an internal "parameter."
113
   * Supply the name of the parameter and the value as a string, just as
114
   * you would in a config file.
115
   * Returns false if the name lookup failed.
116
   * Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
117
   * Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
118
   * SetVariable may be used before Init, but settings will revert to
119
   * defaults on End().
120
   *
121
   * Note: Must be called after Init(). Only works for non-init variables
122
   * (init variables should be passed to Init()).
123
   */
124
  bool SetVariable(const char *name, const char *value);
125
  bool SetDebugVariable(const char *name, const char *value);
126
127
  /**
128
   * Returns true if the parameter was found among Tesseract parameters.
129
   * Fills in value with the value of the parameter.
130
   */
131
  bool GetIntVariable(const char *name, int *value) const;
132
  bool GetBoolVariable(const char *name, bool *value) const;
133
  bool GetDoubleVariable(const char *name, double *value) const;
134
135
  /**
136
   * Returns the pointer to the string that represents the value of the
137
   * parameter if it was found among Tesseract parameters.
138
   */
139
  const char *GetStringVariable(const char *name) const;
140
141
#ifndef DISABLED_LEGACY_ENGINE
142
143
  /**
144
   * Print Tesseract fonts table to the given file.
145
   */
146
  void PrintFontsTable(FILE *fp) const;
147
148
#endif
149
150
  /**
151
   * Print Tesseract parameters to the given file.
152
   */
153
  void PrintVariables(FILE *fp) const;
154
155
  /**
156
   * Get value of named variable as a string, if it exists.
157
   */
158
  bool GetVariableAsString(const char *name, std::string *val) const;
159
160
  /**
161
   * Instances are now mostly thread-safe and totally independent,
162
   * but some global parameters remain. Basically it is safe to use multiple
163
   * TessBaseAPIs in different threads in parallel, UNLESS:
164
   * you use SetVariable on some of the Params in classify and textord.
165
   * If you do, then the effect will be to change it for all your instances.
166
   *
167
   * Start tesseract. Returns zero on success and -1 on failure.
168
   * NOTE that the only members that may be called before Init are those
169
   * listed above here in the class definition.
170
   *
171
   * The datapath must be the name of the tessdata directory.
172
   * The language is (usually) an ISO 639-3 string or nullptr will default to
173
   * eng. It is entirely safe (and eventually will be efficient too) to call
174
   * Init multiple times on the same instance to change language, or just
175
   * to reset the classifier.
176
   * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
177
   * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
178
   * English. Languages may specify internally that they want to be loaded
179
   * with one or more other languages, so the ~ sign is available to override
180
   * that. Eg if hin were set to load eng by default, then hin+~eng would force
181
   * loading only hin. The number of loaded languages is limited only by
182
   * memory, with the caveat that loading additional languages will impact
183
   * both speed and accuracy, as there is more work to do to decide on the
184
   * applicable language, and there is more chance of hallucinating incorrect
185
   * words.
186
   * WARNING: On changing languages, all Tesseract parameters are reset
187
   * back to their default values. (Which may vary between languages.)
188
   * If you have a rare need to set a Variable that controls
189
   * initialization for a second call to Init you should explicitly
190
   * call End() and then use SetVariable before Init. This is only a very
191
   * rare use case, since there are very few uses that require any parameters
192
   * to be set before Init.
193
   *
194
   * If set_only_non_debug_params is true, only params that do not contain
195
   * "debug" in the name will be set.
196
   */
197
  int Init(const char *datapath, const char *language, OcrEngineMode mode,
198
           char **configs, int configs_size,
199
           const std::vector<std::string> *vars_vec,
200
           const std::vector<std::string> *vars_values,
201
           bool set_only_non_debug_params);
202
0
  int Init(const char *datapath, const char *language, OcrEngineMode oem) {
203
0
    return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
204
0
  }
205
4
  int Init(const char *datapath, const char *language) {
206
4
    return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
207
4
                false);
208
4
  }
209
  // In-memory version reads the traineddata file directly from the given
210
  // data[data_size] array, and/or reads data via a FileReader.
211
  int Init(const char *data, int data_size, const char *language,
212
           OcrEngineMode mode, char **configs, int configs_size,
213
           const std::vector<std::string> *vars_vec,
214
           const std::vector<std::string> *vars_values,
215
           bool set_only_non_debug_params, FileReader reader);
216
217
  /**
218
   * Returns the languages string used in the last valid initialization.
219
   * If the last initialization specified "deu+hin" then that will be
220
   * returned. If hin loaded eng automatically as well, then that will
221
   * not be included in this list. To find the languages actually
222
   * loaded use GetLoadedLanguagesAsVector.
223
   * The returned string should NOT be deleted.
224
   */
225
  const char *GetInitLanguagesAsString() const;
226
227
  /**
228
   * Returns the loaded languages in the vector of std::string.
229
   * Includes all languages loaded by the last Init, including those loaded
230
   * as dependencies of other loaded languages.
231
   */
232
  void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
233
234
  /**
235
   * Returns the available languages in the sorted vector of std::string.
236
   */
237
  void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
238
239
  /**
240
   * Init only for page layout analysis. Use only for calls to SetImage and
241
   * AnalysePage. Calls that attempt recognition will generate an error.
242
   */
243
  void InitForAnalysePage();
244
245
  /**
246
   * Read a "config" file containing a set of param, value pairs.
247
   * Searches the standard places: tessdata/configs, tessdata/tessconfigs
248
   * and also accepts a relative or absolute path name.
249
   * Note: only non-init params will be set (init params are set by Init()).
250
   */
251
  void ReadConfigFile(const char *filename);
252
  /** Same as above, but only set debug params from the given config file. */
253
  void ReadDebugConfigFile(const char *filename);
254
255
  /**
256
   * Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
257
   * The mode is stored as an IntParam so it can also be modified by
258
   * ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
259
   */
260
  void SetPageSegMode(PageSegMode mode);
261
262
  /** Return the current page segmentation mode. */
263
  PageSegMode GetPageSegMode() const;
264
265
  /**
266
   * Recognize a rectangle from an image and return the result as a string.
267
   * May be called many times for a single Init.
268
   * Currently has no error checking.
269
   * Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
270
   * Palette color images will not work properly and must be converted to
271
   * 24 bit.
272
   * Binary images of 1 bit per pixel may also be given but they must be
273
   * byte packed with the MSB of the first byte being the first pixel, and a
274
   * 1 represents WHITE. For binary images set bytes_per_pixel=0.
275
   * The recognized text is returned as a char* which is coded
276
   * as UTF8 and must be freed with the delete [] operator.
277
   *
278
   * Note that TesseractRect is the simplified convenience interface.
279
   * For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
280
   * and one or more of the Get*Text functions below.
281
   */
282
  char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
283
                      int bytes_per_line, int left, int top, int width,
284
                      int height);
285
286
  /**
287
   * Call between pages or documents etc to free up memory and forget
288
   * adaptive data.
289
   */
290
  void ClearAdaptiveClassifier();
291
292
  /**
293
   * @defgroup AdvancedAPI Advanced API
294
   * The following methods break TesseractRect into pieces, so you can
295
   * get hold of the thresholded image, get the text in different formats,
296
   * get bounding boxes, confidences etc.
297
   */
298
  /* @{ */
299
300
  /**
301
   * Provide an image for Tesseract to recognize. Format is as
302
   * TesseractRect above. Copies the image buffer and converts to Pix.
303
   * SetImage clears all recognition results, and sets the rectangle to the
304
   * full image, so it may be followed immediately by a GetUTF8Text, and it
305
   * will automatically perform recognition.
306
   */
307
  void SetImage(const unsigned char *imagedata, int width, int height,
308
                int bytes_per_pixel, int bytes_per_line);
309
310
  /**
311
   * Provide an image for Tesseract to recognize. As with SetImage above,
312
   * Tesseract takes its own copy of the image, so it need not persist until
313
   * after Recognize.
314
   * Pix vs raw, which to use?
315
   * Use Pix where possible. Tesseract uses Pix as its internal representation
316
   * and it is therefore more efficient to provide a Pix directly.
317
   */
318
  void SetImage(Pix *pix);
319
320
  /**
321
   * Set the resolution of the source image in pixels per inch so font size
322
   * information can be calculated in results.  Call this after SetImage().
323
   */
324
  void SetSourceResolution(int ppi);
325
326
  /**
327
   * Restrict recognition to a sub-rectangle of the image. Call after SetImage.
328
   * Each SetRectangle clears the recogntion results so multiple rectangles
329
   * can be recognized with the same image.
330
   */
331
  void SetRectangle(int left, int top, int width, int height);
332
333
  /**
334
   * Get a copy of the internal thresholded image from Tesseract.
335
   * Caller takes ownership of the Pix and must pixDestroy it.
336
   * May be called any time after SetImage, or after TesseractRect.
337
   */
338
  Pix *GetThresholdedImage();
339
340
  /**
341
   * Return average gradient of lines on page.
342
   */
343
  float GetGradient();
344
345
  /**
346
   * Get the result of page layout analysis as a leptonica-style
347
   * Boxa, Pixa pair, in reading order.
348
   * Can be called before or after Recognize.
349
   */
350
  Boxa *GetRegions(Pixa **pixa);
351
352
  /**
353
   * Get the textlines as a leptonica-style
354
   * Boxa, Pixa pair, in reading order.
355
   * Can be called before or after Recognize.
356
   * If raw_image is true, then extract from the original image instead of the
357
   * thresholded image and pad by raw_padding pixels.
358
   * If blockids is not nullptr, the block-id of each line is also returned as
359
   * an array of one element per line. delete [] after use. If paraids is not
360
   * nullptr, the paragraph-id of each line within its block is also returned as
361
   * an array of one element per line. delete [] after use.
362
   */
363
  Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
364
                     int **blockids, int **paraids);
365
  /*
366
   Helper method to extract from the thresholded image. (most common usage)
367
*/
368
0
  Boxa *GetTextlines(Pixa **pixa, int **blockids) {
369
0
    return GetTextlines(false, 0, pixa, blockids, nullptr);
370
0
  }
371
372
  /**
373
   * Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
374
   * pair, in reading order. Enables downstream handling of non-rectangular
375
   * regions.
376
   * Can be called before or after Recognize.
377
   * If blockids is not nullptr, the block-id of each line is also returned as
378
   * an array of one element per line. delete [] after use.
379
   */
380
  Boxa *GetStrips(Pixa **pixa, int **blockids);
381
382
  /**
383
   * Get the words as a leptonica-style
384
   * Boxa, Pixa pair, in reading order.
385
   * Can be called before or after Recognize.
386
   */
387
  Boxa *GetWords(Pixa **pixa);
388
389
  /**
390
   * Gets the individual connected (text) components (created
391
   * after pages segmentation step, but before recognition)
392
   * as a leptonica-style Boxa, Pixa pair, in reading order.
393
   * Can be called before or after Recognize.
394
   * Note: the caller is responsible for calling boxaDestroy()
395
   * on the returned Boxa array and pixaDestroy() on cc array.
396
   */
397
  Boxa *GetConnectedComponents(Pixa **cc);
398
399
  /**
400
   * Get the given level kind of components (block, textline, word etc.) as a
401
   * leptonica-style Boxa, Pixa pair, in reading order.
402
   * Can be called before or after Recognize.
403
   * If blockids is not nullptr, the block-id of each component is also returned
404
   * as an array of one element per component. delete [] after use.
405
   * If blockids is not nullptr, the paragraph-id of each component with its
406
   * block is also returned as an array of one element per component. delete []
407
   * after use. If raw_image is true, then portions of the original image are
408
   * extracted instead of the thresholded image and padded with raw_padding. If
409
   * text_only is true, then only text components are returned.
410
   */
411
  Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
412
                           bool raw_image, int raw_padding, Pixa **pixa,
413
                           int **blockids, int **paraids);
414
  // Helper function to get binary images with no padding (most common usage).
415
  Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
416
0
                           Pixa **pixa, int **blockids) {
417
0
    return GetComponentImages(level, text_only, false, 0, pixa, blockids,
418
0
                              nullptr);
419
0
  }
420
421
  /**
422
   * Returns the scale factor of the thresholded image that would be returned by
423
   * GetThresholdedImage() and the various GetX() methods that call
424
   * GetComponentImages().
425
   * Returns 0 if no thresholder has been set.
426
   */
427
  int GetThresholdedImageScaleFactor() const;
428
429
  /**
430
   * Runs page layout analysis in the mode set by SetPageSegMode.
431
   * May optionally be called prior to Recognize to get access to just
432
   * the page layout results. Returns an iterator to the results.
433
   * If merge_similar_words is true, words are combined where suitable for use
434
   * with a line recognizer. Use if you want to use AnalyseLayout to find the
435
   * textlines, and then want to process textline fragments with an external
436
   * line recognizer.
437
   * Returns nullptr on error or an empty page.
438
   * The returned iterator must be deleted after use.
439
   * WARNING! This class points to data held within the TessBaseAPI class, and
440
   * therefore can only be used while the TessBaseAPI class still exists and
441
   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
442
   * DetectOS, or anything else that changes the internal PAGE_RES.
443
   */
444
  PageIterator *AnalyseLayout();
445
  PageIterator *AnalyseLayout(bool merge_similar_words);
446
447
  /**
448
   * Recognize the image from SetAndThresholdImage, generating Tesseract
449
   * internal structures. Returns 0 on success.
450
   * Optional. The Get*Text functions below will call Recognize if needed.
451
   * After Recognize, the output is kept internally until the next SetImage.
452
   */
453
  int Recognize(ETEXT_DESC *monitor);
454
455
  /**
456
   * Methods to retrieve information after SetAndThresholdImage(),
457
   * Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
458
   */
459
460
  /**
461
   * Turns images into symbolic text.
462
   *
463
   * filename can point to a single image, a multi-page TIFF,
464
   * or a plain text list of image filenames.
465
   *
466
   * retry_config is useful for debugging. If not nullptr, you can fall
467
   * back to an alternate configuration if a page fails for some
468
   * reason.
469
   *
470
   * timeout_millisec terminates processing if any single page
471
   * takes too long. Set to 0 for unlimited time.
472
   *
473
   * renderer is responsible for creating the output. For example,
474
   * use the TessTextRenderer if you want plaintext output, or
475
   * the TessPDFRender to produce searchable PDF.
476
   *
477
   * If tessedit_page_number is non-negative, will only process that
478
   * single page. Works for multi-page tiff file, or filelist.
479
   *
480
   * Returns true if successful, false on error.
481
   */
482
  bool ProcessPages(const char *filename, const char *retry_config,
483
                    int timeout_millisec, TessResultRenderer *renderer);
484
  // Does the real work of ProcessPages.
485
  bool ProcessPagesInternal(const char *filename, const char *retry_config,
486
                            int timeout_millisec, TessResultRenderer *renderer);
487
488
  /**
489
   * Turn a single image into symbolic text.
490
   *
491
   * The pix is the image processed. filename and page_index are
492
   * metadata used by side-effect processes, such as reading a box
493
   * file or formatting as hOCR.
494
   *
495
   * See ProcessPages for descriptions of other parameters.
496
   */
497
  bool ProcessPage(Pix *pix, int page_index, const char *filename,
498
                   const char *retry_config, int timeout_millisec,
499
                   TessResultRenderer *renderer);
500
501
  /**
502
   * Get a reading-order iterator to the results of LayoutAnalysis and/or
503
   * Recognize. The returned iterator must be deleted after use.
504
   * WARNING! This class points to data held within the TessBaseAPI class, and
505
   * therefore can only be used while the TessBaseAPI class still exists and
506
   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
507
   * DetectOS, or anything else that changes the internal PAGE_RES.
508
   */
509
  ResultIterator *GetIterator();
510
511
  /**
512
   * Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
513
   * The returned iterator must be deleted after use.
514
   * WARNING! This class points to data held within the TessBaseAPI class, and
515
   * therefore can only be used while the TessBaseAPI class still exists and
516
   * has not been subjected to a call of Init, SetImage, Recognize, Clear, End
517
   * DetectOS, or anything else that changes the internal PAGE_RES.
518
   */
519
  MutableIterator *GetMutableIterator();
520
521
  /**
522
   * The recognized text is returned as a char* which is coded
523
   * as UTF8 and must be freed with the delete [] operator.
524
   */
525
  char *GetUTF8Text();
526
527
  /**
528
   * Make a HTML-formatted string with hOCR markup from the internal
529
   * data structures.
530
   * page_number is 0-based but will appear in the output as 1-based.
531
   * monitor can be used to
532
   *  cancel the recognition
533
   *  receive progress callbacks
534
   * Returned string must be freed with the delete [] operator.
535
   */
536
  char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
537
538
  /**
539
   * Make a HTML-formatted string with hOCR markup from the internal
540
   * data structures.
541
   * page_number is 0-based but will appear in the output as 1-based.
542
   * Returned string must be freed with the delete [] operator.
543
   */
544
  char *GetHOCRText(int page_number);
545
546
  /**
547
   * Make an XML-formatted string with Alto markup from the internal
548
   * data structures.
549
   */
550
  char *GetAltoText(ETEXT_DESC *monitor, int page_number);
551
552
  /**
553
   * Make an XML-formatted string with Alto markup from the internal
554
   * data structures.
555
   */
556
  char *GetAltoText(int page_number);
557
558
   /**
559
   * Make an XML-formatted string with PAGE markup from the internal
560
   * data structures.
561
   */
562
  char *GetPAGEText(ETEXT_DESC *monitor, int page_number);
563
564
  /**
565
   * Make an XML-formatted string with PAGE markup from the internal
566
   * data structures.
567
   */
568
  char *GetPAGEText(int page_number);
569
570
  /**
571
   * Make a TSV-formatted string from the internal data structures.
572
   * page_number is 0-based but will appear in the output as 1-based.
573
   * Returned string must be freed with the delete [] operator.
574
   */
575
  char *GetTSVText(int page_number);
576
577
  /**
578
   * Make a box file for LSTM training from the internal data structures.
579
   * Constructs coordinates in the original image - not just the rectangle.
580
   * page_number is a 0-based page index that will appear in the box file.
581
   * Returned string must be freed with the delete [] operator.
582
   */
583
  char *GetLSTMBoxText(int page_number);
584
585
  /**
586
   * The recognized text is returned as a char* which is coded in the same
587
   * format as a box file used in training.
588
   * Constructs coordinates in the original image - not just the rectangle.
589
   * page_number is a 0-based page index that will appear in the box file.
590
   * Returned string must be freed with the delete [] operator.
591
   */
592
  char *GetBoxText(int page_number);
593
594
  /**
595
   * The recognized text is returned as a char* which is coded in the same
596
   * format as a WordStr box file used in training.
597
   * page_number is a 0-based page index that will appear in the box file.
598
   * Returned string must be freed with the delete [] operator.
599
   */
600
  char *GetWordStrBoxText(int page_number);
601
602
  /**
603
   * The recognized text is returned as a char* which is coded
604
   * as UNLV format Latin-1 with specific reject and suspect codes.
605
   * Returned string must be freed with the delete [] operator.
606
   */
607
  char *GetUNLVText();
608
609
  /**
610
   * Detect the orientation of the input image and apparent script (alphabet).
611
   * orient_deg is the detected clockwise rotation of the input image in degrees
612
   * (0, 90, 180, 270)
613
   * orient_conf is the confidence (15.0 is reasonably confident)
614
   * script_name is an ASCII string, the name of the script, e.g. "Latin"
615
   * script_conf is confidence level in the script
616
   * Returns true on success and writes values to each parameter as an output
617
   */
618
  bool DetectOrientationScript(int *orient_deg, float *orient_conf,
619
                               const char **script_name, float *script_conf);
620
621
  /**
622
   * The recognized text is returned as a char* which is coded
623
   * as UTF8 and must be freed with the delete [] operator.
624
   * page_number is a 0-based page index that will appear in the osd file.
625
   */
626
  char *GetOsdText(int page_number);
627
628
  /** Returns the (average) confidence value between 0 and 100. */
629
  int MeanTextConf();
630
  /**
631
   * Returns all word confidences (between 0 and 100) in an array, terminated
632
   * by -1.  The calling function must delete [] after use.
633
   * The number of confidences should correspond to the number of space-
634
   * delimited words in GetUTF8Text.
635
   */
636
  int *AllWordConfidences();
637
638
#ifndef DISABLED_LEGACY_ENGINE
639
  /**
640
   * Applies the given word to the adaptive classifier if possible.
641
   * The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
642
   * tell the boundaries of the graphemes.
643
   * Assumes that SetImage/SetRectangle have been used to set the image
644
   * to the given word. The mode arg should be PSM_SINGLE_WORD or
645
   * PSM_CIRCLE_WORD, as that will be used to control layout analysis.
646
   * The currently set PageSegMode is preserved.
647
   * Returns false if adaption was not possible for some reason.
648
   */
649
  bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
650
#endif //  ndef DISABLED_LEGACY_ENGINE
651
652
  /**
653
   * Free up recognition results and any stored image data, without actually
654
   * freeing any recognition data that would be time-consuming to reload.
655
   * Afterwards, you must call SetImage or TesseractRect before doing
656
   * any Recognize or Get* operation.
657
   */
658
  void Clear();
659
660
  /**
661
   * Close down tesseract and free up all memory. End() is equivalent to
662
   * destructing and reconstructing your TessBaseAPI.
663
   * Once End() has been used, none of the other API functions may be used
664
   * other than Init and anything declared above it in the class definition.
665
   */
666
  void End();
667
668
  /**
669
   * Clear any library-level memory caches.
670
   * There are a variety of expensive-to-load constant data structures (mostly
671
   * language dictionaries) that are cached globally -- surviving the Init()
672
   * and End() of individual TessBaseAPI's.  This function allows the clearing
673
   * of these caches.
674
   **/
675
  static void ClearPersistentCache();
676
677
  /**
678
   * Check whether a word is valid according to Tesseract's language model
679
   * @return 0 if the word is invalid, non-zero if valid.
680
   * @warning temporary! This function will be removed from here and placed
681
   * in a separate API at some future time.
682
   */
683
  int IsValidWord(const char *word) const;
684
  // Returns true if utf8_character is defined in the UniCharset.
685
  bool IsValidCharacter(const char *utf8_character) const;
686
687
  bool GetTextDirection(int *out_offset, float *out_slope);
688
689
  /** Sets Dict::letter_is_okay_ function to point to the given function. */
690
  void SetDictFunc(DictFunc f);
691
692
  /** Sets Dict::probability_in_context_ function to point to the given
693
   * function.
694
   */
695
  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
696
697
  /**
698
   * Estimates the Orientation And Script of the image.
699
   * @return true if the image was processed successfully.
700
   */
701
  bool DetectOS(OSResults *);
702
703
  /**
704
   * Return text orientation of each block as determined by an earlier run
705
   * of layout analysis.
706
   */
707
  void GetBlockTextOrientations(int **block_orientation,
708
                                bool **vertical_writing);
709
710
  /** This method returns the string form of the specified unichar. */
711
  const char *GetUnichar(int unichar_id) const;
712
713
  /** Return the pointer to the i-th dawg loaded into tesseract_ object. */
714
  const Dawg *GetDawg(int i) const;
715
716
  /** Return the number of dawgs loaded into tesseract_ object. */
717
  int NumDawgs() const;
718
719
0
  Tesseract *tesseract() const {
720
0
    return tesseract_;
721
0
  }
722
723
0
  OcrEngineMode oem() const {
724
0
    return last_oem_requested_;
725
0
  }
726
727
  void set_min_orientation_margin(double margin);
728
  /* @} */
729
730
protected:
731
  /** Common code for setting the image. Returns true if Init has been called.
732
   */
733
  bool InternalSetImage();
734
735
  /**
736
   * Run the thresholder to make the thresholded image. If pix is not nullptr,
737
   * the source is thresholded to pix instead of the internal IMAGE.
738
   */
739
  virtual bool Threshold(Pix **pix);
740
741
  /**
742
   * Find lines from the image making the BLOCK_LIST.
743
   * @return 0 on success.
744
   */
745
  int FindLines();
746
747
  /** Delete the pageres and block list ready for a new page. */
748
  void ClearResults();
749
750
  /**
751
   * Return an LTR Result Iterator -- used only for training, as we really want
752
   * to ignore all BiDi smarts at that point.
753
   * delete once you're done with it.
754
   */
755
  LTRResultIterator *GetLTRIterator();
756
757
  /**
758
   * Return the length of the output text string, as UTF8, assuming
759
   * one newline per line and one per block, with a terminator,
760
   * and assuming a single character reject marker for each rejected character.
761
   * Also return the number of recognized blobs in blob_count.
762
   */
763
  int TextLength(int *blob_count) const;
764
765
  //// paragraphs.cpp ////////////////////////////////////////////////////
766
  void DetectParagraphs(bool after_text_recognition);
767
768
0
  const PAGE_RES *GetPageRes() const {
769
0
    return page_res_;
770
0
  }
771
772
protected:
773
  Tesseract *tesseract_;          ///< The underlying data object.
774
  Tesseract *osd_tesseract_;      ///< For orientation & script detection.
775
  EquationDetect *equ_detect_;    ///< The equation detector.
776
  FileReader reader_;             ///< Reads files from any filesystem.
777
  ImageThresholder *thresholder_; ///< Image thresholding module.
778
  std::vector<ParagraphModel *> *paragraph_models_;
779
  BLOCK_LIST *block_list_;           ///< The page layout.
780
  PAGE_RES *page_res_;               ///< The page-level data.
781
  std::string input_file_;           ///< Name used by training code.
782
  std::string output_file_;          ///< Name used by debug code.
783
  std::string datapath_;             ///< Current location of tessdata.
784
  std::string language_;             ///< Last initialized language.
785
  OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
786
  bool recognition_done_;            ///< page_res_ contains recognition data.
787
788
  /**
789
   * @defgroup ThresholderParams Thresholder Parameters
790
   * Parameters saved from the Thresholder. Needed to rebuild coordinates.
791
   */
792
  /* @{ */
793
  int rect_left_;
794
  int rect_top_;
795
  int rect_width_;
796
  int rect_height_;
797
  int image_width_;
798
  int image_height_;
799
  /* @} */
800
801
private:
802
  // A list of image filenames gets special consideration
803
  bool ProcessPagesFileList(FILE *fp, std::string *buf,
804
                            const char *retry_config, int timeout_millisec,
805
                            TessResultRenderer *renderer,
806
                            int tessedit_page_number);
807
  // TIFF supports multipage so gets special consideration.
808
  bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
809
                                 const char *filename, const char *retry_config,
810
                                 int timeout_millisec,
811
                                 TessResultRenderer *renderer,
812
                                 int tessedit_page_number);
813
}; // class TessBaseAPI.
814
815
/** Escape a char string - replace &<>"' with HTML codes. */
816
std::string HOcrEscape(const char *text);
817
818
} // namespace tesseract
819
820
#endif // TESSERACT_API_BASEAPI_H_