/src/tesseract/src/ccutil/ccutil.cpp
Line | Count | Source |
1 | | // Copyright 2008 Google Inc. All Rights Reserved. |
2 | | // Author: scharron@google.com (Samuel Charron) |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // Unless required by applicable law or agreed to in writing, software |
8 | | // distributed under the License is distributed on an "AS IS" BASIS, |
9 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
10 | | // See the License for the specific language governing permissions and |
11 | | // limitations under the License. |
12 | | |
13 | | #include "ccutil.h" |
14 | | #include "tprintf.h" // for tprintf |
15 | | |
16 | | #include <cstdlib> |
17 | | #include <cstring> // for std::strrchrA |
18 | | #include <filesystem> // for std::filesystem |
19 | | |
20 | | namespace tesseract { |
21 | | |
22 | | CCUtil::CCUtil() |
23 | 8 | : params_() |
24 | 8 | , INT_INIT_MEMBER(ambigs_debug_level, 0, "Debug level for unichar ambiguities", ¶ms_) |
25 | 8 | , BOOL_MEMBER(use_ambigs_for_adaption, false, |
26 | | "Use ambigs for deciding" |
27 | | " whether to adapt to a character", |
28 | 8 | ¶ms_) {} |
29 | | |
30 | | // Destructor. |
31 | | // It is defined here, so the compiler can create a single vtable |
32 | | // instead of weak vtables in every compilation unit. |
33 | 0 | CCUtil::~CCUtil() = default; |
34 | | |
35 | | /** |
36 | | * @brief CCUtil::main_setup - set location of tessdata and name of image |
37 | | * |
38 | | * @param argv0 - paths to the directory with language files and config files. |
39 | | * An actual value of argv0 is used if not nullptr, otherwise TESSDATA_PREFIX is |
40 | | * used if not nullptr, next try to use compiled in -DTESSDATA_PREFIX. If |
41 | | * previous is not successful - use current directory. |
42 | | * @param basename - name of image |
43 | | */ |
44 | 4 | void CCUtil::main_setup(const std::string &argv0, const std::string &basename) { |
45 | 4 | imagebasename = basename; /**< name of image */ |
46 | | |
47 | 4 | const char *tessdata_prefix = getenv("TESSDATA_PREFIX"); |
48 | | |
49 | | // Ignore TESSDATA_PREFIX if there is no matching filesystem entry. |
50 | 4 | if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) { |
51 | 0 | tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix); |
52 | 0 | tessdata_prefix = nullptr; |
53 | 0 | } |
54 | | |
55 | 4 | if (!argv0.empty()) { |
56 | | /* Use tessdata prefix from the command line. */ |
57 | 0 | datadir = argv0; |
58 | 4 | } else if (tessdata_prefix) { |
59 | | /* Use tessdata prefix from the environment. */ |
60 | 4 | datadir = tessdata_prefix; |
61 | | #if defined(_WIN32) |
62 | | } else if (datadir.empty() || !std::filesystem::exists(datadir)) { |
63 | | /* Look for tessdata in directory of executable. */ |
64 | | char path[_MAX_PATH]; |
65 | | DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); |
66 | | if (length > 0 && length < sizeof(path)) { |
67 | | char *separator = std::strrchr(path, '\\'); |
68 | | if (separator != nullptr) { |
69 | | *separator = '\0'; |
70 | | std::string subdir = path; |
71 | | subdir += "/tessdata"; |
72 | | if (std::filesystem::exists(subdir)) { |
73 | | datadir = subdir; |
74 | | } |
75 | | } |
76 | | } |
77 | | #endif /* _WIN32 */ |
78 | 4 | } |
79 | | |
80 | | // datadir may still be empty: |
81 | 4 | if (datadir.empty()) { |
82 | 0 | #if defined(TESSDATA_PREFIX) |
83 | | // Use tessdata prefix which was compiled in. |
84 | 0 | datadir = TESSDATA_PREFIX "/tessdata/"; |
85 | | // Note that some software (for example conda) patches TESSDATA_PREFIX |
86 | | // in the binary, so it might be shorter. Recalculate its length. |
87 | 0 | datadir.resize(std::strlen(datadir.c_str())); |
88 | | #else |
89 | | datadir = "./"; |
90 | | #endif /* TESSDATA_PREFIX */ |
91 | 0 | } |
92 | | |
93 | | // check for missing directory separator |
94 | 4 | const char lastchar = datadir.back(); |
95 | 4 | if (lastchar != '/' && lastchar != '\\') { |
96 | 4 | datadir += '/'; |
97 | 4 | } |
98 | 4 | } |
99 | | |
100 | | } // namespace tesseract |