/src/tesseract/src/ccutil/serialis.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | * File: serialis.cpp (Formerly serialmac.h) |
3 | | * Description: Inline routines and macros for serialisation functions |
4 | | * Author: Phil Cheatle |
5 | | * |
6 | | * (C) Copyright 1990, Hewlett-Packard Ltd. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | * |
17 | | **********************************************************************/ |
18 | | |
19 | | #include "serialis.h" |
20 | | |
21 | | #include "errcode.h" |
22 | | |
23 | | #include "helpers.h" // for ReverseN |
24 | | |
25 | | #include <climits> // for INT_MAX |
26 | | #include <cstdio> |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | | // The default FileReader loads the whole file into the vector of char, |
31 | | // returning false on error. |
32 | 2 | bool LoadDataFromFile(const char *filename, std::vector<char> *data) { |
33 | 2 | bool result = false; |
34 | 2 | FILE *fp = fopen(filename, "rb"); |
35 | 2 | if (fp != nullptr) { |
36 | 2 | fseek(fp, 0, SEEK_END); |
37 | 2 | auto size = std::ftell(fp); |
38 | 2 | fseek(fp, 0, SEEK_SET); |
39 | | // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here. |
40 | 2 | if (size > 0 && size < LONG_MAX) { |
41 | | // reserve an extra byte in case caller wants to append a '\0' character |
42 | 2 | data->reserve(size + 1); |
43 | 2 | data->resize(size); // TODO: optimize no init |
44 | 2 | result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size; |
45 | 2 | } |
46 | 2 | fclose(fp); |
47 | 2 | } |
48 | 2 | return result; |
49 | 2 | } |
50 | | |
51 | | // The default FileWriter writes the vector of char to the filename file, |
52 | | // returning false on error. |
53 | 0 | bool SaveDataToFile(const std::vector<char> &data, const char *filename) { |
54 | 0 | FILE *fp = fopen(filename, "wb"); |
55 | 0 | if (fp == nullptr) { |
56 | 0 | return false; |
57 | 0 | } |
58 | 0 | bool result = fwrite(&data[0], 1, data.size(), fp) == data.size(); |
59 | 0 | fclose(fp); |
60 | 0 | return result; |
61 | 0 | } |
62 | | |
63 | 30 | TFile::TFile() { |
64 | 30 | } |
65 | | |
66 | 30 | TFile::~TFile() { |
67 | 30 | if (data_is_owned_) { |
68 | 28 | delete data_; |
69 | 28 | } |
70 | 30 | } |
71 | | |
72 | 0 | bool TFile::DeSerializeSize(int32_t *pSize) { |
73 | 0 | uint32_t size; |
74 | 0 | if (FReadEndian(&size, sizeof(size), 1) != 1) { |
75 | 0 | return false; |
76 | 0 | } |
77 | 0 | if (size > data_->size() / 4) { |
78 | | // Reverse endianness. |
79 | 0 | swap_ = !swap_; |
80 | 0 | ReverseN(&size, 4); |
81 | 0 | } |
82 | 0 | *pSize = size; |
83 | 0 | return true; |
84 | 0 | } |
85 | | |
86 | 0 | bool TFile::DeSerializeSkip(size_t size) { |
87 | 0 | uint32_t len; |
88 | 0 | if (!DeSerialize(&len)) { |
89 | 0 | return false; |
90 | 0 | } |
91 | 0 | return Skip(len * size); |
92 | 0 | } |
93 | | |
94 | 54 | bool TFile::DeSerialize(std::string &data) { |
95 | 54 | uint32_t size; |
96 | 54 | if (!DeSerialize(&size)) { |
97 | 0 | return false; |
98 | 54 | } else if (size > 0) { |
99 | | // TODO: optimize. |
100 | 54 | data.resize(size); |
101 | 54 | return DeSerialize(&data[0], size); |
102 | 54 | } |
103 | 0 | data.clear(); |
104 | 0 | return true; |
105 | 54 | } |
106 | | |
107 | 0 | bool TFile::Serialize(const std::string &data) { |
108 | 0 | uint32_t size = data.size(); |
109 | 0 | return Serialize(&size) && Serialize(data.c_str(), size); |
110 | 0 | } |
111 | | |
112 | 0 | bool TFile::DeSerialize(std::vector<char> &data) { |
113 | 0 | uint32_t size; |
114 | 0 | if (!DeSerialize(&size)) { |
115 | 0 | return false; |
116 | 0 | } else if (size > 0) { |
117 | | // TODO: optimize. |
118 | 0 | data.resize(size); |
119 | 0 | return DeSerialize(&data[0], data.size()); |
120 | 0 | } |
121 | 0 | data.clear(); |
122 | 0 | return true; |
123 | 0 | } |
124 | | |
125 | 0 | bool TFile::Serialize(const std::vector<char> &data) { |
126 | 0 | uint32_t size = data.size(); |
127 | 0 | if (!Serialize(&size)) { |
128 | 0 | return false; |
129 | 0 | } else if (size > 0) { |
130 | 0 | return Serialize(&data[0], size); |
131 | 0 | } |
132 | 0 | return true; |
133 | 0 | } |
134 | | |
135 | 0 | bool TFile::Skip(size_t count) { |
136 | 0 | offset_ += count; |
137 | 0 | return true; |
138 | 0 | } |
139 | | |
140 | 0 | bool TFile::Open(const char *filename, FileReader reader) { |
141 | 0 | if (!data_is_owned_) { |
142 | 0 | data_ = new std::vector<char>; |
143 | 0 | data_is_owned_ = true; |
144 | 0 | } |
145 | 0 | offset_ = 0; |
146 | 0 | is_writing_ = false; |
147 | 0 | swap_ = false; |
148 | 0 | if (reader == nullptr) { |
149 | 0 | return LoadDataFromFile(filename, data_); |
150 | 0 | } else { |
151 | 0 | return (*reader)(filename, data_); |
152 | 0 | } |
153 | 0 | } |
154 | | |
155 | 38 | bool TFile::Open(const char *data, size_t size) { |
156 | 38 | offset_ = 0; |
157 | 38 | if (!data_is_owned_) { |
158 | 28 | data_ = new std::vector<char>; |
159 | 28 | data_is_owned_ = true; |
160 | 28 | } |
161 | 38 | is_writing_ = false; |
162 | 38 | swap_ = false; |
163 | 38 | data_->resize(size); // TODO: optimize no init |
164 | 38 | memcpy(&(*data_)[0], data, size); |
165 | 38 | return true; |
166 | 38 | } |
167 | | |
168 | 0 | bool TFile::Open(FILE *fp, int64_t end_offset) { |
169 | 0 | offset_ = 0; |
170 | 0 | auto current_pos = std::ftell(fp); |
171 | 0 | if (current_pos < 0) { |
172 | | // ftell failed. |
173 | 0 | return false; |
174 | 0 | } |
175 | 0 | if (end_offset < 0) { |
176 | 0 | if (fseek(fp, 0, SEEK_END)) { |
177 | 0 | return false; |
178 | 0 | } |
179 | 0 | end_offset = ftell(fp); |
180 | 0 | if (fseek(fp, current_pos, SEEK_SET)) { |
181 | 0 | return false; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | size_t size = end_offset - current_pos; |
185 | 0 | is_writing_ = false; |
186 | 0 | swap_ = false; |
187 | 0 | if (!data_is_owned_) { |
188 | 0 | data_ = new std::vector<char>; |
189 | 0 | data_is_owned_ = true; |
190 | 0 | } |
191 | 0 | data_->resize(size); // TODO: optimize no init |
192 | 0 | return fread(&(*data_)[0], 1, size, fp) == size; |
193 | 0 | } |
194 | | |
195 | 39.9k | char *TFile::FGets(char *buffer, int buffer_size) { |
196 | 39.9k | ASSERT_HOST(!is_writing_); |
197 | 39.9k | int size = 0; |
198 | 399k | while (size + 1 < buffer_size && offset_ < data_->size()) { |
199 | 399k | buffer[size++] = (*data_)[offset_++]; |
200 | 399k | if ((*data_)[offset_ - 1] == '\n') { |
201 | 39.9k | break; |
202 | 39.9k | } |
203 | 399k | } |
204 | 39.9k | if (size < buffer_size) { |
205 | 39.9k | buffer[size] = '\0'; |
206 | 39.9k | } |
207 | 39.9k | return size > 0 ? buffer : nullptr; |
208 | 39.9k | } |
209 | | |
210 | 102k | size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) { |
211 | 102k | auto num_read = FRead(buffer, size, count); |
212 | 102k | if (swap_ && size != 1) { |
213 | 0 | char *char_buffer = static_cast<char *>(buffer); |
214 | 0 | for (size_t i = 0; i < num_read; ++i, char_buffer += size) { |
215 | 0 | ReverseN(char_buffer, size); |
216 | 0 | } |
217 | 0 | } |
218 | 102k | return num_read; |
219 | 102k | } |
220 | | |
221 | 204k | size_t TFile::FRead(void *buffer, size_t size, size_t count) { |
222 | 204k | ASSERT_HOST(!is_writing_); |
223 | 204k | ASSERT_HOST(size > 0); |
224 | 204k | size_t required_size; |
225 | 204k | if (SIZE_MAX / size <= count) { |
226 | | // Avoid integer overflow. |
227 | 0 | required_size = data_->size() - offset_; |
228 | 204k | } else { |
229 | 204k | required_size = size * count; |
230 | 204k | if (data_->size() - offset_ < required_size) { |
231 | 0 | required_size = data_->size() - offset_; |
232 | 0 | } |
233 | 204k | } |
234 | 204k | if (required_size > 0 && buffer != nullptr) { |
235 | 204k | memcpy(buffer, &(*data_)[offset_], required_size); |
236 | 204k | } |
237 | 204k | offset_ += required_size; |
238 | 204k | return required_size / size; |
239 | 204k | } |
240 | | |
241 | 0 | void TFile::Rewind() { |
242 | 0 | ASSERT_HOST(!is_writing_); |
243 | 0 | offset_ = 0; |
244 | 0 | } |
245 | | |
246 | 0 | void TFile::OpenWrite(std::vector<char> *data) { |
247 | 0 | offset_ = 0; |
248 | 0 | if (data != nullptr) { |
249 | 0 | if (data_is_owned_) { |
250 | 0 | delete data_; |
251 | 0 | } |
252 | 0 | data_ = data; |
253 | 0 | data_is_owned_ = false; |
254 | 0 | } else if (!data_is_owned_) { |
255 | 0 | data_ = new std::vector<char>; |
256 | 0 | data_is_owned_ = true; |
257 | 0 | } |
258 | 0 | is_writing_ = true; |
259 | 0 | swap_ = false; |
260 | 0 | data_->clear(); |
261 | 0 | } |
262 | | |
263 | 0 | bool TFile::CloseWrite(const char *filename, FileWriter writer) { |
264 | 0 | ASSERT_HOST(is_writing_); |
265 | 0 | if (writer == nullptr) { |
266 | 0 | return SaveDataToFile(*data_, filename); |
267 | 0 | } else { |
268 | 0 | return (*writer)(*data_, filename); |
269 | 0 | } |
270 | 0 | } |
271 | | |
272 | 0 | size_t TFile::FWrite(const void *buffer, size_t size, size_t count) { |
273 | 0 | ASSERT_HOST(is_writing_); |
274 | 0 | ASSERT_HOST(size > 0); |
275 | 0 | ASSERT_HOST(SIZE_MAX / size > count); |
276 | 0 | size_t total = size * count; |
277 | 0 | const char *buf = static_cast<const char *>(buffer); |
278 | | // This isn't very efficient, but memory is so fast compared to disk |
279 | | // that it is relatively unimportant, and very simple. |
280 | 0 | for (size_t i = 0; i < total; ++i) { |
281 | 0 | data_->push_back(buf[i]); |
282 | 0 | } |
283 | 0 | return count; |
284 | 0 | } |
285 | | |
286 | | } // namespace tesseract. |