Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/ccutil/serialis.cpp
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
 * File:        serialis.cpp  (Formerly serialmac.h)
3
 * Description: Inline routines and macros for serialisation functions
4
 * Author:      Phil Cheatle
5
 *
6
 * (C) Copyright 1990, Hewlett-Packard Ltd.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 *
17
 **********************************************************************/
18
19
#include "serialis.h"
20
21
#include "errcode.h"
22
23
#include "helpers.h" // for ReverseN
24
25
#include <climits> // for INT_MAX
26
#include <cstdio>
27
28
namespace tesseract {
29
30
// The default FileReader loads the whole file into the vector of char,
31
// returning false on error.
32
2
bool LoadDataFromFile(const char *filename, std::vector<char> *data) {
33
2
  bool result = false;
34
2
  FILE *fp = fopen(filename, "rb");
35
2
  if (fp != nullptr) {
36
2
    fseek(fp, 0, SEEK_END);
37
2
    auto size = std::ftell(fp);
38
2
    fseek(fp, 0, SEEK_SET);
39
    // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
40
2
    if (size > 0 && size < LONG_MAX) {
41
      // reserve an extra byte in case caller wants to append a '\0' character
42
2
      data->reserve(size + 1);
43
2
      data->resize(size); // TODO: optimize no init
44
2
      result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
45
2
    }
46
2
    fclose(fp);
47
2
  }
48
2
  return result;
49
2
}
50
51
// The default FileWriter writes the vector of char to the filename file,
52
// returning false on error.
53
0
bool SaveDataToFile(const std::vector<char> &data, const char *filename) {
54
0
  FILE *fp = fopen(filename, "wb");
55
0
  if (fp == nullptr) {
56
0
    return false;
57
0
  }
58
0
  bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
59
0
  fclose(fp);
60
0
  return result;
61
0
}
62
63
30
TFile::TFile() {
64
30
}
65
66
30
TFile::~TFile() {
67
30
  if (data_is_owned_) {
68
28
    delete data_;
69
28
  }
70
30
}
71
72
0
bool TFile::DeSerializeSize(int32_t *pSize) {
73
0
  uint32_t size;
74
0
  if (FReadEndian(&size, sizeof(size), 1) != 1) {
75
0
    return false;
76
0
  }
77
0
  if (size > data_->size() / 4) {
78
    // Reverse endianness.
79
0
    swap_ = !swap_;
80
0
    ReverseN(&size, 4);
81
0
  }
82
0
  *pSize = size;
83
0
  return true;
84
0
}
85
86
0
bool TFile::DeSerializeSkip(size_t size) {
87
0
  uint32_t len;
88
0
  if (!DeSerialize(&len)) {
89
0
    return false;
90
0
  }
91
0
  return Skip(len * size);
92
0
}
93
94
54
bool TFile::DeSerialize(std::string &data) {
95
54
  uint32_t size;
96
54
  if (!DeSerialize(&size)) {
97
0
    return false;
98
54
  } else if (size > 0) {
99
    // TODO: optimize.
100
54
    data.resize(size);
101
54
    return DeSerialize(&data[0], size);
102
54
  }
103
0
  data.clear();
104
0
  return true;
105
54
}
106
107
0
bool TFile::Serialize(const std::string &data) {
108
0
  uint32_t size = data.size();
109
0
  return Serialize(&size) && Serialize(data.c_str(), size);
110
0
}
111
112
0
bool TFile::DeSerialize(std::vector<char> &data) {
113
0
  uint32_t size;
114
0
  if (!DeSerialize(&size)) {
115
0
    return false;
116
0
  } else if (size > 0) {
117
    // TODO: optimize.
118
0
    data.resize(size);
119
0
    return DeSerialize(&data[0], data.size());
120
0
  }
121
0
  data.clear();
122
0
  return true;
123
0
}
124
125
0
bool TFile::Serialize(const std::vector<char> &data) {
126
0
  uint32_t size = data.size();
127
0
  if (!Serialize(&size)) {
128
0
    return false;
129
0
  } else if (size > 0) {
130
0
    return Serialize(&data[0], size);
131
0
  }
132
0
  return true;
133
0
}
134
135
0
bool TFile::Skip(size_t count) {
136
0
  offset_ += count;
137
0
  return true;
138
0
}
139
140
0
bool TFile::Open(const char *filename, FileReader reader) {
141
0
  if (!data_is_owned_) {
142
0
    data_ = new std::vector<char>;
143
0
    data_is_owned_ = true;
144
0
  }
145
0
  offset_ = 0;
146
0
  is_writing_ = false;
147
0
  swap_ = false;
148
0
  if (reader == nullptr) {
149
0
    return LoadDataFromFile(filename, data_);
150
0
  } else {
151
0
    return (*reader)(filename, data_);
152
0
  }
153
0
}
154
155
38
bool TFile::Open(const char *data, size_t size) {
156
38
  offset_ = 0;
157
38
  if (!data_is_owned_) {
158
28
    data_ = new std::vector<char>;
159
28
    data_is_owned_ = true;
160
28
  }
161
38
  is_writing_ = false;
162
38
  swap_ = false;
163
38
  data_->resize(size); // TODO: optimize no init
164
38
  memcpy(&(*data_)[0], data, size);
165
38
  return true;
166
38
}
167
168
0
bool TFile::Open(FILE *fp, int64_t end_offset) {
169
0
  offset_ = 0;
170
0
  auto current_pos = std::ftell(fp);
171
0
  if (current_pos < 0) {
172
    // ftell failed.
173
0
    return false;
174
0
  }
175
0
  if (end_offset < 0) {
176
0
    if (fseek(fp, 0, SEEK_END)) {
177
0
      return false;
178
0
    }
179
0
    end_offset = ftell(fp);
180
0
    if (fseek(fp, current_pos, SEEK_SET)) {
181
0
      return false;
182
0
    }
183
0
  }
184
0
  size_t size = end_offset - current_pos;
185
0
  is_writing_ = false;
186
0
  swap_ = false;
187
0
  if (!data_is_owned_) {
188
0
    data_ = new std::vector<char>;
189
0
    data_is_owned_ = true;
190
0
  }
191
0
  data_->resize(size); // TODO: optimize no init
192
0
  return fread(&(*data_)[0], 1, size, fp) == size;
193
0
}
194
195
39.9k
char *TFile::FGets(char *buffer, int buffer_size) {
196
39.9k
  ASSERT_HOST(!is_writing_);
197
39.9k
  int size = 0;
198
399k
  while (size + 1 < buffer_size && offset_ < data_->size()) {
199
399k
    buffer[size++] = (*data_)[offset_++];
200
399k
    if ((*data_)[offset_ - 1] == '\n') {
201
39.9k
      break;
202
39.9k
    }
203
399k
  }
204
39.9k
  if (size < buffer_size) {
205
39.9k
    buffer[size] = '\0';
206
39.9k
  }
207
39.9k
  return size > 0 ? buffer : nullptr;
208
39.9k
}
209
210
102k
size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) {
211
102k
  auto num_read = FRead(buffer, size, count);
212
102k
  if (swap_ && size != 1) {
213
0
    char *char_buffer = static_cast<char *>(buffer);
214
0
    for (size_t i = 0; i < num_read; ++i, char_buffer += size) {
215
0
      ReverseN(char_buffer, size);
216
0
    }
217
0
  }
218
102k
  return num_read;
219
102k
}
220
221
204k
size_t TFile::FRead(void *buffer, size_t size, size_t count) {
222
204k
  ASSERT_HOST(!is_writing_);
223
204k
  ASSERT_HOST(size > 0);
224
204k
  size_t required_size;
225
204k
  if (SIZE_MAX / size <= count) {
226
    // Avoid integer overflow.
227
0
    required_size = data_->size() - offset_;
228
204k
  } else {
229
204k
    required_size = size * count;
230
204k
    if (data_->size() - offset_ < required_size) {
231
0
      required_size = data_->size() - offset_;
232
0
    }
233
204k
  }
234
204k
  if (required_size > 0 && buffer != nullptr) {
235
204k
    memcpy(buffer, &(*data_)[offset_], required_size);
236
204k
  }
237
204k
  offset_ += required_size;
238
204k
  return required_size / size;
239
204k
}
240
241
0
void TFile::Rewind() {
242
0
  ASSERT_HOST(!is_writing_);
243
0
  offset_ = 0;
244
0
}
245
246
0
void TFile::OpenWrite(std::vector<char> *data) {
247
0
  offset_ = 0;
248
0
  if (data != nullptr) {
249
0
    if (data_is_owned_) {
250
0
      delete data_;
251
0
    }
252
0
    data_ = data;
253
0
    data_is_owned_ = false;
254
0
  } else if (!data_is_owned_) {
255
0
    data_ = new std::vector<char>;
256
0
    data_is_owned_ = true;
257
0
  }
258
0
  is_writing_ = true;
259
0
  swap_ = false;
260
0
  data_->clear();
261
0
}
262
263
0
bool TFile::CloseWrite(const char *filename, FileWriter writer) {
264
0
  ASSERT_HOST(is_writing_);
265
0
  if (writer == nullptr) {
266
0
    return SaveDataToFile(*data_, filename);
267
0
  } else {
268
0
    return (*writer)(*data_, filename);
269
0
  }
270
0
}
271
272
0
size_t TFile::FWrite(const void *buffer, size_t size, size_t count) {
273
0
  ASSERT_HOST(is_writing_);
274
0
  ASSERT_HOST(size > 0);
275
0
  ASSERT_HOST(SIZE_MAX / size > count);
276
0
  size_t total = size * count;
277
0
  const char *buf = static_cast<const char *>(buffer);
278
  // This isn't very efficient, but memory is so fast compared to disk
279
  // that it is relatively unimportant, and very simple.
280
0
  for (size_t i = 0; i < total; ++i) {
281
0
    data_->push_back(buf[i]);
282
0
  }
283
0
  return count;
284
0
}
285
286
} // namespace tesseract.