Coverage Report

Created: 2026-01-16 06:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/piex/src/image_type_recognition/image_type_recognition_lite.cc
Line
Count
Source
1
// Copyright 2015 Google Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//      http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
////////////////////////////////////////////////////////////////////////////////
16
//
17
// This file implements the image type recognition algorithm. Functions, which
18
// will check each single image type, are implemented based on the comparisons
19
// of magic numbers or signature strings. Other checks (e.g endianness, general
20
// tiff magic number "42", etc.) could also be used in some of those functions
21
// to make the type recognition more stable. Those checks are designed
22
// according to the format spcifications and our own experiments. Notice that
23
// the magic numbers and signature strings may have different binary values
24
// according to different endiannesses.
25
#include "src/image_type_recognition/image_type_recognition_lite.h"
26
27
#include <algorithm>
28
#include <cassert>
29
#include <string>
30
#include <vector>
31
32
#include "src/binary_parse/range_checked_byte_ptr.h"
33
34
namespace piex {
35
namespace image_type_recognition {
36
namespace {
37
38
using std::string;
39
using binary_parse::MemoryStatus;
40
using binary_parse::RangeCheckedBytePtr;
41
42
// Base class for checking image type. For each image type, one should create an
43
// inherited class and do the implementation.
44
class TypeChecker {
45
 public:
46
  // Comparing function, whihc is used for sorting.
47
570k
  static bool Compare(const TypeChecker* a, const TypeChecker* b) {
48
570k
    assert(a);
49
570k
    assert(b);
50
570k
    return a->RequestedSize() < b->RequestedSize();
51
570k
  }
52
53
85.4k
  virtual ~TypeChecker() {}
54
55
  // Returns the type of current checker.
56
  virtual RawImageTypes Type() const = 0;
57
58
  // Returns the requested data size (in bytes) for current checker. The checker
59
  // guarantees that it will not read more than this size.
60
  virtual size_t RequestedSize() const = 0;
61
62
  // Checks if source data belongs to current checker type.
63
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0;
64
65
 protected:
66
  // Limits the source length to the RequestedSize(), using it guarantees that
67
  // we will not read more than this size from the source.
68
17.7k
  RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const {
69
17.7k
    return source.pointerToSubArray(0 /* pos */, RequestedSize());
70
17.7k
  }
71
};
72
73
// Check if the uint16 value at (source + offset) is equal to the target value.
74
bool CheckUInt16Value(const RangeCheckedBytePtr& source,
75
                      const size_t source_offset, const bool use_big_endian,
76
5.28k
                      const unsigned short target_value) {  // NOLINT
77
5.28k
  MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
78
5.28k
  const unsigned short value = binary_parse::Get16u(  // NOLINT
79
5.28k
      source + source_offset, use_big_endian, &status);
80
5.28k
  if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
81
0
    return false;
82
0
  }
83
5.28k
  return (target_value == value);
84
5.28k
}
85
86
// Check if the uint32 value at (source + offset) is equal to the target value.
87
bool CheckUInt32Value(const RangeCheckedBytePtr& source,
88
                      const size_t source_offset, const bool use_big_endian,
89
1.05k
                      const unsigned int target_value) {
90
1.05k
  MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS;
91
1.05k
  const unsigned int value =
92
1.05k
      binary_parse::Get32u(source + source_offset, use_big_endian, &status);
93
1.05k
  if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) {
94
0
    return false;
95
0
  }
96
1.05k
  return (target_value == value);
97
1.05k
}
98
99
// Determine the endianness. The return value is NOT the endianness indicator,
100
// it's just that this function was successful.
101
bool DetermineEndianness(const RangeCheckedBytePtr& source,
102
8.50k
                         bool* is_big_endian) {
103
8.50k
  if (source.remainingLength() < 2) {
104
0
    return false;
105
0
  }
106
107
8.50k
  if (source[0] == 0x49 && source[1] == 0x49) {
108
6.77k
    *is_big_endian = false;
109
6.77k
  } else if (source[0] == 0x4D && source[1] == 0x4D) {
110
698
    *is_big_endian = true;
111
1.03k
  } else {
112
1.03k
    return false;
113
1.03k
  }
114
7.46k
  return true;
115
8.50k
}
116
117
// Check if signature string can match to the same length string start from
118
// (source + offset). The signature string will be used as longer magic number
119
// series.
120
bool IsSignatureMatched(const RangeCheckedBytePtr& source,
121
9.44M
                        const size_t source_offset, const string& signature) {
122
9.44M
  return source.substr(source_offset, signature.size()) == signature;
123
9.44M
}
124
125
// Check if signature is found in [source + offset, source + offset + range].
126
bool IsSignatureFound(const RangeCheckedBytePtr& source,
127
                      const size_t search_offset, const size_t search_range,
128
6.51k
                      const string& signature, size_t* first_matched) {
129
6.51k
  if (source.remainingLength() < search_offset + search_range) {
130
0
    return false;
131
0
  }
132
133
  // The index must be in range [offset, offset + range - sizeof(signature)], so
134
  // that it can guarantee that it will not read outside of range.
135
6.51k
  for (size_t i = search_offset;
136
9.43M
       i < search_offset + search_range - signature.size(); ++i) {
137
9.42M
    if (IsSignatureMatched(source, i, signature)) {
138
2.05k
      if (first_matched) {
139
0
        *first_matched = i;
140
0
      }
141
2.05k
      return true;
142
2.05k
    }
143
9.42M
  }
144
4.46k
  return false;
145
6.51k
}
146
147
// Sony RAW format.
148
class ArwTypeChecker : public TypeChecker {
149
 public:
150
32
  virtual RawImageTypes Type() const { return kArwImage; }
151
152
83.4k
  virtual size_t RequestedSize() const { return 10000; }
153
154
  // Check multiple points:
155
  // 1. valid endianness at the beginning of the file;
156
  // 2. correct tiff magic number at the (offset == 8) position of the file;
157
  // 3. signature "SONY" in first requested bytes;
158
  // 4. correct signature for (section + version) in first requested bytes.
159
93
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
160
93
    RangeCheckedBytePtr limited_source = LimitSource(source);
161
162
93
    bool use_big_endian;
163
93
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
164
17
      return false;
165
17
    }
166
167
76
    const unsigned short kTiffMagic = 0x2A;  // NOLINT
168
76
    const unsigned int kTiffOffset = 8;
169
76
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
170
76
                          kTiffMagic) ||
171
72
        !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
172
72
                          kTiffOffset)) {
173
37
      return false;
174
37
    }
175
176
    // Search for kSignatureSony in first requested bytes
177
39
    const string kSignatureSony("SONY");
178
39
    if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
179
39
                          kSignatureSony, NULL)) {
180
4
      return false;
181
4
    }
182
183
    // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first
184
    // requested bytes
185
35
    const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8);
186
35
    const int kSignatureVersionsSize = 6;
187
35
    const string kSignatureVersions[kSignatureVersionsSize] = {
188
35
        string("\x02\x00", 2),  // ARW 1.0
189
35
        string("\x03\x00", 2),  // ARW 2.0
190
35
        string("\x03\x01", 2),  // ARW 2.1
191
35
        string("\x03\x02", 2),  // ARW 2.2
192
35
        string("\x03\x03", 2),  // ARW 2.3
193
35
        string("\x04\x00", 2),  // ARW 4.0
194
35
    };
195
35
    bool matched = false;
196
245
    for (int i = 0; i < kSignatureVersionsSize; ++i) {
197
210
      matched = matched || IsSignatureFound(
198
98
                               limited_source, 0 /* offset */, RequestedSize(),
199
98
                               kSignatureSection + kSignatureVersions[i], NULL);
200
210
    }
201
35
    return matched;
202
39
  }
203
};
204
205
// Canon RAW (CR3 extension).
206
class Cr3TypeChecker : public TypeChecker {
207
 public:
208
  static constexpr size_t kSignatureOffset = 4;
209
  static constexpr const char* kSignature = "ftypcrx ";
210
211
697
  virtual RawImageTypes Type() const { return kCr3Image; }
212
213
37.9k
  virtual size_t RequestedSize() const {
214
37.9k
    return kSignatureOffset + strlen(kSignature);
215
37.9k
  }
216
217
  // Checks for the ftyp box w/ brand 'crx '.
218
1.97k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
219
1.97k
    RangeCheckedBytePtr limited_source = LimitSource(source);
220
1.97k
    return IsSignatureMatched(limited_source, kSignatureOffset, kSignature);
221
1.97k
  }
222
};
223
224
// Canon RAW (CR2 extension).
225
class Cr2TypeChecker : public TypeChecker {
226
 public:
227
276
  virtual RawImageTypes Type() const { return kCr2Image; }
228
229
50.7k
  virtual size_t RequestedSize() const { return 16; }
230
231
  // Check multiple points:
232
  // 1. valid endianness at the beginning of the file;
233
  // 2. magic number "42" at the (offset == 2) position of the file;
234
  // 3. signature "CR2" at the (offset == 8) position of the file.
235
1.27k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
236
1.27k
    RangeCheckedBytePtr limited_source = LimitSource(source);
237
238
1.27k
    bool use_big_endian;
239
1.27k
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
240
20
      return false;
241
20
    }
242
243
1.25k
    const unsigned short kTag = 42;  // NOLINT
244
1.25k
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
245
1.25k
                          kTag)) {
246
469
      return false;
247
469
    }
248
249
789
    const string kSignature("CR\2\0", 4);
250
789
    return IsSignatureMatched(limited_source, 8 /* offset */, kSignature);
251
1.25k
  }
252
};
253
254
// Canon RAW (CRW extension).
255
class CrwTypeChecker : public TypeChecker {
256
 public:
257
2
  virtual RawImageTypes Type() const { return kCrwImage; }
258
259
37.2k
  virtual size_t RequestedSize() const { return 14; }
260
261
  // Check only the signature at the (offset == 6) position of the file.
262
1.28k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
263
1.28k
    RangeCheckedBytePtr limited_source = LimitSource(source);
264
265
1.28k
    bool use_big_endian;
266
1.28k
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
267
20
      return false;
268
20
    }
269
270
1.26k
    string signature;
271
1.26k
    if (use_big_endian) {
272
76
      signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8);
273
1.18k
    } else {
274
1.18k
      signature = string("HEAPCCDR");
275
1.18k
    }
276
1.26k
    return IsSignatureMatched(limited_source, 6 /* offset */, signature);
277
1.28k
  }
278
};
279
280
// Kodak RAW.
281
class DcrTypeChecker : public TypeChecker {
282
 public:
283
15
  virtual RawImageTypes Type() const { return kDcrImage; }
284
285
67.7k
  virtual size_t RequestedSize() const { return 5000; }
286
287
  // Check two different cases, only need to fulfill one of the two:
288
  // 1. signature at the (offset == 16) position of the file;
289
  // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the
290
  // first requested bytes of the file.
291
132
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
292
132
    RangeCheckedBytePtr limited_source = LimitSource(source);
293
294
132
    bool use_big_endian;
295
132
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
296
17
      return false;
297
17
    }
298
299
    // Case 1: has signature
300
115
    const string kSignature(
301
115
        "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16);
302
115
    if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) {
303
1
      return true;
304
1
    }
305
306
    // Case 2: search for tags in first requested bytes
307
114
    string kIfdTags[2];
308
114
    if (use_big_endian) {
309
29
      kIfdTags[0] = string("\x03\xe9\x00\x02", 4);  // OriginalFileName
310
29
      kIfdTags[1] = string("\x0c\xe5\x00\x02", 4);  // FirmwareVersion
311
85
    } else {
312
85
      kIfdTags[0] = string("\xe9\x03\x02\x00", 4);  // OriginalFileName
313
85
      kIfdTags[1] = string("\xe5\x0c\x02\x00", 4);  // FirmwareVersion
314
85
    }
315
114
    return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
316
114
                            kIfdTags[0], NULL) &&
317
26
           IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
318
26
                            kIfdTags[1], NULL);
319
115
  }
320
};
321
322
// Digital Negative RAW.
323
class DngTypeChecker : public TypeChecker {
324
 public:
325
552
  virtual RawImageTypes Type() const { return kDngImage; }
326
327
64.2k
  virtual size_t RequestedSize() const { return 1024; }
328
329
  // Check multiple points:
330
  // 1. valid endianness at the beginning of the file;
331
  // 2. at least two dng specific tags in the first requested bytes of the
332
  // file
333
987
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
334
987
    RangeCheckedBytePtr limited_source = LimitSource(source);
335
336
987
    bool use_big_endian;
337
987
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
338
17
      return false;
339
17
    }
340
341
    // Search tags in first requested bytes and verify the order of them.
342
970
    const int kTagsCount = 5;
343
970
    string dng_tags[kTagsCount];
344
970
    if (use_big_endian) {
345
70
      dng_tags[0] =
346
70
          string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8);  // tag: 50706
347
70
      dng_tags[1] =
348
70
          string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8);  // tag: 50707
349
70
      dng_tags[2] = string("\xc6\x14\x00\x02", 4);        // tag: 50708
350
70
      dng_tags[3] = string("\xc6\x20", 2);                // tag: 50720
351
70
      dng_tags[4] =
352
70
          string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8);  // tag: 50733
353
900
    } else {
354
900
      dng_tags[0] =
355
900
          string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50706
356
900
      dng_tags[1] =
357
900
          string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8);  // tag: 50707
358
900
      dng_tags[2] = string("\x14\xc6\x02\x00", 4);        // tag: 50708
359
900
      dng_tags[3] = string("\x20\xc6", 2);                // tag: 50720
360
900
      dng_tags[4] =
361
900
          string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8);  // tag: 50733
362
900
    }
363
970
    int tags_found = 0;
364
5.82k
    for (int i = 0; i < kTagsCount; ++i) {
365
4.85k
      if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
366
4.85k
                           dng_tags[i], NULL)) {
367
1.30k
        tags_found++;
368
1.30k
      }
369
4.85k
    }
370
970
    return tags_found >= 2;
371
987
  }
372
};
373
374
// Kodak RAW.
375
class KdcTypeChecker : public TypeChecker {
376
 public:
377
10
  virtual RawImageTypes Type() const { return kKdcImage; }
378
379
63.1k
  virtual size_t RequestedSize() const { return 5000; }
380
381
  // Check two points:
382
  // 1. valid endianness at the beginning of the file;
383
  // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes.
384
117
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
385
117
    RangeCheckedBytePtr limited_source = LimitSource(source);
386
387
117
    bool use_big_endian;
388
117
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
389
17
      return false;
390
17
    }
391
392
    // Search in first requested bytes
393
100
    const size_t kIfdTagsSize = 2;
394
100
    string kIfdTags[kIfdTagsSize];
395
100
    if (use_big_endian) {
396
29
      kIfdTags[0] = string("\xfa\x0d\x00\x01", 4);  // WhiteBalance
397
29
      kIfdTags[1] = string("\xfa\x00\x00\x02", 4);  // SerialNumber
398
71
    } else {
399
71
      kIfdTags[0] = string("\x0d\xfa\x01\x00", 4);  // WhiteBalance
400
71
      kIfdTags[1] = string("\x00\xfa\x02\x00", 4);  // SerialNumber
401
71
    }
402
403
100
    return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
404
100
                            kIfdTags[0], NULL) &&
405
25
           IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
406
25
                            kIfdTags[1], NULL);
407
117
  }
408
};
409
410
// Leaf RAW.
411
class MosTypeChecker : public TypeChecker {
412
 public:
413
14
  virtual RawImageTypes Type() const { return kMosImage; }
414
415
58.6k
  virtual size_t RequestedSize() const { return 5000; }
416
417
  // Check two points:
418
  // 1. valid endianness at the beginning of the file;
419
  // 2. signature "PKTS    " in the first requested bytes. Note the
420
  // "whitespace". It's important as they are special binary values.
421
107
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
422
107
    RangeCheckedBytePtr limited_source = LimitSource(source);
423
424
107
    bool use_big_endian;
425
107
    if (!DetermineEndianness(source, &use_big_endian)) {
426
17
      return false;
427
17
    }
428
429
    // Search kSignaturePKTS in first requested bytes
430
90
    const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8);
431
90
    return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
432
90
                            kSignaturePKTS, NULL);
433
107
  }
434
};
435
436
// Minolta RAW.
437
class MrwTypeChecker : public TypeChecker {
438
 public:
439
1
  virtual RawImageTypes Type() const { return kMrwImage; }
440
441
47.1k
  virtual size_t RequestedSize() const { return 4; }
442
443
  // Check only the signature at the beginning of the file.
444
2.23k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
445
    // Limits the source length to the RequestedSize(), using it guarantees that
446
    // we will not read more than this size from the source.
447
2.23k
    RangeCheckedBytePtr limited_source =
448
2.23k
        source.pointerToSubArray(0 /* pos */, RequestedSize());
449
450
2.23k
    const string kSignature("\0MRM", 4);
451
2.23k
    return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
452
2.23k
  }
453
};
454
455
// Check if the file contains a NRW signature "NRW   " in the first requested
456
// bytes. Note the "whitespace". It's important as they are special binary
457
// values.
458
const size_t kRequestedSizeForNrwSignature = 4000;
459
226
bool ContainsNrwSignature(const RangeCheckedBytePtr& source) {
460
  // Search for kSignatureNrw.
461
226
  const string kSignatureNrw("NRW\x20\x20\x20", 6);
462
226
  return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature,
463
226
                          kSignatureNrw, NULL);
464
226
}
465
466
// Checks if the file contains the signatures for Nikon formats:
467
// * the general Nikon singature "NIKON" string.
468
// * the ReferenceBlackWhite tag.
469
const size_t kRequestedSizeForNikonSignatures = 4000;
470
bool ContainsNikonSignatures(const RangeCheckedBytePtr& source,
471
351
                             const bool use_big_endian) {
472
351
  const string kSignatureNikon("NIKON");
473
351
  const string kReferenceBlackWhiteTag = use_big_endian
474
351
                                             ? string("\x02\x14\x00\x05", 4)
475
351
                                             : string("\x14\x02\x05\x00", 4);
476
351
  const std::vector<string> kSignatures = {kSignatureNikon,
477
351
                                           kReferenceBlackWhiteTag};
478
601
  for (auto const& signature : kSignatures) {
479
601
    if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures,
480
601
                          signature, NULL)) {
481
125
      return false;
482
125
    }
483
601
  }
484
226
  return true;
485
351
}
486
487
// Nikon RAW (NEF extension).
488
class NefTypeChecker : public TypeChecker {
489
 public:
490
209
  virtual RawImageTypes Type() const { return kNefImage; }
491
492
63.2k
  virtual size_t RequestedSize() const {
493
63.2k
    return std::max(kRequestedSizeForNikonSignatures,
494
63.2k
                    kRequestedSizeForNrwSignature);
495
63.2k
  }
496
497
  // Check multiple points:
498
  // 1. valid endianness at the beginning of the file;
499
  // 2. magic number at the (offset == 2) position of the file;
500
  // 3. the signature "NIKON" in the requested bytes of the file;
501
  // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
502
  // 5. does not contain the NRW signature. We may also check a special
503
  // signature "RAW   " similar to the NRW case, but we got issues in some
504
  // special images that the signature locates in the middle of the file, and it
505
  // costs too  long time to check;
506
346
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
507
346
    RangeCheckedBytePtr limited_source = LimitSource(source);
508
509
346
    bool use_big_endian;
510
346
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
511
17
      return false;
512
17
    }
513
514
329
    const unsigned short kTiffMagic = 0x2A;  // NOLINT
515
329
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
516
329
                          kTiffMagic)) {
517
23
      return false;
518
23
    }
519
520
306
    return ContainsNikonSignatures(limited_source, use_big_endian) &&
521
221
           !ContainsNrwSignature(limited_source);  // not NRW
522
329
  }
523
};
524
525
// Nikon RAW (NRW extension).
526
class NrwTypeChecker : public TypeChecker {
527
 public:
528
5
  virtual RawImageTypes Type() const { return kNrwImage; }
529
530
58.5k
  virtual size_t RequestedSize() const {
531
58.5k
    return std::max(kRequestedSizeForNikonSignatures,
532
58.5k
                    kRequestedSizeForNrwSignature);
533
58.5k
  }
534
535
  // Check multiple points:
536
  // 1. valid endianness at the beginning of the file;
537
  // 2. magic numbers at the (offset == 2 and offset == 4) positions of the
538
  // file;
539
  // 3. the signature "NIKON" in the first requested bytes of the file;
540
  // 4. the ReferenceBlackWhite tag in the requested bytes of the file;
541
  // 5. contains the NRW signature;
542
137
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
543
137
    RangeCheckedBytePtr limited_source = LimitSource(source);
544
545
137
    bool use_big_endian;
546
137
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
547
17
      return false;
548
17
    }
549
550
120
    const unsigned short kTiffMagic = 0x2A;  // NOLINT
551
120
    const unsigned int kTiffOffset = 8;
552
120
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
553
120
                          kTiffMagic) ||
554
97
        !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
555
97
                          kTiffOffset)) {
556
75
      return false;
557
75
    }
558
559
45
    return ContainsNikonSignatures(limited_source, use_big_endian) &&
560
5
           ContainsNrwSignature(limited_source);
561
120
  }
562
};
563
564
// Olympus RAW.
565
class OrfTypeChecker : public TypeChecker {
566
 public:
567
19
  virtual RawImageTypes Type() const { return kOrfImage; }
568
569
63.3k
  virtual size_t RequestedSize() const { return 3000; }
570
571
  // Check multiple points:
572
  // 1. valid endianness at the beginning of the file;
573
  // 2. tag at the (offset == 2) position of the file;
574
  // 3. signature "OLYMP" in the first requested bytes.
575
365
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
576
365
    RangeCheckedBytePtr limited_source = LimitSource(source);
577
578
365
    bool use_big_endian;
579
365
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
580
17
      return false;
581
17
    }
582
583
348
    const size_t kTagSize = 2;
584
348
    const unsigned short kTag[kTagSize] = {0x4F52, 0x5352};  // NOLINT
585
348
    if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
586
348
                           kTag[0]) ||
587
347
          CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
588
347
                           kTag[1]))) {
589
328
      return false;
590
328
    }
591
592
    // Search for kSignatureOlymp in first requested bytes
593
20
    const string kSignatureOlymp("OLYMP");
594
20
    return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
595
20
                            kSignatureOlymp, NULL);
596
348
  }
597
};
598
599
// Pentax RAW.
600
class PefTypeChecker : public TypeChecker {
601
 public:
602
70
  virtual RawImageTypes Type() const { return kPefImage; }
603
604
63.5k
  virtual size_t RequestedSize() const { return 1280; }
605
606
  // Check multiple points:
607
  // 1. valid big endianness at the beginning of the file;
608
  // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
609
  // 3. signature "AOC   " or "PENTAX  " in first requested bytes.
610
435
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
611
435
    RangeCheckedBytePtr limited_source = LimitSource(source);
612
613
435
    bool use_big_endian;
614
435
    if (!DetermineEndianness(limited_source, &use_big_endian)) {
615
17
      return false;
616
17
    }
617
618
418
    const unsigned short kTiffMagic = 0x2A;  // NOLINT
619
418
    const unsigned int kTiffOffset = 8;
620
418
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
621
418
                          kTiffMagic) ||
622
376
        !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
623
376
                          kTiffOffset)) {
624
293
      return false;
625
293
    }
626
627
    // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes
628
125
    const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6);
629
125
    const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8);
630
125
    return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
631
125
                            kSignatureAOC, NULL) ||
632
69
           IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(),
633
69
                            kSignaturePENTAX, NULL);
634
418
  }
635
};
636
637
// Apple format.
638
class QtkTypeChecker : public TypeChecker {
639
 public:
640
1
  virtual RawImageTypes Type() const { return kQtkImage; }
641
642
74.0k
  virtual size_t RequestedSize() const { return 8; }
643
644
  // Check only the signature at the beginning of the file.
645
2.08k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
646
2.08k
    RangeCheckedBytePtr limited_source = LimitSource(source);
647
648
2.08k
    const size_t kSignatureSize = 2;
649
2.08k
    const string kSignature[kSignatureSize] = {
650
2.08k
        string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8),
651
2.08k
    };
652
2.08k
    return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) ||
653
2.08k
           IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]);
654
2.08k
  }
655
};
656
657
// Fuji RAW.
658
class RafTypeChecker : public TypeChecker {
659
 public:
660
110
  virtual RawImageTypes Type() const { return kRafImage; }
661
662
69.5k
  virtual size_t RequestedSize() const { return 8; }
663
664
  // Check only the signature at the beginning of the file.
665
2.08k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
666
2.08k
    RangeCheckedBytePtr limited_source = LimitSource(source);
667
668
2.08k
    const string kSignature("FUJIFILM");
669
2.08k
    return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
670
2.08k
  }
671
};
672
673
// Contax N RAW.
674
class RawContaxNTypeChecker : public TypeChecker {
675
 public:
676
5
  virtual RawImageTypes Type() const { return kRawContaxNImage; }
677
678
59.4k
  virtual size_t RequestedSize() const { return 36; }
679
680
  // Check only the signature at the (offset == 25) position of the
681
  // file.
682
1.00k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
683
1.00k
    RangeCheckedBytePtr limited_source = LimitSource(source);
684
685
1.00k
    const string kSignature("ARECOYK");
686
1.00k
    return IsSignatureMatched(limited_source, 25, kSignature);
687
1.00k
  }
688
};
689
690
// Panasonic RAW.
691
class Rw2TypeChecker : public TypeChecker {
692
 public:
693
144
  virtual RawImageTypes Type() const { return kRw2Image; }
694
695
78.6k
  virtual size_t RequestedSize() const { return 4; }
696
697
  // Check two points: 1. valid endianness at the beginning of the
698
  // file; 2. tag at the (offset == 2) position of the file.
699
2.23k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
700
2.23k
    RangeCheckedBytePtr limited_source = LimitSource(source);
701
702
2.23k
    bool use_big_endian;
703
2.23k
    if (!DetermineEndianness(source, &use_big_endian)) {
704
828
      return false;
705
828
    }
706
707
1.40k
    const unsigned short kTag = 0x55;  // NOLINT
708
1.40k
    return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
709
1.40k
                            kTag);
710
2.23k
  }
711
};
712
713
// Samsung RAW.
714
class SrwTypeChecker : public TypeChecker {
715
 public:
716
10
  virtual RawImageTypes Type() const { return kSrwImage; }
717
718
50.5k
  virtual size_t RequestedSize() const { return 256; }
719
720
  // Check multiple points:
721
  // 1. valid big endianness at the beginning of the file;
722
  // 2. magic numbers at the (offset == 2 and offset==4) positions of the file;
723
  // 3. the signature "SAMSUNG" in the requested bytes of the file;
724
997
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
725
997
    RangeCheckedBytePtr limited_source = LimitSource(source);
726
727
997
    bool use_big_endian;
728
997
    if (!DetermineEndianness(source, &use_big_endian)) {
729
17
      return false;
730
17
    }
731
732
980
    const unsigned short kTiffMagic = 0x2A;  // NOLINT
733
980
    const unsigned int kTiffOffset = 8;
734
980
    if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian,
735
980
                          kTiffMagic) ||
736
512
        !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian,
737
845
                          kTiffOffset)) {
738
845
      return false;
739
845
    }
740
741
135
    const string kSignature("SAMSUNG");
742
135
    if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) {
743
125
      return false;
744
125
    }
745
10
    return true;
746
135
  }
747
};
748
749
// Sigma / Polaroid RAW.
750
class X3fTypeChecker : public TypeChecker {
751
 public:
752
1
  virtual RawImageTypes Type() const { return kX3fImage; }
753
754
78.5k
  virtual size_t RequestedSize() const { return 4; }
755
756
  // Check only the signature at the beginning of the file.
757
2.08k
  virtual bool IsMyType(const RangeCheckedBytePtr& source) const {
758
2.08k
    RangeCheckedBytePtr limited_source = LimitSource(source);
759
760
2.08k
    const string kSignature("FOVb", 4);
761
2.08k
    return IsSignatureMatched(limited_source, 0 /* offset */, kSignature);
762
2.08k
  }
763
};
764
765
// This class contains the list of all type checkers. One should used this list
766
// as a whole to execute the image type recognition.
767
class TypeCheckerList {
768
 public:
769
4.49k
  TypeCheckerList() {
770
    // Add all supported RAW type checkers here.
771
4.49k
    checkers_.push_back(new ArwTypeChecker());
772
4.49k
    checkers_.push_back(new Cr3TypeChecker());
773
4.49k
    checkers_.push_back(new Cr2TypeChecker());
774
4.49k
    checkers_.push_back(new CrwTypeChecker());
775
4.49k
    checkers_.push_back(new DcrTypeChecker());
776
4.49k
    checkers_.push_back(new DngTypeChecker());
777
4.49k
    checkers_.push_back(new KdcTypeChecker());
778
4.49k
    checkers_.push_back(new MosTypeChecker());
779
4.49k
    checkers_.push_back(new MrwTypeChecker());
780
4.49k
    checkers_.push_back(new NefTypeChecker());
781
4.49k
    checkers_.push_back(new NrwTypeChecker());
782
4.49k
    checkers_.push_back(new OrfTypeChecker());
783
4.49k
    checkers_.push_back(new PefTypeChecker());
784
4.49k
    checkers_.push_back(new QtkTypeChecker());
785
4.49k
    checkers_.push_back(new RafTypeChecker());
786
4.49k
    checkers_.push_back(new RawContaxNTypeChecker());
787
4.49k
    checkers_.push_back(new Rw2TypeChecker());
788
4.49k
    checkers_.push_back(new SrwTypeChecker());
789
4.49k
    checkers_.push_back(new X3fTypeChecker());
790
791
    // Sort the checkers by the ascending RequestedSize() to get better
792
    // performance when checking type.
793
4.49k
    std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare);
794
4.49k
  }
795
796
4.49k
  ~TypeCheckerList() {
797
89.9k
    for (size_t i = 0; i < checkers_.size(); ++i) {
798
85.4k
      delete checkers_[i];
799
85.4k
      checkers_[i] = NULL;
800
85.4k
    }
801
4.49k
  }
802
803
  // Returns the type of source data. If it can not be identified, returns
804
  // kNonRawImage.
805
2.23k
  RawImageTypes GetType(const RangeCheckedBytePtr& source) const {
806
20.0k
    for (size_t i = 0; i < checkers_.size(); ++i) {
807
19.9k
      if (checkers_[i]->IsMyType(source)) {
808
2.17k
        return checkers_[i]->Type();
809
2.17k
      }
810
19.9k
    }
811
61
    return kNonRawImage;
812
2.23k
  }
813
814
  // Returns the maximum size of requested size of data for identifying image
815
  // type using this class. The class guarantees that it will not read more than
816
  // this size.
817
2.26k
  size_t RequestedSize() const {
818
2.26k
    assert(!checkers_.empty());
819
    // The checkers_ is ascending sorted. The last element is the maximum.
820
2.26k
    return checkers_.back()->RequestedSize();
821
2.26k
  }
822
823
0
  bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
824
0
    const TypeChecker* type_checker = GetTypeCheckerForType(type);
825
0
    if (type_checker) {
826
0
      return type_checker->IsMyType(source);
827
0
    } else {
828
0
      return false;
829
0
    }
830
0
  }
831
832
0
  size_t RequestedSizeForType(const RawImageTypes type) {
833
0
    const TypeChecker* type_checker = GetTypeCheckerForType(type);
834
0
    if (type_checker) {
835
0
      return type_checker->RequestedSize();
836
0
    } else {
837
0
      return 0;
838
0
    }
839
0
  }
840
841
 private:
842
0
  const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) {
843
0
    for (const auto* type_checker : checkers_) {
844
0
      if (type_checker->Type() == type) {
845
0
        return type_checker;
846
0
      }
847
0
    }
848
0
    return nullptr;
849
0
  }
850
851
  std::vector<TypeChecker*> checkers_;
852
};
853
854
}  // namespace
855
856
0
bool IsRaw(const RawImageTypes type) {
857
0
  switch (type) {
858
    // Non-RAW-image type
859
0
    case kNonRawImage: {
860
0
      return false;
861
0
    }
862
863
    // Raw image types
864
0
    case kArwImage:
865
0
    case kCr3Image:
866
0
    case kCr2Image:
867
0
    case kCrwImage:
868
0
    case kDcrImage:
869
0
    case kDngImage:
870
0
    case kKdcImage:
871
0
    case kMosImage:
872
0
    case kMrwImage:
873
0
    case kNefImage:
874
0
    case kNrwImage:
875
0
    case kOrfImage:
876
0
    case kPefImage:
877
0
    case kQtkImage:
878
0
    case kRafImage:
879
0
    case kRawContaxNImage:
880
0
    case kRw2Image:
881
0
    case kSrwImage:
882
0
    case kX3fImage: {
883
0
      return true;
884
0
    }
885
886
0
    default: {
887
      // Unsupported type!
888
0
      assert(false);
889
0
    }
890
0
  }
891
0
  return false;
892
0
}
893
894
0
bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) {
895
0
  return TypeCheckerList().IsOfType(source, type);
896
0
}
897
898
2.23k
RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) {
899
2.23k
  return TypeCheckerList().GetType(source);
900
2.23k
}
901
902
2.26k
size_t GetNumberOfBytesForIsRawLite() {
903
2.26k
  return TypeCheckerList().RequestedSize();
904
2.26k
}
905
906
0
size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) {
907
0
  return TypeCheckerList().RequestedSizeForType(type);
908
0
}
909
910
0
bool IsRawLite(const RangeCheckedBytePtr& source) {
911
0
  return IsRaw(RecognizeRawImageTypeLite(source));
912
0
}
913
914
}  // namespace image_type_recognition
915
}  // namespace piex