/src/piex/src/image_type_recognition/image_type_recognition_lite.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2015 Google Inc. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | //////////////////////////////////////////////////////////////////////////////// |
16 | | // |
17 | | // This file implements the image type recognition algorithm. Functions, which |
18 | | // will check each single image type, are implemented based on the comparisons |
19 | | // of magic numbers or signature strings. Other checks (e.g endianness, general |
20 | | // tiff magic number "42", etc.) could also be used in some of those functions |
21 | | // to make the type recognition more stable. Those checks are designed |
22 | | // according to the format spcifications and our own experiments. Notice that |
23 | | // the magic numbers and signature strings may have different binary values |
24 | | // according to different endiannesses. |
25 | | #include "src/image_type_recognition/image_type_recognition_lite.h" |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cassert> |
29 | | #include <string> |
30 | | #include <vector> |
31 | | |
32 | | #include "src/binary_parse/range_checked_byte_ptr.h" |
33 | | |
34 | | namespace piex { |
35 | | namespace image_type_recognition { |
36 | | namespace { |
37 | | |
38 | | using std::string; |
39 | | using binary_parse::MemoryStatus; |
40 | | using binary_parse::RangeCheckedBytePtr; |
41 | | |
42 | | // Base class for checking image type. For each image type, one should create an |
43 | | // inherited class and do the implementation. |
44 | | class TypeChecker { |
45 | | public: |
46 | | // Comparing function, whihc is used for sorting. |
47 | 448k | static bool Compare(const TypeChecker* a, const TypeChecker* b) { |
48 | 448k | assert(a); |
49 | 448k | assert(b); |
50 | 448k | return a->RequestedSize() < b->RequestedSize(); |
51 | 448k | } |
52 | | |
53 | 67.0k | virtual ~TypeChecker() {} |
54 | | |
55 | | // Returns the type of current checker. |
56 | | virtual RawImageTypes Type() const = 0; |
57 | | |
58 | | // Returns the requested data size (in bytes) for current checker. The checker |
59 | | // guarantees that it will not read more than this size. |
60 | | virtual size_t RequestedSize() const = 0; |
61 | | |
62 | | // Checks if source data belongs to current checker type. |
63 | | virtual bool IsMyType(const RangeCheckedBytePtr& source) const = 0; |
64 | | |
65 | | protected: |
66 | | // Limits the source length to the RequestedSize(), using it guarantees that |
67 | | // we will not read more than this size from the source. |
68 | 15.7k | RangeCheckedBytePtr LimitSource(const RangeCheckedBytePtr& source) const { |
69 | 15.7k | return source.pointerToSubArray(0 /* pos */, RequestedSize()); |
70 | 15.7k | } |
71 | | }; |
72 | | |
73 | | // Check if the uint16 value at (source + offset) is equal to the target value. |
74 | | bool CheckUInt16Value(const RangeCheckedBytePtr& source, |
75 | | const size_t source_offset, const bool use_big_endian, |
76 | 5.59k | const unsigned short target_value) { // NOLINT |
77 | 5.59k | MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS; |
78 | 5.59k | const unsigned short value = binary_parse::Get16u( // NOLINT |
79 | 5.59k | source + source_offset, use_big_endian, &status); |
80 | 5.59k | if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) { |
81 | 0 | return false; |
82 | 0 | } |
83 | 5.59k | return (target_value == value); |
84 | 5.59k | } |
85 | | |
86 | | // Check if the uint32 value at (source + offset) is equal to the target value. |
87 | | bool CheckUInt32Value(const RangeCheckedBytePtr& source, |
88 | | const size_t source_offset, const bool use_big_endian, |
89 | 928 | const unsigned int target_value) { |
90 | 928 | MemoryStatus status = binary_parse::RANGE_CHECKED_BYTE_SUCCESS; |
91 | 928 | const unsigned int value = |
92 | 928 | binary_parse::Get32u(source + source_offset, use_big_endian, &status); |
93 | 928 | if (status != binary_parse::RANGE_CHECKED_BYTE_SUCCESS) { |
94 | 0 | return false; |
95 | 0 | } |
96 | 928 | return (target_value == value); |
97 | 928 | } |
98 | | |
99 | | // Determine the endianness. The return value is NOT the endianness indicator, |
100 | | // it's just that this function was successful. |
101 | | bool DetermineEndianness(const RangeCheckedBytePtr& source, |
102 | 8.51k | bool* is_big_endian) { |
103 | 8.51k | if (source.remainingLength() < 2) { |
104 | 0 | return false; |
105 | 0 | } |
106 | | |
107 | 8.51k | if (source[0] == 0x49 && source[1] == 0x49) { |
108 | 7.36k | *is_big_endian = false; |
109 | 7.36k | } else if (source[0] == 0x4D && source[1] == 0x4D) { |
110 | 563 | *is_big_endian = true; |
111 | 583 | } else { |
112 | 583 | return false; |
113 | 583 | } |
114 | 7.93k | return true; |
115 | 8.51k | } |
116 | | |
117 | | // Check if signature string can match to the same length string start from |
118 | | // (source + offset). The signature string will be used as longer magic number |
119 | | // series. |
120 | | bool IsSignatureMatched(const RangeCheckedBytePtr& source, |
121 | 9.20M | const size_t source_offset, const string& signature) { |
122 | 9.20M | return source.substr(source_offset, signature.size()) == signature; |
123 | 9.20M | } |
124 | | |
125 | | // Check if signature is found in [source + offset, source + offset + range]. |
126 | | bool IsSignatureFound(const RangeCheckedBytePtr& source, |
127 | | const size_t search_offset, const size_t search_range, |
128 | 6.58k | const string& signature, size_t* first_matched) { |
129 | 6.58k | if (source.remainingLength() < search_offset + search_range) { |
130 | 0 | return false; |
131 | 0 | } |
132 | | |
133 | | // The index must be in range [offset, offset + range - sizeof(signature)], so |
134 | | // that it can guarantee that it will not read outside of range. |
135 | 6.58k | for (size_t i = search_offset; |
136 | 9.19M | i < search_offset + search_range - signature.size(); ++i) { |
137 | 9.19M | if (IsSignatureMatched(source, i, signature)) { |
138 | 2.07k | if (first_matched) { |
139 | 0 | *first_matched = i; |
140 | 0 | } |
141 | 2.07k | return true; |
142 | 2.07k | } |
143 | 9.19M | } |
144 | 4.50k | return false; |
145 | 6.58k | } |
146 | | |
147 | | // Sony RAW format. |
148 | | class ArwTypeChecker : public TypeChecker { |
149 | | public: |
150 | 27 | virtual RawImageTypes Type() const { return kArwImage; } |
151 | | |
152 | 65.5k | virtual size_t RequestedSize() const { return 10000; } |
153 | | |
154 | | // Check multiple points: |
155 | | // 1. valid endianness at the beginning of the file; |
156 | | // 2. correct tiff magic number at the (offset == 8) position of the file; |
157 | | // 3. signature "SONY" in first requested bytes; |
158 | | // 4. correct signature for (section + version) in first requested bytes. |
159 | 99 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
160 | 99 | RangeCheckedBytePtr limited_source = LimitSource(source); |
161 | | |
162 | 99 | bool use_big_endian; |
163 | 99 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
164 | 33 | return false; |
165 | 33 | } |
166 | | |
167 | 66 | const unsigned short kTiffMagic = 0x2A; // NOLINT |
168 | 66 | const unsigned int kTiffOffset = 8; |
169 | 66 | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
170 | 66 | kTiffMagic) || |
171 | 66 | !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, |
172 | 57 | kTiffOffset)) { |
173 | 36 | return false; |
174 | 36 | } |
175 | | |
176 | | // Search for kSignatureSony in first requested bytes |
177 | 30 | const string kSignatureSony("SONY"); |
178 | 30 | if (!IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
179 | 30 | kSignatureSony, NULL)) { |
180 | 1 | return false; |
181 | 1 | } |
182 | | |
183 | | // Search for (kSignatureFileTypeSection + kSignatureVersions[i]) in first |
184 | | // requested bytes |
185 | 29 | const string kSignatureSection("\x00\xb0\x01\x00\x04\x00\x00\x00", 8); |
186 | 29 | const int kSignatureVersionsSize = 6; |
187 | 29 | const string kSignatureVersions[kSignatureVersionsSize] = { |
188 | 29 | string("\x02\x00", 2), // ARW 1.0 |
189 | 29 | string("\x03\x00", 2), // ARW 2.0 |
190 | 29 | string("\x03\x01", 2), // ARW 2.1 |
191 | 29 | string("\x03\x02", 2), // ARW 2.2 |
192 | 29 | string("\x03\x03", 2), // ARW 2.3 |
193 | 29 | string("\x04\x00", 2), // ARW 4.0 |
194 | 29 | }; |
195 | 29 | bool matched = false; |
196 | 203 | for (int i = 0; i < kSignatureVersionsSize; ++i) { |
197 | 174 | matched = matched || IsSignatureFound( |
198 | 76 | limited_source, 0 /* offset */, RequestedSize(), |
199 | 76 | kSignatureSection + kSignatureVersions[i], NULL); |
200 | 174 | } |
201 | 29 | return matched; |
202 | 30 | } |
203 | | }; |
204 | | |
205 | | // Canon RAW (CR3 extension). |
206 | | class Cr3TypeChecker : public TypeChecker { |
207 | | public: |
208 | | static constexpr size_t kSignatureOffset = 4; |
209 | | static constexpr const char* kSignature = "ftypcrx "; |
210 | | |
211 | 0 | virtual RawImageTypes Type() const { return kCr3Image; } |
212 | | |
213 | 29.6k | virtual size_t RequestedSize() const { |
214 | 29.6k | return kSignatureOffset + strlen(kSignature); |
215 | 29.6k | } |
216 | | |
217 | | // Checks for the ftyp box w/ brand 'crx '. |
218 | 1.44k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
219 | 1.44k | RangeCheckedBytePtr limited_source = LimitSource(source); |
220 | 1.44k | return IsSignatureMatched(limited_source, kSignatureOffset, kSignature); |
221 | 1.44k | } |
222 | | }; |
223 | | |
224 | | // Canon RAW (CR2 extension). |
225 | | class Cr2TypeChecker : public TypeChecker { |
226 | | public: |
227 | 404 | virtual RawImageTypes Type() const { return kCr2Image; } |
228 | | |
229 | 40.2k | virtual size_t RequestedSize() const { return 16; } |
230 | | |
231 | | // Check multiple points: |
232 | | // 1. valid endianness at the beginning of the file; |
233 | | // 2. magic number "42" at the (offset == 2) position of the file; |
234 | | // 3. signature "CR2" at the (offset == 8) position of the file. |
235 | 1.44k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
236 | 1.44k | RangeCheckedBytePtr limited_source = LimitSource(source); |
237 | | |
238 | 1.44k | bool use_big_endian; |
239 | 1.44k | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
240 | 35 | return false; |
241 | 35 | } |
242 | | |
243 | 1.41k | const unsigned short kTag = 42; // NOLINT |
244 | 1.41k | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
245 | 1.41k | kTag)) { |
246 | 589 | return false; |
247 | 589 | } |
248 | | |
249 | 822 | const string kSignature("CR\2\0", 4); |
250 | 822 | return IsSignatureMatched(limited_source, 8 /* offset */, kSignature); |
251 | 1.41k | } |
252 | | }; |
253 | | |
254 | | // Canon RAW (CRW extension). |
255 | | class CrwTypeChecker : public TypeChecker { |
256 | | public: |
257 | 2 | virtual RawImageTypes Type() const { return kCrwImage; } |
258 | | |
259 | 29.6k | virtual size_t RequestedSize() const { return 14; } |
260 | | |
261 | | // Check only the signature at the (offset == 6) position of the file. |
262 | 1.44k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
263 | 1.44k | RangeCheckedBytePtr limited_source = LimitSource(source); |
264 | | |
265 | 1.44k | bool use_big_endian; |
266 | 1.44k | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
267 | 35 | return false; |
268 | 35 | } |
269 | | |
270 | 1.41k | string signature; |
271 | 1.41k | if (use_big_endian) { |
272 | 69 | signature = string("\x00\x10\xba\xb0\xac\xbb\x00\x02", 8); |
273 | 1.34k | } else { |
274 | 1.34k | signature = string("HEAPCCDR"); |
275 | 1.34k | } |
276 | 1.41k | return IsSignatureMatched(limited_source, 6 /* offset */, signature); |
277 | 1.44k | } |
278 | | }; |
279 | | |
280 | | // Kodak RAW. |
281 | | class DcrTypeChecker : public TypeChecker { |
282 | | public: |
283 | 14 | virtual RawImageTypes Type() const { return kDcrImage; } |
284 | | |
285 | 53.2k | virtual size_t RequestedSize() const { return 5000; } |
286 | | |
287 | | // Check two different cases, only need to fulfill one of the two: |
288 | | // 1. signature at the (offset == 16) position of the file; |
289 | | // 2. two tags (OriginalFileName and FirmwareVersion) can be found in the |
290 | | // first requested bytes of the file. |
291 | 136 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
292 | 136 | RangeCheckedBytePtr limited_source = LimitSource(source); |
293 | | |
294 | 136 | bool use_big_endian; |
295 | 136 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
296 | 33 | return false; |
297 | 33 | } |
298 | | |
299 | | // Case 1: has signature |
300 | 103 | const string kSignature( |
301 | 103 | "\x4b\x4f\x44\x41\x4b\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20", 16); |
302 | 103 | if (IsSignatureMatched(limited_source, 16 /* offset */, kSignature)) { |
303 | 1 | return true; |
304 | 1 | } |
305 | | |
306 | | // Case 2: search for tags in first requested bytes |
307 | 102 | string kIfdTags[2]; |
308 | 102 | if (use_big_endian) { |
309 | 22 | kIfdTags[0] = string("\x03\xe9\x00\x02", 4); // OriginalFileName |
310 | 22 | kIfdTags[1] = string("\x0c\xe5\x00\x02", 4); // FirmwareVersion |
311 | 80 | } else { |
312 | 80 | kIfdTags[0] = string("\xe9\x03\x02\x00", 4); // OriginalFileName |
313 | 80 | kIfdTags[1] = string("\xe5\x0c\x02\x00", 4); // FirmwareVersion |
314 | 80 | } |
315 | 102 | return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
316 | 102 | kIfdTags[0], NULL) && |
317 | 102 | IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
318 | 15 | kIfdTags[1], NULL); |
319 | 103 | } |
320 | | }; |
321 | | |
322 | | // Digital Negative RAW. |
323 | | class DngTypeChecker : public TypeChecker { |
324 | | public: |
325 | 589 | virtual RawImageTypes Type() const { return kDngImage; } |
326 | | |
327 | 51.9k | virtual size_t RequestedSize() const { return 1024; } |
328 | | |
329 | | // Check multiple points: |
330 | | // 1. valid endianness at the beginning of the file; |
331 | | // 2. at least two dng specific tags in the first requested bytes of the |
332 | | // file |
333 | 1.03k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
334 | 1.03k | RangeCheckedBytePtr limited_source = LimitSource(source); |
335 | | |
336 | 1.03k | bool use_big_endian; |
337 | 1.03k | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
338 | 33 | return false; |
339 | 33 | } |
340 | | |
341 | | // Search tags in first requested bytes and verify the order of them. |
342 | 999 | const int kTagsCount = 5; |
343 | 999 | string dng_tags[kTagsCount]; |
344 | 999 | if (use_big_endian) { |
345 | 55 | dng_tags[0] = |
346 | 55 | string("\xc6\x12\x00\x01\x00\x00\x00\x04", 8); // tag: 50706 |
347 | 55 | dng_tags[1] = |
348 | 55 | string("\xc6\x13\x00\x01\x00\x00\x00\x04", 8); // tag: 50707 |
349 | 55 | dng_tags[2] = string("\xc6\x14\x00\x02", 4); // tag: 50708 |
350 | 55 | dng_tags[3] = string("\xc6\x20", 2); // tag: 50720 |
351 | 55 | dng_tags[4] = |
352 | 55 | string("\xc6\x2d\x00\x04\x00\x00\x00\x01", 8); // tag: 50733 |
353 | 944 | } else { |
354 | 944 | dng_tags[0] = |
355 | 944 | string("\x12\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50706 |
356 | 944 | dng_tags[1] = |
357 | 944 | string("\x13\xc6\x01\x00\x04\x00\x00\x00", 8); // tag: 50707 |
358 | 944 | dng_tags[2] = string("\x14\xc6\x02\x00", 4); // tag: 50708 |
359 | 944 | dng_tags[3] = string("\x20\xc6", 2); // tag: 50720 |
360 | 944 | dng_tags[4] = |
361 | 944 | string("\x2d\xc6\x04\x00\x01\x00\x00\x00", 8); // tag: 50733 |
362 | 944 | } |
363 | 999 | int tags_found = 0; |
364 | 5.99k | for (int i = 0; i < kTagsCount; ++i) { |
365 | 4.99k | if (IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
366 | 4.99k | dng_tags[i], NULL)) { |
367 | 1.32k | tags_found++; |
368 | 1.32k | } |
369 | 4.99k | } |
370 | 999 | return tags_found >= 2; |
371 | 1.03k | } |
372 | | }; |
373 | | |
374 | | // Kodak RAW. |
375 | | class KdcTypeChecker : public TypeChecker { |
376 | | public: |
377 | 12 | virtual RawImageTypes Type() const { return kKdcImage; } |
378 | | |
379 | 49.6k | virtual size_t RequestedSize() const { return 5000; } |
380 | | |
381 | | // Check two points: |
382 | | // 1. valid endianness at the beginning of the file; |
383 | | // 2. two tags (WhiteBalance and SerialNumber) in the first requested bytes. |
384 | 122 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
385 | 122 | RangeCheckedBytePtr limited_source = LimitSource(source); |
386 | | |
387 | 122 | bool use_big_endian; |
388 | 122 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
389 | 33 | return false; |
390 | 33 | } |
391 | | |
392 | | // Search in first requested bytes |
393 | 89 | const size_t kIfdTagsSize = 2; |
394 | 89 | string kIfdTags[kIfdTagsSize]; |
395 | 89 | if (use_big_endian) { |
396 | 22 | kIfdTags[0] = string("\xfa\x0d\x00\x01", 4); // WhiteBalance |
397 | 22 | kIfdTags[1] = string("\xfa\x00\x00\x02", 4); // SerialNumber |
398 | 67 | } else { |
399 | 67 | kIfdTags[0] = string("\x0d\xfa\x01\x00", 4); // WhiteBalance |
400 | 67 | kIfdTags[1] = string("\x00\xfa\x02\x00", 4); // SerialNumber |
401 | 67 | } |
402 | | |
403 | 89 | return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
404 | 89 | kIfdTags[0], NULL) && |
405 | 89 | IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
406 | 21 | kIfdTags[1], NULL); |
407 | 122 | } |
408 | | }; |
409 | | |
410 | | // Leaf RAW. |
411 | | class MosTypeChecker : public TypeChecker { |
412 | | public: |
413 | 11 | virtual RawImageTypes Type() const { return kMosImage; } |
414 | | |
415 | 46.0k | virtual size_t RequestedSize() const { return 5000; } |
416 | | |
417 | | // Check two points: |
418 | | // 1. valid endianness at the beginning of the file; |
419 | | // 2. signature "PKTS " in the first requested bytes. Note the |
420 | | // "whitespace". It's important as they are special binary values. |
421 | 110 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
422 | 110 | RangeCheckedBytePtr limited_source = LimitSource(source); |
423 | | |
424 | 110 | bool use_big_endian; |
425 | 110 | if (!DetermineEndianness(source, &use_big_endian)) { |
426 | 33 | return false; |
427 | 33 | } |
428 | | |
429 | | // Search kSignaturePKTS in first requested bytes |
430 | 77 | const string kSignaturePKTS("PKTS\x00\x00\x00\x001", 8); |
431 | 77 | return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
432 | 77 | kSignaturePKTS, NULL); |
433 | 110 | } |
434 | | }; |
435 | | |
436 | | // Minolta RAW. |
437 | | class MrwTypeChecker : public TypeChecker { |
438 | | public: |
439 | 1 | virtual RawImageTypes Type() const { return kMrwImage; } |
440 | | |
441 | 37.0k | virtual size_t RequestedSize() const { return 4; } |
442 | | |
443 | | // Check only the signature at the beginning of the file. |
444 | 1.75k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
445 | | // Limits the source length to the RequestedSize(), using it guarantees that |
446 | | // we will not read more than this size from the source. |
447 | 1.75k | RangeCheckedBytePtr limited_source = |
448 | 1.75k | source.pointerToSubArray(0 /* pos */, RequestedSize()); |
449 | | |
450 | 1.75k | const string kSignature("\0MRM", 4); |
451 | 1.75k | return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); |
452 | 1.75k | } |
453 | | }; |
454 | | |
455 | | // Check if the file contains a NRW signature "NRW " in the first requested |
456 | | // bytes. Note the "whitespace". It's important as they are special binary |
457 | | // values. |
458 | | const size_t kRequestedSizeForNrwSignature = 4000; |
459 | 244 | bool ContainsNrwSignature(const RangeCheckedBytePtr& source) { |
460 | | // Search for kSignatureNrw. |
461 | 244 | const string kSignatureNrw("NRW\x20\x20\x20", 6); |
462 | 244 | return IsSignatureFound(source, 0 /* offset */, kRequestedSizeForNrwSignature, |
463 | 244 | kSignatureNrw, NULL); |
464 | 244 | } |
465 | | |
466 | | // Checks if the file contains the signatures for Nikon formats: |
467 | | // * the general Nikon singature "NIKON" string. |
468 | | // * the ReferenceBlackWhite tag. |
469 | | const size_t kRequestedSizeForNikonSignatures = 4000; |
470 | | bool ContainsNikonSignatures(const RangeCheckedBytePtr& source, |
471 | 347 | const bool use_big_endian) { |
472 | 347 | const string kSignatureNikon("NIKON"); |
473 | 347 | const string kReferenceBlackWhiteTag = use_big_endian |
474 | 347 | ? string("\x02\x14\x00\x05", 4) |
475 | 347 | : string("\x14\x02\x05\x00", 4); |
476 | 347 | const std::vector<string> kSignatures = {kSignatureNikon, |
477 | 347 | kReferenceBlackWhiteTag}; |
478 | 624 | for (auto const& signature : kSignatures) { |
479 | 624 | if (!IsSignatureFound(source, 0, kRequestedSizeForNikonSignatures, |
480 | 624 | signature, NULL)) { |
481 | 103 | return false; |
482 | 103 | } |
483 | 624 | } |
484 | 244 | return true; |
485 | 347 | } |
486 | | |
487 | | // Nikon RAW (NEF extension). |
488 | | class NefTypeChecker : public TypeChecker { |
489 | | public: |
490 | 223 | virtual RawImageTypes Type() const { return kNefImage; } |
491 | | |
492 | 49.7k | virtual size_t RequestedSize() const { |
493 | 49.7k | return std::max(kRequestedSizeForNikonSignatures, |
494 | 49.7k | kRequestedSizeForNrwSignature); |
495 | 49.7k | } |
496 | | |
497 | | // Check multiple points: |
498 | | // 1. valid endianness at the beginning of the file; |
499 | | // 2. magic number at the (offset == 2) position of the file; |
500 | | // 3. the signature "NIKON" in the requested bytes of the file; |
501 | | // 4. the ReferenceBlackWhite tag in the requested bytes of the file; |
502 | | // 5. does not contain the NRW signature. We may also check a special |
503 | | // signature "RAW " similar to the NRW case, but we got issues in some |
504 | | // special images that the signature locates in the middle of the file, and it |
505 | | // costs too long time to check; |
506 | 365 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
507 | 365 | RangeCheckedBytePtr limited_source = LimitSource(source); |
508 | | |
509 | 365 | bool use_big_endian; |
510 | 365 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
511 | 33 | return false; |
512 | 33 | } |
513 | | |
514 | 332 | const unsigned short kTiffMagic = 0x2A; // NOLINT |
515 | 332 | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
516 | 332 | kTiffMagic)) { |
517 | 25 | return false; |
518 | 25 | } |
519 | | |
520 | 307 | return ContainsNikonSignatures(limited_source, use_big_endian) && |
521 | 307 | !ContainsNrwSignature(limited_source); // not NRW |
522 | 332 | } |
523 | | }; |
524 | | |
525 | | // Nikon RAW (NRW extension). |
526 | | class NrwTypeChecker : public TypeChecker { |
527 | | public: |
528 | 6 | virtual RawImageTypes Type() const { return kNrwImage; } |
529 | | |
530 | 46.0k | virtual size_t RequestedSize() const { |
531 | 46.0k | return std::max(kRequestedSizeForNikonSignatures, |
532 | 46.0k | kRequestedSizeForNrwSignature); |
533 | 46.0k | } |
534 | | |
535 | | // Check multiple points: |
536 | | // 1. valid endianness at the beginning of the file; |
537 | | // 2. magic numbers at the (offset == 2 and offset == 4) positions of the |
538 | | // file; |
539 | | // 3. the signature "NIKON" in the first requested bytes of the file; |
540 | | // 4. the ReferenceBlackWhite tag in the requested bytes of the file; |
541 | | // 5. contains the NRW signature; |
542 | 142 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
543 | 142 | RangeCheckedBytePtr limited_source = LimitSource(source); |
544 | | |
545 | 142 | bool use_big_endian; |
546 | 142 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
547 | 33 | return false; |
548 | 33 | } |
549 | | |
550 | 109 | const unsigned short kTiffMagic = 0x2A; // NOLINT |
551 | 109 | const unsigned int kTiffOffset = 8; |
552 | 109 | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
553 | 109 | kTiffMagic) || |
554 | 109 | !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, |
555 | 84 | kTiffOffset)) { |
556 | 69 | return false; |
557 | 69 | } |
558 | | |
559 | 40 | return ContainsNikonSignatures(limited_source, use_big_endian) && |
560 | 40 | ContainsNrwSignature(limited_source); |
561 | 109 | } |
562 | | }; |
563 | | |
564 | | // Olympus RAW. |
565 | | class OrfTypeChecker : public TypeChecker { |
566 | | public: |
567 | 16 | virtual RawImageTypes Type() const { return kOrfImage; } |
568 | | |
569 | 49.8k | virtual size_t RequestedSize() const { return 3000; } |
570 | | |
571 | | // Check multiple points: |
572 | | // 1. valid endianness at the beginning of the file; |
573 | | // 2. tag at the (offset == 2) position of the file; |
574 | | // 3. signature "OLYMP" in the first requested bytes. |
575 | 381 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
576 | 381 | RangeCheckedBytePtr limited_source = LimitSource(source); |
577 | | |
578 | 381 | bool use_big_endian; |
579 | 381 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
580 | 33 | return false; |
581 | 33 | } |
582 | | |
583 | 348 | const size_t kTagSize = 2; |
584 | 348 | const unsigned short kTag[kTagSize] = {0x4F52, 0x5352}; // NOLINT |
585 | 348 | if (!(CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
586 | 348 | kTag[0]) || |
587 | 348 | CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
588 | 346 | kTag[1]))) { |
589 | 331 | return false; |
590 | 331 | } |
591 | | |
592 | | // Search for kSignatureOlymp in first requested bytes |
593 | 17 | const string kSignatureOlymp("OLYMP"); |
594 | 17 | return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
595 | 17 | kSignatureOlymp, NULL); |
596 | 348 | } |
597 | | }; |
598 | | |
599 | | // Pentax RAW. |
600 | | class PefTypeChecker : public TypeChecker { |
601 | | public: |
602 | 62 | virtual RawImageTypes Type() const { return kPefImage; } |
603 | | |
604 | 50.0k | virtual size_t RequestedSize() const { return 1280; } |
605 | | |
606 | | // Check multiple points: |
607 | | // 1. valid big endianness at the beginning of the file; |
608 | | // 2. magic numbers at the (offset == 2 and offset==4) positions of the file; |
609 | | // 3. signature "AOC " or "PENTAX " in first requested bytes. |
610 | 443 | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
611 | 443 | RangeCheckedBytePtr limited_source = LimitSource(source); |
612 | | |
613 | 443 | bool use_big_endian; |
614 | 443 | if (!DetermineEndianness(limited_source, &use_big_endian)) { |
615 | 33 | return false; |
616 | 33 | } |
617 | | |
618 | 410 | const unsigned short kTiffMagic = 0x2A; // NOLINT |
619 | 410 | const unsigned int kTiffOffset = 8; |
620 | 410 | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
621 | 410 | kTiffMagic) || |
622 | 410 | !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, |
623 | 369 | kTiffOffset)) { |
624 | 297 | return false; |
625 | 297 | } |
626 | | |
627 | | // Search for kSignatureAOC or kSignaturePENTAX in first requested bytes |
628 | 113 | const string kSignatureAOC("\x41\x4f\x43\x00\x4d\x4d", 6); |
629 | 113 | const string kSignaturePENTAX("\x50\x45\x4e\x54\x41\x58\x20\x00", 8); |
630 | 113 | return IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
631 | 113 | kSignatureAOC, NULL) || |
632 | 113 | IsSignatureFound(limited_source, 0 /* offset */, RequestedSize(), |
633 | 61 | kSignaturePENTAX, NULL); |
634 | 410 | } |
635 | | }; |
636 | | |
637 | | // Apple format. |
638 | | class QtkTypeChecker : public TypeChecker { |
639 | | public: |
640 | 1 | virtual RawImageTypes Type() const { return kQtkImage; } |
641 | | |
642 | 58.0k | virtual size_t RequestedSize() const { return 8; } |
643 | | |
644 | | // Check only the signature at the beginning of the file. |
645 | 1.59k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
646 | 1.59k | RangeCheckedBytePtr limited_source = LimitSource(source); |
647 | | |
648 | 1.59k | const size_t kSignatureSize = 2; |
649 | 1.59k | const string kSignature[kSignatureSize] = { |
650 | 1.59k | string("qktk\x00\x00\x00\x08", 8), string("qktn\x00\x00\x00\x08", 8), |
651 | 1.59k | }; |
652 | 1.59k | return IsSignatureMatched(limited_source, 0 /* offset */, kSignature[0]) || |
653 | 1.59k | IsSignatureMatched(limited_source, 0 /* offset */, kSignature[1]); |
654 | 1.59k | } |
655 | | }; |
656 | | |
657 | | // Fuji RAW. |
658 | | class RafTypeChecker : public TypeChecker { |
659 | | public: |
660 | 146 | virtual RawImageTypes Type() const { return kRafImage; } |
661 | | |
662 | 54.5k | virtual size_t RequestedSize() const { return 8; } |
663 | | |
664 | | // Check only the signature at the beginning of the file. |
665 | 1.59k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
666 | 1.59k | RangeCheckedBytePtr limited_source = LimitSource(source); |
667 | | |
668 | 1.59k | const string kSignature("FUJIFILM"); |
669 | 1.59k | return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); |
670 | 1.59k | } |
671 | | }; |
672 | | |
673 | | // Contax N RAW. |
674 | | class RawContaxNTypeChecker : public TypeChecker { |
675 | | public: |
676 | 2 | virtual RawImageTypes Type() const { return kRawContaxNImage; } |
677 | | |
678 | 46.9k | virtual size_t RequestedSize() const { return 36; } |
679 | | |
680 | | // Check only the signature at the (offset == 25) position of the |
681 | | // file. |
682 | 1.04k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
683 | 1.04k | RangeCheckedBytePtr limited_source = LimitSource(source); |
684 | | |
685 | 1.04k | const string kSignature("ARECOYK"); |
686 | 1.04k | return IsSignatureMatched(limited_source, 25, kSignature); |
687 | 1.04k | } |
688 | | }; |
689 | | |
690 | | // Panasonic RAW. |
691 | | class Rw2TypeChecker : public TypeChecker { |
692 | | public: |
693 | 154 | virtual RawImageTypes Type() const { return kRw2Image; } |
694 | | |
695 | 61.7k | virtual size_t RequestedSize() const { return 4; } |
696 | | |
697 | | // Check two points: 1. valid endianness at the beginning of the |
698 | | // file; 2. tag at the (offset == 2) position of the file. |
699 | 1.75k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
700 | 1.75k | RangeCheckedBytePtr limited_source = LimitSource(source); |
701 | | |
702 | 1.75k | bool use_big_endian; |
703 | 1.75k | if (!DetermineEndianness(source, &use_big_endian)) { |
704 | 183 | return false; |
705 | 183 | } |
706 | | |
707 | 1.56k | const unsigned short kTag = 0x55; // NOLINT |
708 | 1.56k | return CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
709 | 1.56k | kTag); |
710 | 1.75k | } |
711 | | }; |
712 | | |
713 | | // Samsung RAW. |
714 | | class SrwTypeChecker : public TypeChecker { |
715 | | public: |
716 | 8 | virtual RawImageTypes Type() const { return kSrwImage; } |
717 | | |
718 | 39.9k | virtual size_t RequestedSize() const { return 256; } |
719 | | |
720 | | // Check multiple points: |
721 | | // 1. valid big endianness at the beginning of the file; |
722 | | // 2. magic numbers at the (offset == 2 and offset==4) positions of the file; |
723 | | // 3. the signature "SAMSUNG" in the requested bytes of the file; |
724 | 1.04k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
725 | 1.04k | RangeCheckedBytePtr limited_source = LimitSource(source); |
726 | | |
727 | 1.04k | bool use_big_endian; |
728 | 1.04k | if (!DetermineEndianness(source, &use_big_endian)) { |
729 | 33 | return false; |
730 | 33 | } |
731 | | |
732 | 1.00k | const unsigned short kTiffMagic = 0x2A; // NOLINT |
733 | 1.00k | const unsigned int kTiffOffset = 8; |
734 | 1.00k | if (!CheckUInt16Value(limited_source, 2 /* offset */, use_big_endian, |
735 | 1.00k | kTiffMagic) || |
736 | 1.00k | !CheckUInt32Value(limited_source, 4 /* offset */, use_big_endian, |
737 | 886 | kTiffOffset)) { |
738 | 886 | return false; |
739 | 886 | } |
740 | | |
741 | 121 | const string kSignature("SAMSUNG"); |
742 | 121 | if (!IsSignatureFound(source, 0, RequestedSize(), kSignature, NULL)) { |
743 | 113 | return false; |
744 | 113 | } |
745 | 8 | return true; |
746 | 121 | } |
747 | | }; |
748 | | |
749 | | // Sigma / Polaroid RAW. |
750 | | class X3fTypeChecker : public TypeChecker { |
751 | | public: |
752 | 1 | virtual RawImageTypes Type() const { return kX3fImage; } |
753 | | |
754 | 61.6k | virtual size_t RequestedSize() const { return 4; } |
755 | | |
756 | | // Check only the signature at the beginning of the file. |
757 | 1.59k | virtual bool IsMyType(const RangeCheckedBytePtr& source) const { |
758 | 1.59k | RangeCheckedBytePtr limited_source = LimitSource(source); |
759 | | |
760 | 1.59k | const string kSignature("FOVb", 4); |
761 | 1.59k | return IsSignatureMatched(limited_source, 0 /* offset */, kSignature); |
762 | 1.59k | } |
763 | | }; |
764 | | |
765 | | // This class contains the list of all type checkers. One should used this list |
766 | | // as a whole to execute the image type recognition. |
767 | | class TypeCheckerList { |
768 | | public: |
769 | 3.53k | TypeCheckerList() { |
770 | | // Add all supported RAW type checkers here. |
771 | 3.53k | checkers_.push_back(new ArwTypeChecker()); |
772 | 3.53k | checkers_.push_back(new Cr3TypeChecker()); |
773 | 3.53k | checkers_.push_back(new Cr2TypeChecker()); |
774 | 3.53k | checkers_.push_back(new CrwTypeChecker()); |
775 | 3.53k | checkers_.push_back(new DcrTypeChecker()); |
776 | 3.53k | checkers_.push_back(new DngTypeChecker()); |
777 | 3.53k | checkers_.push_back(new KdcTypeChecker()); |
778 | 3.53k | checkers_.push_back(new MosTypeChecker()); |
779 | 3.53k | checkers_.push_back(new MrwTypeChecker()); |
780 | 3.53k | checkers_.push_back(new NefTypeChecker()); |
781 | 3.53k | checkers_.push_back(new NrwTypeChecker()); |
782 | 3.53k | checkers_.push_back(new OrfTypeChecker()); |
783 | 3.53k | checkers_.push_back(new PefTypeChecker()); |
784 | 3.53k | checkers_.push_back(new QtkTypeChecker()); |
785 | 3.53k | checkers_.push_back(new RafTypeChecker()); |
786 | 3.53k | checkers_.push_back(new RawContaxNTypeChecker()); |
787 | 3.53k | checkers_.push_back(new Rw2TypeChecker()); |
788 | 3.53k | checkers_.push_back(new SrwTypeChecker()); |
789 | 3.53k | checkers_.push_back(new X3fTypeChecker()); |
790 | | |
791 | | // Sort the checkers by the ascending RequestedSize() to get better |
792 | | // performance when checking type. |
793 | 3.53k | std::sort(checkers_.begin(), checkers_.end(), TypeChecker::Compare); |
794 | 3.53k | } |
795 | | |
796 | 3.53k | ~TypeCheckerList() { |
797 | 70.6k | for (size_t i = 0; i < checkers_.size(); ++i) { |
798 | 67.0k | delete checkers_[i]; |
799 | 67.0k | checkers_[i] = NULL; |
800 | 67.0k | } |
801 | 3.53k | } |
802 | | |
803 | | // Returns the type of source data. If it can not be identified, returns |
804 | | // kNonRawImage. |
805 | 1.75k | RawImageTypes GetType(const RangeCheckedBytePtr& source) const { |
806 | 17.6k | for (size_t i = 0; i < checkers_.size(); ++i) { |
807 | 17.5k | if (checkers_[i]->IsMyType(source)) { |
808 | 1.67k | return checkers_[i]->Type(); |
809 | 1.67k | } |
810 | 17.5k | } |
811 | 72 | return kNonRawImage; |
812 | 1.75k | } |
813 | | |
814 | | // Returns the maximum size of requested size of data for identifying image |
815 | | // type using this class. The class guarantees that it will not read more than |
816 | | // this size. |
817 | 1.77k | size_t RequestedSize() const { |
818 | 1.77k | assert(!checkers_.empty()); |
819 | | // The checkers_ is ascending sorted. The last element is the maximum. |
820 | 1.77k | return checkers_.back()->RequestedSize(); |
821 | 1.77k | } |
822 | | |
823 | 0 | bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) { |
824 | 0 | const TypeChecker* type_checker = GetTypeCheckerForType(type); |
825 | 0 | if (type_checker) { |
826 | 0 | return type_checker->IsMyType(source); |
827 | 0 | } else { |
828 | 0 | return false; |
829 | 0 | } |
830 | 0 | } |
831 | | |
832 | 0 | size_t RequestedSizeForType(const RawImageTypes type) { |
833 | 0 | const TypeChecker* type_checker = GetTypeCheckerForType(type); |
834 | 0 | if (type_checker) { |
835 | 0 | return type_checker->RequestedSize(); |
836 | 0 | } else { |
837 | 0 | return 0; |
838 | 0 | } |
839 | 0 | } |
840 | | |
841 | | private: |
842 | 0 | const TypeChecker* GetTypeCheckerForType(const RawImageTypes type) { |
843 | 0 | for (const auto* type_checker : checkers_) { |
844 | 0 | if (type_checker->Type() == type) { |
845 | 0 | return type_checker; |
846 | 0 | } |
847 | 0 | } |
848 | 0 | return nullptr; |
849 | 0 | } |
850 | | |
851 | | std::vector<TypeChecker*> checkers_; |
852 | | }; |
853 | | |
854 | | } // namespace |
855 | | |
856 | 0 | bool IsRaw(const RawImageTypes type) { |
857 | 0 | switch (type) { |
858 | | // Non-RAW-image type |
859 | 0 | case kNonRawImage: { |
860 | 0 | return false; |
861 | 0 | } |
862 | | |
863 | | // Raw image types |
864 | 0 | case kArwImage: |
865 | 0 | case kCr3Image: |
866 | 0 | case kCr2Image: |
867 | 0 | case kCrwImage: |
868 | 0 | case kDcrImage: |
869 | 0 | case kDngImage: |
870 | 0 | case kKdcImage: |
871 | 0 | case kMosImage: |
872 | 0 | case kMrwImage: |
873 | 0 | case kNefImage: |
874 | 0 | case kNrwImage: |
875 | 0 | case kOrfImage: |
876 | 0 | case kPefImage: |
877 | 0 | case kQtkImage: |
878 | 0 | case kRafImage: |
879 | 0 | case kRawContaxNImage: |
880 | 0 | case kRw2Image: |
881 | 0 | case kSrwImage: |
882 | 0 | case kX3fImage: { |
883 | 0 | return true; |
884 | 0 | } |
885 | | |
886 | 0 | default: { |
887 | | // Unsupported type! |
888 | 0 | assert(false); |
889 | 0 | } |
890 | 0 | } |
891 | 0 | return false; |
892 | 0 | } |
893 | | |
894 | 0 | bool IsOfType(const RangeCheckedBytePtr& source, const RawImageTypes type) { |
895 | 0 | return TypeCheckerList().IsOfType(source, type); |
896 | 0 | } |
897 | | |
898 | 1.75k | RawImageTypes RecognizeRawImageTypeLite(const RangeCheckedBytePtr& source) { |
899 | 1.75k | return TypeCheckerList().GetType(source); |
900 | 1.75k | } |
901 | | |
902 | 1.77k | size_t GetNumberOfBytesForIsRawLite() { |
903 | 1.77k | return TypeCheckerList().RequestedSize(); |
904 | 1.77k | } |
905 | | |
906 | 0 | size_t GetNumberOfBytesForIsOfType(const RawImageTypes type) { |
907 | 0 | return TypeCheckerList().RequestedSizeForType(type); |
908 | 0 | } |
909 | | |
910 | 0 | bool IsRawLite(const RangeCheckedBytePtr& source) { |
911 | 0 | return IsRaw(RecognizeRawImageTypeLite(source)); |
912 | 0 | } |
913 | | |
914 | | } // namespace image_type_recognition |
915 | | } // namespace piex |