Coverage Report

Created: 2025-11-16 07:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibMedia/Containers/Matroska/Reader.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
3
 * Copyright (c) 2022-2023, Gregory Bertilson <Zaggy1024@gmail.com>
4
 *
5
 * SPDX-License-Identifier: BSD-2-Clause
6
 */
7
8
#include <AK/Debug.h>
9
#include <AK/Function.h>
10
#include <AK/Math.h>
11
#include <AK/Optional.h>
12
#include <AK/Time.h>
13
#include <AK/Utf8View.h>
14
#include <LibCore/MappedFile.h>
15
16
#include "Reader.h"
17
18
namespace Media::Matroska {
19
20
1.93M
#define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression)
21
22
// RFC 8794 - Extensible Binary Meta Language
23
// https://datatracker.ietf.org/doc/html/rfc8794
24
constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
25
constexpr u32 EBML_CRC32_ELEMENT_ID = 0xBF;
26
constexpr u32 EBML_VOID_ELEMENT_ID = 0xEC;
27
28
// Matroska elements' IDs and types are listed at this URL:
29
// https://www.matroska.org/technical/elements.html
30
constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
31
constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
32
constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
33
34
constexpr u32 SEEK_HEAD_ELEMENT_ID = 0x114D9B74;
35
constexpr u32 SEEK_ELEMENT_ID = 0x4DBB;
36
constexpr u32 SEEK_ID_ELEMENT_ID = 0x53AB;
37
constexpr u32 SEEK_POSITION_ELEMENT_ID = 0x53AC;
38
39
constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
40
constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
41
constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
42
constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
43
constexpr u32 MUXING_APP_ID = 0x4D80;
44
constexpr u32 WRITING_APP_ID = 0x5741;
45
constexpr u32 DURATION_ID = 0x4489;
46
47
// Tracks
48
constexpr u32 TRACK_ENTRY_ID = 0xAE;
49
constexpr u32 TRACK_NUMBER_ID = 0xD7;
50
constexpr u32 TRACK_UID_ID = 0x73C5;
51
constexpr u32 TRACK_TYPE_ID = 0x83;
52
constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
53
constexpr u32 TRACK_CODEC_ID = 0x86;
54
constexpr u32 TRACK_CODEC_PRIVATE = 0x63A2;
55
constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
56
constexpr u32 TRACK_OFFSET_ID = 0x537F;
57
constexpr u32 TRACK_VIDEO_ID = 0xE0;
58
constexpr u32 TRACK_AUDIO_ID = 0xE1;
59
60
// Video
61
constexpr u32 PIXEL_WIDTH_ID = 0xB0;
62
constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
63
constexpr u32 COLOR_ENTRY_ID = 0x55B0;
64
constexpr u32 PRIMARIES_ID = 0x55BB;
65
constexpr u32 TRANSFER_CHARACTERISTICS_ID = 0x55BA;
66
constexpr u32 MATRIX_COEFFICIENTS_ID = 0x55B1;
67
constexpr u32 RANGE_ID = 0x55B9;
68
constexpr u32 BITS_PER_CHANNEL_ID = 0x55B2;
69
70
// Audio
71
constexpr u32 CHANNELS_ID = 0x9F;
72
constexpr u32 BIT_DEPTH_ID = 0x6264;
73
74
// Clusters
75
constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
76
constexpr u32 TIMESTAMP_ID = 0xE7;
77
78
// Cues
79
constexpr u32 CUES_ID = 0x1C53BB6B;
80
constexpr u32 CUE_POINT_ID = 0xBB;
81
constexpr u32 CUE_TIME_ID = 0xB3;
82
constexpr u32 CUE_TRACK_POSITIONS_ID = 0xB7;
83
constexpr u32 CUE_TRACK_ID = 0xF7;
84
constexpr u32 CUE_CLUSTER_POSITION_ID = 0xF1;
85
constexpr u32 CUE_RELATIVE_POSITION_ID = 0xF0;
86
constexpr u32 CUE_CODEC_STATE_ID = 0xEA;
87
constexpr u32 CUE_REFERENCE_ID = 0xDB;
88
89
DecoderErrorOr<Reader> Reader::from_file(StringView path)
90
0
{
91
0
    auto mapped_file = DECODER_TRY(DecoderErrorCategory::IO, Core::MappedFile::map(path));
92
0
    return from_mapped_file(move(mapped_file));
93
0
}
94
95
DecoderErrorOr<Reader> Reader::from_mapped_file(NonnullOwnPtr<Core::MappedFile> mapped_file)
96
0
{
97
0
    auto reader = TRY(from_data(mapped_file->bytes()));
98
0
    reader.m_mapped_file = make_ref_counted<Core::SharedMappedFile>(move(mapped_file));
99
0
    return reader;
100
0
}
101
102
DecoderErrorOr<Reader> Reader::from_data(ReadonlyBytes data)
103
5.51k
{
104
5.51k
    Reader reader(data);
105
5.51k
    TRY(reader.parse_initial_data());
106
4.70k
    return reader;
107
5.51k
}
108
109
// Returns the position of the first element that is read from this master element.
110
static DecoderErrorOr<size_t> parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function<DecoderErrorOr<IterationDecision>(u64)> element_consumer)
111
39.4k
{
112
39.4k
    auto element_data_size = TRY_READ(streamer.read_variable_size_integer());
113
39.3k
    dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size);
114
115
39.3k
    bool first_element = true;
116
39.3k
    auto first_element_position = streamer.position();
117
118
39.3k
    streamer.push_octets_read();
119
1.84M
    while (streamer.octets_read() < element_data_size) {
120
1.81M
        dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading  element ======");
121
1.81M
        auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
122
1.81M
        dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id);
123
124
1.81M
        if (element_id == EBML_CRC32_ELEMENT_ID) {
125
            // The CRC-32 Element contains a 32-bit Cyclic Redundancy Check value of all the
126
            // Element Data of the Parent Element as stored except for the CRC-32 Element itself.
127
            // When the CRC-32 Element is present, the CRC-32 Element MUST be the first ordered
128
            // EBML Element within its Parent Element for easier reading.
129
319
            if (!first_element)
130
14
                return DecoderError::corrupted("CRC32 element must be the first child"sv);
131
132
            // All Top-Level Elements of an EBML Document that are Master Elements SHOULD include a
133
            // CRC-32 Element as a Child Element. The CRC in use is the IEEE-CRC-32 algorithm as used
134
            // in the [ISO3309] standard and in Section 8.1.1.6.2 of [ITU.V42], with initial value of
135
            // 0xFFFFFFFF. The CRC value MUST be computed on a little-endian bytestream and MUST use
136
            // little-endian storage.
137
138
            // FIXME: Currently we skip the CRC-32 Element instead of checking it. It may be worth
139
            //        verifying the contents of the SeekHead, Segment Info, and Tracks Elements.
140
            //        Note that Cluster Elements tend to be quite large, so verifying their integrity
141
            //        will result in longer buffering times in streamed contexts, so it may not be
142
            //        worth the effort checking those. It would also prevent error correction in
143
            //        video codecs from taking effect.
144
305
            TRY_READ(streamer.read_unknown_element());
145
242
            continue;
146
305
        }
147
1.81M
        if (element_id == EBML_VOID_ELEMENT_ID) {
148
            // Used to void data or to avoid unexpected behaviors when using damaged data.
149
            // The content is discarded. Also used to reserve space in a subelement for later use.
150
360
            TRY_READ(streamer.read_unknown_element());
151
287
            continue;
152
360
        }
153
154
1.81M
        auto result = element_consumer(element_id);
155
1.81M
        if (result.is_error())
156
6.24k
            return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description());
157
1.80M
        if (result.release_value() == IterationDecision::Break)
158
0
            break;
159
160
1.80M
        dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name);
161
1.80M
        first_element = false;
162
1.80M
    }
163
31.3k
    streamer.pop_octets_read();
164
165
31.3k
    return first_element_position;
166
39.3k
}
167
168
static DecoderErrorOr<EBMLHeader> parse_ebml_header(Streamer& streamer)
169
5.40k
{
170
5.40k
    EBMLHeader header;
171
5.40k
    TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
172
4.81k
        switch (element_id) {
173
4.81k
        case DOCTYPE_ELEMENT_ID:
174
4.81k
            header.doc_type = TRY_READ(streamer.read_string());
175
4.81k
            dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", header.doc_type);
176
4.81k
            break;
177
4.81k
        case DOCTYPE_VERSION_ELEMENT_ID:
178
4.81k
            header.doc_type_version = TRY_READ(streamer.read_u64());
179
4.81k
            dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", header.doc_type_version);
180
4.81k
            break;
181
4.81k
        default:
182
4.81k
            TRY_READ(streamer.read_unknown_element());
183
4.81k
        }
184
185
4.81k
        return IterationDecision::Continue;
186
4.81k
    }));
187
188
4.81k
    return header;
189
5.40k
}
190
191
DecoderErrorOr<void> Reader::parse_initial_data()
192
5.51k
{
193
5.51k
    Streamer streamer { m_data };
194
5.51k
    auto first_element_id = TRY_READ(streamer.read_variable_size_integer(false));
195
5.48k
    dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id);
196
5.48k
    if (first_element_id != EBML_MASTER_ELEMENT_ID)
197
87
        return DecoderError::corrupted("First element was not an EBML header"sv);
198
199
5.40k
    m_header = TRY(parse_ebml_header(streamer));
200
4.81k
    dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
201
202
4.81k
    auto root_element_id = TRY_READ(streamer.read_variable_size_integer(false));
203
4.79k
    if (root_element_id != SEGMENT_ELEMENT_ID)
204
92
        return DecoderError::corrupted("Second element was not a segment element"sv);
205
206
4.70k
    m_segment_contents_size = TRY_READ(streamer.read_variable_size_integer());
207
4.70k
    m_segment_contents_position = streamer.position();
208
4.70k
    dbgln_if(MATROSKA_TRACE_DEBUG, "Segment is at {} with size {}, available size is {}", m_segment_contents_position, m_segment_contents_size, m_data.size() - m_segment_contents_position);
209
4.70k
    m_segment_contents_size = min(m_segment_contents_size, m_data.size() - m_segment_contents_position);
210
4.70k
    return {};
211
4.70k
}
212
213
static DecoderErrorOr<void> parse_seek_head(Streamer& streamer, size_t base_position, HashMap<u32, size_t>& table)
214
4.18k
{
215
4.18k
    TRY(parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id) -> DecoderErrorOr<IterationDecision> {
216
2.86k
        if (seek_head_child_id == SEEK_ELEMENT_ID) {
217
2.86k
            Optional<u64> seek_id;
218
2.86k
            Optional<u64> seek_position;
219
2.86k
            TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id) -> DecoderErrorOr<IterationDecision> {
220
2.86k
                switch (seek_entry_child_id) {
221
2.86k
                case SEEK_ID_ELEMENT_ID:
222
2.86k
                    seek_id = TRY_READ(streamer.read_u64());
223
2.86k
                    dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Element ID value {:#010x}", seek_id.value());
224
2.86k
                    break;
225
2.86k
                case SEEK_POSITION_ELEMENT_ID:
226
2.86k
                    seek_position = TRY_READ(streamer.read_u64());
227
2.86k
                    dbgln_if(MATROSKA_TRACE_DEBUG, "Read Seek Position value {}", seek_position.value());
228
2.86k
                    break;
229
2.86k
                default:
230
2.86k
                    TRY_READ(streamer.read_unknown_element());
231
2.86k
                }
232
233
2.86k
                return IterationDecision::Continue;
234
2.86k
            }));
235
236
2.86k
            if (!seek_id.has_value())
237
2.86k
                return DecoderError::corrupted("Seek entry is missing the element ID"sv);
238
2.86k
            if (!seek_position.has_value())
239
2.86k
                return DecoderError::corrupted("Seek entry is missing the seeking position"sv);
240
2.86k
            if (seek_id.value() > NumericLimits<u32>::max())
241
2.86k
                return DecoderError::corrupted("Seek entry's element ID is too large"sv);
242
243
2.86k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Seek entry found with ID {:#010x} and position {} offset from SeekHead at {}", seek_id.value(), seek_position.value(), base_position);
244
            // FIXME: SeekHead can reference another SeekHead, we should recursively parse all SeekHeads.
245
246
2.86k
            if (table.contains(seek_id.value())) {
247
2.86k
                dbgln_if(MATROSKA_DEBUG, "Warning: Duplicate seek entry with ID {:#010x} at position {}", seek_id.value(), seek_position.value());
248
2.86k
                return IterationDecision::Continue;
249
2.86k
            }
250
251
2.86k
            DECODER_TRY_ALLOC(table.try_set(seek_id.release_value(), base_position + seek_position.release_value()));
252
2.86k
        } else {
253
2.86k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Unknown SeekHead child element ID {:#010x}", seek_head_child_id);
254
2.86k
        }
255
256
2.86k
        return IterationDecision::Continue;
257
2.86k
    }));
258
2.86k
    return {};
259
4.18k
}
260
261
DecoderErrorOr<Optional<size_t>> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id)
262
9.40k
{
263
9.40k
    dbgln_if(MATROSKA_DEBUG, "====== Finding element {} with ID {:#010x} ======", element_name, element_id);
264
265
9.40k
    if (m_seek_entries.contains(element_id)) {
266
2.77k
        dbgln_if(MATROSKA_TRACE_DEBUG, "Cache hit!");
267
2.77k
        return m_seek_entries.get(element_id).release_value();
268
2.77k
    }
269
270
6.63k
    Streamer streamer { m_data };
271
6.63k
    if (m_last_top_level_element_position != 0)
272
6.63k
        TRY_READ(streamer.seek_to_position(m_last_top_level_element_position));
273
5.87k
    else
274
6.63k
        TRY_READ(streamer.seek_to_position(m_segment_contents_position));
275
276
6.63k
    Optional<size_t> position;
277
278
51.1k
    while (streamer.position() < m_segment_contents_position + m_segment_contents_size) {
279
49.2k
        auto found_element_id = TRY_READ(streamer.read_variable_size_integer(false));
280
48.9k
        auto found_element_position = streamer.position();
281
48.9k
        dbgln_if(MATROSKA_TRACE_DEBUG, "Found element ID {:#010x} with position {}.", found_element_id, found_element_position);
282
283
48.9k
        if (found_element_id == SEEK_HEAD_ELEMENT_ID) {
284
4.18k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Found SeekHead, parsing it into the lookup table.");
285
4.18k
            m_seek_entries.clear();
286
4.18k
            TRY(parse_seek_head(streamer, found_element_position, m_seek_entries));
287
2.86k
            m_last_top_level_element_position = 0;
288
2.86k
            if (m_seek_entries.contains(element_id)) {
289
219
                dbgln_if(MATROSKA_TRACE_DEBUG, "SeekHead hit!");
290
219
                position = m_seek_entries.get(element_id).release_value();
291
219
                break;
292
219
            }
293
2.64k
            continue;
294
2.86k
        }
295
296
44.8k
        auto result = streamer.read_unknown_element();
297
44.8k
        if (result.is_error())
298
1.99k
            return DecoderError::format(DecoderErrorCategory::Corrupted, "While seeking to {}: {}", element_name, result.release_error().string_literal());
299
300
42.8k
        m_last_top_level_element_position = streamer.position();
301
302
42.8k
        DECODER_TRY_ALLOC(m_seek_entries.try_set(found_element_id, found_element_position));
303
304
42.8k
        if (found_element_id == element_id) {
305
480
            position = found_element_position;
306
480
            break;
307
480
        }
308
309
42.3k
        dbgln_if(MATROSKA_TRACE_DEBUG, "Skipped to position {}.", m_last_top_level_element_position);
310
42.3k
    }
311
312
2.62k
    return position;
313
6.15k
}
314
315
static DecoderErrorOr<SegmentInformation> parse_information(Streamer& streamer)
316
550
{
317
550
    SegmentInformation segment_information;
318
550
    TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
319
124
        switch (element_id) {
320
124
        case TIMESTAMP_SCALE_ID:
321
124
            segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64()));
322
124
            dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", segment_information.timestamp_scale());
323
124
            break;
324
124
        case MUXING_APP_ID:
325
124
            segment_information.set_muxing_app(TRY_READ(streamer.read_string()));
326
124
            dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", segment_information.muxing_app().as_string());
327
124
            break;
328
124
        case WRITING_APP_ID:
329
124
            segment_information.set_writing_app(TRY_READ(streamer.read_string()));
330
124
            dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", segment_information.writing_app().as_string());
331
124
            break;
332
124
        case DURATION_ID:
333
124
            segment_information.set_duration_unscaled(TRY_READ(streamer.read_float()));
334
124
            dbgln_if(MATROSKA_DEBUG, "Read Duration attribute: {}", segment_information.duration_unscaled().value());
335
124
            break;
336
124
        default:
337
124
            TRY_READ(streamer.read_unknown_element());
338
124
        }
339
340
124
        return IterationDecision::Continue;
341
124
    }));
342
343
124
    return segment_information;
344
550
}
345
346
DecoderErrorOr<SegmentInformation> Reader::segment_information()
347
4.70k
{
348
4.70k
    if (m_segment_information.has_value())
349
0
        return m_segment_information.value();
350
351
4.70k
    auto position = TRY(find_first_top_level_element_with_id("Segment Information"sv, SEGMENT_INFORMATION_ELEMENT_ID));
352
2.12k
    if (!position.has_value())
353
1.47k
        return DecoderError::corrupted("No Segment Information element found"sv);
354
656
    Streamer streamer { m_data };
355
656
    TRY_READ(streamer.seek_to_position(position.release_value()));
356
550
    m_segment_information = TRY(parse_information(streamer));
357
124
    return m_segment_information.value();
358
550
}
359
360
DecoderErrorOr<void> Reader::ensure_tracks_are_parsed()
361
4.70k
{
362
4.70k
    if (!m_tracks.is_empty())
363
0
        return {};
364
4.70k
    auto position = TRY(find_first_top_level_element_with_id("Tracks"sv, TRACK_ELEMENT_ID));
365
3.26k
    if (!position.has_value())
366
454
        return DecoderError::corrupted("No Tracks element found"sv);
367
2.81k
    Streamer streamer { m_data };
368
2.81k
    TRY_READ(streamer.seek_to_position(position.release_value()));
369
2.71k
    TRY(parse_tracks(streamer));
370
385
    return {};
371
2.71k
}
372
373
static DecoderErrorOr<TrackEntry::ColorFormat> parse_video_color_information(Streamer& streamer)
374
480
{
375
480
    TrackEntry::ColorFormat color_format {};
376
377
480
    TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
378
258
        switch (element_id) {
379
258
        case PRIMARIES_ID:
380
258
            color_format.color_primaries = static_cast<ColorPrimaries>(TRY_READ(streamer.read_u64()));
381
258
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Primaries attribute: {}", color_primaries_to_string(color_format.color_primaries));
382
258
            break;
383
258
        case TRANSFER_CHARACTERISTICS_ID:
384
258
            color_format.transfer_characteristics = static_cast<TransferCharacteristics>(TRY_READ(streamer.read_u64()));
385
258
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's TransferCharacteristics attribute: {}", transfer_characteristics_to_string(color_format.transfer_characteristics));
386
258
            break;
387
258
        case MATRIX_COEFFICIENTS_ID:
388
258
            color_format.matrix_coefficients = static_cast<MatrixCoefficients>(TRY_READ(streamer.read_u64()));
389
258
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's MatrixCoefficients attribute: {}", matrix_coefficients_to_string(color_format.matrix_coefficients));
390
258
            break;
391
258
        case RANGE_ID:
392
258
            color_format.range = static_cast<TrackEntry::ColorRange>(TRY_READ(streamer.read_u64()));
393
258
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's Range attribute: {}", to_underlying(color_format.range));
394
258
            break;
395
258
        case BITS_PER_CHANNEL_ID:
396
258
            color_format.bits_per_channel = TRY_READ(streamer.read_u64());
397
258
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Colour's BitsPerChannel attribute: {}", color_format.bits_per_channel);
398
258
            break;
399
258
        default:
400
258
            TRY_READ(streamer.read_unknown_element());
401
258
        }
402
403
258
        return IterationDecision::Continue;
404
258
    }));
405
406
258
    return color_format;
407
480
}
408
409
static DecoderErrorOr<TrackEntry::VideoTrack> parse_video_track_information(Streamer& streamer)
410
1.00k
{
411
1.00k
    TrackEntry::VideoTrack video_track {};
412
413
1.00k
    TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
414
562
        switch (element_id) {
415
562
        case PIXEL_WIDTH_ID:
416
562
            video_track.pixel_width = TRY_READ(streamer.read_u64());
417
562
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", video_track.pixel_width);
418
562
            break;
419
562
        case PIXEL_HEIGHT_ID:
420
562
            video_track.pixel_height = TRY_READ(streamer.read_u64());
421
562
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", video_track.pixel_height);
422
562
            break;
423
562
        case COLOR_ENTRY_ID:
424
562
            video_track.color_format = TRY(parse_video_color_information(streamer));
425
562
            break;
426
562
        default:
427
562
            TRY_READ(streamer.read_unknown_element());
428
562
        }
429
430
562
        return IterationDecision::Continue;
431
562
    }));
432
433
562
    return video_track;
434
1.00k
}
435
436
static DecoderErrorOr<TrackEntry::AudioTrack> parse_audio_track_information(Streamer& streamer)
437
616
{
438
616
    TrackEntry::AudioTrack audio_track {};
439
440
616
    TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
441
385
        switch (element_id) {
442
385
        case CHANNELS_ID:
443
385
            audio_track.channels = TRY_READ(streamer.read_u64());
444
385
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", audio_track.channels);
445
385
            break;
446
385
        case BIT_DEPTH_ID:
447
385
            audio_track.bit_depth = TRY_READ(streamer.read_u64());
448
385
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", audio_track.bit_depth);
449
385
            break;
450
385
        default:
451
385
            TRY_READ(streamer.read_unknown_element());
452
385
        }
453
454
385
        return IterationDecision::Continue;
455
385
    }));
456
457
385
    return audio_track;
458
616
}
459
460
static DecoderErrorOr<NonnullRefPtr<TrackEntry>> parse_track_entry(Streamer& streamer)
461
8.49k
{
462
8.49k
    auto track_entry = DECODER_TRY_ALLOC(try_make_ref_counted<TrackEntry>());
463
8.49k
    TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
464
6.39k
        switch (element_id) {
465
6.39k
        case TRACK_NUMBER_ID:
466
6.39k
            track_entry->set_track_number(TRY_READ(streamer.read_u64()));
467
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_entry->track_number());
468
6.39k
            break;
469
6.39k
        case TRACK_UID_ID:
470
6.39k
            track_entry->set_track_uid(TRY_READ(streamer.read_u64()));
471
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_entry->track_uid());
472
6.39k
            break;
473
6.39k
        case TRACK_TYPE_ID:
474
6.39k
            track_entry->set_track_type(static_cast<TrackEntry::TrackType>(TRY_READ(streamer.read_u64())));
475
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", to_underlying(track_entry->track_type()));
476
6.39k
            break;
477
6.39k
        case TRACK_LANGUAGE_ID:
478
6.39k
            track_entry->set_language(DECODER_TRY_ALLOC(String::from_byte_string(TRY_READ(streamer.read_string()))));
479
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", track_entry->language());
480
6.39k
            break;
481
6.39k
        case TRACK_CODEC_ID:
482
6.39k
            track_entry->set_codec_id(DECODER_TRY_ALLOC(String::from_byte_string(TRY_READ(streamer.read_string()))));
483
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry->codec_id());
484
6.39k
            break;
485
6.39k
        case TRACK_CODEC_PRIVATE: {
486
6.39k
            auto codec_private_data = TRY_READ(streamer.read_raw_octets(TRY_READ(streamer.read_variable_size_integer())));
487
6.39k
            DECODER_TRY_ALLOC(track_entry->set_codec_private_data(codec_private_data));
488
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry->codec_id());
489
6.39k
            break;
490
6.39k
        }
491
6.39k
        case TRACK_TIMESTAMP_SCALE_ID:
492
6.39k
            track_entry->set_timestamp_scale(TRY_READ(streamer.read_float()));
493
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry->timestamp_scale());
494
6.39k
            break;
495
6.39k
        case TRACK_OFFSET_ID:
496
6.39k
            track_entry->set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
497
6.39k
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry->timestamp_offset());
498
6.39k
            break;
499
6.39k
        case TRACK_VIDEO_ID:
500
6.39k
            track_entry->set_video_track(TRY(parse_video_track_information(streamer)));
501
6.39k
            break;
502
6.39k
        case TRACK_AUDIO_ID:
503
6.39k
            track_entry->set_audio_track(TRY(parse_audio_track_information(streamer)));
504
6.39k
            break;
505
6.39k
        default:
506
6.39k
            TRY_READ(streamer.read_unknown_element());
507
6.39k
        }
508
509
6.39k
        return IterationDecision::Continue;
510
6.39k
    }));
511
512
6.39k
    return track_entry;
513
8.49k
}
514
515
DecoderErrorOr<void> Reader::parse_tracks(Streamer& streamer)
516
2.71k
{
517
2.71k
    TRY(parse_master_element(streamer, "Tracks"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
518
385
        if (element_id == TRACK_ENTRY_ID) {
519
385
            auto track_entry = TRY(parse_track_entry(streamer));
520
385
            dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry->track_number());
521
385
            DECODER_TRY_ALLOC(m_tracks.try_set(track_entry->track_number(), track_entry));
522
385
        } else {
523
385
            TRY_READ(streamer.read_unknown_element());
524
385
        }
525
526
385
        return IterationDecision::Continue;
527
385
    }));
528
385
    return {};
529
2.71k
}
530
531
DecoderErrorOr<void> Reader::for_each_track(TrackEntryCallback callback)
532
0
{
533
0
    TRY(ensure_tracks_are_parsed());
534
0
    for (auto const& track_entry : m_tracks) {
535
0
        auto decision = TRY(callback(track_entry.value));
536
0
        if (decision == IterationDecision::Break)
537
0
            break;
538
0
    }
539
0
    return {};
540
0
}
541
542
DecoderErrorOr<void> Reader::for_each_track_of_type(TrackEntry::TrackType type, TrackEntryCallback callback)
543
0
{
544
0
    return for_each_track([&](TrackEntry const& track_entry) -> DecoderErrorOr<IterationDecision> {
545
0
        if (track_entry.track_type() != type)
546
0
            return IterationDecision::Continue;
547
0
        return callback(track_entry);
548
0
    });
549
0
}
550
551
DecoderErrorOr<NonnullRefPtr<TrackEntry>> Reader::track_for_track_number(u64 track_number)
552
0
{
553
0
    TRY(ensure_tracks_are_parsed());
554
0
    auto optional_track_entry = m_tracks.get(track_number);
555
0
    if (!optional_track_entry.has_value())
556
0
        return DecoderError::format(DecoderErrorCategory::Invalid, "No track found with number {}", track_number);
557
0
    return *optional_track_entry.release_value();
558
0
}
559
560
DecoderErrorOr<size_t> Reader::track_count()
561
4.70k
{
562
4.70k
    TRY(ensure_tracks_are_parsed());
563
385
    return m_tracks.size();
564
4.70k
}
565
566
constexpr size_t get_element_id_size(u32 element_id)
567
0
{
568
0
    return sizeof(element_id) - (count_leading_zeroes(element_id) / 8);
569
0
}
570
571
static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_scale)
572
0
{
573
0
    Optional<u64> timestamp;
574
575
0
    auto first_element_position = TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
576
0
        switch (element_id) {
577
0
        case TIMESTAMP_ID:
578
0
            timestamp = TRY_READ(streamer.read_u64());
579
0
            return IterationDecision::Break;
580
0
        default:
581
0
            TRY_READ(streamer.read_unknown_element());
582
0
        }
583
584
0
        return IterationDecision::Continue;
585
0
    }));
586
587
0
    if (!timestamp.has_value())
588
0
        return DecoderError::corrupted("Cluster was missing a timestamp"sv);
589
0
    if (first_element_position == 0)
590
0
        return DecoderError::corrupted("Cluster had no children"sv);
591
592
0
    dbgln_if(MATROSKA_TRACE_DEBUG, "Seeking back to position {}", first_element_position);
593
0
    TRY_READ(streamer.seek_to_position(first_element_position));
594
595
0
    Cluster cluster;
596
0
    cluster.set_timestamp(Duration::from_nanoseconds(timestamp.release_value() * timestamp_scale));
597
0
    return cluster;
598
0
}
599
600
static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Duration cluster_timestamp, u64 segment_timestamp_scale, TrackEntry const& track)
601
0
{
602
0
    Block block;
603
604
0
    auto content_size = TRY_READ(streamer.read_variable_size_integer());
605
606
0
    auto position_before_track_number = streamer.position();
607
0
    block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
608
609
    // https://www.matroska.org/technical/notes.html
610
    // Block Timestamps:
611
    //     The Block Element and SimpleBlock Element store their timestamps as signed integers,
612
    //     relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
613
    //     timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
614
    //         `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
615
    //
616
    //     When a CodecDelay Element is set, its value MUST be subtracted from each Block timestamp
617
    //     of that track. To get the timestamp in nanoseconds of the first frame in a Block or
618
    //     SimpleBlock, the formula becomes:
619
    //         `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
620
0
    auto raw_timestamp_offset = TRY_READ(streamer.read_i16());
621
0
    Checked<i64> timestamp_offset_ns = AK::clamp_to<i64>(static_cast<double>(raw_timestamp_offset * AK::clamp_to<i64>(segment_timestamp_scale)) * track.timestamp_scale());
622
0
    timestamp_offset_ns.saturating_sub(AK::clamp_to<i64>(track.codec_delay()));
623
    // This is only mentioned in the elements specification under TrackOffset.
624
    // https://www.matroska.org/technical/elements.html
625
0
    timestamp_offset_ns.saturating_add(AK::clamp_to<i64>(track.timestamp_offset()));
626
0
    Duration timestamp_offset = Duration::from_nanoseconds(timestamp_offset_ns.value());
627
0
    block.set_timestamp(cluster_timestamp + timestamp_offset);
628
629
0
    auto flags = TRY_READ(streamer.read_octet());
630
0
    block.set_only_keyframes((flags & (1u << 7u)) != 0);
631
0
    block.set_invisible((flags & (1u << 3u)) != 0);
632
0
    block.set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
633
0
    block.set_discardable((flags & 1u) != 0);
634
635
0
    auto total_frame_content_size = content_size - (streamer.position() - position_before_track_number);
636
637
0
    Vector<ReadonlyBytes> frames;
638
639
0
    if (block.lacing() == Block::Lacing::EBML) {
640
0
        auto octets_read_before_frame_sizes = streamer.octets_read();
641
0
        auto frame_count = TRY_READ(streamer.read_octet()) + 1;
642
0
        Vector<u64> frame_sizes;
643
0
        frame_sizes.ensure_capacity(frame_count);
644
645
0
        u64 frame_size_sum = 0;
646
0
        u64 previous_frame_size;
647
0
        auto first_frame_size = TRY_READ(streamer.read_variable_size_integer());
648
0
        frame_sizes.append(first_frame_size);
649
0
        frame_size_sum += first_frame_size;
650
0
        previous_frame_size = first_frame_size;
651
652
0
        for (int i = 0; i < frame_count - 2; i++) {
653
0
            auto frame_size_difference = TRY_READ(streamer.read_variable_size_signed_integer());
654
0
            u64 frame_size;
655
            // FIXME: x - (-y) == x + y?
656
0
            if (frame_size_difference < 0)
657
0
                frame_size = previous_frame_size - (-frame_size_difference);
658
0
            else
659
0
                frame_size = previous_frame_size + frame_size_difference;
660
0
            frame_sizes.append(frame_size);
661
0
            frame_size_sum += frame_size;
662
0
            previous_frame_size = frame_size;
663
0
        }
664
0
        frame_sizes.append(total_frame_content_size - frame_size_sum - (streamer.octets_read() - octets_read_before_frame_sizes));
665
666
0
        for (int i = 0; i < frame_count; i++) {
667
            // FIXME: ReadonlyBytes instead of copying the frame data?
668
0
            auto current_frame_size = frame_sizes.at(i);
669
0
            frames.append(TRY_READ(streamer.read_raw_octets(current_frame_size)));
670
0
        }
671
0
    } else if (block.lacing() == Block::Lacing::FixedSize) {
672
0
        auto frame_count = TRY_READ(streamer.read_octet()) + 1;
673
0
        auto individual_frame_size = total_frame_content_size / frame_count;
674
0
        for (int i = 0; i < frame_count; i++)
675
0
            frames.append(TRY_READ(streamer.read_raw_octets(individual_frame_size)));
676
0
    } else {
677
0
        frames.append(TRY_READ(streamer.read_raw_octets(total_frame_content_size)));
678
0
    }
679
0
    block.set_frames(move(frames));
680
0
    return block;
681
0
}
682
683
DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
684
0
{
685
0
    auto optional_position = TRY(find_first_top_level_element_with_id("Cluster"sv, CLUSTER_ELEMENT_ID));
686
0
    if (!optional_position.has_value())
687
0
        return DecoderError::corrupted("No clusters are present in the segment"sv);
688
0
    ReadonlyBytes segment_view = m_data.slice(m_segment_contents_position, m_segment_contents_size);
689
690
    // We need to have the element ID included so that the iterator knows where it is.
691
0
    auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
692
693
0
    dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
694
0
    return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
695
0
}
696
697
static DecoderErrorOr<CueTrackPosition> parse_cue_track_position(Streamer& streamer)
698
0
{
699
0
    CueTrackPosition track_position;
700
701
0
    bool had_cluster_position = false;
702
703
0
    TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
704
0
        switch (element_id) {
705
0
        case CUE_TRACK_ID:
706
0
            track_position.set_track_number(TRY_READ(streamer.read_u64()));
707
0
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions track number {}", track_position.track_number());
708
0
            break;
709
0
        case CUE_CLUSTER_POSITION_ID:
710
0
            track_position.set_cluster_position(TRY_READ(streamer.read_u64()));
711
0
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions cluster position {}", track_position.cluster_position());
712
0
            had_cluster_position = true;
713
0
            break;
714
0
        case CUE_RELATIVE_POSITION_ID:
715
0
            track_position.set_block_offset(TRY_READ(streamer.read_u64()));
716
0
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read CueTrackPositions relative position {}", track_position.block_offset());
717
0
            break;
718
0
        case CUE_CODEC_STATE_ID:
719
            // Mandatory in spec, but not present in files? 0 means use TrackEntry's codec state.
720
            // FIXME: Do something with this value.
721
0
            dbgln_if(MATROSKA_DEBUG, "Found CodecState, skipping");
722
0
            TRY_READ(streamer.read_unknown_element());
723
0
            break;
724
0
        case CUE_REFERENCE_ID:
725
0
            return DecoderError::not_implemented();
726
0
        default:
727
0
            TRY_READ(streamer.read_unknown_element());
728
0
            break;
729
0
        }
730
731
0
        return IterationDecision::Continue;
732
0
    }));
733
734
0
    if (track_position.track_number() == 0)
735
0
        return DecoderError::corrupted("Track number was not present or 0"sv);
736
737
0
    if (!had_cluster_position)
738
0
        return DecoderError::corrupted("Cluster was missing the cluster position"sv);
739
740
0
    return track_position;
741
0
}
742
743
static DecoderErrorOr<CuePoint> parse_cue_point(Streamer& streamer, u64 timestamp_scale)
744
0
{
745
0
    CuePoint cue_point;
746
747
0
    TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
748
0
        switch (element_id) {
749
0
        case CUE_TIME_ID: {
750
            // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element:
751
            // > Absolute timestamp of the seek point, expressed in Matroska Ticks -- ie in nanoseconds; see timestamp-ticks.
752
            // Matroska Ticks are specified in https://www.matroska.org/technical/notes.html:
753
            // > For such elements, the timestamp value is stored directly in nanoseconds.
754
            // However, my test files appear to use Segment Ticks, which uses the segment's timestamp scale, and Mozilla's nestegg parser agrees:
755
            // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1941
756
            // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L2411-L2416
757
            // https://github.com/mozilla/nestegg/tree/ec6adfbbf979678e3058cc4695257366f39e290b/src/nestegg.c#L1383-L1392
758
            // Other fields that specify Matroska Ticks may also use Segment Ticks instead, who knows :^(
759
0
            auto timestamp = Duration::from_nanoseconds(static_cast<i64>(TRY_READ(streamer.read_u64()) * timestamp_scale));
760
0
            cue_point.set_timestamp(timestamp);
761
0
            dbgln_if(MATROSKA_DEBUG, "Read CuePoint timestamp {}ms", cue_point.timestamp().to_milliseconds());
762
0
            break;
763
0
        }
764
0
        case CUE_TRACK_POSITIONS_ID: {
765
0
            auto track_position = TRY_READ(parse_cue_track_position(streamer));
766
0
            DECODER_TRY_ALLOC(cue_point.track_positions().try_set(track_position.track_number(), track_position));
767
0
            break;
768
0
        }
769
0
        default:
770
0
            TRY_READ(streamer.read_unknown_element());
771
0
            break;
772
0
        }
773
774
0
        return IterationDecision::Continue;
775
0
    }));
776
777
0
    if (cue_point.timestamp().is_negative())
778
0
        return DecoderError::corrupted("CuePoint was missing a timestamp"sv);
779
780
0
    if (cue_point.track_positions().is_empty())
781
0
        return DecoderError::corrupted("CuePoint was missing track positions"sv);
782
783
0
    return cue_point;
784
0
}
785
786
DecoderErrorOr<void> Reader::parse_cues(Streamer& streamer)
787
0
{
788
0
    m_cues.clear();
789
790
0
    TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id) -> DecoderErrorOr<IterationDecision> {
791
0
        switch (element_id) {
792
0
        case CUE_POINT_ID: {
793
0
            auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale()));
794
795
            // FIXME: Verify that these are already in order of timestamp. If they are not, return a corrupted error for now,
796
            //        but if it turns out that Matroska files with out-of-order cue points are valid, sort them instead.
797
798
0
            for (auto track_position_entry : cue_point.track_positions()) {
799
0
                if (!m_cues.contains(track_position_entry.key))
800
0
                    DECODER_TRY_ALLOC(m_cues.try_set(track_position_entry.key, Vector<CuePoint>()));
801
0
                Vector<CuePoint>& cue_points_for_track = m_cues.get(track_position_entry.key).release_value();
802
0
                cue_points_for_track.append(cue_point);
803
0
            }
804
0
            break;
805
0
        }
806
0
        default:
807
0
            return DecoderError::format(DecoderErrorCategory::Corrupted, "Unknown Cues child ID {:#010x}", element_id);
808
0
        }
809
810
0
        return IterationDecision::Continue;
811
0
    }));
812
813
0
    return {};
814
0
}
815
816
DecoderErrorOr<void> Reader::ensure_cues_are_parsed()
817
0
{
818
0
    if (m_cues_have_been_parsed)
819
0
        return {};
820
0
    auto position = TRY(find_first_top_level_element_with_id("Cues"sv, CUES_ID));
821
0
    if (!position.has_value())
822
0
        return DecoderError::corrupted("No Tracks element found"sv);
823
0
    Streamer streamer { m_data };
824
0
    TRY_READ(streamer.seek_to_position(position.release_value()));
825
0
    TRY(parse_cues(streamer));
826
0
    m_cues_have_been_parsed = true;
827
0
    return {};
828
0
}
829
830
DecoderErrorOr<void> Reader::seek_to_cue_for_timestamp(SampleIterator& iterator, Duration const& timestamp)
831
0
{
832
0
    auto const& cue_points = MUST(cue_points_for_track(iterator.m_track->track_number())).release_value();
833
834
    // Take a guess at where in the cues the timestamp will be and correct from there.
835
0
    auto duration = TRY(segment_information()).duration();
836
0
    size_t index = 0;
837
0
    if (duration.has_value())
838
0
        index = clamp(((timestamp.to_nanoseconds() * cue_points.size()) / TRY(segment_information()).duration()->to_nanoseconds()), 0, cue_points.size() - 1);
839
840
0
    CuePoint const* prev_cue_point = &cue_points[index];
841
0
    dbgln_if(MATROSKA_DEBUG, "Finding Matroska cue points for timestamp {}ms starting from cue at {}ms", timestamp.to_milliseconds(), prev_cue_point->timestamp().to_milliseconds());
842
843
0
    if (prev_cue_point->timestamp() == timestamp) {
844
0
        TRY(iterator.seek_to_cue_point(*prev_cue_point));
845
0
        return {};
846
0
    }
847
848
0
    if (prev_cue_point->timestamp() > timestamp) {
849
0
        while (index > 0 && prev_cue_point->timestamp() > timestamp) {
850
0
            prev_cue_point = &cue_points[--index];
851
0
            dbgln_if(MATROSKA_DEBUG, "Checking previous cue point {}ms", prev_cue_point->timestamp().to_milliseconds());
852
0
        }
853
0
        TRY(iterator.seek_to_cue_point(*prev_cue_point));
854
0
        return {};
855
0
    }
856
857
0
    while (++index < cue_points.size()) {
858
0
        auto const& cue_point = cue_points[index];
859
0
        dbgln_if(MATROSKA_DEBUG, "Checking future cue point {}ms", cue_point.timestamp().to_milliseconds());
860
0
        if (cue_point.timestamp() > timestamp)
861
0
            break;
862
0
        prev_cue_point = &cue_point;
863
0
    }
864
865
0
    TRY(iterator.seek_to_cue_point(*prev_cue_point));
866
0
    return {};
867
0
}
868
869
static DecoderErrorOr<void> search_clusters_for_keyframe_before_timestamp(SampleIterator& iterator, Duration const& timestamp)
870
0
{
871
#if MATROSKA_DEBUG
872
    size_t inter_frames_count;
873
#endif
874
0
    Optional<SampleIterator> last_keyframe;
875
876
0
    while (true) {
877
0
        SampleIterator rewind_iterator = iterator;
878
0
        auto block = TRY(iterator.next_block());
879
880
0
        if (block.only_keyframes()) {
881
0
            last_keyframe.emplace(rewind_iterator);
882
#if MATROSKA_DEBUG
883
            inter_frames_count = 0;
884
#endif
885
0
        }
886
887
0
        if (block.timestamp() > timestamp)
888
0
            break;
889
890
#if MATROSKA_DEBUG
891
        inter_frames_count++;
892
#endif
893
0
    }
894
895
0
    if (last_keyframe.has_value()) {
896
#if MATROSKA_DEBUG
897
        dbgln("Seeked to a keyframe with {} inter frames to skip", inter_frames_count);
898
#endif
899
0
        iterator = last_keyframe.release_value();
900
0
    }
901
902
0
    return {};
903
0
}
904
905
DecoderErrorOr<bool> Reader::has_cues_for_track(u64 track_number)
906
0
{
907
0
    TRY(ensure_cues_are_parsed());
908
0
    return m_cues.contains(track_number);
909
0
}
910
911
DecoderErrorOr<SampleIterator> Reader::seek_to_random_access_point(SampleIterator iterator, Duration timestamp)
912
0
{
913
0
    if (TRY(has_cues_for_track(iterator.m_track->track_number()))) {
914
0
        TRY(seek_to_cue_for_timestamp(iterator, timestamp));
915
0
        VERIFY(iterator.last_timestamp().has_value());
916
0
        return iterator;
917
0
    }
918
919
0
    if (!iterator.last_timestamp().has_value() || timestamp < iterator.last_timestamp().value()) {
920
        // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
921
0
        iterator = TRY(create_sample_iterator(iterator.m_track->track_number()));
922
0
        TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
923
0
        return iterator;
924
0
    }
925
926
0
    TRY(search_clusters_for_keyframe_before_timestamp(iterator, timestamp));
927
0
    return iterator;
928
0
}
929
930
DecoderErrorOr<Optional<Vector<CuePoint> const&>> Reader::cue_points_for_track(u64 track_number)
931
0
{
932
0
    TRY(ensure_cues_are_parsed());
933
0
    return m_cues.get(track_number);
934
0
}
935
936
DecoderErrorOr<Block> SampleIterator::next_block()
937
0
{
938
0
    if (m_position >= m_data.size())
939
0
        return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "Still at end of stream :^)"sv);
940
941
0
    Streamer streamer { m_data };
942
0
    TRY_READ(streamer.seek_to_position(m_position));
943
944
0
    Optional<Block> block;
945
946
0
    while (streamer.has_octet()) {
947
#if MATROSKA_TRACE_DEBUG
948
        auto element_position = streamer.position();
949
#endif
950
0
        auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
951
#if MATROSKA_TRACE_DEBUG
952
        dbgln("Iterator found element with ID {:#010x} at offset {} within the segment.", element_id, element_position);
953
#endif
954
955
0
        if (element_id == CLUSTER_ELEMENT_ID) {
956
0
            dbgln_if(MATROSKA_DEBUG, "  Iterator is parsing new cluster.");
957
0
            m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
958
0
        } else if (element_id == SIMPLE_BLOCK_ID) {
959
0
            dbgln_if(MATROSKA_TRACE_DEBUG, "  Iterator is parsing new block.");
960
0
            auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
961
0
            if (candidate_block.track_number() == m_track->track_number())
962
0
                block = move(candidate_block);
963
0
        } else {
964
0
            dbgln_if(MATROSKA_TRACE_DEBUG, "  Iterator is skipping unknown element with ID {:#010x}.", element_id);
965
0
            TRY_READ(streamer.read_unknown_element());
966
0
        }
967
968
0
        m_position = streamer.position();
969
0
        if (block.has_value()) {
970
0
            m_last_timestamp = block->timestamp();
971
0
            return block.release_value();
972
0
        }
973
0
    }
974
975
0
    m_current_cluster.clear();
976
0
    return DecoderError::with_description(DecoderErrorCategory::EndOfStream, "End of stream"sv);
977
0
}
978
979
DecoderErrorOr<void> SampleIterator::seek_to_cue_point(CuePoint const& cue_point)
980
0
{
981
    // This is a private function. The position getter can return optional, but the caller should already know that this track has a position.
982
0
    auto const& cue_position = cue_point.position_for_track(m_track->track_number()).release_value();
983
0
    Streamer streamer { m_data };
984
0
    TRY_READ(streamer.seek_to_position(cue_position.cluster_position()));
985
986
0
    auto element_id = TRY_READ(streamer.read_variable_size_integer(false));
987
0
    if (element_id != CLUSTER_ELEMENT_ID)
988
0
        return DecoderError::corrupted("Cue point's cluster position didn't point to a cluster"sv);
989
990
0
    m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
991
0
    dbgln_if(MATROSKA_DEBUG, "SampleIterator set to cue point at timestamp {}ms", m_current_cluster->timestamp().to_milliseconds());
992
993
0
    m_position = streamer.position() + cue_position.block_offset();
994
0
    m_last_timestamp = cue_point.timestamp();
995
0
    return {};
996
0
}
997
998
ErrorOr<ByteString> Streamer::read_string()
999
10.2k
{
1000
10.2k
    auto string_length = TRY(read_variable_size_integer());
1001
10.2k
    if (remaining() < string_length)
1002
86
        return Error::from_string_literal("String length extends past the end of the stream");
1003
10.1k
    auto string_data = data_as_chars();
1004
10.1k
    auto string_value = ByteString(string_data, strnlen(string_data, string_length));
1005
10.1k
    TRY(read_raw_octets(string_length));
1006
10.1k
    return string_value;
1007
10.1k
}
1008
1009
ErrorOr<u8> Streamer::read_octet()
1010
11.9M
{
1011
11.9M
    if (!has_octet()) {
1012
4.01k
        dbgln_if(MATROSKA_TRACE_DEBUG, "Ran out of stream data");
1013
4.01k
        return Error::from_string_literal("Stream is out of data");
1014
4.01k
    }
1015
11.9M
    u8 byte = *data();
1016
11.9M
    m_octets_read.last()++;
1017
11.9M
    m_position++;
1018
11.9M
    return byte;
1019
11.9M
}
1020
1021
ErrorOr<i16> Streamer::read_i16()
1022
0
{
1023
0
    return (TRY(read_octet()) << 8) | TRY(read_octet());
1024
0
}
1025
1026
ErrorOr<u64> Streamer::read_variable_size_integer(bool mask_length)
1027
2.40M
{
1028
2.40M
    dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT from offset {:p}", position());
1029
2.40M
    auto length_descriptor = TRY(read_octet());
1030
2.40M
    dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor);
1031
2.40M
    if (length_descriptor == 0)
1032
252
        return Error::from_string_literal("read_variable_size_integer: Length descriptor has no terminating set bit");
1033
2.40M
    size_t length = 0;
1034
11.6M
    while (length < 8) {
1035
10.8M
        if (((length_descriptor >> (8 - length)) & 1) == 1)
1036
1.60M
            break;
1037
9.24M
        length++;
1038
9.24M
    }
1039
2.40M
    dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT of total length {}", length);
1040
2.40M
    if (length > 8)
1041
0
        return Error::from_string_literal("read_variable_size_integer: Length is too large");
1042
1043
2.40M
    u64 result;
1044
2.40M
    if (mask_length)
1045
528k
        result = length_descriptor & ~(1u << (8 - length));
1046
1.87M
    else
1047
1.87M
        result = length_descriptor;
1048
2.40M
    dbgln_if(MATROSKA_TRACE_DEBUG, "Beginning of VINT is {:#02x}", result);
1049
9.24M
    for (size_t i = 1; i < length; i++) {
1050
6.83M
        u8 next_octet = TRY(read_octet());
1051
6.83M
        dbgln_if(MATROSKA_TRACE_DEBUG, "Read octet of {:#02x}", next_octet);
1052
6.83M
        result = (result << 8u) | next_octet;
1053
6.83M
        dbgln_if(MATROSKA_TRACE_DEBUG, "New result is {:#010x}", result);
1054
6.83M
    }
1055
2.40M
    return result;
1056
2.40M
}
1057
1058
ErrorOr<i64> Streamer::read_variable_size_signed_integer()
1059
22.1k
{
1060
22.1k
    auto length_descriptor = TRY(read_octet());
1061
22.1k
    if (length_descriptor == 0)
1062
7
        return Error::from_string_literal("read_variable_sized_signed_integer: Length descriptor has no terminating set bit");
1063
22.1k
    i64 length = 0;
1064
152k
    while (length < 8) {
1065
145k
        if (((length_descriptor >> (8 - length)) & 1) == 1)
1066
14.5k
            break;
1067
130k
        length++;
1068
130k
    }
1069
22.1k
    if (length > 8)
1070
0
        return Error::from_string_literal("read_variable_size_integer: Length is too large");
1071
1072
22.1k
    i64 result = length_descriptor & ~(1u << (8 - length));
1073
130k
    for (i64 i = 1; i < length; i++) {
1074
108k
        u8 next_octet = TRY(read_octet());
1075
108k
        result = (result << 8u) | next_octet;
1076
108k
    }
1077
22.1k
    result -= AK::exp2<i64>(length * 7 - 1) - 1;
1078
21.9k
    return result;
1079
22.1k
}
1080
1081
ErrorOr<ReadonlyBytes> Streamer::read_raw_octets(size_t num_octets)
1082
364k
{
1083
364k
    if (remaining() < num_octets)
1084
2.26k
        return Error::from_string_literal("Tried to drop octets past the end of the stream");
1085
362k
    ReadonlyBytes result = { data(), num_octets };
1086
362k
    m_position += num_octets;
1087
362k
    m_octets_read.last() += num_octets;
1088
362k
    return result;
1089
364k
}
1090
1091
ErrorOr<u64> Streamer::read_u64()
1092
118k
{
1093
118k
    auto integer_length = TRY(read_variable_size_integer());
1094
118k
    u64 result = 0;
1095
2.66M
    for (size_t i = 0; i < integer_length; i++) {
1096
2.54M
        result = (result << 8u) + TRY(read_octet());
1097
2.54M
    }
1098
118k
    return result;
1099
118k
}
1100
1101
ErrorOr<double> Streamer::read_float()
1102
1.71k
{
1103
1.71k
    auto length = TRY(read_variable_size_integer());
1104
1.70k
    if (length != 4u && length != 8u)
1105
61
        return Error::from_string_literal("Float size must be 4 or 8 bytes");
1106
1107
1.64k
    union {
1108
1.64k
        u64 value;
1109
1.64k
        float float_value;
1110
1.64k
        double double_value;
1111
1.64k
    } read_data;
1112
1.64k
    read_data.value = 0;
1113
10.0k
    for (size_t i = 0; i < length; i++) {
1114
8.39k
        read_data.value = (read_data.value << 8u) + TRY(read_octet());
1115
8.36k
    }
1116
1.64k
    if (length == 4u)
1117
1.15k
        return read_data.float_value;
1118
465
    return read_data.double_value;
1119
1.61k
}
1120
1121
ErrorOr<void> Streamer::read_unknown_element()
1122
353k
{
1123
353k
    auto element_length = TRY(read_variable_size_integer());
1124
351k
    dbgln_if(MATROSKA_TRACE_DEBUG, "Skipping unknown element of size {}.", element_length);
1125
351k
    TRY(read_raw_octets(element_length));
1126
348k
    return {};
1127
351k
}
1128
1129
ErrorOr<void> Streamer::seek_to_position(size_t position)
1130
10.1k
{
1131
10.1k
    if (position >= m_data.size())
1132
685
        return Error::from_string_literal("Attempted to seek past the end of the stream");
1133
9.41k
    m_position = position;
1134
9.41k
    return {};
1135
10.1k
}
1136
1137
}