Coverage Report

Created: 2026-02-14 08:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/serenity/Userland/Libraries/LibWeb/FileAPI/Blob.cpp
Line
Count
Source
1
/*
2
 * Copyright (c) 2022-2024, Kenneth Myhra <kennethmyhra@serenityos.org>
3
 * Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
4
 *
5
 * SPDX-License-Identifier: BSD-2-Clause
6
 */
7
8
#include <AK/GenericLexer.h>
9
#include <LibJS/Runtime/ArrayBuffer.h>
10
#include <LibJS/Runtime/Completion.h>
11
#include <LibJS/Runtime/TypedArray.h>
12
#include <LibTextCodec/Decoder.h>
13
#include <LibWeb/Bindings/BlobPrototype.h>
14
#include <LibWeb/Bindings/ExceptionOrUtils.h>
15
#include <LibWeb/Bindings/Intrinsics.h>
16
#include <LibWeb/FileAPI/Blob.h>
17
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
18
#include <LibWeb/HTML/StructuredSerialize.h>
19
#include <LibWeb/Infra/Strings.h>
20
#include <LibWeb/MimeSniff/MimeType.h>
21
#include <LibWeb/Streams/AbstractOperations.h>
22
#include <LibWeb/Streams/ReadableStreamDefaultReader.h>
23
#include <LibWeb/WebIDL/AbstractOperations.h>
24
#include <LibWeb/WebIDL/Buffers.h>
25
26
namespace Web::FileAPI {
27
28
JS_DEFINE_ALLOCATOR(Blob);
29
30
JS::NonnullGCPtr<Blob> Blob::create(JS::Realm& realm, ByteBuffer byte_buffer, String type)
31
0
{
32
0
    return realm.heap().allocate<Blob>(realm, realm, move(byte_buffer), move(type));
33
0
}
34
35
// https://w3c.github.io/FileAPI/#convert-line-endings-to-native
36
ErrorOr<String> convert_line_endings_to_native(StringView string)
37
0
{
38
    // 1. Let native line ending be be the code point U+000A LF.
39
0
    auto native_line_ending = "\n"sv;
40
41
    // 2. If the underlying platform’s conventions are to represent newlines as a carriage return and line feed sequence, set native line ending to the code point U+000D CR followed by the code point U+000A LF.
42
    // NOTE: this step is a no-op since LibWeb does not compile on Windows, which is the only platform we know of that that uses a carriage return and line feed sequence for line endings.
43
44
    // 3. Set result to the empty string.
45
0
    StringBuilder result;
46
47
    // 4. Let position be a position variable for s, initially pointing at the start of s.
48
0
    auto lexer = GenericLexer { string };
49
50
    // 5. Let token be the result of collecting a sequence of code points that are not equal to U+000A LF or U+000D CR from s given position.
51
    // 6. Append token to result.
52
0
    TRY(result.try_append(lexer.consume_until(is_any_of("\n\r"sv))));
53
54
    // 7. While position is not past the end of s:
55
0
    while (!lexer.is_eof()) {
56
        // 1. If the code point at position within s equals U+000D CR:
57
0
        if (lexer.peek() == '\r') {
58
            // 1. Append native line ending to result.
59
0
            TRY(result.try_append(native_line_ending));
60
61
            // 2. Advance position by 1.
62
0
            lexer.ignore(1);
63
64
            // 3. If position is not past the end of s and the code point at position within s equals U+000A LF advance position by 1.
65
0
            if (!lexer.is_eof() && lexer.peek() == '\n')
66
0
                lexer.ignore(1);
67
0
        }
68
        // 2. Otherwise if the code point at position within s equals U+000A LF, advance position by 1 and append native line ending to result.
69
0
        else if (lexer.peek() == '\n') {
70
0
            lexer.ignore(1);
71
0
            TRY(result.try_append(native_line_ending));
72
0
        }
73
74
        // 3. Let token be the result of collecting a sequence of code points that are not equal to U+000A LF or U+000D CR from s given position.
75
        // 4. Append token to result.
76
0
        TRY(result.try_append(lexer.consume_until(is_any_of("\n\r"sv))));
77
0
    }
78
    // 5. Return result.
79
0
    return result.to_string();
80
0
}
81
82
// https://w3c.github.io/FileAPI/#process-blob-parts
83
ErrorOr<ByteBuffer> process_blob_parts(Vector<BlobPart> const& blob_parts, Optional<BlobPropertyBag> const& options)
84
0
{
85
    // 1. Let bytes be an empty sequence of bytes.
86
0
    ByteBuffer bytes {};
87
88
    // 2. For each element in parts:
89
0
    for (auto const& blob_part : blob_parts) {
90
0
        TRY(blob_part.visit(
91
            // 1. If element is a USVString, run the following sub-steps:
92
0
            [&](String const& string) -> ErrorOr<void> {
93
                // 1. Let s be element.
94
0
                auto s = string;
95
96
                // 2. If the endings member of options is "native", set s to the result of converting line endings to native of element.
97
0
                if (options.has_value() && options->endings == Bindings::EndingType::Native)
98
0
                    s = TRY(convert_line_endings_to_native(s));
99
100
                // NOTE: The AK::String is always UTF-8.
101
                // 3. Append the result of UTF-8 encoding s to bytes.
102
0
                return bytes.try_append(s.bytes());
103
0
            },
104
            // 2. If element is a BufferSource, get a copy of the bytes held by the buffer source, and append those bytes to bytes.
105
0
            [&](JS::Handle<WebIDL::BufferSource> const& buffer_source) -> ErrorOr<void> {
106
0
                auto data_buffer = TRY(WebIDL::get_buffer_source_copy(*buffer_source->raw_object()));
107
0
                return bytes.try_append(data_buffer.bytes());
108
0
            },
109
            // 3. If element is a Blob, append the bytes it represents to bytes.
110
0
            [&](JS::Handle<Blob> const& blob) -> ErrorOr<void> {
111
0
                return bytes.try_append(blob->raw_bytes());
112
0
            }));
113
0
    }
114
    // 3. Return bytes.
115
0
    return bytes;
116
0
}
117
118
bool is_basic_latin(StringView view)
119
0
{
120
0
    for (auto code_point : view) {
121
0
        if (code_point < 0x0020 || code_point > 0x007E)
122
0
            return false;
123
0
    }
124
0
    return true;
125
0
}
126
127
Blob::Blob(JS::Realm& realm)
128
0
    : PlatformObject(realm)
129
0
{
130
0
}
131
132
Blob::Blob(JS::Realm& realm, ByteBuffer byte_buffer, String type)
133
0
    : PlatformObject(realm)
134
0
    , m_byte_buffer(move(byte_buffer))
135
0
    , m_type(move(type))
136
0
{
137
0
}
138
139
Blob::Blob(JS::Realm& realm, ByteBuffer byte_buffer)
140
0
    : PlatformObject(realm)
141
0
    , m_byte_buffer(move(byte_buffer))
142
0
{
143
0
}
144
145
0
Blob::~Blob() = default;
146
147
void Blob::initialize(JS::Realm& realm)
148
0
{
149
0
    Base::initialize(realm);
150
0
    WEB_SET_PROTOTYPE_FOR_INTERFACE(Blob);
151
0
}
152
153
WebIDL::ExceptionOr<void> Blob::serialization_steps(HTML::SerializationRecord& record, bool, HTML::SerializationMemory&)
154
0
{
155
0
    auto& vm = this->vm();
156
157
    //  FIXME: 1. Set serialized.[[SnapshotState]] to value’s snapshot state.
158
159
    // NON-STANDARD: FileAPI spec doesn't specify that type should be serialized, although
160
    //               to be conformant with other browsers this needs to be serialized.
161
0
    TRY(HTML::serialize_string(vm, record, m_type));
162
163
    // 2. Set serialized.[[ByteSequence]] to value’s underlying byte sequence.
164
0
    TRY(HTML::serialize_bytes(vm, record, m_byte_buffer.bytes()));
165
166
0
    return {};
167
0
}
168
169
WebIDL::ExceptionOr<void> Blob::deserialization_steps(ReadonlySpan<u32> const& record, size_t& position, HTML::DeserializationMemory&)
170
0
{
171
0
    auto& vm = this->vm();
172
173
    // FIXME: 1. Set value’s snapshot state to serialized.[[SnapshotState]].
174
175
    // NON-STANDARD: FileAPI spec doesn't specify that type should be deserialized, although
176
    //               to be conformant with other browsers this needs to be deserialized.
177
0
    m_type = TRY(HTML::deserialize_string(vm, record, position));
178
179
    // 2. Set value’s underlying byte sequence to serialized.[[ByteSequence]].
180
0
    m_byte_buffer = TRY(HTML::deserialize_bytes(vm, record, position));
181
182
0
    return {};
183
0
}
184
185
// https://w3c.github.io/FileAPI/#ref-for-dom-blob-blob
186
JS::NonnullGCPtr<Blob> Blob::create(JS::Realm& realm, Optional<Vector<BlobPart>> const& blob_parts, Optional<BlobPropertyBag> const& options)
187
0
{
188
    // 1. If invoked with zero parameters, return a new Blob object consisting of 0 bytes, with size set to 0, and with type set to the empty string.
189
0
    if (!blob_parts.has_value() && !options.has_value())
190
0
        return realm.heap().allocate<Blob>(realm, realm);
191
192
0
    ByteBuffer byte_buffer {};
193
    // 2. Let bytes be the result of processing blob parts given blobParts and options.
194
0
    if (blob_parts.has_value()) {
195
0
        byte_buffer = MUST(process_blob_parts(blob_parts.value(), options));
196
0
    }
197
198
0
    auto type = String {};
199
    // 3. If the type member of the options argument is not the empty string, run the following sub-steps:
200
0
    if (options.has_value() && !options->type.is_empty()) {
201
        // FIXME: 1. If the type member is provided and is not the empty string, let t be set to the type dictionary member.
202
        //    If t contains any characters outside the range U+0020 to U+007E, then set t to the empty string and return from these substeps.
203
        // FIXME: 2. Convert every character in t to ASCII lowercase.
204
205
        // NOTE: The spec is out of date, and we are supposed to call into the MimeType parser here.
206
0
        if (!options->type.is_empty()) {
207
0
            auto maybe_parsed_type = Web::MimeSniff::MimeType::parse(options->type);
208
209
0
            if (maybe_parsed_type.has_value())
210
0
                type = maybe_parsed_type->serialized();
211
0
        }
212
0
    }
213
214
    // 4. Return a Blob object referring to bytes as its associated byte sequence, with its size set to the length of bytes, and its type set to the value of t from the substeps above.
215
0
    return realm.heap().allocate<Blob>(realm, realm, move(byte_buffer), move(type));
216
0
}
217
218
WebIDL::ExceptionOr<JS::NonnullGCPtr<Blob>> Blob::construct_impl(JS::Realm& realm, Optional<Vector<BlobPart>> const& blob_parts, Optional<BlobPropertyBag> const& options)
219
0
{
220
0
    return Blob::create(realm, blob_parts, options);
221
0
}
222
223
// https://w3c.github.io/FileAPI/#dfn-slice
224
WebIDL::ExceptionOr<JS::NonnullGCPtr<Blob>> Blob::slice(Optional<i64> start, Optional<i64> end, Optional<String> const& content_type)
225
0
{
226
    // 1. Let sliceStart, sliceEnd, and sliceContentType be null.
227
    // 2. If start is given, set sliceStart to start.
228
    // 3. If end is given, set sliceEnd to end.
229
    // 3. If contentType is given, set sliceContentType to contentType.
230
    // 4. Return the result of slice blob given this, sliceStart, sliceEnd, and sliceContentType.
231
0
    return slice_blob(start, end, content_type);
232
0
}
233
234
// https://w3c.github.io/FileAPI/#slice-blob
235
WebIDL::ExceptionOr<JS::NonnullGCPtr<Blob>> Blob::slice_blob(Optional<i64> start, Optional<i64> end, Optional<String> const& content_type)
236
0
{
237
0
    auto& vm = realm().vm();
238
239
    // 1. Let originalSize be blob’s size.
240
0
    auto original_size = size();
241
242
    // 2. The start parameter, if non-null, is a value for the start point of a slice blob call, and must be treated as a byte-order position,
243
    //    with the zeroth position representing the first byte. User agents must normalize start according to the following:
244
0
    i64 relative_start;
245
0
    if (!start.has_value()) {
246
        // a. If start is null, let relativeStart be 0.
247
0
        relative_start = 0;
248
0
    } else {
249
0
        auto start_value = start.value();
250
251
        // b. If start is negative, let relativeStart be max((originalSize + start), 0).
252
0
        if (start_value < 0) {
253
0
            relative_start = max((static_cast<i64>(original_size) + start_value), 0);
254
0
        }
255
        // c. Otherwise, let relativeStart be min(start, originalSize).
256
0
        else {
257
0
            relative_start = min(start_value, original_size);
258
0
        }
259
0
    }
260
261
    // 3. The end parameter, if non-null. is a value for the end point of a slice blob call. User agents must normalize end according to the following:
262
0
    i64 relative_end;
263
0
    if (!end.has_value()) {
264
        // a. If end is null, let relativeEnd be originalSize.
265
0
        relative_end = original_size;
266
0
    } else {
267
0
        auto end_value = end.value();
268
269
        // b. If end is negative, let relativeEnd be max((originalSize + end), 0).
270
0
        if (end_value < 0) {
271
0
            relative_end = max((static_cast<i64>(original_size) + end_value), 0);
272
0
        }
273
        // c. Otherwise, let relativeEnd be min(end, originalSize).
274
0
        else {
275
0
            relative_end = min(end_value, original_size);
276
0
        }
277
0
    }
278
279
    // 4. The contentType parameter, if non-null, is used to set the ASCII-encoded string in lower case representing the media type of the Blob.
280
    //    User agents must normalize contentType according to the following:
281
0
    String relative_content_type;
282
0
    if (!content_type.has_value()) {
283
        // a. If contentType is null, let relativeContentType be set to the empty string.
284
0
        relative_content_type = {};
285
0
    } else {
286
        // b. Otherwise, let relativeContentType be set to contentType and run the substeps below:
287
288
        // 1. If relativeContentType contains any characters outside the range of U+0020 to U+007E, then set relativeContentType to the empty string
289
        //    and return from these substeps:
290
0
        if (!is_basic_latin(content_type.value())) {
291
0
            relative_content_type = {};
292
0
        }
293
        // 2. Convert every character in relativeContentType to ASCII lowercase.
294
0
        else {
295
0
            relative_content_type = content_type.value().to_ascii_lowercase();
296
0
        }
297
0
    }
298
299
    // 5. Let span be max((relativeEnd - relativeStart), 0).
300
0
    auto span = max((relative_end - relative_start), 0);
301
302
    // 6. Return a new Blob object S with the following characteristics:
303
    // a. S refers to span consecutive bytes from blob’s associated byte sequence, beginning with the byte at byte-order position relativeStart.
304
    // b. S.size = span.
305
    // c. S.type = relativeContentType.
306
0
    auto byte_buffer = TRY_OR_THROW_OOM(vm, m_byte_buffer.slice(relative_start, span));
307
0
    return heap().allocate<Blob>(realm(), realm(), move(byte_buffer), move(relative_content_type));
308
0
}
309
310
// https://w3c.github.io/FileAPI/#dom-blob-stream
311
JS::NonnullGCPtr<Streams::ReadableStream> Blob::stream()
312
0
{
313
    // The stream() method, when invoked, must return the result of calling get stream on this.
314
0
    return get_stream();
315
0
}
316
317
// https://w3c.github.io/FileAPI/#blob-get-stream
318
JS::NonnullGCPtr<Streams::ReadableStream> Blob::get_stream()
319
0
{
320
0
    auto& realm = this->realm();
321
322
    // 1. Let stream be a new ReadableStream created in blob’s relevant Realm.
323
0
    auto stream = realm.heap().allocate<Streams::ReadableStream>(realm, realm);
324
325
    // 2. Set up stream with byte reading support.
326
0
    set_up_readable_stream_controller_with_byte_reading_support(stream);
327
328
    // FIXME: 3. Run the following steps in parallel:
329
0
    {
330
        // 1. While not all bytes of blob have been read:
331
        //    NOTE: for simplicity the chunk is the entire buffer for now.
332
0
        {
333
            // 1. Let bytes be the byte sequence that results from reading a chunk from blob, or failure if a chunk cannot be read.
334
0
            auto bytes = m_byte_buffer;
335
336
            // 2. Queue a global task on the file reading task source given blob’s relevant global object to perform the following steps:
337
0
            HTML::queue_global_task(HTML::Task::Source::FileReading, realm.global_object(), JS::create_heap_function(heap(), [stream, bytes = move(bytes)]() {
338
                // NOTE: Using an TemporaryExecutionContext here results in a crash in the method HTML::incumbent_settings_object()
339
                //       since we end up in a state where we have no execution context + an event loop with an empty incumbent
340
                //       settings object stack. We still need an execution context therefore we push the realm's execution context
341
                //       onto the realm's VM, and we need an incumbent settings object which is pushed onto the incumbent settings
342
                //       object stack by EnvironmentSettings::prepare_to_run_callback().
343
0
                auto& realm = stream->realm();
344
0
                auto& environment_settings = Bindings::host_defined_environment_settings_object(realm);
345
0
                realm.vm().push_execution_context(environment_settings.realm_execution_context());
346
0
                environment_settings.prepare_to_run_callback();
347
0
                ScopeGuard const guard = [&environment_settings, &realm] {
348
0
                    environment_settings.clean_up_after_running_callback();
349
0
                    realm.vm().pop_execution_context();
350
0
                };
351
352
                // 1. If bytes is failure, then error stream with a failure reason and abort these steps.
353
                // 2. Let chunk be a new Uint8Array wrapping an ArrayBuffer containing bytes. If creating the ArrayBuffer throws an exception, then error stream with that exception and abort these steps.
354
0
                auto array_buffer = JS::ArrayBuffer::create(stream->realm(), bytes);
355
0
                auto chunk = JS::Uint8Array::create(stream->realm(), bytes.size(), *array_buffer);
356
357
                // 3. Enqueue chunk in stream.
358
0
                auto maybe_error = Bindings::throw_dom_exception_if_needed(stream->realm().vm(), [&]() {
359
0
                    return readable_stream_enqueue(*stream->controller(), chunk);
360
0
                });
361
362
0
                if (maybe_error.is_error()) {
363
0
                    readable_stream_error(*stream, maybe_error.release_error().value().value());
364
0
                    return;
365
0
                }
366
367
                // FIXME: Close the stream now that we have finished enqueuing all chunks to the stream. Without this, ReadableStream.read will never resolve the second time around with 'done' set.
368
                //        Nowhere in the spec seems to mention this - but testing against other implementations the stream does appear to be closed after reading all data (closed callback is fired).
369
                //        Probably there is a better way of doing this.
370
0
                readable_stream_close(*stream);
371
0
            }));
372
0
        }
373
0
    }
374
375
    // 4. Return stream.
376
0
    return stream;
377
0
}
378
379
// https://w3c.github.io/FileAPI/#dom-blob-text
380
JS::NonnullGCPtr<JS::Promise> Blob::text()
381
0
{
382
0
    auto& realm = this->realm();
383
0
    auto& vm = realm.vm();
384
385
    // 1. Let stream be the result of calling get stream on this.
386
0
    auto stream = get_stream();
387
388
    // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
389
0
    auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
390
0
    if (reader_or_exception.is_exception())
391
0
        return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
392
0
    auto reader = reader_or_exception.release_value();
393
394
    // 3. Let promise be the result of reading all bytes from stream with reader
395
0
    auto promise = reader->read_all_bytes_deprecated();
396
397
    // 4. Return the result of transforming promise by a fulfillment handler that returns the result of running UTF-8 decode on its first argument.
398
0
    return WebIDL::upon_fulfillment(*promise, JS::create_heap_function(heap(), [&vm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
399
0
        auto const& object = first_argument.as_object();
400
0
        VERIFY(is<JS::ArrayBuffer>(object));
401
0
        auto const& buffer = static_cast<const JS::ArrayBuffer&>(object).buffer();
402
403
0
        auto decoder = TextCodec::decoder_for("UTF-8"sv);
404
0
        auto utf8_text = TRY_OR_THROW_OOM(vm, TextCodec::convert_input_to_utf8_using_given_decoder_unless_there_is_a_byte_order_mark(*decoder, buffer));
405
0
        return JS::PrimitiveString::create(vm, move(utf8_text));
406
0
    }));
407
0
}
408
409
// https://w3c.github.io/FileAPI/#dom-blob-arraybuffer
410
JS::NonnullGCPtr<JS::Promise> Blob::array_buffer()
411
0
{
412
0
    auto& realm = this->realm();
413
414
    // 1. Let stream be the result of calling get stream on this.
415
0
    auto stream = get_stream();
416
417
    // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
418
0
    auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
419
0
    if (reader_or_exception.is_exception())
420
0
        return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
421
0
    auto reader = reader_or_exception.release_value();
422
423
    // 3. Let promise be the result of reading all bytes from stream with reader.
424
0
    auto promise = reader->read_all_bytes_deprecated();
425
426
    // 4. Return the result of transforming promise by a fulfillment handler that returns a new ArrayBuffer whose contents are its first argument.
427
0
    return WebIDL::upon_fulfillment(*promise, JS::create_heap_function(heap(), [&realm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
428
0
        auto const& object = first_argument.as_object();
429
0
        VERIFY(is<JS::ArrayBuffer>(object));
430
0
        auto const& buffer = static_cast<const JS::ArrayBuffer&>(object).buffer();
431
432
0
        return JS::ArrayBuffer::create(realm, buffer);
433
0
    }));
434
0
}
435
436
// https://w3c.github.io/FileAPI/#dom-blob-bytes
437
JS::NonnullGCPtr<JS::Promise> Blob::bytes()
438
0
{
439
0
    auto& realm = this->realm();
440
441
    // 1. Let stream be the result of calling get stream on this.
442
0
    auto stream = get_stream();
443
444
    // 2. Let reader be the result of getting a reader from stream. If that threw an exception, return a new promise rejected with that exception.
445
0
    auto reader_or_exception = acquire_readable_stream_default_reader(*stream);
446
0
    if (reader_or_exception.is_exception())
447
0
        return WebIDL::create_rejected_promise_from_exception(realm, reader_or_exception.release_error());
448
0
    auto reader = reader_or_exception.release_value();
449
450
    // 3. Let promise be the result of reading all bytes from stream with reader.
451
0
    auto promise = reader->read_all_bytes_deprecated();
452
453
    // 4. Return the result of transforming promise by a fulfillment handler that returns a new Uint8Array wrapping an ArrayBuffer containing its first argument.
454
0
    return WebIDL::upon_fulfillment(*promise, JS::create_heap_function(heap(), [&realm](JS::Value first_argument) -> WebIDL::ExceptionOr<JS::Value> {
455
0
        auto& object = first_argument.as_object();
456
0
        VERIFY(is<JS::ArrayBuffer>(object));
457
0
        auto& array_buffer = static_cast<JS::ArrayBuffer&>(object);
458
0
        return JS::Uint8Array::create(realm, array_buffer.byte_length(), array_buffer);
459
0
    }));
460
0
}
461
462
}