Coverage Report

Created: 2025-09-05 06:37

/src/swift-protobuf/Sources/SwiftProtobuf/AnyMessageStorage.swift
Line
Count
Source (jump to first uncovered line)
1
// Sources/SwiftProtobuf/AnyMessageStorage.swift - Custom storage for Any WKT
2
//
3
// Copyright (c) 2014 - 2017 Apple Inc. and the project authors
4
// Licensed under Apache License v2.0 with Runtime Library Exception
5
//
6
// See LICENSE.txt for license information:
7
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8
//
9
// -----------------------------------------------------------------------------
10
///
11
/// Hand written storage class for Google_Protobuf_Any to support on demand
12
/// transforms between the formats.
13
///
14
// -----------------------------------------------------------------------------
15
16
import Foundation
17
18
private func serializeAnyJSON(
19
    for message: any Message,
20
    typeURL: String,
21
    options: JSONEncodingOptions
22
0
) throws -> String {
23
0
    var visitor = try JSONEncodingVisitor(type: type(of: message), options: options)
24
0
    visitor.startObject(message: message)
25
0
    visitor.encodeField(name: "@type", stringValue: typeURL)
26
0
    if let m = message as? (any _CustomJSONCodable) {
27
0
        let value = try m.encodedJSONString(options: options)
28
0
        visitor.encodeField(name: "value", jsonText: value)
29
0
    } else {
30
0
        try message.traverse(visitor: &visitor)
31
0
    }
32
0
    visitor.endObject()
33
0
    return visitor.stringResult
34
0
}
35
36
88.1k
private func emitVerboseTextForm(visitor: inout TextFormatEncodingVisitor, message: any Message, typeURL: String) {
37
88.1k
    let url: String
38
88.1k
    if typeURL.isEmpty {
39
0
        url = buildTypeURL(forMessage: message, typePrefix: defaultAnyTypeURLPrefix)
40
88.1k
    } else {
41
88.1k
        url = typeURL
42
88.1k
    }
43
88.1k
    visitor.visitAnyVerbose(value: message, typeURL: url)
44
88.1k
}
45
46
0
private func asJSONObject(body: [UInt8]) -> Data {
47
0
    let asciiOpenCurlyBracket = UInt8(ascii: "{")
48
0
    let asciiCloseCurlyBracket = UInt8(ascii: "}")
49
0
    var result = [asciiOpenCurlyBracket]
50
0
    result.append(contentsOf: body)
51
0
    result.append(asciiCloseCurlyBracket)
52
0
    return Data(result)
53
0
}
54
55
private func unpack(
56
    contentJSON: [UInt8],
57
    extensions: any ExtensionMap,
58
    options: JSONDecodingOptions,
59
    as messageType: any Message.Type
60
0
) throws -> any Message {
61
0
    guard messageType is any _CustomJSONCodable.Type else {
62
0
        let contentJSONAsObject = asJSONObject(body: contentJSON)
63
0
        return try messageType.init(jsonUTF8Bytes: contentJSONAsObject, extensions: extensions, options: options)
64
0
    }
65
0
66
0
    var value = String()
67
0
    try contentJSON.withUnsafeBytes { (body: UnsafeRawBufferPointer) in
68
0
        if body.count > 0 {
69
0
            // contentJSON will be the valid JSON for inside an object (everything but
70
0
            // the '{' and '}', so minimal validation is needed.
71
0
            var scanner = JSONScanner(source: body, options: options, extensions: extensions)
72
0
            while !scanner.complete {
73
0
                let key = try scanner.nextQuotedString()
74
0
                try scanner.skipRequiredColon()
75
0
                if key == "value" {
76
0
                    value = try scanner.skip()
77
0
                    break
78
0
                }
79
0
                if !options.ignoreUnknownFields {
80
0
                    // The only thing within a WKT should be "value".
81
0
                    throw AnyUnpackError.malformedWellKnownTypeJSON
82
0
                }
83
0
                let _ = try scanner.skip()
84
0
                try scanner.skipRequiredComma()
85
0
            }
86
0
            if !options.ignoreUnknownFields && !scanner.complete {
87
0
                // If that wasn't the end, then there was another key, and WKTs should
88
0
                // only have the one when not skipping unknowns.
89
0
                throw AnyUnpackError.malformedWellKnownTypeJSON
90
0
            }
91
0
        }
92
0
    }
93
0
    return try messageType.init(jsonString: value, extensions: extensions, options: options)
94
0
}
95
96
internal class AnyMessageStorage {
97
    // The two properties generated Google_Protobuf_Any will reference.
98
246k
    var _typeURL = String()
99
    var _value: Data {
100
        // Remapped to the internal `state`.
101
255k
        get {
102
255k
            switch state {
103
255k
            case .binary(let value):
104
255k
                return Data(value)
105
255k
            case .message(let message):
106
0
                do {
107
0
                    return try message.serializedBytes(partial: true)
108
0
                } catch {
109
0
                    return Data()
110
255k
                }
111
255k
            case .contentJSON(let contentJSON, let options):
112
0
                guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
113
0
                    return Data()
114
0
                }
115
0
                do {
116
0
                    let m = try unpack(
117
0
                        contentJSON: contentJSON,
118
0
                        extensions: SimpleExtensionMap(),
119
0
                        options: options,
120
0
                        as: messageType
121
0
                    )
122
0
                    return try m.serializedBytes(partial: true)
123
0
                } catch {
124
0
                    return Data()
125
255k
                }
126
255k
            }
127
0
        }
128
3.20M
        set {
129
3.20M
            state = .binary(newValue)
130
3.20M
        }
131
    }
132
133
    enum InternalState {
134
        // a serialized binary
135
        // Note: Unlike contentJSON below, binary does not bother to capture the
136
        // decoding options. This is because the actual binary format is the binary
137
        // blob, i.e. - when decoding from binary, the spec doesn't include decoding
138
        // the binary blob, it is pass through. Instead there is a public api for
139
        // unpacking that takes new options when a developer decides to decode it.
140
        case binary(Data)
141
        // a message
142
        case message(any Message)
143
        // parsed JSON with the @type removed and the decoding options.
144
        case contentJSON([UInt8], JSONDecodingOptions)
145
    }
146
246k
    var state: InternalState = .binary(Data())
147
148
    // This property is used as the initial default value for new instances of the type.
149
    // The type itself is protecting the reference to its storage via CoW semantics.
150
    // This will force a copy to be made of this reference when the first mutation occurs;
151
    // hence, it is safe to mark this as `nonisolated(unsafe)`.
152
    static nonisolated(unsafe) let defaultInstance = AnyMessageStorage()
153
154
10
    private init() {}
155
156
186k
    init(copying source: AnyMessageStorage) {
157
186k
        _typeURL = source._typeURL
158
186k
        state = source.state
159
186k
    }
160
161
0
    func isA<M: Message>(_ type: M.Type) -> Bool {
162
0
        if _typeURL.isEmpty {
163
0
            return false
164
0
        }
165
0
        let encodedType = typeName(fromURL: _typeURL)
166
0
        return encodedType == M.protoMessageName
167
0
    }
168
169
    // This is only ever called with the expectation that target will be fully
170
    // replaced during the unpacking and never as a merge.
171
    func unpackTo<M: Message>(
172
        target: inout M,
173
        extensions: (any ExtensionMap)?,
174
        options: BinaryDecodingOptions
175
0
    ) throws {
176
0
        guard isA(M.self) else {
177
0
            throw AnyUnpackError.typeMismatch
178
0
        }
179
0
180
0
        switch state {
181
0
        case .binary(let data):
182
0
            target = try M(serializedBytes: data, extensions: extensions, partial: true, options: options)
183
0
184
0
        case .message(let msg):
185
0
            if let message = msg as? M {
186
0
                // Already right type, copy it over.
187
0
                target = message
188
0
            } else {
189
0
                // Different type, serialize and parse.
190
0
                let bytes: [UInt8] = try msg.serializedBytes(partial: true)
191
0
                target = try M(serializedBytes: bytes, extensions: extensions, partial: true)
192
0
            }
193
0
194
0
        case .contentJSON(let contentJSON, let options):
195
0
            target =
196
0
                try unpack(
197
0
                    contentJSON: contentJSON,
198
0
                    extensions: extensions ?? SimpleExtensionMap(),
199
0
                    options: options,
200
0
                    as: M.self
201
0
                ) as! M
202
0
        }
203
0
    }
204
205
    // Called before the message is traversed to do any error preflights.
206
    // Since traverse() will use _value, this is our chance to throw
207
    // when _value can't.
208
47.0k
    func preTraverse() throws {
209
47.0k
        switch state {
210
47.0k
        case .binary:
211
47.0k
            // Nothing to be checked.
212
47.0k
            break
213
47.0k
214
47.0k
        case .message:
215
0
            // When set from a developer provided message, partial support
216
0
            // is done. Any message that comes in from another format isn't
217
0
            // checked, and transcoding the isInitialized requirement is
218
0
            // never inserted.
219
0
            break
220
47.0k
221
47.0k
        case .contentJSON(let contentJSON, let options):
222
0
            // contentJSON requires we have the type available for decoding.
223
0
            guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
224
0
                throw BinaryEncodingError.anyTranscodeFailure
225
0
            }
226
0
            do {
227
0
                // Decodes the full JSON and then discard the result.
228
0
                // The regular traversal will decode this again by querying the
229
0
                // `value` field, but that has no way to fail.  As a result,
230
0
                // we need this to accurately handle decode errors.
231
0
                _ = try unpack(
232
0
                    contentJSON: contentJSON,
233
0
                    extensions: SimpleExtensionMap(),
234
0
                    options: options,
235
0
                    as: messageType
236
0
                )
237
0
            } catch {
238
0
                throw BinaryEncodingError.anyTranscodeFailure
239
47.0k
            }
240
47.0k
        }
241
47.0k
    }
242
}
243
244
/// Custom handling for Text format.
245
extension AnyMessageStorage {
246
7.22k
    func decodeTextFormat(typeURL url: String, decoder: inout TextFormatDecoder) throws {
247
7.22k
        // Decoding the verbose form requires knowing the type.
248
7.22k
        _typeURL = url
249
7.22k
        guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: url) else {
250
175
            // The type wasn't registered, can't parse it.
251
175
            throw TextFormatDecodingError.malformedText
252
7.04k
        }
253
7.04k
        let terminator = try decoder.scanner.skipObjectStart()
254
7.04k
        var subDecoder = try TextFormatDecoder(
255
7.04k
            messageType: messageType,
256
7.04k
            scanner: decoder.scanner,
257
7.04k
            terminator: terminator
258
7.04k
        )
259
7.04k
        if messageType == Google_Protobuf_Any.self {
260
811
            var any = Google_Protobuf_Any()
261
811
            try any.decodeTextFormat(decoder: &subDecoder)
262
811
            state = .message(any)
263
7.04k
        } else {
264
6.23k
            var m = messageType.init()
265
6.23k
            try m.decodeMessage(decoder: &subDecoder)
266
6.23k
            state = .message(m)
267
7.04k
        }
268
7.04k
        decoder.scanner = subDecoder.scanner
269
7.04k
        if try decoder.nextFieldNumber() != nil {
270
1
            // Verbose any can never have additional keys.
271
1
            throw TextFormatDecodingError.malformedText
272
7.04k
        }
273
7.04k
    }
274
275
    // Specialized traverse for writing out a Text form of the Any.
276
    // This prefers the more-legible "verbose" format if it can
277
    // use it, otherwise will fall back to simpler forms.
278
148k
    internal func textTraverse(visitor: inout TextFormatEncodingVisitor) {
279
148k
        switch state {
280
148k
        case .binary(let valueData):
281
146k
            if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
282
125k
                // If we can decode it, we can write the readable verbose form:
283
125k
                do {
284
125k
                    let m = try messageType.init(serializedBytes: valueData, partial: true)
285
125k
                    emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
286
125k
                    return
287
125k
                } catch {
288
47.1k
                    // Fall through to just print the type and raw binary data.
289
47.1k
                }
290
125k
            }
291
64.4k
            if !_typeURL.isEmpty {
292
64.4k
                try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
293
64.4k
            }
294
54.3k
            if !valueData.isEmpty {
295
54.3k
                try! visitor.visitSingularBytesField(value: valueData, fieldNumber: 2)
296
148k
            }
297
148k
298
148k
        case .message(let msg):
299
2.61k
            emitVerboseTextForm(visitor: &visitor, message: msg, typeURL: _typeURL)
300
148k
301
148k
        case .contentJSON(let contentJSON, let options):
302
0
            // If we can decode it, we can write the readable verbose form:
303
0
            if let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) {
304
0
                do {
305
0
                    let m = try unpack(
306
0
                        contentJSON: contentJSON,
307
0
                        extensions: SimpleExtensionMap(),
308
0
                        options: options,
309
0
                        as: messageType
310
0
                    )
311
0
                    emitVerboseTextForm(visitor: &visitor, message: m, typeURL: _typeURL)
312
0
                    return
313
0
                } catch {
314
0
                    // Fall through to just print the raw JSON data
315
0
                }
316
0
            }
317
0
            if !_typeURL.isEmpty {
318
0
                try! visitor.visitSingularStringField(value: _typeURL, fieldNumber: 1)
319
0
            }
320
0
            // Build a readable form of the JSON:
321
0
            let contentJSONAsObject = asJSONObject(body: contentJSON)
322
0
            visitor.visitAnyJSONBytesField(value: contentJSONAsObject)
323
148k
        }
324
23.6k
    }
325
}
326
327
/// The obvious goal for Hashable/Equatable conformance would be for
328
/// hash and equality to behave as if we always decoded the inner
329
/// object and hashed or compared that.  Unfortunately, Any typically
330
/// stores serialized contents and we don't always have the ability to
331
/// deserialize it.  Since none of our supported serializations are
332
/// fully deterministic, we can't even ensure that equality will
333
/// behave this way when the Any contents are in the same
334
/// serialization.
335
///
336
/// As a result, we can only really perform a "best effort" equality
337
/// test.  Of course, regardless of the above, we must guarantee that
338
/// hashValue is compatible with equality.
339
extension AnyMessageStorage {
340
    // Can't use _valueData for a few reasons:
341
    // 1. Since decode is done on demand, two objects could be equal
342
    //    but created differently (one from JSON, one for Message, etc.),
343
    //    and the hash values have to be equal even if we don't have data
344
    //    yet.
345
    // 2. map<> serialization order is undefined. At the time of writing
346
    //    the Swift, Objective-C, and Go runtimes all tend to have random
347
    //    orders, so the messages could be identical, but in binary form
348
    //    they could differ.
349
0
    public func hash(into hasher: inout Hasher) {
350
0
        if !_typeURL.isEmpty {
351
0
            hasher.combine(_typeURL)
352
0
        }
353
0
    }
354
355
0
    func isEqualTo(other: AnyMessageStorage) -> Bool {
356
0
        if _typeURL != other._typeURL {
357
0
            return false
358
0
        }
359
0
360
0
        // Since the library does lazy Any decode, equality is a very hard problem.
361
0
        // It things exactly match, that's pretty easy, otherwise, one ends up having
362
0
        // to error on saying they aren't equal.
363
0
        //
364
0
        // The best option would be to have Message forms and compare those, as that
365
0
        // removes issues like map<> serialization order, some other protocol buffer
366
0
        // implementation details/bugs around serialized form order, etc.; but that
367
0
        // would also greatly slow down equality tests.
368
0
        //
369
0
        // Do our best to compare what is present have...
370
0
371
0
        // If both have messages, check if they are the same.
372
0
        if case .message(let myMsg) = state, case .message(let otherMsg) = other.state,
373
0
            type(of: myMsg) == type(of: otherMsg)
374
0
        {
375
0
            // Since the messages are known to be same type, we can claim both equal and
376
0
            // not equal based on the equality comparison.
377
0
            return myMsg.isEqualTo(message: otherMsg)
378
0
        }
379
0
380
0
        // If both have serialized data, and they exactly match; the messages are equal.
381
0
        // Because there could be map in the message, the fact that the data isn't the
382
0
        // same doesn't always mean the messages aren't equal. Likewise, the binary could
383
0
        // have been created by a library that doesn't order the fields, or the binary was
384
0
        // created using the appending ability in of the binary format.
385
0
        if case .binary(let myValue) = state, case .binary(let otherValue) = other.state, myValue == otherValue {
386
0
            return true
387
0
        }
388
0
389
0
        // If both have contentJSON, and they exactly match; the messages are equal.
390
0
        // Because there could be map in the message (or the JSON could just be in a different
391
0
        // order), the fact that the JSON isn't the same doesn't always mean the messages
392
0
        // aren't equal.
393
0
        if case .contentJSON(let myJSON, _) = state,
394
0
            case .contentJSON(let otherJSON, _) = other.state,
395
0
            myJSON == otherJSON
396
0
        {
397
0
            return true
398
0
        }
399
0
400
0
        // Out of options. To do more compares, the states conversions would have to be
401
0
        // done to do comparisons; and since equality can be used somewhat removed from
402
0
        // a developer (if they put protos in a Set, use them as keys to a Dictionary, etc),
403
0
        // the conversion cost might be to high for those uses.  Give up and say they aren't equal.
404
0
        return false
405
0
    }
406
}
407
408
// _CustomJSONCodable support for Google_Protobuf_Any
409
extension AnyMessageStorage {
410
    // Spec for Any says this should contain atleast one slash. Looking at upstream languages, most
411
    // actually look up the value in their runtime registries, but since we do deferred parsing
412
    // we can't assume the registry is complete, thus just do this minimal validation check.
413
5.84k
    fileprivate func isTypeURLValid() -> Bool {
414
5.84k
        _typeURL.contains("/")
415
5.84k
    }
416
417
    // Override the traversal-based JSON encoding
418
    // This builds an Any JSON representation from one of:
419
    //  * The message we were initialized with,
420
    //  * The JSON fields we last deserialized, or
421
    //  * The protobuf field we were deserialized from.
422
    // The last case requires locating the type, deserializing
423
    // into an object, then reserializing back to JSON.
424
965
    func encodedJSONString(options: JSONEncodingOptions) throws -> String {
425
965
        switch state {
426
965
        case .binary(let valueData):
427
265
            // Follow the C++ protostream_objectsource.cc's
428
265
            // ProtoStreamObjectSource::RenderAny() special casing of an empty value.
429
637
            if valueData.isEmpty && _typeURL.isEmpty {
430
265
                return "{}"
431
265
            }
432
0
            guard isTypeURLValid() else {
433
0
                if _typeURL.isEmpty {
434
0
                    throw SwiftProtobufError.JSONEncoding.emptyAnyTypeURL()
435
0
                }
436
0
                throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
437
0
            }
438
0
            if valueData.isEmpty {
439
0
                var jsonEncoder = JSONEncoder()
440
0
                jsonEncoder.startObject()
441
0
                jsonEncoder.startField(name: "@type")
442
0
                jsonEncoder.putStringValue(value: _typeURL)
443
0
                jsonEncoder.endObject()
444
0
                return jsonEncoder.stringResult
445
0
            }
446
0
            // Transcode by decoding the binary data to a message object
447
0
            // and then recode back into JSON.
448
0
            guard let messageType = Google_Protobuf_Any.messageType(forTypeURL: _typeURL) else {
449
0
                // If we don't have the type available, we can't decode the
450
0
                // binary value, so we're stuck.  (The Google spec does not
451
0
                // provide a way to just package the binary value for someone
452
0
                // else to decode later.)
453
0
                throw JSONEncodingError.anyTranscodeFailure
454
0
            }
455
0
            let m = try messageType.init(serializedBytes: valueData, partial: true)
456
0
            return try serializeAnyJSON(for: m, typeURL: _typeURL, options: options)
457
965
458
965
        case .message(let msg):
459
0
            // We should have been initialized with a typeURL, make sure it is valid.
460
0
            if !_typeURL.isEmpty && !isTypeURLValid() {
461
0
                throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
462
0
            }
463
0
            // If it was cleared, default it.
464
0
            let url = !_typeURL.isEmpty ? _typeURL : buildTypeURL(forMessage: msg, typePrefix: defaultAnyTypeURLPrefix)
465
0
            return try serializeAnyJSON(for: msg, typeURL: url, options: options)
466
965
467
965
        case .contentJSON(let contentJSON, _):
468
700
            guard isTypeURLValid() else {
469
0
                if _typeURL.isEmpty {
470
0
                    throw SwiftProtobufError.JSONEncoding.emptyAnyTypeURL()
471
0
                }
472
0
                throw SwiftProtobufError.JSONEncoding.invalidAnyTypeURL(type_url: _typeURL)
473
700
            }
474
700
            var jsonEncoder = JSONEncoder()
475
700
            jsonEncoder.startObject()
476
700
            jsonEncoder.startField(name: "@type")
477
700
            jsonEncoder.putStringValue(value: _typeURL)
478
700
            if !contentJSON.isEmpty {
479
311
                jsonEncoder.append(staticText: ",")
480
311
                // NOTE: This doesn't really take `options` into account since it is
481
311
                // just reflecting out what was taken in originally.
482
311
                jsonEncoder.append(utf8Bytes: contentJSON)
483
700
            }
484
700
            jsonEncoder.endObject()
485
700
            return jsonEncoder.stringResult
486
965
        }
487
965
    }
488
489
    // TODO: If the type is well-known or has already been registered,
490
    // we should consider decoding eagerly.  Eager decoding would
491
    // catch certain errors earlier (good) but would probably be
492
    // a performance hit if the Any contents were never accessed (bad).
493
    // Of course, we can't always decode eagerly (we don't always have the
494
    // message type available), so the deferred logic here is still needed.
495
3.14k
    func decodeJSON(from decoder: inout JSONDecoder) throws {
496
3.14k
        try decoder.scanner.skipRequiredObjectStart()
497
3.14k
        // Reset state
498
3.14k
        _typeURL = String()
499
3.14k
        state = .binary(Data())
500
3.14k
        if decoder.scanner.skipOptionalObjectEnd() {
501
1.27k
            return
502
1.86k
        }
503
1.86k
504
1.86k
        var jsonEncoder = JSONEncoder()
505
9.77k
        while true {
506
9.51k
            let key = try decoder.scanner.nextQuotedString()
507
9.51k
            try decoder.scanner.skipRequiredColon()
508
9.51k
            if key == "@type" {
509
1.82k
                _typeURL = try decoder.scanner.nextQuotedString()
510
1.82k
                guard isTypeURLValid() else {
511
7
                    throw SwiftProtobufError.JSONDecoding.invalidAnyTypeURL(type_url: _typeURL)
512
1.81k
                }
513
9.51k
            } else {
514
7.69k
                jsonEncoder.startField(name: key)
515
7.69k
                let keyValueJSON = try decoder.scanner.skip()
516
7.69k
                jsonEncoder.append(text: keyValueJSON)
517
9.51k
            }
518
9.51k
            if decoder.scanner.skipOptionalObjectEnd() {
519
1.60k
                if _typeURL.isEmpty {
520
9
                    throw SwiftProtobufError.JSONDecoding.emptyAnyTypeURL()
521
1.59k
                }
522
1.59k
                // Capture the options, but set the messageDepthLimit to be what
523
1.59k
                // was left right now, as that is the limit when the JSON is finally
524
1.59k
                // parsed.
525
1.59k
                var updatedOptions = decoder.options
526
1.59k
                updatedOptions.messageDepthLimit = decoder.scanner.recursionBudget
527
1.59k
                state = .contentJSON(Array(jsonEncoder.dataResult), updatedOptions)
528
1.59k
                return
529
7.90k
            }
530
7.90k
            try decoder.scanner.skipRequiredComma()
531
7.90k
        }
532
254
    }
533
}