Coverage Report

Created: 2025-07-04 06:57

/src/swift-protobuf/Sources/SwiftProtobuf/BytecodeReader.swift
Line
Count
Source (jump to first uncovered line)
1
// Sources/SwiftProtobuf/BytecodeReader.swift - Internal bytecode reader
2
//
3
// Copyright (c) 2014 - 2025 Apple Inc. and the project authors
4
// Licensed under Apache License v2.0 with Runtime Library Exception
5
//
6
// See LICENSE.txt for license information:
7
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8
//
9
// -----------------------------------------------------------------------------
10
11
/// Reads values encoded in a SwiftProtobuf bytecode stream.
12
package struct BytecodeReader<Instruction: RawRepresentable> where Instruction.RawValue == UInt64 {
13
    /// The remaining slice of the program that has not yet been read.
14
    private var remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence
15
16
    /// Indicates whether or not there is still data that hasn't yet been read in the bytecode
17
    /// stream.
18
29.1k
    package var hasData: Bool {
19
29.1k
        !remainingProgram.isEmpty
20
29.1k
    }
21
22
    /// Creates a new bytecode reader that reads the given bytecode stream.
23
300
    package init(remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence) {
24
300
        self.remainingProgram = remainingProgram
25
300
26
300
        // We reserve the first integer of the program text as a "format specifier". This
27
300
        // future-proofs us if we ever want to change the way programs themselves are encoded
28
300
        // (for example, compressing them).
29
300
        Self.checkProgramFormat(nextUInt64())
30
300
    }
31
32
    /// Checks that the given program format is valid (i.e., not greater than the runtime supports),
33
    /// trapping if it is invalid.
34
240
    static func checkProgramFormat(_ programFormat: UInt64) {
35
240
        if programFormat > latestBytecodeProgramFormat {
36
0
            fatalError("Unexpected bytecode program format \(programFormat)")
37
240
        }
38
240
    }
39
40
    /// Reads and returns the next instruction from the bytecode stream.
41
    ///
42
    /// - Precondition: The reader must not be at the end of the bytecode stream, and the next
43
    ///   opcode must not be zero.
44
    ///
45
    /// - Returns: The instruction that was read from the bytecode stream.
46
3.44k
    package mutating func nextInstruction() -> Instruction {
47
3.44k
        precondition(hasData, "Unexpected end of bytecode stream")
48
3.44k
49
3.44k
        let opcode = nextUInt64()
50
3.44k
        precondition(opcode != 0, "Opcode 0 is reserved; do not use it in your own instructions")
51
3.44k
        guard let instruction = Instruction(rawValue: opcode) else {
52
0
            fatalError("Unexpected opcode \(opcode) for instruction set \(Instruction.self)")
53
3.44k
        }
54
3.44k
        return instruction
55
3.44k
    }
56
57
    /// Reads and returns the next signed 32-bit integer from the bytecode stream.
58
    ///
59
    /// This is provided as its own primitive operation because 32-bit values are extremely common
60
    /// as field numbers (0 to 2^29-1) and enum cases (-2^31 to 2^31-1). In particular for enum
61
    /// cases, using this function specifically for those cases avoids making mistakes involving
62
    /// sign- vs. zero-extension between differently-sized integers.
63
    ///
64
    /// - Precondition: The reader must not be at the end of the bytecode stream.
65
    ///
66
    /// - Returns: The signed 32-bit integer that was read from the bytecode stream.
67
185
    package mutating func nextInt32() -> Int32 {
68
185
        // `Int32`s are stored by converting them bit-wise to a `UInt32` and then zero-extended to
69
185
        // `UInt64`, since this representation is smaller than sign-extending them to 64 bits.
70
185
        let uint64Value = nextUInt64()
71
185
        assert(uint64Value < 1 &<< 32, "nextInt32() read a value larger than 32 bits")
72
185
        return Int32(bitPattern: UInt32(truncatingIfNeeded: uint64Value))
73
185
    }
74
75
    /// Reads and returns the next unsigned 64-bit integer from the bytecode stream.
76
    ///
77
    /// - Precondition: The reader must not be at the end of the bytecode stream.
78
    ///
79
    /// - Returns: The unsigned 64-bit integer that was read from the bytecode stream.
80
3.93k
    package mutating func nextUInt64() -> UInt64 {
81
3.93k
        precondition(hasData, "Unexpected end of bytecode stream")
82
3.93k
83
3.93k
        // We store our programs as `StaticString`s, but those are still required to be UTF-8
84
3.93k
        // encoded. This means we can't use a standard varint encoding for integers (because we
85
3.93k
        // cannot arbitrarily use the most significant bit), but we can use a slightly modified
86
3.93k
        // version that always keeps the MSB clear and uses the next-to-MSB as the continuation bit.
87
3.93k
        let byte = UInt64(remainingProgram.first!)
88
3.93k
        remainingProgram = remainingProgram.dropFirst()
89
3.93k
        precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
90
3.93k
91
3.93k
        if byte & 0x40 == 0 {
92
3.88k
            return byte
93
3.88k
        }
94
50
        var value: UInt64 = byte & 0x3f
95
50
        var shift: UInt64 = 6
96
50
        while true {
97
50
            let byte = remainingProgram.first!
98
50
            remainingProgram = remainingProgram.dropFirst()
99
50
            value |= UInt64(byte & 0x3f) &<< shift
100
50
            precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
101
50
            if byte & 0x40 == 0 {
102
50
                return value
103
50
            }
104
0
            shift &+= 6
105
0
            guard shift < 64 else {
106
0
                fatalError("Bytecode value too large to fit into UInt64")
107
0
            }
108
0
        }
109
0
    }
110
111
    /// Reads and returns the next null-terminated string from the bytecode stream.
112
    ///
113
    /// - Precondition: The reader must not be at the end of the bytecode stream.
114
    ///
115
    /// - Returns: An `UnsafeBufferPointer` containing the string that was read from the bytecode
116
    ///   stream. This pointer is rebased -- its base address is the start of the string that was
117
    ///   just read, not the start of the entire stream -- but its lifetime is still tied to that of
118
    ///   the original bytecode stream (which is immortal if it originated from a static string).
119
3.44k
    package mutating func nextNullTerminatedString() -> UnsafeBufferPointer<UInt8> {
120
3.44k
        precondition(hasData, "Unexpected end of bytecode stream")
121
3.44k
122
3.44k
        guard let nullIndex = remainingProgram.firstIndex(of: 0) else {
123
0
            preconditionFailure("Unexpected end of bytecode stream while looking for end of string")
124
3.44k
        }
125
3.44k
        let endIndex = remainingProgram.index(after: nullIndex)
126
3.44k
        defer { remainingProgram = remainingProgram[endIndex...] }
127
3.44k
        return .init(rebasing: remainingProgram[..<nullIndex])
128
3.44k
    }
129
130
    /// Reads and returns the next array of length-delimited strings from the bytecode stream.
131
    ///
132
    /// - Precondition: The reader must not be at the end of the bytecode stream.
133
    ///
134
    /// - Returns: An array of `UnsafeBufferPointer`s containing the strings that were read from the
135
    ///   bytecode stream. See the documentation of `nextString()` for details on the lifetimes of
136
    ///   these pointers.
137
0
    package mutating func nextNullTerminatedStringArray() -> [UnsafeBufferPointer<UInt8>] {
138
0
        precondition(hasData, "Unexpected end of bytecode stream")
139
0
140
0
        let count = Int(nextUInt64())
141
0
        return [UnsafeBufferPointer<UInt8>](unsafeUninitializedCapacity: count) {
142
0
            (buffer, initializedCount) in
143
0
            for index in 0..<count {
144
0
                buffer.initializeElement(at: index, to: nextNullTerminatedString())
145
0
            }
146
0
            initializedCount = count
147
0
        }
148
0
    }
149
}
150
151
/// Indicates the latest bytecode program format supported by `BytecodeReader`.
152
///
153
/// Programs written by a `BytecodeWriter` (see protoc-gen-swift) should *only* support this
154
/// version; there is no reason to generate an older version than the latest that the runtime
155
/// supports. Readers, on the other hand, must support the latest and all previous formats (unless
156
/// making breaking changes).
157
package let latestBytecodeProgramFormat: UInt64 = 0