/src/swift-protobuf/Sources/SwiftProtobuf/BytecodeReader.swift

Source (jump to first uncovered line)
// Sources/SwiftProtobuf/BytecodeReader.swift - Internal bytecode reader
//
// Copyright (c) 2014 - 2025 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------

/// Reads values encoded in a SwiftProtobuf bytecode stream.
package struct BytecodeReader<Instruction: RawRepresentable> where Instruction.RawValue == UInt64 {
    /// The remaining slice of the program that has not yet been read.
    private var remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence

    /// Indicates whether or not there is still data that hasn't yet been read in the bytecode
    /// stream.
    package var hasData: Bool {
        !remainingProgram.isEmpty
    }

    /// Creates a new bytecode reader that reads the given bytecode stream.
    package init(remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence) {
        self.remainingProgram = remainingProgram

        // We reserve the first integer of the program text as a "format specifier". This
        // future-proofs us if we ever want to change the way programs themselves are encoded
        // (for example, compressing them).
        Self.checkProgramFormat(nextUInt64())
    }

    /// Checks that the given program format is valid (i.e., not greater than the runtime supports),
    /// trapping if it is invalid.
    static func checkProgramFormat(_ programFormat: UInt64) {
        if programFormat > latestBytecodeProgramFormat {
            fatalError("Unexpected bytecode program format \(programFormat)")
        }
    }

    /// Reads and returns the next instruction from the bytecode stream.
    ///
    /// - Precondition: The reader must not be at the end of the bytecode stream, and the next
    ///   opcode must not be zero.
    ///
    /// - Returns: The instruction that was read from the bytecode stream.
    package mutating func nextInstruction() -> Instruction {
        precondition(hasData, "Unexpected end of bytecode stream")

        let opcode = nextUInt64()
        precondition(opcode != 0, "Opcode 0 is reserved; do not use it in your own instructions")
        guard let instruction = Instruction(rawValue: opcode) else {
            fatalError("Unexpected opcode \(opcode) for instruction set \(Instruction.self)")
        }
        return instruction
    }

    /// Reads and returns the next signed 32-bit integer from the bytecode stream.
    ///
    /// This is provided as its own primitive operation because 32-bit values are extremely common
    /// as field numbers (0 to 2^29-1) and enum cases (-2^31 to 2^31-1). In particular for enum
    /// cases, using this function specifically for those cases avoids making mistakes involving
    /// sign- vs. zero-extension between differently-sized integers.
    ///
    /// - Precondition: The reader must not be at the end of the bytecode stream.
    ///
    /// - Returns: The signed 32-bit integer that was read from the bytecode stream.
    package mutating func nextInt32() -> Int32 {
        // `Int32`s are stored by converting them bit-wise to a `UInt32` and then zero-extended to
        // `UInt64`, since this representation is smaller than sign-extending them to 64 bits.
        let uint64Value = nextUInt64()
        assert(uint64Value < 1 &<< 32, "nextInt32() read a value larger than 32 bits")
        return Int32(bitPattern: UInt32(truncatingIfNeeded: uint64Value))
    }

    /// Reads and returns the next unsigned 64-bit integer from the bytecode stream.
    ///
    /// - Precondition: The reader must not be at the end of the bytecode stream.
    ///
    /// - Returns: The unsigned 64-bit integer that was read from the bytecode stream.
    package mutating func nextUInt64() -> UInt64 {
        precondition(hasData, "Unexpected end of bytecode stream")

        // We store our programs as `StaticString`s, but those are still required to be UTF-8
        // encoded. This means we can't use a standard varint encoding for integers (because we
        // cannot arbitrarily use the most significant bit), but we can use a slightly modified
        // version that always keeps the MSB clear and uses the next-to-MSB as the continuation bit.
        let byte = UInt64(remainingProgram.first!)
        remainingProgram = remainingProgram.dropFirst()
        precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")

        if byte & 0x40 == 0 {
            return byte
        }
        var value: UInt64 = byte & 0x3f
        var shift: UInt64 = 6
        while true {
            let byte = remainingProgram.first!
            remainingProgram = remainingProgram.dropFirst()
            value |= UInt64(byte & 0x3f) &<< shift
            precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
            if byte & 0x40 == 0 {
                return value
            }
            shift &+= 6
            guard shift < 64 else {
                fatalError("Bytecode value too large to fit into UInt64")
            }
        }
    }

    /// Reads and returns the next null-terminated string from the bytecode stream.
    ///
    /// - Precondition: The reader must not be at the end of the bytecode stream.
    ///
    /// - Returns: An `UnsafeBufferPointer` containing the string that was read from the bytecode
    ///   stream. This pointer is rebased -- its base address is the start of the string that was
    ///   just read, not the start of the entire stream -- but its lifetime is still tied to that of
    ///   the original bytecode stream (which is immortal if it originated from a static string).
    package mutating func nextNullTerminatedString() -> UnsafeBufferPointer<UInt8> {
        precondition(hasData, "Unexpected end of bytecode stream")

        guard let nullIndex = remainingProgram.firstIndex(of: 0) else {
            preconditionFailure("Unexpected end of bytecode stream while looking for end of string")
        }
        let endIndex = remainingProgram.index(after: nullIndex)
        defer { remainingProgram = remainingProgram[endIndex...] }
        return .init(rebasing: remainingProgram[..<nullIndex])
    }

    /// Reads and returns the next array of length-delimited strings from the bytecode stream.
    ///
    /// - Precondition: The reader must not be at the end of the bytecode stream.
    ///
    /// - Returns: An array of `UnsafeBufferPointer`s containing the strings that were read from the
    ///   bytecode stream. See the documentation of `nextString()` for details on the lifetimes of
    ///   these pointers.
    package mutating func nextNullTerminatedStringArray() -> [UnsafeBufferPointer<UInt8>] {
        precondition(hasData, "Unexpected end of bytecode stream")

        let count = Int(nextUInt64())
        return [UnsafeBufferPointer<UInt8>](unsafeUninitializedCapacity: count) {
            (buffer, initializedCount) in
            for index in 0..<count {
                buffer.initializeElement(at: index, to: nextNullTerminatedString())
            }
            initializedCount = count
        }
    }
}

/// Indicates the latest bytecode program format supported by `BytecodeReader`.
///
/// Programs written by a `BytecodeWriter` (see protoc-gen-swift) should *only* support this
/// version; there is no reason to generate an older version than the latest that the runtime
/// supports. Readers, on the other hand, must support the latest and all previous formats (unless
/// making breaking changes).
package let latestBytecodeProgramFormat: UInt64 = 0

Line	Count	Source (jump to first uncovered line)
1		// Sources/SwiftProtobuf/BytecodeReader.swift - Internal bytecode reader
2		//
3		// Copyright (c) 2014 - 2025 Apple Inc. and the project authors
4		// Licensed under Apache License v2.0 with Runtime Library Exception
5		//
6		// See LICENSE.txt for license information:
7		// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8		//
9		// -----------------------------------------------------------------------------
10
11		/// Reads values encoded in a SwiftProtobuf bytecode stream.
12		package struct BytecodeReader<Instruction: RawRepresentable> where Instruction.RawValue == UInt64 {
13		/// The remaining slice of the program that has not yet been read.
14		private var remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence
15
16		/// Indicates whether or not there is still data that hasn't yet been read in the bytecode
17		/// stream.
18	29.1k	package var hasData: Bool {
19	29.1k	!remainingProgram.isEmpty
20	29.1k	}
21
22		/// Creates a new bytecode reader that reads the given bytecode stream.
23	300	package init(remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence) {
24	300	self.remainingProgram = remainingProgram
25	300
26	300	// We reserve the first integer of the program text as a "format specifier". This
27	300	// future-proofs us if we ever want to change the way programs themselves are encoded
28	300	// (for example, compressing them).
29	300	Self.checkProgramFormat(nextUInt64())
30	300	}
31
32		/// Checks that the given program format is valid (i.e., not greater than the runtime supports),
33		/// trapping if it is invalid.
34	240	static func checkProgramFormat(_ programFormat: UInt64) {
35	240	if programFormat > latestBytecodeProgramFormat {
36	0	fatalError("Unexpected bytecode program format \(programFormat)")
37	240	}
38	240	}
39
40		/// Reads and returns the next instruction from the bytecode stream.
41		///
42		/// - Precondition: The reader must not be at the end of the bytecode stream, and the next
43		/// opcode must not be zero.
44		///
45		/// - Returns: The instruction that was read from the bytecode stream.
46	3.44k	package mutating func nextInstruction() -> Instruction {
47	3.44k	precondition(hasData, "Unexpected end of bytecode stream")
48	3.44k
49	3.44k	let opcode = nextUInt64()
50	3.44k	precondition(opcode != 0, "Opcode 0 is reserved; do not use it in your own instructions")
51	3.44k	guard let instruction = Instruction(rawValue: opcode) else {
52	0	fatalError("Unexpected opcode \(opcode) for instruction set \(Instruction.self)")
53	3.44k	}
54	3.44k	return instruction
55	3.44k	}
56
57		/// Reads and returns the next signed 32-bit integer from the bytecode stream.
58		///
59		/// This is provided as its own primitive operation because 32-bit values are extremely common
60		/// as field numbers (0 to 2^29-1) and enum cases (-2^31 to 2^31-1). In particular for enum
61		/// cases, using this function specifically for those cases avoids making mistakes involving
62		/// sign- vs. zero-extension between differently-sized integers.
63		///
64		/// - Precondition: The reader must not be at the end of the bytecode stream.
65		///
66		/// - Returns: The signed 32-bit integer that was read from the bytecode stream.
67	185	package mutating func nextInt32() -> Int32 {
68	185	// `Int32`s are stored by converting them bit-wise to a `UInt32` and then zero-extended to
69	185	// `UInt64`, since this representation is smaller than sign-extending them to 64 bits.
70	185	let uint64Value = nextUInt64()
71	185	assert(uint64Value < 1 &<< 32, "nextInt32() read a value larger than 32 bits")
72	185	return Int32(bitPattern: UInt32(truncatingIfNeeded: uint64Value))
73	185	}
74
75		/// Reads and returns the next unsigned 64-bit integer from the bytecode stream.
76		///
77		/// - Precondition: The reader must not be at the end of the bytecode stream.
78		///
79		/// - Returns: The unsigned 64-bit integer that was read from the bytecode stream.
80	3.93k	package mutating func nextUInt64() -> UInt64 {
81	3.93k	precondition(hasData, "Unexpected end of bytecode stream")
82	3.93k
83	3.93k	// We store our programs as `StaticString`s, but those are still required to be UTF-8
84	3.93k	// encoded. This means we can't use a standard varint encoding for integers (because we
85	3.93k	// cannot arbitrarily use the most significant bit), but we can use a slightly modified
86	3.93k	// version that always keeps the MSB clear and uses the next-to-MSB as the continuation bit.
87	3.93k	let byte = UInt64(remainingProgram.first!)
88	3.93k	remainingProgram = remainingProgram.dropFirst()
89	3.93k	precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
90	3.93k
91	3.93k	if byte & 0x40 == 0 {
92	3.88k	return byte
93	3.88k	}
94	50	var value: UInt64 = byte & 0x3f
95	50	var shift: UInt64 = 6
96	50	while true {
97	50	let byte = remainingProgram.first!
98	50	remainingProgram = remainingProgram.dropFirst()
99	50	value \|= UInt64(byte & 0x3f) &<< shift
100	50	precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)")
101	50	if byte & 0x40 == 0 {
102	50	return value
103	50	}
104	0	shift &+= 6
105	0	guard shift < 64 else {
106	0	fatalError("Bytecode value too large to fit into UInt64")
107	0	}
108	0	}
109	0	}
110
111		/// Reads and returns the next null-terminated string from the bytecode stream.
112		///
113		/// - Precondition: The reader must not be at the end of the bytecode stream.
114		///
115		/// - Returns: An `UnsafeBufferPointer` containing the string that was read from the bytecode
116		/// stream. This pointer is rebased -- its base address is the start of the string that was
117		/// just read, not the start of the entire stream -- but its lifetime is still tied to that of
118		/// the original bytecode stream (which is immortal if it originated from a static string).
119	3.44k	package mutating func nextNullTerminatedString() -> UnsafeBufferPointer<UInt8> {
120	3.44k	precondition(hasData, "Unexpected end of bytecode stream")
121	3.44k
122	3.44k	guard let nullIndex = remainingProgram.firstIndex(of: 0) else {
123	0	preconditionFailure("Unexpected end of bytecode stream while looking for end of string")
124	3.44k	}
125	3.44k	let endIndex = remainingProgram.index(after: nullIndex)
126	3.44k	defer { remainingProgram = remainingProgram[endIndex...] }
127	3.44k	return .init(rebasing: remainingProgram[..<nullIndex])
128	3.44k	}
129
130		/// Reads and returns the next array of length-delimited strings from the bytecode stream.
131		///
132		/// - Precondition: The reader must not be at the end of the bytecode stream.
133		///
134		/// - Returns: An array of `UnsafeBufferPointer`s containing the strings that were read from the
135		/// bytecode stream. See the documentation of `nextString()` for details on the lifetimes of
136		/// these pointers.
137	0	package mutating func nextNullTerminatedStringArray() -> [UnsafeBufferPointer<UInt8>] {
138	0	precondition(hasData, "Unexpected end of bytecode stream")
139	0
140	0	let count = Int(nextUInt64())
141	0	return [UnsafeBufferPointer<UInt8>](unsafeUninitializedCapacity: count) {
142	0	(buffer, initializedCount) in
143	0	for index in 0..<count {
144	0	buffer.initializeElement(at: index, to: nextNullTerminatedString())
145	0	}
146	0	initializedCount = count
147	0	}
148	0	}
149		}
150
151		/// Indicates the latest bytecode program format supported by `BytecodeReader`.
152		///
153		/// Programs written by a `BytecodeWriter` (see protoc-gen-swift) should only support this
154		/// version; there is no reason to generate an older version than the latest that the runtime
155		/// supports. Readers, on the other hand, must support the latest and all previous formats (unless
156		/// making breaking changes).
157		package let latestBytecodeProgramFormat: UInt64 = 0

Coverage Report

Created: 2025-07-04 06:57