/src/swift-protobuf/Sources/SwiftProtobuf/BytecodeReader.swift
Line | Count | Source (jump to first uncovered line) |
1 | | // Sources/SwiftProtobuf/BytecodeReader.swift - Internal bytecode reader |
2 | | // |
3 | | // Copyright (c) 2014 - 2025 Apple Inc. and the project authors |
4 | | // Licensed under Apache License v2.0 with Runtime Library Exception |
5 | | // |
6 | | // See LICENSE.txt for license information: |
7 | | // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt |
8 | | // |
9 | | // ----------------------------------------------------------------------------- |
10 | | |
11 | | /// Reads values encoded in a SwiftProtobuf bytecode stream. |
12 | | package struct BytecodeReader<Instruction: RawRepresentable> where Instruction.RawValue == UInt64 { |
13 | | /// The remaining slice of the program that has not yet been read. |
14 | | private var remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence |
15 | | |
16 | | /// Indicates whether or not there is still data that hasn't yet been read in the bytecode |
17 | | /// stream. |
18 | 29.1k | package var hasData: Bool { |
19 | 29.1k | !remainingProgram.isEmpty |
20 | 29.1k | } |
21 | | |
22 | | /// Creates a new bytecode reader that reads the given bytecode stream. |
23 | 300 | package init(remainingProgram: UnsafeBufferPointer<UInt8>.SubSequence) { |
24 | 300 | self.remainingProgram = remainingProgram |
25 | 300 | |
26 | 300 | // We reserve the first integer of the program text as a "format specifier". This |
27 | 300 | // future-proofs us if we ever want to change the way programs themselves are encoded |
28 | 300 | // (for example, compressing them). |
29 | 300 | Self.checkProgramFormat(nextUInt64()) |
30 | 300 | } |
31 | | |
32 | | /// Checks that the given program format is valid (i.e., not greater than the runtime supports), |
33 | | /// trapping if it is invalid. |
34 | 240 | static func checkProgramFormat(_ programFormat: UInt64) { |
35 | 240 | if programFormat > latestBytecodeProgramFormat { |
36 | 0 | fatalError("Unexpected bytecode program format \(programFormat)") |
37 | 240 | } |
38 | 240 | } |
39 | | |
40 | | /// Reads and returns the next instruction from the bytecode stream. |
41 | | /// |
42 | | /// - Precondition: The reader must not be at the end of the bytecode stream, and the next |
43 | | /// opcode must not be zero. |
44 | | /// |
45 | | /// - Returns: The instruction that was read from the bytecode stream. |
46 | 3.44k | package mutating func nextInstruction() -> Instruction { |
47 | 3.44k | precondition(hasData, "Unexpected end of bytecode stream") |
48 | 3.44k | |
49 | 3.44k | let opcode = nextUInt64() |
50 | 3.44k | precondition(opcode != 0, "Opcode 0 is reserved; do not use it in your own instructions") |
51 | 3.44k | guard let instruction = Instruction(rawValue: opcode) else { |
52 | 0 | fatalError("Unexpected opcode \(opcode) for instruction set \(Instruction.self)") |
53 | 3.44k | } |
54 | 3.44k | return instruction |
55 | 3.44k | } |
56 | | |
57 | | /// Reads and returns the next signed 32-bit integer from the bytecode stream. |
58 | | /// |
59 | | /// This is provided as its own primitive operation because 32-bit values are extremely common |
60 | | /// as field numbers (0 to 2^29-1) and enum cases (-2^31 to 2^31-1). In particular for enum |
61 | | /// cases, using this function specifically for those cases avoids making mistakes involving |
62 | | /// sign- vs. zero-extension between differently-sized integers. |
63 | | /// |
64 | | /// - Precondition: The reader must not be at the end of the bytecode stream. |
65 | | /// |
66 | | /// - Returns: The signed 32-bit integer that was read from the bytecode stream. |
67 | 185 | package mutating func nextInt32() -> Int32 { |
68 | 185 | // `Int32`s are stored by converting them bit-wise to a `UInt32` and then zero-extended to |
69 | 185 | // `UInt64`, since this representation is smaller than sign-extending them to 64 bits. |
70 | 185 | let uint64Value = nextUInt64() |
71 | 185 | assert(uint64Value < 1 &<< 32, "nextInt32() read a value larger than 32 bits") |
72 | 185 | return Int32(bitPattern: UInt32(truncatingIfNeeded: uint64Value)) |
73 | 185 | } |
74 | | |
75 | | /// Reads and returns the next unsigned 64-bit integer from the bytecode stream. |
76 | | /// |
77 | | /// - Precondition: The reader must not be at the end of the bytecode stream. |
78 | | /// |
79 | | /// - Returns: The unsigned 64-bit integer that was read from the bytecode stream. |
80 | 3.93k | package mutating func nextUInt64() -> UInt64 { |
81 | 3.93k | precondition(hasData, "Unexpected end of bytecode stream") |
82 | 3.93k | |
83 | 3.93k | // We store our programs as `StaticString`s, but those are still required to be UTF-8 |
84 | 3.93k | // encoded. This means we can't use a standard varint encoding for integers (because we |
85 | 3.93k | // cannot arbitrarily use the most significant bit), but we can use a slightly modified |
86 | 3.93k | // version that always keeps the MSB clear and uses the next-to-MSB as the continuation bit. |
87 | 3.93k | let byte = UInt64(remainingProgram.first!) |
88 | 3.93k | remainingProgram = remainingProgram.dropFirst() |
89 | 3.93k | precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)") |
90 | 3.93k | |
91 | 3.93k | if byte & 0x40 == 0 { |
92 | 3.88k | return byte |
93 | 3.88k | } |
94 | 50 | var value: UInt64 = byte & 0x3f |
95 | 50 | var shift: UInt64 = 6 |
96 | 50 | while true { |
97 | 50 | let byte = remainingProgram.first! |
98 | 50 | remainingProgram = remainingProgram.dropFirst() |
99 | 50 | value |= UInt64(byte & 0x3f) &<< shift |
100 | 50 | precondition(byte & 0x80 == 0, "Invalid integer leading byte \(byte)") |
101 | 50 | if byte & 0x40 == 0 { |
102 | 50 | return value |
103 | 50 | } |
104 | 0 | shift &+= 6 |
105 | 0 | guard shift < 64 else { |
106 | 0 | fatalError("Bytecode value too large to fit into UInt64") |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | /// Reads and returns the next null-terminated string from the bytecode stream. |
112 | | /// |
113 | | /// - Precondition: The reader must not be at the end of the bytecode stream. |
114 | | /// |
115 | | /// - Returns: An `UnsafeBufferPointer` containing the string that was read from the bytecode |
116 | | /// stream. This pointer is rebased -- its base address is the start of the string that was |
117 | | /// just read, not the start of the entire stream -- but its lifetime is still tied to that of |
118 | | /// the original bytecode stream (which is immortal if it originated from a static string). |
119 | 3.44k | package mutating func nextNullTerminatedString() -> UnsafeBufferPointer<UInt8> { |
120 | 3.44k | precondition(hasData, "Unexpected end of bytecode stream") |
121 | 3.44k | |
122 | 3.44k | guard let nullIndex = remainingProgram.firstIndex(of: 0) else { |
123 | 0 | preconditionFailure("Unexpected end of bytecode stream while looking for end of string") |
124 | 3.44k | } |
125 | 3.44k | let endIndex = remainingProgram.index(after: nullIndex) |
126 | 3.44k | defer { remainingProgram = remainingProgram[endIndex...] } |
127 | 3.44k | return .init(rebasing: remainingProgram[..<nullIndex]) |
128 | 3.44k | } |
129 | | |
130 | | /// Reads and returns the next array of length-delimited strings from the bytecode stream. |
131 | | /// |
132 | | /// - Precondition: The reader must not be at the end of the bytecode stream. |
133 | | /// |
134 | | /// - Returns: An array of `UnsafeBufferPointer`s containing the strings that were read from the |
135 | | /// bytecode stream. See the documentation of `nextString()` for details on the lifetimes of |
136 | | /// these pointers. |
137 | 0 | package mutating func nextNullTerminatedStringArray() -> [UnsafeBufferPointer<UInt8>] { |
138 | 0 | precondition(hasData, "Unexpected end of bytecode stream") |
139 | 0 |
|
140 | 0 | let count = Int(nextUInt64()) |
141 | 0 | return [UnsafeBufferPointer<UInt8>](unsafeUninitializedCapacity: count) { |
142 | 0 | (buffer, initializedCount) in |
143 | 0 | for index in 0..<count { |
144 | 0 | buffer.initializeElement(at: index, to: nextNullTerminatedString()) |
145 | 0 | } |
146 | 0 | initializedCount = count |
147 | 0 | } |
148 | 0 | } |
149 | | } |
150 | | |
151 | | /// Indicates the latest bytecode program format supported by `BytecodeReader`. |
152 | | /// |
153 | | /// Programs written by a `BytecodeWriter` (see protoc-gen-swift) should *only* support this |
154 | | /// version; there is no reason to generate an older version than the latest that the runtime |
155 | | /// supports. Readers, on the other hand, must support the latest and all previous formats (unless |
156 | | /// making breaking changes). |
157 | | package let latestBytecodeProgramFormat: UInt64 = 0 |