/src/swift-protobuf/Sources/SwiftProtobuf/TextFormatScanner.swift
Line | Count | Source |
1 | | // Sources/SwiftProtobuf/TextFormatScanner.swift - Text format decoding |
2 | | // |
3 | | // Copyright (c) 2014 - 2019 Apple Inc. and the project authors |
4 | | // Licensed under Apache License v2.0 with Runtime Library Exception |
5 | | // |
6 | | // See LICENSE.txt for license information: |
7 | | // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt |
8 | | // |
9 | | // ----------------------------------------------------------------------------- |
10 | | /// |
11 | | /// Test format decoding engine. |
12 | | /// |
13 | | // ----------------------------------------------------------------------------- |
14 | | |
15 | | #if canImport(FoundationEssentials) |
16 | | import FoundationEssentials |
17 | | #else |
18 | | import Foundation |
19 | | #endif |
20 | | |
21 | | private let asciiBell = UInt8(7) |
22 | | private let asciiBackspace = UInt8(8) |
23 | | private let asciiTab = UInt8(9) |
24 | | private let asciiNewLine = UInt8(10) |
25 | | private let asciiVerticalTab = UInt8(11) |
26 | | private let asciiFormFeed = UInt8(12) |
27 | | private let asciiCarriageReturn = UInt8(13) |
28 | | private let asciiZero = UInt8(ascii: "0") |
29 | | private let asciiOne = UInt8(ascii: "1") |
30 | | private let asciiThree = UInt8(ascii: "3") |
31 | | private let asciiSeven = UInt8(ascii: "7") |
32 | | private let asciiNine = UInt8(ascii: "9") |
33 | | private let asciiColon = UInt8(ascii: ":") |
34 | | private let asciiPeriod = UInt8(ascii: ".") |
35 | | private let asciiPlus = UInt8(ascii: "+") |
36 | | private let asciiComma = UInt8(ascii: ",") |
37 | | private let asciiSemicolon = UInt8(ascii: ";") |
38 | | private let asciiDoubleQuote = UInt8(ascii: "\"") |
39 | | private let asciiSingleQuote = UInt8(ascii: "\'") |
40 | | private let asciiBackslash = UInt8(ascii: "\\") |
41 | | private let asciiForwardSlash = UInt8(ascii: "/") |
42 | | private let asciiHash = UInt8(ascii: "#") |
43 | | private let asciiUnderscore = UInt8(ascii: "_") |
44 | | private let asciiQuestionMark = UInt8(ascii: "?") |
45 | | private let asciiTilde = UInt8(ascii: "~") |
46 | | private let asciiPercent = UInt8(ascii: "%") |
47 | | private let asciiExclamation: UInt8 = UInt8(ascii: "!") |
48 | | private let asciiDollarSign: UInt8 = UInt8(ascii: "$") |
49 | | private let asciiAmpersand: UInt8 = UInt8(ascii: "&") |
50 | | private let asciiAsterisk: UInt8 = UInt8(ascii: "*") |
51 | | private let asciiEquals: UInt8 = UInt8(ascii: "=") |
52 | | private let asciiSpace = UInt8(ascii: " ") |
53 | | private let asciiOpenParenthesis: UInt8 = UInt8(ascii: "(") |
54 | | private let asciiCloseParenthesis: UInt8 = UInt8(ascii: ")") |
55 | | private let asciiOpenSquareBracket = UInt8(ascii: "[") |
56 | | private let asciiCloseSquareBracket = UInt8(ascii: "]") |
57 | | private let asciiOpenCurlyBracket = UInt8(ascii: "{") |
58 | | private let asciiCloseCurlyBracket = UInt8(ascii: "}") |
59 | | private let asciiOpenAngleBracket = UInt8(ascii: "<") |
60 | | private let asciiCloseAngleBracket = UInt8(ascii: ">") |
61 | | private let asciiMinus = UInt8(ascii: "-") |
62 | | private let asciiLowerA = UInt8(ascii: "a") |
63 | | private let asciiUpperA = UInt8(ascii: "A") |
64 | | private let asciiLowerB = UInt8(ascii: "b") |
65 | | private let asciiLowerE = UInt8(ascii: "e") |
66 | | private let asciiUpperE = UInt8(ascii: "E") |
67 | | private let asciiLowerF = UInt8(ascii: "f") |
68 | | private let asciiUpperF = UInt8(ascii: "F") |
69 | | private let asciiLowerI = UInt8(ascii: "i") |
70 | | private let asciiLowerL = UInt8(ascii: "l") |
71 | | private let asciiLowerN = UInt8(ascii: "n") |
72 | | private let asciiLowerR = UInt8(ascii: "r") |
73 | | private let asciiLowerS = UInt8(ascii: "s") |
74 | | private let asciiLowerT = UInt8(ascii: "t") |
75 | | private let asciiUpperT = UInt8(ascii: "T") |
76 | | private let asciiLowerU = UInt8(ascii: "u") |
77 | | private let asciiUpperU = UInt8(ascii: "U") |
78 | | private let asciiLowerV = UInt8(ascii: "v") |
79 | | private let asciiLowerX = UInt8(ascii: "x") |
80 | | private let asciiLowerY = UInt8(ascii: "y") |
81 | | private let asciiLowerZ = UInt8(ascii: "z") |
82 | | private let asciiUpperZ = UInt8(ascii: "Z") |
83 | | |
84 | | // https://protobuf.dev/programming-guides/proto2/#assigning |
85 | | // Fields can be between 1 and 536,870,911. So we can stop parsing |
86 | | // a raw number if we go over this (it also avoid rollover). |
87 | | private let maxFieldNumLength: Int = 9 |
88 | | |
89 | 901k | private func fromHexDigit(_ c: UInt8) -> UInt8? { |
90 | 901k | if c >= asciiZero && c <= asciiNine { |
91 | 507k | return c - asciiZero |
92 | 507k | } |
93 | 394k | if c >= asciiUpperA && c <= asciiUpperF { |
94 | 226k | return c - asciiUpperA + UInt8(10) |
95 | 226k | } |
96 | 168k | if c >= asciiLowerA && c <= asciiLowerF { |
97 | 143k | return c - asciiLowerA + UInt8(10) |
98 | 143k | } |
99 | 24.8k | return nil |
100 | 901k | } |
101 | | |
102 | 588k | private func uint32FromHexDigit(_ c: UInt8) -> UInt32? { |
103 | 588k | guard let u8 = fromHexDigit(c) else { |
104 | 164 | return nil |
105 | 588k | } |
106 | 588k | return UInt32(u8) |
107 | 588k | } |
108 | | |
109 | | // Protobuf Text encoding assumes that you're working directly |
110 | | // in UTF-8. So this implementation converts the string to UTF8, |
111 | | // then decodes it into a sequence of bytes, then converts |
112 | | // it back into a string. |
113 | 20.2k | private func decodeString(_ s: String) -> String? { |
114 | 20.2k | |
115 | 20.2k | // Helper to read 4 hex digits as a UInt32 |
116 | 67.0k | func read4HexDigits(_ i: inout String.UTF8View.Iterator) -> UInt32? { |
117 | 67.0k | if let digit1 = i.next(), |
118 | 67.0k | let d1 = uint32FromHexDigit(digit1), |
119 | 67.0k | let digit2 = i.next(), |
120 | 67.0k | let d2 = uint32FromHexDigit(digit2), |
121 | 67.0k | let digit3 = i.next(), |
122 | 67.0k | let d3 = uint32FromHexDigit(digit3), |
123 | 67.0k | let digit4 = i.next(), |
124 | 67.0k | let d4 = uint32FromHexDigit(digit4) |
125 | 67.0k | { |
126 | 66.8k | return (d1 << 12) + (d2 << 8) + (d3 << 4) + d4 |
127 | 66.8k | } |
128 | 120 | return nil |
129 | 67.0k | } |
130 | 20.2k | |
131 | 20.2k | var out = [UInt8]() |
132 | 20.2k | var bytes = s.utf8.makeIterator() |
133 | 39.2M | while let byte = bytes.next() { |
134 | 39.2M | switch byte { |
135 | 39.2M | case asciiBackslash: // backslash |
136 | 1.04M | if let escaped = bytes.next() { |
137 | 1.04M | switch escaped { |
138 | 1.04M | case asciiZero...asciiSeven: // 0...7 |
139 | 55.5k | // C standard allows 1, 2, or 3 octal digits. |
140 | 55.5k | let savedPosition = bytes |
141 | 55.5k | let digit1 = escaped |
142 | 55.5k | let digit1Value = digit1 - asciiZero |
143 | 55.5k | if let digit2 = bytes.next(), |
144 | 55.5k | digit2 >= asciiZero && digit2 <= asciiSeven |
145 | 55.5k | { |
146 | 48.4k | let digit2Value = digit2 - asciiZero |
147 | 48.4k | let innerSavedPosition = bytes |
148 | 48.4k | if let digit3 = bytes.next(), |
149 | 48.4k | digit3 >= asciiZero && digit3 <= asciiSeven |
150 | 48.4k | { |
151 | 1.86k | let digit3Value = digit3 - asciiZero |
152 | 1.86k | // The max octal digit is actually \377, but looking at the C++ |
153 | 1.86k | // protobuf code in strutil.cc:UnescapeCEscapeSequences(), it |
154 | 1.86k | // decodes with rollover, so just duplicate that behavior for |
155 | 1.86k | // consistency between languages. |
156 | 1.86k | let n = digit1Value &* 64 &+ digit2Value &* 8 &+ digit3Value |
157 | 1.86k | out.append(n) |
158 | 46.5k | } else { |
159 | 46.5k | let n = digit1Value * 8 + digit2Value |
160 | 46.5k | out.append(n) |
161 | 46.5k | bytes = innerSavedPosition |
162 | 46.5k | } |
163 | 48.4k | } else { |
164 | 7.17k | let n = digit1Value |
165 | 7.17k | out.append(n) |
166 | 7.17k | bytes = savedPosition |
167 | 7.17k | } |
168 | 1.04M | case asciiLowerU, asciiUpperU: // "u" |
169 | 58.3k | // \u - 4 hex digits, \U 8 hex digits: |
170 | 58.3k | guard let first = read4HexDigits(&bytes) else { return nil } |
171 | 58.2k | var codePoint = first |
172 | 58.2k | if escaped == asciiUpperU { |
173 | 8.69k | guard let second = read4HexDigits(&bytes) else { return nil } |
174 | 8.66k | codePoint = (codePoint << 16) + second |
175 | 58.1k | } |
176 | 58.1k | switch codePoint { |
177 | 58.1k | case 0...0x7f: |
178 | 858 | // 1 byte encoding |
179 | 858 | out.append(UInt8(truncatingIfNeeded: codePoint)) |
180 | 58.1k | case 0x80...0x7ff: |
181 | 31.1k | // 2 byte encoding |
182 | 31.1k | out.append(0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6)) |
183 | 31.1k | out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) |
184 | 58.1k | case 0x800...0xffff: |
185 | 17.6k | // 3 byte encoding |
186 | 17.6k | out.append(0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12)) |
187 | 17.6k | out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)) |
188 | 17.6k | out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) |
189 | 58.1k | case 0x10000...0x10FFFF: |
190 | 8.44k | // 4 byte encoding |
191 | 8.44k | out.append(0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18)) |
192 | 8.44k | out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F)) |
193 | 8.44k | out.append(0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F)) |
194 | 8.44k | out.append(0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F)) |
195 | 58.1k | default: |
196 | 39 | return nil |
197 | 58.1k | } |
198 | 1.04M | case asciiLowerX: // "x" |
199 | 11.2k | // Unlike C/C++, protobuf only allows 1 or 2 digits here: |
200 | 11.2k | if let byte = bytes.next(), let digit = fromHexDigit(byte) { |
201 | 11.2k | var n = digit |
202 | 11.2k | let savedPosition = bytes |
203 | 11.2k | if let byte = bytes.next(), let digit = fromHexDigit(byte) { |
204 | 7.14k | n = n &* 16 + digit |
205 | 7.14k | } else { |
206 | 4.06k | // No second digit; reset the iterator |
207 | 4.06k | bytes = savedPosition |
208 | 4.06k | } |
209 | 11.2k | out.append(n) |
210 | 11.2k | } else { |
211 | 17 | return nil // Hex escape must have at least 1 digit |
212 | 17 | } |
213 | 1.04M | case asciiLowerA: // \a |
214 | 1.83k | out.append(asciiBell) |
215 | 1.04M | case asciiLowerB: // \b |
216 | 9.14k | out.append(asciiBackspace) |
217 | 1.04M | case asciiLowerF: // \f |
218 | 2.60k | out.append(asciiFormFeed) |
219 | 1.04M | case asciiLowerN: // \n |
220 | 873k | out.append(asciiNewLine) |
221 | 1.04M | case asciiLowerR: // \r |
222 | 8.89k | out.append(asciiCarriageReturn) |
223 | 1.04M | case asciiLowerT: // \t |
224 | 2.58k | out.append(asciiTab) |
225 | 1.04M | case asciiLowerV: // \v |
226 | 2.81k | out.append(asciiVerticalTab) |
227 | 1.04M | case asciiDoubleQuote, |
228 | 20.7k | asciiSingleQuote, |
229 | 20.7k | asciiQuestionMark, |
230 | 20.7k | asciiBackslash: // " ' ? \ |
231 | 20.7k | out.append(escaped) |
232 | 1.04M | default: |
233 | 76 | return nil // Unrecognized escape |
234 | 1.04M | } |
235 | 1.04M | } else { |
236 | 0 | return nil // Input ends with backslash |
237 | 0 | } |
238 | 39.2M | default: |
239 | 38.2M | out.append(byte) |
240 | 39.2M | } |
241 | 39.2M | } |
242 | 19.9k | // There has got to be an easier way to convert a [UInt8] into a String. |
243 | 19.9k | return out.withUnsafeBufferPointer { ptr in |
244 | 19.9k | if let addr = ptr.baseAddress { |
245 | 19.9k | return utf8ToString(bytes: addr, count: ptr.count) |
246 | 19.9k | } else { |
247 | 0 | return String() |
248 | 0 | } |
249 | 19.9k | } |
250 | 20.2k | } |
251 | | |
252 | | /// |
253 | | /// TextFormatScanner has no public members. |
254 | | /// |
255 | | internal struct TextFormatScanner { |
256 | | internal let extensions: (any ExtensionMap)? |
257 | | private var p: UnsafeRawPointer |
258 | | private let end: UnsafeRawPointer |
259 | 124k | private let doubleParser = DoubleParser() |
260 | | |
261 | | internal let options: TextFormatDecodingOptions |
262 | | internal var recursionBudget: Int |
263 | | |
264 | 60.7k | internal var complete: Bool { p == end } |
265 | | |
266 | | internal init( |
267 | | utf8Pointer: UnsafeRawPointer, |
268 | | count: Int, |
269 | | options: TextFormatDecodingOptions, |
270 | | extensions: (any ExtensionMap)? = nil |
271 | 110k | ) { |
272 | 110k | p = utf8Pointer |
273 | 110k | end = p + count |
274 | 110k | self.extensions = extensions |
275 | 110k | self.options = options |
276 | 110k | // Since the root message doesn't start with a `skipObjectStart`, the |
277 | 110k | // budget starts with one less depth to cover that top message. |
278 | 110k | recursionBudget = options.messageDepthLimit - 1 |
279 | 110k | skipWhitespace() |
280 | 110k | } |
281 | | |
282 | 8.14M | private mutating func incrementRecursionDepth() throws { |
283 | 8.14M | recursionBudget -= 1 |
284 | 8.14M | if recursionBudget < 0 { |
285 | 33 | throw TextFormatDecodingError.messageDepthLimit |
286 | 8.14M | } |
287 | 8.14M | } |
288 | | |
289 | 8.05M | private mutating func decrementRecursionDepth() { |
290 | 8.05M | recursionBudget += 1 |
291 | 8.05M | // This should never happen, if it does, something is probably |
292 | 8.05M | // corrupting memory, and simply throwing doesn't make much sense. |
293 | 8.05M | if recursionBudget > options.messageDepthLimit { |
294 | 0 | fatalError("Somehow TextFormatDecoding unwound more objects than it started") |
295 | 0 | } |
296 | 8.05M | } |
297 | | |
298 | | /// Skip whitespace |
299 | | /// |
300 | | /// https://protobuf.dev/reference/protobuf/textformat-spec/#whitespace |
301 | 451M | private mutating func skipWhitespace() { |
302 | 478M | while p != end { |
303 | 477M | let u = p[0] |
304 | 477M | switch u { |
305 | 477M | case asciiSpace, |
306 | 14.6M | asciiTab, // 9 |
307 | 14.6M | asciiNewLine, // 10 |
308 | 14.6M | asciiVerticalTab, // 11 |
309 | 14.6M | asciiFormFeed, // 12 |
310 | 14.6M | asciiCarriageReturn: // 13 |
311 | 14.6M | p += 1 |
312 | 477M | case asciiHash: // # comment |
313 | 12.1M | p += 1 |
314 | 347M | while p != end { |
315 | 347M | // Skip until end of line |
316 | 347M | let c = p[0] |
317 | 347M | p += 1 |
318 | 347M | // NOTE: The support for asciiCarriageReturn (13) to end the comment is not |
319 | 347M | // actually to spec. |
320 | 347M | if c == asciiNewLine || c == asciiCarriageReturn { |
321 | 12.1M | break |
322 | 335M | } |
323 | 335M | } |
324 | 477M | default: |
325 | 450M | return |
326 | 477M | } |
327 | 26.7M | } |
328 | 689k | } |
329 | | |
330 | | /// Return a buffer containing the raw UTF8 for an identifier. |
331 | | /// Assumes that you already know the current byte is a valid |
332 | | /// start of identifier. |
333 | 35.3k | private mutating func parseUTF8Identifier() -> UnsafeRawBufferPointer { |
334 | 35.3k | let start = p |
335 | 636k | loop: while p != end { |
336 | 635k | let c = p[0] |
337 | 635k | switch c { |
338 | 635k | case asciiLowerA...asciiLowerZ, |
339 | 601k | asciiUpperA...asciiUpperZ, |
340 | 601k | asciiZero...asciiNine, |
341 | 601k | asciiUnderscore: |
342 | 601k | p += 1 |
343 | 635k | default: |
344 | 34.2k | break loop |
345 | 635k | } |
346 | 601k | } |
347 | 35.3k | let s = UnsafeRawBufferPointer(start: start, count: p - start) |
348 | 35.3k | skipWhitespace() |
349 | 35.3k | return s |
350 | 35.3k | } |
351 | | |
352 | | /// Return a String containing the next identifier. |
353 | 10.5k | private mutating func parseIdentifier() -> String { |
354 | 10.5k | let buff = parseUTF8Identifier() |
355 | 10.5k | let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count) |
356 | 10.5k | // Force-unwrap is OK: we never have invalid UTF8 at this point. |
357 | 10.5k | return s! |
358 | 10.5k | } |
359 | | |
360 | | /// Scan a string that encodes a byte field, return a count of |
361 | | /// the number of bytes that should be decoded from it |
362 | 733k | private mutating func validateAndCountBytesFromString(terminator: UInt8, sawBackslash: inout Bool) throws -> Int { |
363 | 733k | var count = 0 |
364 | 733k | let start = p |
365 | 733k | sawBackslash = false |
366 | 136M | while p != end { |
367 | 136M | let byte = p[0] |
368 | 136M | p += 1 |
369 | 136M | if byte == terminator { |
370 | 732k | p = start |
371 | 732k | return count |
372 | 136M | } |
373 | 136M | switch byte { |
374 | 136M | case asciiNewLine, asciiCarriageReturn: |
375 | 10 | // Can't have a newline in the middle of a bytes string. |
376 | 10 | throw TextFormatDecodingError.malformedText |
377 | 136M | case asciiBackslash: // "\\" |
378 | 1.65M | sawBackslash = true |
379 | 1.65M | if p != end { |
380 | 1.65M | let escaped = p[0] |
381 | 1.65M | p += 1 |
382 | 1.65M | switch escaped { |
383 | 1.65M | case asciiZero...asciiSeven: // '0'...'7' |
384 | 10.0k | // C standard allows 1, 2, or 3 octal digits. |
385 | 10.0k | if p != end, p[0] >= asciiZero, p[0] <= asciiSeven { |
386 | 3.46k | p += 1 |
387 | 3.46k | if p != end, p[0] >= asciiZero, p[0] <= asciiSeven { |
388 | 1.32k | if escaped > asciiThree { |
389 | 3 | // Out of range octal: three digits and first digit is greater than 3 |
390 | 3 | throw TextFormatDecodingError.malformedText |
391 | 1.32k | } |
392 | 1.32k | p += 1 |
393 | 3.45k | } |
394 | 10.0k | } |
395 | 10.0k | count += 1 |
396 | 1.65M | case asciiLowerU, asciiUpperU: // 'u' or 'U' unicode escape |
397 | 38.2k | let numDigits = (escaped == asciiLowerU) ? 4 : 8 |
398 | 38.2k | guard (end - p) >= numDigits else { |
399 | 19 | throw TextFormatDecodingError.malformedText // unicode escape must 4/8 digits |
400 | 38.2k | } |
401 | 38.2k | var codePoint: UInt32 = 0 |
402 | 162k | for i in 0..<numDigits { |
403 | 162k | if let digit = uint32FromHexDigit(p[i]) { |
404 | 162k | codePoint = (codePoint << 4) + digit |
405 | 162k | } else { |
406 | 95 | throw TextFormatDecodingError.malformedText // wasn't a hex digit |
407 | 162k | } |
408 | 162k | } |
409 | 38.1k | p += numDigits |
410 | 38.1k | switch codePoint { |
411 | 38.1k | case 0...0x7f: |
412 | 583 | // 1 byte encoding |
413 | 583 | count += 1 |
414 | 38.1k | case 0x80...0x7ff: |
415 | 5.52k | // 2 byte encoding |
416 | 5.52k | count += 2 |
417 | 38.1k | case 0xD800...0xDFFF: |
418 | 4 | // Surrogate pair (low or high), shouldn't get a unicode literal of those. |
419 | 4 | throw TextFormatDecodingError.malformedText |
420 | 38.1k | case 0x800...0xffff: |
421 | 30.2k | // 3 byte encoding |
422 | 30.2k | count += 3 |
423 | 38.1k | case 0x10000...0x10FFFF: |
424 | 1.73k | // 4 byte encoding |
425 | 1.73k | count += 4 |
426 | 38.1k | default: |
427 | 42 | throw TextFormatDecodingError.malformedText // Isn't a valid unicode character |
428 | 38.1k | } |
429 | 1.65M | case asciiLowerX: // 'x' hexadecimal escape |
430 | 74.9k | if p != end && fromHexDigit(p[0]) != nil { |
431 | 74.8k | p += 1 |
432 | 74.8k | if p != end && fromHexDigit(p[0]) != nil { |
433 | 63.1k | p += 1 |
434 | 63.1k | } |
435 | 74.8k | } else { |
436 | 20 | throw TextFormatDecodingError.malformedText // Hex escape must have at least 1 digit |
437 | 74.8k | } |
438 | 74.8k | count += 1 |
439 | 1.65M | case asciiLowerA, // \a ("alert") |
440 | 1.52M | asciiLowerB, // \b |
441 | 1.52M | asciiLowerF, // \f |
442 | 1.52M | asciiLowerN, // \n |
443 | 1.52M | asciiLowerR, // \r |
444 | 1.52M | asciiLowerT, // \t |
445 | 1.52M | asciiLowerV, // \v |
446 | 1.52M | asciiSingleQuote, // \' |
447 | 1.52M | asciiDoubleQuote, // \" |
448 | 1.52M | asciiQuestionMark, // \? |
449 | 1.52M | asciiBackslash: // \\ |
450 | 1.52M | count += 1 |
451 | 1.65M | default: |
452 | 44 | throw TextFormatDecodingError.malformedText // Unrecognized escape |
453 | 1.65M | } |
454 | 1.65M | } |
455 | 136M | default: |
456 | 134M | count += 1 |
457 | 136M | } |
458 | 136M | } |
459 | 416 | throw TextFormatDecodingError.malformedText |
460 | 733k | } |
461 | | |
462 | | /// Protobuf Text format uses C ASCII conventions for |
463 | | /// encoding byte sequences, including the use of octal |
464 | | /// and hexadecimal escapes. |
465 | | /// |
466 | | /// Assumes that validateAndCountBytesFromString() has already |
467 | | /// verified the correctness. So we get to avoid error checks here. |
468 | 75.0k | private mutating func parseBytesFromString(terminator: UInt8, into data: inout Data) { |
469 | 75.0k | data.withUnsafeMutableBytes { |
470 | 75.0k | (body: UnsafeMutableRawBufferPointer) in |
471 | 75.0k | if var out = body.baseAddress, body.count > 0 { |
472 | 45.7M | while p[0] != terminator { |
473 | 45.6M | let byte = p[0] |
474 | 45.6M | p += 1 |
475 | 45.6M | switch byte { |
476 | 45.6M | case asciiBackslash: // "\\" |
477 | 1.64M | let escaped = p[0] |
478 | 1.64M | p += 1 |
479 | 1.64M | switch escaped { |
480 | 1.64M | case asciiZero...asciiSeven: // '0'...'7' |
481 | 9.56k | // C standard allows 1, 2, or 3 octal digits. |
482 | 9.56k | let digit1Value = escaped - asciiZero |
483 | 9.56k | let digit2 = p[0] |
484 | 9.56k | if digit2 >= asciiZero, digit2 <= asciiSeven { |
485 | 3.37k | p += 1 |
486 | 3.37k | let digit2Value = digit2 - asciiZero |
487 | 3.37k | let digit3 = p[0] |
488 | 3.37k | if digit3 >= asciiZero, digit3 <= asciiSeven { |
489 | 1.28k | p += 1 |
490 | 1.28k | let digit3Value = digit3 - asciiZero |
491 | 1.28k | out[0] = digit1Value &* 64 + digit2Value * 8 + digit3Value |
492 | 1.28k | out += 1 |
493 | 2.09k | } else { |
494 | 2.09k | out[0] = digit1Value * 8 + digit2Value |
495 | 2.09k | out += 1 |
496 | 2.09k | } |
497 | 6.18k | } else { |
498 | 6.18k | out[0] = digit1Value |
499 | 6.18k | out += 1 |
500 | 6.18k | } |
501 | 1.64M | case asciiLowerU, asciiUpperU: |
502 | 37.5k | let numDigits = (escaped == asciiLowerU) ? 4 : 8 |
503 | 37.5k | var codePoint: UInt32 = 0 |
504 | 158k | for i in 0..<numDigits { |
505 | 158k | codePoint = (codePoint << 4) + uint32FromHexDigit(p[i])! |
506 | 158k | } |
507 | 37.5k | p += numDigits |
508 | 37.5k | switch codePoint { |
509 | 37.5k | case 0...0x7f: |
510 | 570 | // 1 byte encoding |
511 | 570 | out[0] = UInt8(truncatingIfNeeded: codePoint) |
512 | 570 | out += 1 |
513 | 37.5k | case 0x80...0x7ff: |
514 | 5.47k | // 2 byte encoding |
515 | 5.47k | out[0] = 0xC0 + UInt8(truncatingIfNeeded: codePoint >> 6) |
516 | 5.47k | out[1] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) |
517 | 5.47k | out += 2 |
518 | 37.5k | case 0x800...0xffff: |
519 | 30.0k | // 3 byte encoding |
520 | 30.0k | out[0] = 0xE0 + UInt8(truncatingIfNeeded: codePoint >> 12) |
521 | 30.0k | out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F) |
522 | 30.0k | out[2] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) |
523 | 30.0k | out += 3 |
524 | 37.5k | case 0x10000...0x10FFFF: |
525 | 1.39k | // 4 byte encoding |
526 | 1.39k | out[0] = 0xF0 + UInt8(truncatingIfNeeded: codePoint >> 18) |
527 | 1.39k | out[1] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 12) & 0x3F) |
528 | 1.39k | out[2] = 0x80 + UInt8(truncatingIfNeeded: (codePoint >> 6) & 0x3F) |
529 | 1.39k | out[3] = 0x80 + UInt8(truncatingIfNeeded: codePoint & 0x3F) |
530 | 1.39k | out += 4 |
531 | 37.5k | default: |
532 | 0 | preconditionFailure() // Already validated, can't happen |
533 | 37.5k | } |
534 | 1.64M | case asciiLowerX: // 'x' hexadecimal escape |
535 | 71.1k | // We already validated, so we know there's at least one digit: |
536 | 71.1k | var n = fromHexDigit(p[0])! |
537 | 71.1k | p += 1 |
538 | 71.1k | if let digit = fromHexDigit(p[0]) { |
539 | 60.9k | n = n &* 16 &+ digit |
540 | 60.9k | p += 1 |
541 | 60.9k | } |
542 | 71.1k | out[0] = n |
543 | 71.1k | out += 1 |
544 | 1.64M | case asciiLowerA: // \a ("alert") |
545 | 1.29k | out[0] = asciiBell |
546 | 1.29k | out += 1 |
547 | 1.64M | case asciiLowerB: // \b |
548 | 1.40k | out[0] = asciiBackspace |
549 | 1.40k | out += 1 |
550 | 1.64M | case asciiLowerF: // \f |
551 | 1.33k | out[0] = asciiFormFeed |
552 | 1.33k | out += 1 |
553 | 1.64M | case asciiLowerN: // \n |
554 | 1.50M | out[0] = asciiNewLine |
555 | 1.50M | out += 1 |
556 | 1.64M | case asciiLowerR: // \r |
557 | 6.14k | out[0] = asciiCarriageReturn |
558 | 6.14k | out += 1 |
559 | 1.64M | case asciiLowerT: // \t |
560 | 1.05k | out[0] = asciiTab |
561 | 1.05k | out += 1 |
562 | 1.64M | case asciiLowerV: // \v |
563 | 522 | out[0] = asciiVerticalTab |
564 | 522 | out += 1 |
565 | 1.64M | default: |
566 | 11.9k | out[0] = escaped |
567 | 11.9k | out += 1 |
568 | 1.64M | } |
569 | 45.6M | default: |
570 | 44.0M | out[0] = byte |
571 | 44.0M | out += 1 |
572 | 45.6M | } |
573 | 45.6M | } |
574 | 75.0k | p += 1 // Consume terminator |
575 | 75.0k | } |
576 | 75.0k | } |
577 | 75.0k | } |
578 | | |
579 | | /// Assumes the leading quote has already been consumed |
580 | 2.46M | private mutating func parseStringSegment(terminator: UInt8) -> String? { |
581 | 2.46M | let start = p |
582 | 2.46M | var sawBackslash = false |
583 | 197M | while p != end { |
584 | 197M | let c = p[0] |
585 | 197M | if c == terminator { |
586 | 2.46M | let s = utf8ToString(bytes: start, count: p - start) |
587 | 2.46M | p += 1 |
588 | 2.46M | skipWhitespace() |
589 | 2.46M | if let s = s, sawBackslash { |
590 | 69.4k | return decodeString(s) |
591 | 2.39M | } else { |
592 | 2.39M | return s |
593 | 2.39M | } |
594 | 195M | } |
595 | 195M | p += 1 |
596 | 195M | if c == asciiBackslash { // \ |
597 | 4.17M | if p == end { |
598 | 38 | return nil |
599 | 4.17M | } |
600 | 4.17M | sawBackslash = true |
601 | 4.17M | p += 1 |
602 | 195M | } |
603 | 195M | if c == asciiNewLine || c == asciiCarriageReturn { |
604 | 21 | // Can't have a newline in the middle of a raw string. |
605 | 21 | return nil |
606 | 195M | } |
607 | 195M | } |
608 | 420 | return nil // Unterminated quoted string |
609 | 2.46M | } |
610 | | |
611 | 1.40M | internal mutating func nextUInt() throws -> UInt64 { |
612 | 1.40M | if p == end { |
613 | 172 | throw TextFormatDecodingError.malformedNumber |
614 | 1.40M | } |
615 | 1.40M | let c = p[0] |
616 | 1.40M | p += 1 |
617 | 1.40M | if c == asciiZero { // leading '0' precedes octal or hex |
618 | 687k | if p == end { |
619 | 277 | // The TextFormat ended with a field value of zero. |
620 | 277 | return 0 |
621 | 687k | } |
622 | 687k | if p[0] == asciiLowerX { // 'x' => hex |
623 | 2.69k | p += 1 |
624 | 2.69k | var n: UInt64 = 0 |
625 | 24.2k | while p != end { |
626 | 23.2k | let digit = p[0] |
627 | 23.2k | let val: UInt64 |
628 | 23.2k | switch digit { |
629 | 23.2k | case asciiZero...asciiNine: // 0...9 |
630 | 4.43k | val = UInt64(digit - asciiZero) |
631 | 23.2k | case asciiLowerA...asciiLowerF: // a...f |
632 | 9.04k | val = UInt64(digit - asciiLowerA + 10) |
633 | 23.2k | case asciiUpperA...asciiUpperF: |
634 | 8.08k | val = UInt64(digit - asciiUpperA + 10) |
635 | 23.2k | default: |
636 | 1.69k | skipWhitespace() |
637 | 1.69k | return n |
638 | 23.2k | } |
639 | 21.5k | if n > UInt64.max / 16 { |
640 | 9 | throw TextFormatDecodingError.malformedNumber |
641 | 21.5k | } |
642 | 21.5k | p += 1 |
643 | 21.5k | n = n * 16 + val |
644 | 21.5k | } |
645 | 997 | skipWhitespace() |
646 | 997 | return n |
647 | 684k | } else { // octal |
648 | 684k | var n: UInt64 = 0 |
649 | 693k | while p != end { |
650 | 693k | let digit = p[0] |
651 | 693k | if digit < asciiZero || digit > asciiSeven { |
652 | 684k | skipWhitespace() |
653 | 684k | return n // not octal digit |
654 | 684k | } |
655 | 8.43k | let val = UInt64(digit - asciiZero) |
656 | 8.43k | if n > UInt64.max / 8 { |
657 | 16 | throw TextFormatDecodingError.malformedNumber |
658 | 8.42k | } |
659 | 8.42k | p += 1 |
660 | 8.42k | n = n * 8 + val |
661 | 8.42k | } |
662 | 93 | skipWhitespace() |
663 | 93 | return n |
664 | 684k | } |
665 | 902k | } else if c > asciiZero && c <= asciiNine { // 1...9 |
666 | 717k | var n = UInt64(c - asciiZero) |
667 | 812k | while p != end { |
668 | 810k | let digit = p[0] |
669 | 810k | if digit < asciiZero || digit > asciiNine { |
670 | 715k | skipWhitespace() |
671 | 715k | return n // not a digit |
672 | 715k | } |
673 | 95.0k | let val = UInt64(digit - asciiZero) |
674 | 132k | if n > UInt64.max / 10 || n * 10 > UInt64.max - val { |
675 | 16 | throw TextFormatDecodingError.malformedNumber |
676 | 95.0k | } |
677 | 95.0k | p += 1 |
678 | 95.0k | n = n * 10 + val |
679 | 95.0k | } |
680 | 1.84k | skipWhitespace() |
681 | 1.84k | return n |
682 | 717k | } |
683 | 143 | throw TextFormatDecodingError.malformedNumber |
684 | 1.40M | } |
685 | | |
686 | 650k | internal mutating func nextSInt() throws -> Int64 { |
687 | 650k | if p == end { |
688 | 194 | throw TextFormatDecodingError.malformedNumber |
689 | 649k | } |
690 | 649k | let c = p[0] |
691 | 649k | if c == asciiMinus { // - |
692 | 146k | p += 1 |
693 | 146k | if p == end { |
694 | 6 | throw TextFormatDecodingError.malformedNumber |
695 | 146k | } |
696 | 146k | // character after '-' must be digit |
697 | 146k | let digit = p[0] |
698 | 155k | if digit < asciiZero || digit > asciiNine { |
699 | 29 | throw TextFormatDecodingError.malformedNumber |
700 | 146k | } |
701 | 146k | let n = try nextUInt() |
702 | 146k | let limit: UInt64 = 0x8000_0000_0000_0000 // -Int64.min |
703 | 146k | if n >= limit { |
704 | 370 | if n > limit { |
705 | 235 | // Too large negative number |
706 | 235 | throw TextFormatDecodingError.malformedNumber |
707 | 235 | } else { |
708 | 135 | return Int64.min // Special case for Int64.min |
709 | 135 | } |
710 | 145k | } |
711 | 145k | return -Int64(bitPattern: n) |
712 | 503k | } else { |
713 | 503k | let n = try nextUInt() |
714 | 503k | if n > UInt64(bitPattern: Int64.max) { |
715 | 200 | throw TextFormatDecodingError.malformedNumber |
716 | 503k | } |
717 | 503k | return Int64(bitPattern: n) |
718 | 503k | } |
719 | 649k | } |
720 | | |
721 | 2.34M | internal mutating func nextStringValue() throws -> String { |
722 | 2.34M | var result: String |
723 | 2.34M | skipWhitespace() |
724 | 2.34M | if p == end { |
725 | 340 | throw TextFormatDecodingError.malformedText |
726 | 2.34M | } |
727 | 2.34M | let c = p[0] |
728 | 2.34M | if c != asciiSingleQuote && c != asciiDoubleQuote { |
729 | 124 | throw TextFormatDecodingError.malformedText |
730 | 2.34M | } |
731 | 2.34M | p += 1 |
732 | 2.34M | if let s = parseStringSegment(terminator: c) { |
733 | 2.33M | result = s |
734 | 2.33M | } else { |
735 | 1.14k | throw TextFormatDecodingError.malformedText |
736 | 2.33M | } |
737 | 2.33M | |
738 | 2.39M | while true { |
739 | 2.39M | if p == end { |
740 | 4.47k | return result |
741 | 2.39M | } |
742 | 2.39M | let c = p[0] |
743 | 2.40M | if c != asciiSingleQuote && c != asciiDoubleQuote { |
744 | 2.33M | return result |
745 | 2.33M | } |
746 | 58.0k | p += 1 |
747 | 58.0k | if let s = parseStringSegment(terminator: c) { |
748 | 57.7k | result.append(s) |
749 | 57.7k | } else { |
750 | 368 | throw TextFormatDecodingError.malformedText |
751 | 57.7k | } |
752 | 57.7k | } |
753 | 0 | } |
754 | | |
755 | | /// Protobuf Text Format allows a single bytes field to |
756 | | /// contain multiple quoted strings. The values |
757 | | /// are separately decoded and then concatenated: |
758 | | /// field1: "bytes" 'more bytes' |
759 | | /// "and even more bytes" |
760 | 671k | internal mutating func nextBytesValue() throws -> Data { |
761 | 671k | // Get the first string's contents |
762 | 671k | var result: Data |
763 | 671k | skipWhitespace() |
764 | 671k | if p == end { |
765 | 135 | throw TextFormatDecodingError.malformedText |
766 | 671k | } |
767 | 671k | let c = p[0] |
768 | 671k | if c != asciiSingleQuote && c != asciiDoubleQuote { |
769 | 41 | throw TextFormatDecodingError.malformedText |
770 | 671k | } |
771 | 671k | p += 1 |
772 | 671k | var sawBackslash = false |
773 | 671k | let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash) |
774 | 670k | if sawBackslash { |
775 | 50.8k | result = Data(count: n) |
776 | 50.8k | parseBytesFromString(terminator: c, into: &result) |
777 | 620k | } else { |
778 | 620k | result = Data(bytes: p, count: n) |
779 | 620k | p += n + 1 // Skip string body + close quote |
780 | 620k | } |
781 | 670k | |
782 | 670k | // If there are more strings, decode them |
783 | 670k | // and append to the result: |
784 | 689k | while true { |
785 | 689k | skipWhitespace() |
786 | 689k | if p == end { |
787 | 646 | return result |
788 | 688k | } |
789 | 688k | let c = p[0] |
790 | 727k | if c != asciiSingleQuote && c != asciiDoubleQuote { |
791 | 670k | return result |
792 | 670k | } |
793 | 18.3k | p += 1 |
794 | 18.3k | var sawBackslash = false |
795 | 18.3k | let n = try validateAndCountBytesFromString(terminator: c, sawBackslash: &sawBackslash) |
796 | 18.3k | if sawBackslash { |
797 | 1.70k | var b = Data(count: n) |
798 | 1.70k | parseBytesFromString(terminator: c, into: &b) |
799 | 1.70k | result.append(b) |
800 | 16.6k | } else { |
801 | 16.6k | result.append(Data(bytes: p, count: n)) |
802 | 16.6k | p += n + 1 // Skip string body + close quote |
803 | 16.6k | } |
804 | 18.3k | } |
805 | 0 | } |
806 | | |
807 | | // Tries to identify a sequence of UTF8 characters |
808 | | // that represent a numeric floating-point value. |
809 | 940k | private mutating func tryParseFloatString() -> Double? { |
810 | 940k | guard p != end else { return nil } |
811 | 940k | let start = p |
812 | 940k | var c = p[0] |
813 | 940k | if c == asciiMinus { |
814 | 18.9k | p += 1 |
815 | 18.9k | guard p != end else { |
816 | 39 | p = start |
817 | 39 | return nil |
818 | 18.9k | } |
819 | 18.9k | c = p[0] |
820 | 939k | } |
821 | 939k | switch c { |
822 | 939k | case asciiZero: // '0' as first character is not allowed followed by digit |
823 | 244k | p += 1 |
824 | 244k | guard p != end else { break } |
825 | 243k | c = p[0] |
826 | 243k | if c >= asciiZero && c <= asciiNine { |
827 | 14 | p = start |
828 | 14 | return nil |
829 | 14 | } |
830 | 939k | case asciiPeriod: // '.' as first char only if followed by digit |
831 | 333 | p += 1 |
832 | 333 | guard p != end else { |
833 | 12 | p = start |
834 | 12 | return nil |
835 | 321 | } |
836 | 321 | c = p[0] |
837 | 321 | if c < asciiZero || c > asciiNine { |
838 | 16 | p = start |
839 | 16 | return nil |
840 | 16 | } |
841 | 939k | case asciiOne...asciiNine: |
842 | 449k | break |
843 | 939k | default: |
844 | 246k | p = start |
845 | 246k | return nil |
846 | 939k | } |
847 | 2.97M | loop: while p != end { |
848 | 2.97M | let c = p[0] |
849 | 2.97M | switch c { |
850 | 2.97M | case asciiZero...asciiNine, |
851 | 2.28M | asciiPeriod, |
852 | 2.28M | asciiPlus, |
853 | 2.28M | asciiMinus, |
854 | 2.28M | asciiLowerE, |
855 | 2.28M | asciiUpperE: // 0...9, ., +, -, e, E |
856 | 2.28M | p += 1 |
857 | 2.97M | case asciiLowerF, asciiUpperF: // f or F |
858 | 63.8k | let d = doubleParser.utf8ToDouble( |
859 | 63.8k | bytes: UnsafeRawBufferPointer( |
860 | 63.8k | start: start, |
861 | 63.8k | count: p - start |
862 | 63.8k | ), |
863 | 63.8k | finiteOnly: false |
864 | 63.8k | ) |
865 | 63.8k | // Just skip the 'f'/'F' |
866 | 63.8k | p += 1 |
867 | 63.8k | skipWhitespace() |
868 | 63.8k | return d |
869 | 2.97M | default: |
870 | 629k | break loop |
871 | 2.97M | } |
872 | 2.28M | } |
873 | 630k | let d = doubleParser.utf8ToDouble( |
874 | 630k | bytes: UnsafeRawBufferPointer( |
875 | 630k | start: start, |
876 | 630k | count: p - start |
877 | 630k | ), |
878 | 630k | finiteOnly: false |
879 | 630k | ) |
880 | 630k | skipWhitespace() |
881 | 630k | return d |
882 | 940k | } |
883 | | |
884 | | // Skip specified characters if they all match |
885 | 7.80k | private mutating func skipOptionalCharacters(bytes: [UInt8]) { |
886 | 7.80k | let start = p |
887 | 9.22k | for b in bytes { |
888 | 9.22k | if p == end || p[0] != b { |
889 | 7.22k | p = start |
890 | 7.22k | return |
891 | 7.22k | } |
892 | 1.99k | p += 1 |
893 | 1.99k | } |
894 | 577 | } |
895 | | |
896 | | // Skip following keyword if it matches (case-insensitively) |
897 | | // the given keyword (specified as a series of bytes). |
898 | 488k | private mutating func skipOptionalKeyword(bytes: [UInt8]) -> Bool { |
899 | 488k | let start = p |
900 | 983k | for b in bytes { |
901 | 983k | if p == end { |
902 | 469 | p = start |
903 | 469 | return false |
904 | 983k | } |
905 | 983k | var c = p[0] |
906 | 983k | if c >= asciiUpperA && c <= asciiUpperZ { |
907 | 10.8k | // Convert to lower case |
908 | 10.8k | // (Protobuf text keywords are case insensitive) |
909 | 10.8k | c += asciiLowerA - asciiUpperA |
910 | 10.8k | } |
911 | 983k | if c != b { |
912 | 241k | p = start |
913 | 241k | return false |
914 | 741k | } |
915 | 741k | p += 1 |
916 | 741k | } |
917 | 246k | if p == end { |
918 | 133 | return true |
919 | 246k | } |
920 | 246k | let c = p[0] |
921 | 246k | if (c >= asciiUpperA && c <= asciiUpperZ) |
922 | 246k | || (c >= asciiLowerA && c <= asciiLowerZ) |
923 | 246k | { |
924 | 345 | p = start |
925 | 345 | return false |
926 | 246k | } |
927 | 246k | skipWhitespace() |
928 | 246k | return true |
929 | 488k | } |
930 | | |
931 | | // If the next token is the identifier "nan", return true. |
932 | 980k | private mutating func skipOptionalNaN() -> Bool { |
933 | 980k | let start = p |
934 | 980k | // "-nan" doesn't mean anything, but upstream handles it, so skip |
935 | 980k | // over any leading minus when checking for "nan". |
936 | 980k | if p != end && p[0] == asciiMinus { |
937 | 15.8k | p += 1 |
938 | 15.8k | } |
939 | 980k | if skipOptionalKeyword(bytes: [asciiLowerN, asciiLowerA, asciiLowerN]) { |
940 | 21.6k | return true |
941 | 959k | } else { |
942 | 959k | p = start // It wasn't "nan", rewind incase we skipped a minus sign. |
943 | 959k | return false |
944 | 959k | } |
945 | 980k | } |
946 | | |
947 | | // If the next token is a recognized spelling of "infinity", |
948 | | // return Float.infinity or -Float.infinity |
949 | 241k | private mutating func skipOptionalInfinity() -> Float? { |
950 | 241k | if p == end { |
951 | 295 | return nil |
952 | 240k | } |
953 | 240k | let start = p |
954 | 240k | let c = p[0] |
955 | 240k | let negated: Bool |
956 | 240k | if c == asciiMinus { |
957 | 3.89k | negated = true |
958 | 3.89k | p += 1 |
959 | 237k | } else { |
960 | 237k | negated = false |
961 | 237k | } |
962 | 240k | let inf = [asciiLowerI, asciiLowerN, asciiLowerF] |
963 | 240k | let infinity = [ |
964 | 240k | asciiLowerI, asciiLowerN, asciiLowerF, asciiLowerI, |
965 | 240k | asciiLowerN, asciiLowerI, asciiLowerT, asciiLowerY, |
966 | 240k | ] |
967 | 240k | if skipOptionalKeyword(bytes: inf) |
968 | 240k | || skipOptionalKeyword(bytes: infinity) |
969 | 240k | { |
970 | 240k | return negated ? -Float.infinity : Float.infinity |
971 | 240k | } |
972 | 444 | p = start |
973 | 444 | return nil |
974 | 241k | } |
975 | | |
976 | 125k | internal mutating func nextFloat() throws -> Float { |
977 | 125k | if let d = tryParseFloatString() { |
978 | 120k | return Float(d) |
979 | 120k | } |
980 | 5.22k | if skipOptionalNaN() { |
981 | 1.90k | return Float.nan |
982 | 3.31k | } |
983 | 3.31k | if let inf = skipOptionalInfinity() { |
984 | 3.10k | return inf |
985 | 3.10k | } |
986 | 211 | throw TextFormatDecodingError.malformedNumber |
987 | 125k | } |
988 | | |
989 | 2.99M | internal mutating func nextDouble() throws -> Double { |
990 | 2.99M | if let d = tryParseFloatString() { |
991 | 2.03M | return d |
992 | 2.03M | } |
993 | 957k | if skipOptionalNaN() { |
994 | 13.5k | return Double.nan |
995 | 944k | } |
996 | 944k | if let inf = skipOptionalInfinity() { |
997 | 942k | return Double(inf) |
998 | 942k | } |
999 | 1.26k | throw TextFormatDecodingError.malformedNumber |
1000 | 2.99M | } |
1001 | | |
1002 | 122k | internal mutating func nextBool() throws -> Bool { |
1003 | 122k | skipWhitespace() |
1004 | 122k | if p == end { |
1005 | 92 | throw TextFormatDecodingError.malformedText |
1006 | 122k | } |
1007 | 122k | let c = p[0] |
1008 | 122k | p += 1 |
1009 | 122k | let result: Bool |
1010 | 122k | switch c { |
1011 | 122k | case asciiZero: |
1012 | 42.3k | result = false |
1013 | 122k | case asciiOne: |
1014 | 73.4k | result = true |
1015 | 122k | case asciiLowerF, asciiUpperF: |
1016 | 1.34k | if p != end { |
1017 | 1.31k | let alse = [asciiLowerA, asciiLowerL, asciiLowerS, asciiLowerE] |
1018 | 1.31k | skipOptionalCharacters(bytes: alse) |
1019 | 1.31k | } |
1020 | 1.34k | result = false |
1021 | 122k | case asciiLowerT, asciiUpperT: |
1022 | 5.16k | if p != end { |
1023 | 5.14k | let rue = [asciiLowerR, asciiLowerU, asciiLowerE] |
1024 | 5.14k | skipOptionalCharacters(bytes: rue) |
1025 | 5.14k | } |
1026 | 5.16k | result = true |
1027 | 122k | default: |
1028 | 33 | throw TextFormatDecodingError.malformedText |
1029 | 122k | } |
1030 | 122k | if p == end { |
1031 | 122 | return result |
1032 | 122k | } |
1033 | 122k | switch p[0] { |
1034 | 122k | case asciiSpace, |
1035 | 122k | asciiTab, |
1036 | 122k | asciiNewLine, |
1037 | 122k | asciiCarriageReturn, |
1038 | 122k | asciiHash, |
1039 | 122k | asciiComma, |
1040 | 122k | asciiSemicolon, |
1041 | 122k | asciiCloseSquareBracket, |
1042 | 122k | asciiCloseCurlyBracket, |
1043 | 122k | asciiCloseAngleBracket: |
1044 | 122k | skipWhitespace() |
1045 | 122k | return result |
1046 | 122k | default: |
1047 | 26 | throw TextFormatDecodingError.malformedText |
1048 | 122k | } |
1049 | 122k | } |
1050 | | |
1051 | 181k | internal mutating func nextOptionalEnumName() throws -> UnsafeRawBufferPointer? { |
1052 | 181k | skipWhitespace() |
1053 | 181k | if p == end { |
1054 | 85 | throw TextFormatDecodingError.malformedText |
1055 | 181k | } |
1056 | 181k | switch p[0] { |
1057 | 181k | case asciiLowerA...asciiLowerZ, asciiUpperA...asciiUpperZ: |
1058 | 1.06k | return parseUTF8Identifier() |
1059 | 181k | default: |
1060 | 180k | return nil |
1061 | 181k | } |
1062 | 181k | } |
1063 | | |
1064 | | /// Any URLs are syntactically (almost) identical to extension |
1065 | | /// keys, so we share the code for those. |
1066 | 1.29M | internal mutating func nextOptionalAnyURL() throws -> String? { |
1067 | 1.29M | skipWhitespace() |
1068 | 1.29M | if p == end { |
1069 | 56 | return nil |
1070 | 1.29M | } |
1071 | 1.29M | guard p[0] == asciiOpenSquareBracket else { // [ |
1072 | 1.28M | return nil |
1073 | 1.28M | } |
1074 | 6.90k | return try parseComplexFieldName(allowAnyName: true) |
1075 | 1.29M | } |
1076 | | |
1077 | | /// Returns next extension key or nil if end-of-input or |
1078 | | /// if next token is not an extension key. |
1079 | | /// |
1080 | | /// Throws an error if the next token starts with '[' but |
1081 | | /// cannot be parsed as an extension key. |
1082 | 0 | internal mutating func nextOptionalExtensionKey() throws -> String? { |
1083 | 0 | skipWhitespace() |
1084 | 0 | if p == end { |
1085 | 0 | return nil |
1086 | 0 | } |
1087 | 0 | guard p[0] == asciiOpenSquareBracket else { // [ |
1088 | 0 | return nil |
1089 | 0 | } |
1090 | 0 | return try parseComplexFieldName(allowAnyName: false) |
1091 | 0 | } |
1092 | | |
1093 | | /// Parse the rest of an ExtensionName or AnyName. See |
1094 | | /// https://protobuf.dev/reference/protobuf/textformat-spec/#field-names |
1095 | | /// Assumes the initial "[" character has already been read (and is in the prefix) |
1096 | 108k | private mutating func parseComplexFieldName(allowAnyName: Bool) throws -> String { |
1097 | 108k | assert(p[0] == asciiOpenSquareBracket) |
1098 | 108k | p += 1 |
1099 | 108k | skipWhitespace() |
1100 | 108k | if p == end { |
1101 | 49 | throw TextFormatDecodingError.malformedText |
1102 | 108k | } |
1103 | 108k | var start = p |
1104 | 108k | var sawPercentEncoding: Bool = false |
1105 | 108k | if allowAnyName { |
1106 | 11.5k | switch p[0] { |
1107 | 11.5k | case asciiLowerA...asciiLowerZ, // spec: url_unreserved - letter |
1108 | 11.0k | asciiUpperA...asciiUpperZ, // spec: url_unreserved - letter |
1109 | 11.0k | asciiZero...asciiNine, // spec: url_unreserved - dec |
1110 | 11.0k | asciiUnderscore, // spec: url_unreserved |
1111 | 11.0k | asciiPeriod, // spec: url_unreserved |
1112 | 11.0k | asciiMinus, // spec: url_unreserved |
1113 | 11.0k | asciiPeriod, // spec: url_unreserved |
1114 | 11.0k | asciiTilde, // spec: url_unreserved |
1115 | 11.0k | asciiExclamation, // spec: url_sub_delim |
1116 | 11.0k | asciiDollarSign, // spec: url_sub_delim |
1117 | 11.0k | asciiAmpersand, // spec: url_sub_delim |
1118 | 11.0k | asciiOpenParenthesis, // spec: url_sub_delim |
1119 | 11.0k | asciiCloseParenthesis, // spec: url_sub_delim |
1120 | 11.0k | asciiAsterisk, // spec: url_sub_delim |
1121 | 11.0k | asciiEquals, // spec: url_sub_delim |
1122 | 11.0k | asciiPlus, // spec: url_sub_delim |
1123 | 11.0k | asciiComma, // spec: url_sub_delim |
1124 | 11.0k | asciiSemicolon: // spec: url_sub_delim |
1125 | 11.0k | p += 1 |
1126 | 11.5k | case asciiPercent: // spec: url_pct_encoded |
1127 | 495 | sawPercentEncoding = true |
1128 | 495 | p += 1 |
1129 | 11.5k | default: |
1130 | 35 | throw TextFormatDecodingError.malformedText |
1131 | 11.5k | } |
1132 | 96.6k | } else { |
1133 | 96.6k | switch p[0] { |
1134 | 96.6k | // spec: IDENT for start of TypeName |
1135 | 96.6k | case asciiLowerA...asciiLowerZ, |
1136 | 96.6k | asciiUpperA...asciiUpperZ, |
1137 | 96.6k | asciiUnderscore: |
1138 | 96.6k | p += 1 |
1139 | 96.6k | default: |
1140 | 63 | throw TextFormatDecodingError.malformedText |
1141 | 96.6k | } |
1142 | 108k | } |
1143 | 108k | var collector: String? = nil |
1144 | 116k | func appendCurrent() throws { |
1145 | 116k | guard p != start else { return } |
1146 | 116k | guard let complexName = utf8ToString(bytes: start, count: p - start) else { |
1147 | 0 | throw TextFormatDecodingError.malformedText |
1148 | 116k | } |
1149 | 116k | if collector == nil { |
1150 | 107k | collector = complexName |
1151 | 107k | } else { |
1152 | 8.68k | collector!.append(complexName) |
1153 | 8.68k | } |
1154 | 116k | } |
1155 | 5.50M | loop: while p != end { |
1156 | 5.50M | switch p[0] { |
1157 | 5.50M | case asciiLowerA...asciiLowerZ, // spec: IDENT - letter |
1158 | 5.21M | asciiUpperA...asciiUpperZ, // spec: IDENT - letter |
1159 | 5.21M | asciiZero...asciiNine, // spec: IDENT - letter |
1160 | 5.21M | asciiUnderscore, // spec: IDENT - letter |
1161 | 5.21M | asciiPeriod: // spec: TypeName |
1162 | 5.21M | p += 1 |
1163 | 5.50M | case asciiCloseSquareBracket: // ] |
1164 | 107k | break loop |
1165 | 5.50M | case asciiForwardSlash, // spec: url_unreserved |
1166 | 137k | asciiMinus, // spec: url_unreserved |
1167 | 137k | asciiPeriod, // spec: url_unreserved |
1168 | 137k | asciiTilde, // spec: url_unreserved |
1169 | 137k | asciiExclamation, // spec: url_sub_delim |
1170 | 137k | asciiDollarSign, // spec: url_sub_delim |
1171 | 137k | asciiAmpersand, // spec: url_sub_delim |
1172 | 137k | asciiOpenParenthesis, // spec: url_sub_delim |
1173 | 137k | asciiCloseParenthesis, // spec: url_sub_delim |
1174 | 137k | asciiAsterisk, // spec: url_sub_delim |
1175 | 137k | asciiEquals, // spec: url_sub_delim |
1176 | 137k | asciiPlus, // spec: url_sub_delim |
1177 | 137k | asciiComma, // spec: url_sub_delim |
1178 | 137k | asciiSemicolon: // spec: url_sub_delim |
1179 | 137k | guard allowAnyName else { |
1180 | 45 | throw TextFormatDecodingError.malformedText |
1181 | 137k | } |
1182 | 137k | p += 1 |
1183 | 5.50M | case asciiPercent: // spec: url_pct_encoded |
1184 | 37.2k | guard allowAnyName else { |
1185 | 3 | throw TextFormatDecodingError.malformedText |
1186 | 37.1k | } |
1187 | 37.1k | sawPercentEncoding = true |
1188 | 37.1k | p += 1 |
1189 | 5.50M | // Don't really want to call skipWhitespace after each character, so duplicate |
1190 | 5.50M | // the cases here. |
1191 | 5.50M | case asciiSpace, |
1192 | 9.56k | asciiTab, // 9 |
1193 | 9.56k | asciiNewLine, // 10 |
1194 | 9.56k | asciiVerticalTab, // 11 |
1195 | 9.56k | asciiFormFeed, // 12 |
1196 | 9.56k | asciiCarriageReturn, // 13 |
1197 | 9.56k | asciiHash: // # comment |
1198 | 9.56k | // Append what we have, then skip the whitespace/comments and grab the star. |
1199 | 9.56k | try appendCurrent() |
1200 | 9.56k | skipWhitespace() |
1201 | 9.56k | start = p |
1202 | 5.50M | default: |
1203 | 209 | throw TextFormatDecodingError.malformedText |
1204 | 5.50M | } |
1205 | 5.40M | } |
1206 | 107k | if p == end || p[0] != asciiCloseSquareBracket { |
1207 | 674 | throw TextFormatDecodingError.malformedText |
1208 | 107k | } |
1209 | 107k | try appendCurrent() |
1210 | 107k | // If there was never anything in the braces, it was malformed. |
1211 | 107k | guard let complexName = collector else { |
1212 | 0 | throw TextFormatDecodingError.malformedText |
1213 | 107k | } |
1214 | 107k | p += 1 // Skip ] |
1215 | 107k | skipWhitespace() |
1216 | 107k | |
1217 | 107k | // Spec requires ensuring any percent escape is valid hex afterwards (and the conformance |
1218 | 107k | // tests check this is done). |
1219 | 107k | if sawPercentEncoding { |
1220 | 66.2k | func isValidHexDigit(_ c: UInt8) -> Bool { |
1221 | 66.2k | (c >= asciiZero && c <= asciiNine) || (c >= asciiUpperA && c <= asciiUpperF) |
1222 | 66.2k | || (c >= asciiLowerA && c <= asciiLowerF) |
1223 | 66.2k | } |
1224 | 1.61k | var scan = complexName.utf8.makeIterator() |
1225 | 1.13M | while let byte = scan.next() { |
1226 | 1.13M | guard byte == asciiPercent else { continue } |
1227 | 33.1k | guard let firstHex = scan.next(), isValidHexDigit(firstHex), |
1228 | 33.1k | let secondHex = scan.next(), isValidHexDigit(secondHex) |
1229 | 33.1k | else { |
1230 | 79 | throw TextFormatDecodingError.malformedText |
1231 | 33.1k | } |
1232 | 33.1k | } |
1233 | 107k | } |
1234 | 107k | |
1235 | 107k | return complexName |
1236 | 108k | } |
1237 | | |
1238 | | /// Returns text of next regular key or nil if end-of-input. |
1239 | 584k | internal mutating func nextKey(allowExtensions: Bool, allowAnyNames: Bool = false) throws -> String? { |
1240 | 584k | assert(allowExtensions || !allowAnyNames) // allowAnyNames doesn't make sense without allowExtensions |
1241 | 584k | skipWhitespace() |
1242 | 584k | if p == end { |
1243 | 427 | return nil |
1244 | 583k | } |
1245 | 583k | let c = p[0] |
1246 | 583k | switch c { |
1247 | 583k | case asciiOpenSquareBracket: // [ |
1248 | 9.70k | if allowExtensions { |
1249 | 9.70k | return "[\(try parseComplexFieldName(allowAnyName: allowAnyNames))]" |
1250 | 9.70k | } |
1251 | 1 | throw TextFormatDecodingError.unknownField |
1252 | 583k | case asciiLowerA...asciiLowerZ, |
1253 | 6.97k | asciiUpperA...asciiUpperZ: // a...z, A...Z |
1254 | 6.97k | return parseIdentifier() |
1255 | 583k | case asciiOne...asciiNine: // 1...9 (field numbers are 123, not 0123) |
1256 | 566k | let start = p |
1257 | 566k | p += 1 |
1258 | 608k | while p != end { |
1259 | 608k | let c = p[0] |
1260 | 824k | if c < asciiZero || c > asciiNine { |
1261 | 566k | break |
1262 | 566k | } |
1263 | 41.3k | p += 1 |
1264 | 41.3k | if p - start > maxFieldNumLength { |
1265 | 1 | throw TextFormatDecodingError.malformedText |
1266 | 41.3k | } |
1267 | 566k | } |
1268 | 566k | let buff = UnsafeRawBufferPointer(start: start, count: p - start) |
1269 | 566k | skipWhitespace() |
1270 | 566k | let s = utf8ToString(bytes: buff.baseAddress!, count: buff.count) |
1271 | 566k | // Safe, can't be invalid UTF-8 given the input. |
1272 | 566k | return s! |
1273 | 583k | default: |
1274 | 108 | throw TextFormatDecodingError.malformedText |
1275 | 583k | } |
1276 | 583k | } |
1277 | | |
1278 | | /// Parse a field name, look it up, and return the corresponding |
1279 | | /// field number. |
1280 | | /// |
1281 | | /// returns nil at end-of-input |
1282 | | /// |
1283 | | /// Throws if field name cannot be parsed or if field name is |
1284 | | /// unknown. |
1285 | | /// |
1286 | | /// This function accounts for as much as 2/3 of the total run |
1287 | | /// time of the entire parse. |
1288 | | internal mutating func nextFieldNumber( |
1289 | | names: _NameMap, |
1290 | | messageType: any Message.Type, |
1291 | | terminator: UInt8? |
1292 | 5.43M | ) throws -> Int? { |
1293 | 5.48M | while true { |
1294 | 5.48M | skipWhitespace() |
1295 | 5.48M | if p == end { |
1296 | 16.1k | if terminator == nil { |
1297 | 15.1k | return nil |
1298 | 15.1k | } else { |
1299 | 946 | // Never got the terminator. |
1300 | 946 | throw TextFormatDecodingError.malformedText |
1301 | 946 | } |
1302 | 5.46M | } |
1303 | 5.46M | var isReserved = false |
1304 | 5.46M | let c = p[0] |
1305 | 5.46M | switch c { |
1306 | 5.46M | case asciiLowerA...asciiLowerZ, |
1307 | 11.4k | asciiUpperA...asciiUpperZ: // a...z, A...Z |
1308 | 11.4k | let key = parseUTF8Identifier() |
1309 | 11.4k | if let fieldNumber = names.number(forProtoName: key) { |
1310 | 5.40k | return fieldNumber |
1311 | 6.01k | } |
1312 | 6.01k | if !options.ignoreUnknownFields { |
1313 | 266 | if names.isReserved(name: key) { |
1314 | 0 | isReserved = true |
1315 | 266 | } else { |
1316 | 266 | throw TextFormatDecodingError.unknownField |
1317 | 266 | } |
1318 | 5.74k | } |
1319 | 5.74k | // Unknown field name or reserved, break and skip |
1320 | 5.74k | break |
1321 | 5.46M | case asciiOpenSquareBracket: // Start of an extension field |
1322 | 77.6k | let key = try parseComplexFieldName(allowAnyName: false) |
1323 | 77.2k | if let fieldNumber = extensions?.fieldNumberForProto(messageType: messageType, protoFieldName: key) { |
1324 | 58.4k | return fieldNumber |
1325 | 58.4k | } |
1326 | 18.8k | if !options.ignoreUnknownExtensionFields { |
1327 | 132 | throw TextFormatDecodingError.unknownField |
1328 | 18.7k | } |
1329 | 18.7k | // Unknown field name, break and skip |
1330 | 18.7k | break |
1331 | 5.46M | case asciiOne...asciiNine: // 1-9 (field numbers are 123, not 0123) |
1332 | 3.77M | let start = p |
1333 | 3.77M | var fieldNum = Int(c) - Int(asciiZero) |
1334 | 3.77M | p += 1 |
1335 | 7.59M | while p != end { |
1336 | 7.59M | let c = p[0] |
1337 | 8.95M | if c >= asciiZero && c <= asciiNine { |
1338 | 3.81M | fieldNum = fieldNum &* 10 &+ (Int(c) - Int(asciiZero)) |
1339 | 3.81M | } else { |
1340 | 3.77M | break |
1341 | 3.81M | } |
1342 | 3.81M | p += 1 |
1343 | 3.81M | if p - start > maxFieldNumLength { |
1344 | 6 | throw TextFormatDecodingError.malformedText |
1345 | 3.81M | } |
1346 | 3.81M | } |
1347 | 3.77M | skipWhitespace() |
1348 | 3.77M | if names.names(for: fieldNum) != nil { |
1349 | 3.75M | return fieldNum |
1350 | 3.75M | } |
1351 | 23.8k | if !options.ignoreUnknownFields { |
1352 | 200 | // fieldNumber is range checked while parsing, so safe can truncate. |
1353 | 200 | if names.isReserved(number: Int32(truncatingIfNeeded: fieldNum)) { |
1354 | 0 | isReserved = true |
1355 | 200 | } else { |
1356 | 200 | throw TextFormatDecodingError.unknownField |
1357 | 200 | } |
1358 | 23.6k | } |
1359 | 23.6k | // Unknown field name or reserved, break and skip |
1360 | 23.6k | break |
1361 | 5.46M | default: |
1362 | 1.59M | if c == terminator { |
1363 | 1.59M | let _ = skipOptionalObjectEnd(c) |
1364 | 1.59M | return nil |
1365 | 1.59M | } |
1366 | 449 | throw TextFormatDecodingError.malformedText |
1367 | 5.46M | } |
1368 | 48.1k | |
1369 | 48.1k | assert(options.ignoreUnknownFields || options.ignoreUnknownExtensionFields || isReserved) |
1370 | 48.1k | try skipUnknownFieldValue() |
1371 | 47.0k | // Skip any separator before looping around to try for another field. |
1372 | 47.0k | skipOptionalSeparator() |
1373 | 47.0k | } |
1374 | 0 | } |
1375 | | |
1376 | | // Helper to skip past an unknown field value, when called `p` will be pointing |
1377 | | // at the first character after the unknown field name. |
1378 | 397k | internal mutating func skipUnknownFieldValue() throws { |
1379 | 397k | // This is modeled after the C++ text_format.cpp `ConsumeField()` |
1380 | 397k | // |
1381 | 397k | // Guess the type of this field: |
1382 | 397k | // - If this field is not a message, there should be a ":" between the |
1383 | 397k | // field name and the field value and also the field value should not |
1384 | 397k | // start with "{" or "<" which indicates the beginning of a message body. |
1385 | 397k | // - If there is no ":" or there is a "{" or "<" after ":", this field has |
1386 | 397k | // to be a message or the input is ill-formed. |
1387 | 397k | |
1388 | 397k | skipWhitespace() |
1389 | 397k | if skipOptionalColon() { |
1390 | 206k | if p == end { |
1391 | 2.38k | // Nothing after the ':'? |
1392 | 2.38k | throw TextFormatDecodingError.malformedText |
1393 | 204k | } |
1394 | 204k | let c = p[0] |
1395 | 224k | if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket { |
1396 | 175k | try skipUnknownPrimativeFieldValue() |
1397 | 173k | } else { |
1398 | 28.5k | try skipUnknownMessageFieldValue() |
1399 | 201k | } |
1400 | 201k | } else { |
1401 | 190k | try skipUnknownMessageFieldValue() |
1402 | 381k | } |
1403 | 381k | } |
1404 | | |
1405 | | /// Helper to see if this could be the start of a hex or octal number so unknown field |
1406 | | /// value parsing can decide how to parse/validate. |
1407 | 80.4k | private func mustParseNumberAsDecimal() -> Bool { |
1408 | 80.4k | // NOTE: If we run out of characters/can't tell; then just say it doesn't have |
1409 | 80.4k | // to be decimal, and let the other code error handle it. |
1410 | 80.4k | var scan = p |
1411 | 80.4k | var c = scan[0] |
1412 | 80.4k | |
1413 | 80.4k | // Floats or decimals can have leading '-' |
1414 | 80.4k | if c == asciiMinus { |
1415 | 4.01k | scan += 1 |
1416 | 4.01k | if scan == end { return false } |
1417 | 4.00k | c = scan[0] |
1418 | 80.4k | } |
1419 | 80.4k | |
1420 | 80.4k | if c == asciiPeriod { |
1421 | 93 | return false // "(-)." : clearly a float |
1422 | 80.3k | } |
1423 | 80.3k | |
1424 | 80.3k | if c == asciiZero { |
1425 | 23.3k | scan += 1 |
1426 | 23.3k | if scan == end { return true } // "(-)0[end]" : parse it as decimal |
1427 | 23.3k | c = scan[0] |
1428 | 23.3k | if c == asciiLowerX // "(-)0x" : hex - must parse as decimal |
1429 | 23.3k | || (c >= asciiZero && c <= asciiSeven) |
1430 | 23.3k | { // "(-)0[0-7]" : octal - must parse as decimal |
1431 | 2.53k | return true |
1432 | 20.7k | } |
1433 | 20.7k | if c == asciiPeriod { |
1434 | 71 | return false // "(-)0." : clearly a float |
1435 | 20.7k | } |
1436 | 77.7k | } |
1437 | 77.7k | |
1438 | 77.7k | // At this point, it doesn't realy matter what comes next. We'll call it a floating |
1439 | 77.7k | // point value since even if it was a decimal, it might be too large for a UInt64 but |
1440 | 77.7k | // would still be valid for a float/double field. |
1441 | 77.7k | return false |
1442 | 80.4k | } |
1443 | | |
1444 | 91.7k | private mutating func skipUnknownPrimativeFieldValue(canBeList: Bool = true) throws { |
1445 | 91.7k | // This is modeled after the C++ text_format.cpp `SkipFieldValue()` |
1446 | 91.7k | let c = p[0] |
1447 | 91.7k | |
1448 | 91.7k | if c == asciiSingleQuote || c == asciiDoubleQuote { |
1449 | 3.61k | // Note: the field could be 'bytes', so we can't parse that as a string |
1450 | 3.61k | // as it might fail. |
1451 | 3.61k | let _ = try nextBytesValue() |
1452 | 3.57k | return |
1453 | 88.1k | } |
1454 | 88.1k | |
1455 | 88.1k | if skipOptionalBeginArray() { |
1456 | 5.26k | guard canBeList else { |
1457 | 3 | // Have encounted an array as an element in an array, that isn't legal. |
1458 | 3 | throw TextFormatDecodingError.malformedText |
1459 | 5.26k | } |
1460 | 5.26k | if skipOptionalEndArray() { |
1461 | 1.83k | return |
1462 | 3.42k | } |
1463 | 7.75k | while true { |
1464 | 7.75k | if p == end { |
1465 | 88 | throw TextFormatDecodingError.malformedText |
1466 | 7.66k | } |
1467 | 7.66k | let c = p[0] |
1468 | 7.66k | if c != asciiOpenAngleBracket && c != asciiOpenCurlyBracket { |
1469 | 6.68k | try skipUnknownPrimativeFieldValue(canBeList: false) |
1470 | 6.63k | } else { |
1471 | 989 | try skipUnknownMessageFieldValue() |
1472 | 7.18k | } |
1473 | 7.18k | if skipOptionalEndArray() { |
1474 | 2.75k | return |
1475 | 4.42k | } |
1476 | 4.42k | try skipRequiredComma() |
1477 | 4.33k | } |
1478 | 82.8k | } |
1479 | 82.8k | |
1480 | 82.8k | // NOTE: This will also cover "true", "false" for booleans, "nan"/"inf" for floats. |
1481 | 82.8k | if let _ = try nextOptionalEnumName() { |
1482 | 2.38k | skipWhitespace() // `nextOptionalEnumName()` doesn't skip trailing whitespace |
1483 | 2.38k | return |
1484 | 80.4k | } |
1485 | 80.4k | |
1486 | 80.4k | // NOTE: We don't need to special case "-nan"/"-inf", as they won't be forced |
1487 | 80.4k | // to parse as decimal, and `nextDouble()` already supports them. |
1488 | 80.4k | if mustParseNumberAsDecimal() { |
1489 | 2.56k | if c == asciiMinus { |
1490 | 325 | let _ = try nextSInt() |
1491 | 2.23k | } else { |
1492 | 2.23k | let _ = try nextUInt() |
1493 | 2.55k | } |
1494 | 77.9k | } else { |
1495 | 77.9k | let _ = try nextDouble() |
1496 | 80.3k | } |
1497 | 80.3k | } |
1498 | | |
1499 | 75.8k | private mutating func skipUnknownMessageFieldValue() throws { |
1500 | 75.8k | // This is modeled after the C++ text_format.cpp `SkipFieldMessage()` |
1501 | 75.8k | |
1502 | 75.8k | let terminator = try skipObjectStart() |
1503 | 141k | while !skipOptionalObjectEnd(terminator) { |
1504 | 70.8k | if p == end { |
1505 | 270 | throw TextFormatDecodingError.malformedText |
1506 | 70.5k | } |
1507 | 70.5k | if let _ = try nextKey(allowExtensions: true, allowAnyNames: true) { |
1508 | 70.2k | // Got a valid field name or extension name ("[ext.name]") |
1509 | 70.2k | } else { |
1510 | 0 | throw TextFormatDecodingError.malformedText |
1511 | 70.2k | } |
1512 | 70.2k | try skipUnknownFieldValue() |
1513 | 66.1k | skipOptionalSeparator() |
1514 | 70.4k | } |
1515 | 70.4k | } |
1516 | | |
1517 | 33.4M | private mutating func skipRequiredCharacter(_ c: UInt8) throws { |
1518 | 33.4M | skipWhitespace() |
1519 | 33.4M | if p != end && p[0] == c { |
1520 | 33.4M | p += 1 |
1521 | 33.4M | skipWhitespace() |
1522 | 33.4M | } else { |
1523 | 17.6k | throw TextFormatDecodingError.malformedText |
1524 | 33.4M | } |
1525 | 33.4M | } |
1526 | | |
1527 | 4.69M | internal mutating func skipRequiredComma() throws { |
1528 | 4.69M | try skipRequiredCharacter(asciiComma) |
1529 | 4.69M | } |
1530 | | |
1531 | 22.4M | internal mutating func skipRequiredColon() throws { |
1532 | 22.4M | try skipRequiredCharacter(asciiColon) |
1533 | 22.4M | } |
1534 | | |
1535 | 31.6M | private mutating func skipOptionalCharacter(_ c: UInt8) -> Bool { |
1536 | 31.6M | if p != end && p[0] == c { |
1537 | 9.78M | p += 1 |
1538 | 9.78M | skipWhitespace() |
1539 | 9.78M | return true |
1540 | 21.8M | } |
1541 | 21.8M | return false |
1542 | 31.6M | } |
1543 | | |
1544 | 7.83M | internal mutating func skipOptionalColon() -> Bool { |
1545 | 7.83M | skipOptionalCharacter(asciiColon) |
1546 | 7.83M | } |
1547 | | |
1548 | 5.89M | internal mutating func skipOptionalEndArray() -> Bool { |
1549 | 5.89M | skipOptionalCharacter(asciiCloseSquareBracket) |
1550 | 5.89M | } |
1551 | | |
1552 | 6.28M | internal mutating func skipOptionalBeginArray() -> Bool { |
1553 | 6.28M | skipOptionalCharacter(asciiOpenSquareBracket) |
1554 | 6.28M | } |
1555 | | |
1556 | 9.96M | internal mutating func skipOptionalObjectEnd(_ c: UInt8) -> Bool { |
1557 | 9.96M | let result = skipOptionalCharacter(c) |
1558 | 9.96M | if result { |
1559 | 7.62M | decrementRecursionDepth() |
1560 | 7.62M | } |
1561 | 9.96M | return result |
1562 | 9.96M | } |
1563 | | |
1564 | 70.7M | internal mutating func skipOptionalSeparator() { |
1565 | 70.7M | if p != end { |
1566 | 70.5M | let c = p[0] |
1567 | 71.2M | if c == asciiComma || c == asciiSemicolon { // comma or semicolon |
1568 | 303k | p += 1 |
1569 | 303k | skipWhitespace() |
1570 | 303k | } |
1571 | 70.5M | } |
1572 | 70.7M | } |
1573 | | |
1574 | | /// Returns the character that should end this field. |
1575 | | /// E.g., if object starts with "{", returns "}" |
1576 | 7.70M | internal mutating func skipObjectStart() throws -> UInt8 { |
1577 | 7.70M | try incrementRecursionDepth() |
1578 | 7.70M | if p != end { |
1579 | 7.69M | let c = p[0] |
1580 | 7.69M | p += 1 |
1581 | 7.69M | skipWhitespace() |
1582 | 7.69M | switch c { |
1583 | 7.69M | case asciiOpenCurlyBracket: // { |
1584 | 99.8k | return asciiCloseCurlyBracket // } |
1585 | 7.69M | case asciiOpenAngleBracket: // < |
1586 | 7.59M | return asciiCloseAngleBracket // > |
1587 | 7.69M | default: |
1588 | 2.22k | break |
1589 | 7.69M | } |
1590 | 8.11k | } |
1591 | 8.11k | throw TextFormatDecodingError.malformedText |
1592 | 7.70M | } |
1593 | | } |