/src/swift-protobuf/Sources/SwiftProtobuf/StringUtils.swift
Line | Count | Source |
1 | | // Sources/SwiftProtobuf/StringUtils.swift - String utility functions |
2 | | // |
3 | | // Copyright (c) 2014 - 2016 Apple Inc. and the project authors |
4 | | // Licensed under Apache License v2.0 with Runtime Library Exception |
5 | | // |
6 | | // See LICENSE.txt for license information: |
7 | | // https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt |
8 | | // |
9 | | // ----------------------------------------------------------------------------- |
10 | | /// |
11 | | /// Utility functions for converting UTF8 bytes into Strings. |
12 | | /// These functions must: |
13 | | /// * Accept any valid UTF8, including a zero byte (which is |
14 | | /// a valid UTF8 encoding of U+0000) |
15 | | /// * Return nil for any invalid UTF8 |
16 | | /// * Be fast (since they're extensively used by all decoders |
17 | | /// and even some of the encoders) |
18 | | /// |
19 | | // ----------------------------------------------------------------------------- |
20 | | |
21 | | import Foundation |
22 | | |
23 | | // Note: Once our minimum support version is at least Swift 5.3, we should |
24 | | // probably recast the following to use String(unsafeUninitializedCapacity:) |
25 | | |
26 | | // Note: We're trying to avoid Foundation's String(format:) since that's not |
27 | | // universally available. |
28 | | |
29 | 38.1k | private func formatZeroPaddedInt(_ value: Int32, digits: Int) -> String { |
30 | 38.1k | precondition(value >= 0) |
31 | 38.1k | let s = String(value) |
32 | 38.1k | if s.count >= digits { |
33 | 23.7k | return s |
34 | 23.7k | } else { |
35 | 14.4k | let pad = String(repeating: "0", count: digits - s.count) |
36 | 14.4k | return pad + s |
37 | 14.4k | } |
38 | 14.4k | } |
39 | | |
40 | 13.0k | internal func twoDigit(_ value: Int32) -> String { |
41 | 13.0k | formatZeroPaddedInt(value, digits: 2) |
42 | 13.0k | } |
43 | 1.85k | internal func threeDigit(_ value: Int32) -> String { |
44 | 1.85k | formatZeroPaddedInt(value, digits: 3) |
45 | 1.85k | } |
46 | 2.60k | internal func fourDigit(_ value: Int32) -> String { |
47 | 2.60k | formatZeroPaddedInt(value, digits: 4) |
48 | 2.60k | } |
49 | 882 | internal func sixDigit(_ value: Int32) -> String { |
50 | 882 | formatZeroPaddedInt(value, digits: 6) |
51 | 882 | } |
52 | 1.40k | internal func nineDigit(_ value: Int32) -> String { |
53 | 1.40k | formatZeroPaddedInt(value, digits: 9) |
54 | 1.40k | } |
55 | | |
56 | | // Wrapper that takes a buffer and start/end offsets |
57 | | internal func utf8ToString( |
58 | | bytes: UnsafeRawBufferPointer, |
59 | | start: UnsafeRawBufferPointer.Index, |
60 | | end: UnsafeRawBufferPointer.Index |
61 | 1.96M | ) -> String? { |
62 | 1.96M | utf8ToString(bytes: bytes.baseAddress! + start, count: end - start) |
63 | 1.96M | } |
64 | | |
65 | | // Swift 4 introduced new faster String facilities |
66 | | // that seem to work consistently across all platforms. |
67 | | |
68 | | // Notes on performance: |
69 | | // |
70 | | // The pre-verification here only takes about 10% of |
71 | | // the time needed for constructing the string. |
72 | | // Eliminating it would provide only a very minor |
73 | | // speed improvement. |
74 | | // |
75 | | // On macOS, this is only about 25% faster than |
76 | | // the Foundation initializer used below for Swift 3. |
77 | | // On Linux, the Foundation initializer is much |
78 | | // slower than on macOS, so this is a much bigger |
79 | | // win there. |
80 | 2.41M | internal func utf8ToString(bytes: UnsafeRawPointer, count: Int) -> String? { |
81 | 2.41M | if count == 0 { |
82 | 265k | return String() |
83 | 2.14M | } |
84 | 2.14M | let codeUnits = UnsafeRawBufferPointer(start: bytes, count: count) |
85 | 2.14M | let sourceEncoding = Unicode.UTF8.self |
86 | 2.14M | |
87 | 2.14M | // Verify that the UTF-8 is valid. |
88 | 2.88M | var p = sourceEncoding.ForwardParser() |
89 | 2.14M | var i = codeUnits.makeIterator() |
90 | 244M | Loop: while true { |
91 | 244M | switch p.parseScalar(from: &i) { |
92 | 244M | case .valid(_): |
93 | 242M | break |
94 | 244M | case .error: |
95 | 1.14k | return nil |
96 | 244M | case .emptyInput: |
97 | 2.14M | break Loop |
98 | 244M | } |
99 | 242M | } |
100 | 2.14M | |
101 | 2.14M | // This initializer is fast but does not reject broken |
102 | 2.14M | // UTF-8 (which is why we validate the UTF-8 above). |
103 | 2.14M | return String(decoding: codeUnits, as: sourceEncoding) |
104 | 2.14M | } |
105 | | |
106 | | extension Unicode.Scalar { |
107 | | /// Assuming the given scalar is ASCII, this is the uppercased equivalent (unchanged if it is |
108 | | /// not a lowercase alphabetic character). |
109 | 1.00k | var uppercasedAssumingASCII: Unicode.Scalar { |
110 | 1.92k | guard "a" <= self && self <= "z" else { |
111 | 44 | return self |
112 | 964 | } |
113 | 964 | return Self(value & 0x5f)! |
114 | 1.00k | } |
115 | | } |