Coverage Report

Created: 2025-09-05 06:37

/src/swift-protobuf/Sources/SwiftProtobuf/StringUtils.swift
Line
Count
Source
1
// Sources/SwiftProtobuf/StringUtils.swift - String utility functions
2
//
3
// Copyright (c) 2014 - 2016 Apple Inc. and the project authors
4
// Licensed under Apache License v2.0 with Runtime Library Exception
5
//
6
// See LICENSE.txt for license information:
7
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
8
//
9
// -----------------------------------------------------------------------------
10
///
11
/// Utility functions for converting UTF8 bytes into Strings.
12
/// These functions must:
13
///  * Accept any valid UTF8, including a zero byte (which is
14
///    a valid UTF8 encoding of U+0000)
15
///  * Return nil for any invalid UTF8
16
///  * Be fast (since they're extensively used by all decoders
17
///    and even some of the encoders)
18
///
19
// -----------------------------------------------------------------------------
20
21
import Foundation
22
23
// Note: Once our minimum support version is at least Swift 5.3, we should
24
// probably recast the following to use String(unsafeUninitializedCapacity:)
25
26
// Note: We're trying to avoid Foundation's String(format:) since that's not
27
// universally available.
28
29
38.1k
private func formatZeroPaddedInt(_ value: Int32, digits: Int) -> String {
30
38.1k
    precondition(value >= 0)
31
38.1k
    let s = String(value)
32
38.1k
    if s.count >= digits {
33
23.7k
        return s
34
23.7k
    } else {
35
14.4k
        let pad = String(repeating: "0", count: digits - s.count)
36
14.4k
        return pad + s
37
14.4k
    }
38
14.4k
}
39
40
13.0k
internal func twoDigit(_ value: Int32) -> String {
41
13.0k
    formatZeroPaddedInt(value, digits: 2)
42
13.0k
}
43
1.85k
internal func threeDigit(_ value: Int32) -> String {
44
1.85k
    formatZeroPaddedInt(value, digits: 3)
45
1.85k
}
46
2.60k
internal func fourDigit(_ value: Int32) -> String {
47
2.60k
    formatZeroPaddedInt(value, digits: 4)
48
2.60k
}
49
882
internal func sixDigit(_ value: Int32) -> String {
50
882
    formatZeroPaddedInt(value, digits: 6)
51
882
}
52
1.40k
internal func nineDigit(_ value: Int32) -> String {
53
1.40k
    formatZeroPaddedInt(value, digits: 9)
54
1.40k
}
55
56
// Wrapper that takes a buffer and start/end offsets
57
internal func utf8ToString(
58
    bytes: UnsafeRawBufferPointer,
59
    start: UnsafeRawBufferPointer.Index,
60
    end: UnsafeRawBufferPointer.Index
61
1.96M
) -> String? {
62
1.96M
    utf8ToString(bytes: bytes.baseAddress! + start, count: end - start)
63
1.96M
}
64
65
// Swift 4 introduced new faster String facilities
66
// that seem to work consistently across all platforms.
67
68
// Notes on performance:
69
//
70
// The pre-verification here only takes about 10% of
71
// the time needed for constructing the string.
72
// Eliminating it would provide only a very minor
73
// speed improvement.
74
//
75
// On macOS, this is only about 25% faster than
76
// the Foundation initializer used below for Swift 3.
77
// On Linux, the Foundation initializer is much
78
// slower than on macOS, so this is a much bigger
79
// win there.
80
2.41M
internal func utf8ToString(bytes: UnsafeRawPointer, count: Int) -> String? {
81
2.41M
    if count == 0 {
82
265k
        return String()
83
2.14M
    }
84
2.14M
    let codeUnits = UnsafeRawBufferPointer(start: bytes, count: count)
85
2.14M
    let sourceEncoding = Unicode.UTF8.self
86
2.14M
87
2.14M
    // Verify that the UTF-8 is valid.
88
2.88M
    var p = sourceEncoding.ForwardParser()
89
2.14M
    var i = codeUnits.makeIterator()
90
244M
    Loop: while true {
91
244M
        switch p.parseScalar(from: &i) {
92
244M
        case .valid(_):
93
242M
            break
94
244M
        case .error:
95
1.14k
            return nil
96
244M
        case .emptyInput:
97
2.14M
            break Loop
98
244M
        }
99
242M
    }
100
2.14M
101
2.14M
    // This initializer is fast but does not reject broken
102
2.14M
    // UTF-8 (which is why we validate the UTF-8 above).
103
2.14M
    return String(decoding: codeUnits, as: sourceEncoding)
104
2.14M
}
105
106
extension Unicode.Scalar {
107
    /// Assuming the given scalar is ASCII, this is the uppercased equivalent (unchanged if it is
108
    /// not a lowercase alphabetic character).
109
1.00k
    var uppercasedAssumingASCII: Unicode.Scalar {
110
1.92k
        guard "a" <= self && self <= "z" else {
111
44
            return self
112
964
        }
113
964
        return Self(value & 0x5f)!
114
1.00k
    }
115
}