/proc/self/cwd/external/abseil-cpp~/absl/strings/charset.h
Line | Count | Source |
1 | | // Copyright 2022 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | // ----------------------------------------------------------------------------- |
16 | | // File: charset.h |
17 | | // ----------------------------------------------------------------------------- |
18 | | // |
19 | | // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned |
20 | | // characters. |
21 | | // |
22 | | // Instances can be initialized as constexpr constants. For example: |
23 | | // |
24 | | // constexpr absl::CharSet kJustX = absl::CharSet::Char('x'); |
25 | | // constexpr absl::CharSet kMySymbols = absl::CharSet("$@!"); |
26 | | // constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z'); |
27 | | // |
28 | | // Multiple instances can be combined that still forms a constexpr expression. |
29 | | // For example: |
30 | | // |
31 | | // constexpr absl::CharSet kLettersAndNumbers = |
32 | | // absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9'); |
33 | | // |
34 | | // Several pre-defined character classes are available that mirror the methods |
35 | | // from <cctype>. For example: |
36 | | // |
37 | | // constexpr absl::CharSet kLettersAndWhitespace = |
38 | | // absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace(); |
39 | | // |
40 | | // To check membership, use the .contains method, e.g. |
41 | | // |
42 | | // absl::CharSet hex_letters("abcdef"); |
43 | | // hex_letters.contains('a'); // true |
44 | | // hex_letters.contains('g'); // false |
45 | | |
46 | | #ifndef ABSL_STRINGS_CHARSET_H_ |
47 | | #define ABSL_STRINGS_CHARSET_H_ |
48 | | |
49 | | #include <cstdint> |
50 | | |
51 | | #include "absl/base/config.h" |
52 | | #include "absl/strings/string_view.h" |
53 | | |
54 | | namespace absl { |
55 | | ABSL_NAMESPACE_BEGIN |
56 | | |
57 | | class CharSet { |
58 | | public: |
59 | 0 | constexpr CharSet() : m_() {} |
60 | | |
61 | | // Initializes with a given string_view. |
62 | 0 | constexpr explicit CharSet(absl::string_view str) : m_() { |
63 | 0 | for (char c : str) { |
64 | 0 | SetChar(static_cast<unsigned char>(c)); |
65 | 0 | } |
66 | 0 | } |
67 | | |
68 | 0 | constexpr bool contains(char c) const { |
69 | 0 | return ((m_[static_cast<unsigned char>(c) / 64] >> |
70 | 0 | (static_cast<unsigned char>(c) % 64)) & |
71 | 0 | 0x1) == 0x1; |
72 | 0 | } |
73 | | |
74 | 0 | constexpr bool empty() const { |
75 | 0 | for (uint64_t c : m_) { |
76 | 0 | if (c != 0) return false; |
77 | 0 | } |
78 | 0 | return true; |
79 | 0 | } |
80 | | |
81 | | // Containing only a single specified char. |
82 | 0 | static constexpr CharSet Char(char x) { |
83 | 0 | return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1), |
84 | 0 | CharMaskForWord(x, 2), CharMaskForWord(x, 3)); |
85 | 0 | } |
86 | | |
87 | | // Containing all the chars in the closed interval [lo,hi]. |
88 | 0 | static constexpr CharSet Range(char lo, char hi) { |
89 | 0 | return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1), |
90 | 0 | RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3)); |
91 | 0 | } |
92 | | |
93 | 0 | friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) { |
94 | 0 | return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2], |
95 | 0 | a.m_[3] & b.m_[3]); |
96 | 0 | } |
97 | | |
98 | 0 | friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) { |
99 | 0 | return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2], |
100 | 0 | a.m_[3] | b.m_[3]); |
101 | 0 | } |
102 | | |
103 | 0 | friend constexpr CharSet operator~(const CharSet& a) { |
104 | 0 | return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]); |
105 | 0 | } |
106 | | |
107 | | // Mirrors the char-classifying predicates in <cctype>. |
108 | 0 | static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); } |
109 | 0 | static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); } |
110 | 0 | static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); } |
111 | 0 | static constexpr CharSet AsciiAlphabet() { |
112 | 0 | return AsciiLowercase() | AsciiUppercase(); |
113 | 0 | } |
114 | 0 | static constexpr CharSet AsciiAlphanumerics() { |
115 | 0 | return AsciiDigits() | AsciiAlphabet(); |
116 | 0 | } |
117 | 0 | static constexpr CharSet AsciiHexDigits() { |
118 | 0 | return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f'); |
119 | 0 | } |
120 | 0 | static constexpr CharSet AsciiPrintable() { |
121 | 0 | return CharSet::Range(0x20, 0x7e); |
122 | 0 | } |
123 | 0 | static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); } |
124 | 0 | static constexpr CharSet AsciiPunctuation() { |
125 | 0 | return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics(); |
126 | 0 | } |
127 | | |
128 | | private: |
129 | | constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3) |
130 | 0 | : m_{b0, b1, b2, b3} {} |
131 | | |
132 | 0 | static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) { |
133 | 0 | return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) & |
134 | 0 | ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word); |
135 | 0 | } |
136 | | |
137 | | // All the chars in the specified word of the range [0, upper). |
138 | | static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper, |
139 | 0 | uint64_t word) { |
140 | 0 | return (upper <= 64 * word) ? 0 |
141 | 0 | : (upper >= 64 * (word + 1)) |
142 | 0 | ? ~static_cast<uint64_t>(0) |
143 | 0 | : (~static_cast<uint64_t>(0) >> (64 - upper % 64)); |
144 | 0 | } |
145 | | |
146 | 0 | static constexpr uint64_t CharMaskForWord(char x, uint64_t word) { |
147 | 0 | return (static_cast<unsigned char>(x) / 64 == word) |
148 | 0 | ? (static_cast<uint64_t>(1) |
149 | 0 | << (static_cast<unsigned char>(x) % 64)) |
150 | 0 | : 0; |
151 | 0 | } |
152 | | |
153 | 0 | constexpr void SetChar(unsigned char c) { |
154 | 0 | m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64); |
155 | 0 | } |
156 | | |
157 | | uint64_t m_[4]; |
158 | | }; |
159 | | |
160 | | ABSL_NAMESPACE_END |
161 | | } // namespace absl |
162 | | |
163 | | #endif // ABSL_STRINGS_CHARSET_H_ |