/src/abseil-cpp/absl/strings/ascii.h
Line | Count | Source |
1 | | // |
2 | | // Copyright 2017 The Abseil Authors. |
3 | | // |
4 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | // you may not use this file except in compliance with the License. |
6 | | // You may obtain a copy of the License at |
7 | | // |
8 | | // https://www.apache.org/licenses/LICENSE-2.0 |
9 | | // |
10 | | // Unless required by applicable law or agreed to in writing, software |
11 | | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | // See the License for the specific language governing permissions and |
14 | | // limitations under the License. |
15 | | // |
16 | | // ----------------------------------------------------------------------------- |
17 | | // File: ascii.h |
18 | | // ----------------------------------------------------------------------------- |
19 | | // |
20 | | // This package contains functions operating on characters and strings |
21 | | // restricted to standard ASCII. These include character classification |
22 | | // functions analogous to those found in the ANSI C Standard Library <ctype.h> |
23 | | // header file. |
24 | | // |
25 | | // C++ implementations provide <ctype.h> functionality based on their |
26 | | // C environment locale. In general, reliance on such a locale is not ideal, as |
27 | | // the locale standard is problematic (and may not return invariant information |
28 | | // for the same character set, for example). These `ascii_*()` functions are |
29 | | // hard-wired for standard ASCII, much faster, and guaranteed to behave |
30 | | // consistently. They will never be overloaded, nor will their function |
31 | | // signature change. |
32 | | // |
33 | | // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, |
34 | | // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, |
35 | | // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, |
36 | | // `ascii_isxdigit()` |
37 | | // Analogous to the <ctype.h> functions with similar names, these |
38 | | // functions take an unsigned char and return a bool, based on whether the |
39 | | // character matches the condition specified. |
40 | | // |
41 | | // If the input character has a numerical value greater than 127, these |
42 | | // functions return `false`. |
43 | | // |
44 | | // `ascii_tolower()`, `ascii_toupper()` |
45 | | // Analogous to the <ctype.h> functions with similar names, these functions |
46 | | // take an unsigned char and return a char. |
47 | | // |
48 | | // If the input character is not an ASCII {lower,upper}-case letter (including |
49 | | // numerical values greater than 127) then the functions return the same value |
50 | | // as the input character. |
51 | | |
52 | | #ifndef ABSL_STRINGS_ASCII_H_ |
53 | | #define ABSL_STRINGS_ASCII_H_ |
54 | | |
55 | | #include <algorithm> |
56 | | #include <cstddef> |
57 | | #include <string> |
58 | | #include <utility> |
59 | | |
60 | | #include "absl/base/attributes.h" |
61 | | #include "absl/base/config.h" |
62 | | #include "absl/base/nullability.h" |
63 | | #include "absl/strings/internal/resize_uninitialized.h" |
64 | | #include "absl/strings/resize_and_overwrite.h" |
65 | | #include "absl/strings/string_view.h" |
66 | | |
67 | | namespace absl { |
68 | | ABSL_NAMESPACE_BEGIN |
69 | | namespace ascii_internal { |
70 | | |
71 | | // Declaration for an array of bitfields holding character information. |
72 | | ABSL_DLL extern const unsigned char kPropertyBits[256]; |
73 | | |
74 | | // Declaration for the array of characters to upper-case characters. |
75 | | ABSL_DLL extern const char kToUpper[256]; |
76 | | |
77 | | // Declaration for the array of characters to lower-case characters. |
78 | | ABSL_DLL extern const char kToLower[256]; |
79 | | |
80 | | void AsciiStrToLower(char* absl_nonnull dst, const char* absl_nullable src, |
81 | | size_t n); |
82 | | |
83 | | void AsciiStrToUpper(char* absl_nonnull dst, const char* absl_nullable src, |
84 | | size_t n); |
85 | | |
86 | | } // namespace ascii_internal |
87 | | |
88 | | // ascii_isalpha() |
89 | | // |
90 | | // Determines whether the given character is an alphabetic character. |
91 | 0 | inline bool ascii_isalpha(unsigned char c) { |
92 | 0 | return (ascii_internal::kPropertyBits[c] & 0x01) != 0; |
93 | 0 | } |
94 | | |
95 | | // ascii_isalnum() |
96 | | // |
97 | | // Determines whether the given character is an alphanumeric character. |
98 | 0 | inline bool ascii_isalnum(unsigned char c) { |
99 | 0 | return (ascii_internal::kPropertyBits[c] & 0x04) != 0; |
100 | 0 | } |
101 | | |
102 | | // ascii_isspace() |
103 | | // |
104 | | // Determines whether the given character is a whitespace character (space, |
105 | | // tab, vertical tab, formfeed, linefeed, or carriage return). |
106 | 0 | inline bool ascii_isspace(unsigned char c) { |
107 | 0 | return (ascii_internal::kPropertyBits[c] & 0x08) != 0; |
108 | 0 | } |
109 | | |
110 | | // ascii_ispunct() |
111 | | // |
112 | | // Determines whether the given character is a punctuation character. |
113 | 0 | inline bool ascii_ispunct(unsigned char c) { |
114 | 0 | return (ascii_internal::kPropertyBits[c] & 0x10) != 0; |
115 | 0 | } |
116 | | |
117 | | // ascii_isblank() |
118 | | // |
119 | | // Determines whether the given character is a blank character (tab or space). |
120 | 0 | inline bool ascii_isblank(unsigned char c) { |
121 | 0 | return (ascii_internal::kPropertyBits[c] & 0x20) != 0; |
122 | 0 | } |
123 | | |
124 | | // ascii_iscntrl() |
125 | | // |
126 | | // Determines whether the given character is a control character. |
127 | 0 | inline bool ascii_iscntrl(unsigned char c) { |
128 | 0 | return (ascii_internal::kPropertyBits[c] & 0x40) != 0; |
129 | 0 | } |
130 | | |
131 | | // ascii_isxdigit() |
132 | | // |
133 | | // Determines whether the given character can be represented as a hexadecimal |
134 | | // digit character (i.e. {0-9} or {A-F}). |
135 | 0 | inline bool ascii_isxdigit(unsigned char c) { |
136 | 0 | return (ascii_internal::kPropertyBits[c] & 0x80) != 0; |
137 | 0 | } |
138 | | |
139 | | // ascii_isdigit() |
140 | | // |
141 | | // Determines whether the given character can be represented as a decimal |
142 | | // digit character (i.e. {0-9}). |
143 | 0 | inline constexpr bool ascii_isdigit(unsigned char c) { |
144 | 0 | return c >= '0' && c <= '9'; |
145 | 0 | } |
146 | | |
147 | | // ascii_isprint() |
148 | | // |
149 | | // Determines whether the given character is printable, including spaces. |
150 | 0 | inline constexpr bool ascii_isprint(unsigned char c) { |
151 | 0 | return c >= 32 && c < 127; |
152 | 0 | } |
153 | | |
154 | | // ascii_isgraph() |
155 | | // |
156 | | // Determines whether the given character has a graphical representation. |
157 | 0 | inline constexpr bool ascii_isgraph(unsigned char c) { |
158 | 0 | return c > 32 && c < 127; |
159 | 0 | } |
160 | | |
161 | | // ascii_isupper() |
162 | | // |
163 | | // Determines whether the given character is uppercase. |
164 | 0 | inline constexpr bool ascii_isupper(unsigned char c) { |
165 | 0 | return c >= 'A' && c <= 'Z'; |
166 | 0 | } |
167 | | |
168 | | // ascii_islower() |
169 | | // |
170 | | // Determines whether the given character is lowercase. |
171 | 0 | inline constexpr bool ascii_islower(unsigned char c) { |
172 | 0 | return c >= 'a' && c <= 'z'; |
173 | 0 | } |
174 | | |
175 | | // ascii_isascii() |
176 | | // |
177 | | // Determines whether the given character is ASCII. |
178 | 0 | inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; } |
179 | | |
180 | | // ascii_tolower() |
181 | | // |
182 | | // Returns an ASCII character, converting to lowercase if uppercase is |
183 | | // passed. Note that character values > 127 are simply returned. |
184 | 0 | inline char ascii_tolower(unsigned char c) { |
185 | 0 | return ascii_internal::kToLower[c]; |
186 | 0 | } |
187 | | |
188 | | // Converts the characters in `s` to lowercase, changing the contents of `s`. |
189 | | void AsciiStrToLower(std::string* absl_nonnull s); |
190 | | |
191 | | // Creates a lowercase string from a given absl::string_view. |
192 | 0 | [[nodiscard]] inline std::string AsciiStrToLower(absl::string_view s) { |
193 | 0 | std::string result; |
194 | 0 | StringResizeAndOverwrite(result, s.size(), [s](char* buf, size_t buf_size) { |
195 | 0 | ascii_internal::AsciiStrToLower(buf, s.data(), s.size()); |
196 | 0 | return buf_size; |
197 | 0 | }); |
198 | 0 | return result; |
199 | 0 | } |
200 | | |
201 | | // Creates a lowercase string from a given std::string&&. |
202 | | // |
203 | | // (Template is used to lower priority of this overload.) |
204 | | template <int&... DoNotSpecify> |
205 | | [[nodiscard]] inline std::string AsciiStrToLower(std::string&& s) { |
206 | | std::string result = std::move(s); |
207 | | absl::AsciiStrToLower(&result); |
208 | | return result; |
209 | | } |
210 | | |
211 | | // ascii_toupper() |
212 | | // |
213 | | // Returns the ASCII character, converting to upper-case if lower-case is |
214 | | // passed. Note that characters values > 127 are simply returned. |
215 | 0 | inline char ascii_toupper(unsigned char c) { |
216 | 0 | return ascii_internal::kToUpper[c]; |
217 | 0 | } |
218 | | |
219 | | // Converts the characters in `s` to uppercase, changing the contents of `s`. |
220 | | void AsciiStrToUpper(std::string* absl_nonnull s); |
221 | | |
222 | | // Creates an uppercase string from a given absl::string_view. |
223 | 0 | [[nodiscard]] inline std::string AsciiStrToUpper(absl::string_view s) { |
224 | 0 | std::string result; |
225 | 0 | StringResizeAndOverwrite(result, s.size(), [s](char* buf, size_t buf_size) { |
226 | 0 | ascii_internal::AsciiStrToUpper(buf, s.data(), s.size()); |
227 | 0 | return buf_size; |
228 | 0 | }); |
229 | 0 | return result; |
230 | 0 | } |
231 | | |
232 | | // Creates an uppercase string from a given std::string&&. |
233 | | // |
234 | | // (Template is used to lower priority of this overload.) |
235 | | template <int&... DoNotSpecify> |
236 | | [[nodiscard]] inline std::string AsciiStrToUpper(std::string&& s) { |
237 | | std::string result = std::move(s); |
238 | | absl::AsciiStrToUpper(&result); |
239 | | return result; |
240 | | } |
241 | | |
242 | | // Returns absl::string_view with whitespace stripped from the beginning of the |
243 | | // given string_view. |
244 | | [[nodiscard]] inline absl::string_view StripLeadingAsciiWhitespace( |
245 | 0 | absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { |
246 | 0 | auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); |
247 | 0 | return str.substr(static_cast<size_t>(it - str.begin())); |
248 | 0 | } |
249 | | |
250 | | // Strips in place whitespace from the beginning of the given string. |
251 | 0 | inline void StripLeadingAsciiWhitespace(std::string* absl_nonnull str) { |
252 | 0 | auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); |
253 | 0 | str->erase(str->begin(), it); |
254 | 0 | } |
255 | | |
256 | | // Returns absl::string_view with whitespace stripped from the end of the given |
257 | | // string_view. |
258 | | [[nodiscard]] inline absl::string_view StripTrailingAsciiWhitespace( |
259 | 0 | absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { |
260 | 0 | auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); |
261 | 0 | return str.substr(0, static_cast<size_t>(str.rend() - it)); |
262 | 0 | } |
263 | | |
264 | | // Strips in place whitespace from the end of the given string |
265 | 0 | inline void StripTrailingAsciiWhitespace(std::string* absl_nonnull str) { |
266 | 0 | auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); |
267 | 0 | str->erase(static_cast<size_t>(str->rend() - it)); |
268 | 0 | } |
269 | | |
270 | | // Returns absl::string_view with whitespace stripped from both ends of the |
271 | | // given string_view. |
272 | | [[nodiscard]] inline absl::string_view StripAsciiWhitespace( |
273 | 0 | absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND) { |
274 | 0 | return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); |
275 | 0 | } |
276 | | |
277 | | // Strips in place whitespace from both ends of the given string |
278 | 0 | inline void StripAsciiWhitespace(std::string* absl_nonnull str) { |
279 | 0 | StripTrailingAsciiWhitespace(str); |
280 | 0 | StripLeadingAsciiWhitespace(str); |
281 | 0 | } |
282 | | |
283 | | // Removes leading, trailing, and consecutive internal whitespace. |
284 | | void RemoveExtraAsciiWhitespace(std::string* absl_nonnull str); |
285 | | |
286 | | ABSL_NAMESPACE_END |
287 | | } // namespace absl |
288 | | |
289 | | #endif // ABSL_STRINGS_ASCII_H_ |