/src/mozilla-central/xpcom/ds/nsCharSeparatedTokenizer.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #ifndef __nsCharSeparatedTokenizer_h |
8 | | #define __nsCharSeparatedTokenizer_h |
9 | | |
10 | | #include "mozilla/RangedPtr.h" |
11 | | |
12 | | #include "nsDependentSubstring.h" |
13 | | #include "nsCRT.h" |
14 | | |
15 | | /** |
16 | | * This parses a SeparatorChar-separated string into tokens. |
17 | | * Whitespace surrounding tokens is not treated as part of tokens, however |
18 | | * whitespace inside a token is. If the final token is the empty string, it is |
19 | | * not returned. |
20 | | * |
21 | | * Some examples, with SeparatorChar = ',': |
22 | | * |
23 | | * "foo, bar, baz" -> "foo" "bar" "baz" |
24 | | * "foo,bar,baz" -> "foo" "bar" "baz" |
25 | | * "foo , bar hi , baz" -> "foo" "bar hi" "baz" |
26 | | * "foo, ,bar,baz" -> "foo" "" "bar" "baz" |
27 | | * "foo,,bar,baz" -> "foo" "" "bar" "baz" |
28 | | * "foo,bar,baz," -> "foo" "bar" "baz" |
29 | | * |
30 | | * The function used for whitespace detection is a template argument. |
31 | | * By default, it is NS_IsAsciiWhitespace. |
32 | | */ |
33 | | template<typename DependentSubstringType, bool IsWhitespace(char16_t)> |
34 | | class nsTCharSeparatedTokenizer |
35 | | { |
36 | | typedef typename DependentSubstringType::char_type CharType; |
37 | | typedef typename DependentSubstringType::substring_type SubstringType; |
38 | | |
39 | | public: |
40 | | // Flags -- only one for now. If we need more, they should be defined to |
41 | | // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) |
42 | | enum |
43 | | { |
44 | | SEPARATOR_OPTIONAL = 1 |
45 | | }; |
46 | | |
47 | | nsTCharSeparatedTokenizer(const SubstringType& aSource, |
48 | | CharType aSeparatorChar, |
49 | | uint32_t aFlags = 0) |
50 | | : mIter(aSource.Data(), aSource.Length()) |
51 | | , mEnd(aSource.Data() + aSource.Length(), aSource.Data(), |
52 | | aSource.Length()) |
53 | | , mSeparatorChar(aSeparatorChar) |
54 | | , mWhitespaceBeforeFirstToken(false) |
55 | | , mWhitespaceAfterCurrentToken(false) |
56 | | , mSeparatorAfterCurrentToken(false) |
57 | | , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL) |
58 | 1 | { |
59 | 1 | // Skip initial whitespace |
60 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
61 | 0 | mWhitespaceBeforeFirstToken = true; |
62 | 0 | ++mIter; |
63 | 0 | } |
64 | 1 | } nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::nsTCharSeparatedTokenizer(nsTSubstring<char> const&, char, unsigned int) Line | Count | Source | 58 | 1 | { | 59 | 1 | // Skip initial whitespace | 60 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { | 61 | 0 | mWhitespaceBeforeFirstToken = true; | 62 | 0 | ++mIter; | 63 | 0 | } | 64 | 1 | } |
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int) |
65 | | |
66 | | /** |
67 | | * Checks if any more tokens are available. |
68 | | */ |
69 | | bool hasMoreTokens() const |
70 | 1 | { |
71 | 1 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
72 | 1 | "Should be at beginning of token if there is one"); |
73 | 1 | |
74 | 1 | return mIter < mEnd; |
75 | 1 | } nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::hasMoreTokens() const Line | Count | Source | 70 | 1 | { | 71 | 1 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), | 72 | 1 | "Should be at beginning of token if there is one"); | 73 | 1 | | 74 | 1 | return mIter < mEnd; | 75 | 1 | } |
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::hasMoreTokens() const Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::hasMoreTokens() const |
76 | | |
77 | | /* |
78 | | * Returns true if there is whitespace prior to the first token. |
79 | | */ |
80 | | bool whitespaceBeforeFirstToken() const |
81 | | { |
82 | | return mWhitespaceBeforeFirstToken; |
83 | | } |
84 | | |
85 | | /* |
86 | | * Returns true if there is a separator after the current token. |
87 | | * Useful if you want to check whether the last token has a separator |
88 | | * after it which may not be valid. |
89 | | */ |
90 | | bool separatorAfterCurrentToken() const |
91 | | { |
92 | | return mSeparatorAfterCurrentToken; |
93 | | } |
94 | | |
95 | | /* |
96 | | * Returns true if there is any whitespace after the current token. |
97 | | */ |
98 | | bool whitespaceAfterCurrentToken() const |
99 | | { |
100 | | return mWhitespaceAfterCurrentToken; |
101 | | } |
102 | | |
103 | | /** |
104 | | * Returns the next token. |
105 | | */ |
106 | | const DependentSubstringType nextToken() |
107 | 1 | { |
108 | 1 | mozilla::RangedPtr<const CharType> tokenStart = mIter; |
109 | 1 | mozilla::RangedPtr<const CharType> tokenEnd = mIter; |
110 | 1 | |
111 | 1 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), |
112 | 1 | "Should be at beginning of token if there is one"); |
113 | 1 | |
114 | 1 | // Search until we hit separator or end (or whitespace, if a separator |
115 | 1 | // isn't required -- see clause with 'break' below). |
116 | 2 | while (mIter < mEnd && *mIter != mSeparatorChar) { |
117 | 1 | // Skip to end of the current word. |
118 | 2 | while (mIter < mEnd && |
119 | 2 | !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { |
120 | 1 | ++mIter; |
121 | 1 | } |
122 | 1 | tokenEnd = mIter; |
123 | 1 | |
124 | 1 | // Skip whitespace after the current word. |
125 | 1 | mWhitespaceAfterCurrentToken = false; |
126 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
127 | 0 | mWhitespaceAfterCurrentToken = true; |
128 | 0 | ++mIter; |
129 | 0 | } |
130 | 1 | if (mSeparatorOptional) { |
131 | 0 | // We've hit (and skipped) whitespace, and that's sufficient to end |
132 | 0 | // our token, regardless of whether we've reached a SeparatorChar. |
133 | 0 | break; |
134 | 0 | } // (else, we'll keep looping until we hit mEnd or SeparatorChar) |
135 | 1 | } |
136 | 1 | |
137 | 1 | mSeparatorAfterCurrentToken = (mIter != mEnd && |
138 | 1 | *mIter == mSeparatorChar); |
139 | 1 | MOZ_ASSERT(mSeparatorOptional || |
140 | 1 | (mSeparatorAfterCurrentToken == (mIter < mEnd)), |
141 | 1 | "If we require a separator and haven't hit the end of " |
142 | 1 | "our string, then we shouldn't have left the loop " |
143 | 1 | "unless we hit a separator"); |
144 | 1 | |
145 | 1 | // Skip separator (and any whitespace after it), if we're at one. |
146 | 1 | if (mSeparatorAfterCurrentToken) { |
147 | 1 | ++mIter; |
148 | 1 | |
149 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { |
150 | 0 | mWhitespaceAfterCurrentToken = true; |
151 | 0 | ++mIter; |
152 | 0 | } |
153 | 1 | } |
154 | 1 | |
155 | 1 | return Substring(tokenStart.get(), tokenEnd.get()); |
156 | 1 | } nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::nextToken() Line | Count | Source | 107 | 1 | { | 108 | 1 | mozilla::RangedPtr<const CharType> tokenStart = mIter; | 109 | 1 | mozilla::RangedPtr<const CharType> tokenEnd = mIter; | 110 | 1 | | 111 | 1 | MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), | 112 | 1 | "Should be at beginning of token if there is one"); | 113 | 1 | | 114 | 1 | // Search until we hit separator or end (or whitespace, if a separator | 115 | 1 | // isn't required -- see clause with 'break' below). | 116 | 2 | while (mIter < mEnd && *mIter != mSeparatorChar) { | 117 | 1 | // Skip to end of the current word. | 118 | 2 | while (mIter < mEnd && | 119 | 2 | !IsWhitespace(*mIter) && *mIter != mSeparatorChar) { | 120 | 1 | ++mIter; | 121 | 1 | } | 122 | 1 | tokenEnd = mIter; | 123 | 1 | | 124 | 1 | // Skip whitespace after the current word. | 125 | 1 | mWhitespaceAfterCurrentToken = false; | 126 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { | 127 | 0 | mWhitespaceAfterCurrentToken = true; | 128 | 0 | ++mIter; | 129 | 0 | } | 130 | 1 | if (mSeparatorOptional) { | 131 | 0 | // We've hit (and skipped) whitespace, and that's sufficient to end | 132 | 0 | // our token, regardless of whether we've reached a SeparatorChar. | 133 | 0 | break; | 134 | 0 | } // (else, we'll keep looping until we hit mEnd or SeparatorChar) | 135 | 1 | } | 136 | 1 | | 137 | 1 | mSeparatorAfterCurrentToken = (mIter != mEnd && | 138 | 1 | *mIter == mSeparatorChar); | 139 | 1 | MOZ_ASSERT(mSeparatorOptional || | 140 | 1 | (mSeparatorAfterCurrentToken == (mIter < mEnd)), | 141 | 1 | "If we require a separator and haven't hit the end of " | 142 | 1 | "our string, then we shouldn't have left the loop " | 143 | 1 | "unless we hit a separator"); | 144 | 1 | | 145 | 1 | // Skip separator (and any whitespace after it), if we're at one. | 146 | 1 | if (mSeparatorAfterCurrentToken) { | 147 | 1 | ++mIter; | 148 | 1 | | 149 | 1 | while (mIter < mEnd && IsWhitespace(*mIter)) { | 150 | 0 | mWhitespaceAfterCurrentToken = true; | 151 | 0 | ++mIter; | 152 | 0 | } | 153 | 1 | } | 154 | 1 | | 155 | 1 | return Substring(tokenStart.get(), tokenEnd.get()); | 156 | 1 | } |
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::nextToken() Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::nextToken() |
157 | | |
158 | | private: |
159 | | mozilla::RangedPtr<const CharType> mIter; |
160 | | const mozilla::RangedPtr<const CharType> mEnd; |
161 | | CharType mSeparatorChar; |
162 | | bool mWhitespaceBeforeFirstToken; |
163 | | bool mWhitespaceAfterCurrentToken; |
164 | | bool mSeparatorAfterCurrentToken; |
165 | | bool mSeparatorOptional; |
166 | | }; |
167 | | |
168 | | template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> |
169 | | class nsCharSeparatedTokenizerTemplate |
170 | | : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace> |
171 | | { |
172 | | public: |
173 | | nsCharSeparatedTokenizerTemplate(const nsAString& aSource, |
174 | | char16_t aSeparatorChar, |
175 | | uint32_t aFlags = 0) |
176 | | : nsTCharSeparatedTokenizer<nsDependentSubstring, |
177 | | IsWhitespace>(aSource, aSeparatorChar, aFlags) |
178 | 0 | { |
179 | 0 | } Unexecuted instantiation: nsCharSeparatedTokenizerTemplate<&(NS_IsAsciiWhitespace(char16_t))>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: nsCharSeparatedTokenizerTemplate<&nsContentUtils::IsHTMLWhitespace>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int) |
180 | | }; |
181 | | |
182 | | typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer; |
183 | | |
184 | | template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace> |
185 | | class nsCCharSeparatedTokenizerTemplate |
186 | | : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace> |
187 | | { |
188 | | public: |
189 | | nsCCharSeparatedTokenizerTemplate(const nsACString& aSource, |
190 | | char aSeparatorChar, |
191 | | uint32_t aFlags = 0) |
192 | | : nsTCharSeparatedTokenizer<nsDependentCSubstring, |
193 | | IsWhitespace>(aSource, aSeparatorChar, aFlags) |
194 | 1 | { |
195 | 1 | } |
196 | | }; |
197 | | |
198 | | typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer; |
199 | | |
200 | | #endif /* __nsCharSeparatedTokenizer_h */ |