Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/xpcom/ds/nsCharSeparatedTokenizer.h
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#ifndef __nsCharSeparatedTokenizer_h
8
#define __nsCharSeparatedTokenizer_h
9
10
#include "mozilla/RangedPtr.h"
11
12
#include "nsDependentSubstring.h"
13
#include "nsCRT.h"
14
15
/**
16
 * This parses a SeparatorChar-separated string into tokens.
17
 * Whitespace surrounding tokens is not treated as part of tokens, however
18
 * whitespace inside a token is. If the final token is the empty string, it is
19
 * not returned.
20
 *
21
 * Some examples, with SeparatorChar = ',':
22
 *
23
 * "foo, bar, baz" ->      "foo" "bar" "baz"
24
 * "foo,bar,baz" ->        "foo" "bar" "baz"
25
 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
26
 * "foo, ,bar,baz" ->      "foo" "" "bar" "baz"
27
 * "foo,,bar,baz" ->       "foo" "" "bar" "baz"
28
 * "foo,bar,baz," ->       "foo" "bar" "baz"
29
 *
30
 * The function used for whitespace detection is a template argument.
31
 * By default, it is NS_IsAsciiWhitespace.
32
 */
33
template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
34
class nsTCharSeparatedTokenizer
35
{
36
  typedef typename DependentSubstringType::char_type CharType;
37
  typedef typename DependentSubstringType::substring_type SubstringType;
38
39
public:
40
  // Flags -- only one for now. If we need more, they should be defined to
41
  // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
42
  enum
43
  {
44
    SEPARATOR_OPTIONAL = 1
45
  };
46
47
  nsTCharSeparatedTokenizer(const SubstringType& aSource,
48
                            CharType aSeparatorChar,
49
                            uint32_t aFlags = 0)
50
    : mIter(aSource.Data(), aSource.Length())
51
    , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
52
           aSource.Length())
53
    , mSeparatorChar(aSeparatorChar)
54
    , mWhitespaceBeforeFirstToken(false)
55
    , mWhitespaceAfterCurrentToken(false)
56
    , mSeparatorAfterCurrentToken(false)
57
    , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
58
1
  {
59
1
    // Skip initial whitespace
60
1
    while (mIter < mEnd && IsWhitespace(*mIter)) {
61
0
      mWhitespaceBeforeFirstToken = true;
62
0
      ++mIter;
63
0
    }
64
1
  }
nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::nsTCharSeparatedTokenizer(nsTSubstring<char> const&, char, unsigned int)
Line
Count
Source
58
1
  {
59
1
    // Skip initial whitespace
60
1
    while (mIter < mEnd && IsWhitespace(*mIter)) {
61
0
      mWhitespaceBeforeFirstToken = true;
62
0
      ++mIter;
63
0
    }
64
1
  }
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int)
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int)
65
66
  /**
67
   * Checks if any more tokens are available.
68
   */
69
  bool hasMoreTokens() const
70
1
  {
71
1
    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
72
1
               "Should be at beginning of token if there is one");
73
1
74
1
    return mIter < mEnd;
75
1
  }
nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::hasMoreTokens() const
Line
Count
Source
70
1
  {
71
1
    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
72
1
               "Should be at beginning of token if there is one");
73
1
74
1
    return mIter < mEnd;
75
1
  }
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::hasMoreTokens() const
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::hasMoreTokens() const
76
77
  /*
78
   * Returns true if there is whitespace prior to the first token.
79
   */
80
  bool whitespaceBeforeFirstToken() const
81
  {
82
    return mWhitespaceBeforeFirstToken;
83
  }
84
85
  /*
86
   * Returns true if there is a separator after the current token.
87
   * Useful if you want to check whether the last token has a separator
88
   * after it which may not be valid.
89
   */
90
  bool separatorAfterCurrentToken() const
91
  {
92
    return mSeparatorAfterCurrentToken;
93
  }
94
95
  /*
96
   * Returns true if there is any whitespace after the current token.
97
   */
98
  bool whitespaceAfterCurrentToken() const
99
  {
100
    return mWhitespaceAfterCurrentToken;
101
  }
102
103
  /**
104
   * Returns the next token.
105
   */
106
  const DependentSubstringType nextToken()
107
1
  {
108
1
    mozilla::RangedPtr<const CharType> tokenStart = mIter;
109
1
    mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110
1
111
1
    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
112
1
               "Should be at beginning of token if there is one");
113
1
114
1
    // Search until we hit separator or end (or whitespace, if a separator
115
1
    // isn't required -- see clause with 'break' below).
116
2
    while (mIter < mEnd && *mIter != mSeparatorChar) {
117
1
      // Skip to end of the current word.
118
2
      while (mIter < mEnd &&
119
2
             !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
120
1
        ++mIter;
121
1
      }
122
1
      tokenEnd = mIter;
123
1
124
1
      // Skip whitespace after the current word.
125
1
      mWhitespaceAfterCurrentToken = false;
126
1
      while (mIter < mEnd && IsWhitespace(*mIter)) {
127
0
        mWhitespaceAfterCurrentToken = true;
128
0
        ++mIter;
129
0
      }
130
1
      if (mSeparatorOptional) {
131
0
        // We've hit (and skipped) whitespace, and that's sufficient to end
132
0
        // our token, regardless of whether we've reached a SeparatorChar.
133
0
        break;
134
0
      } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
135
1
    }
136
1
137
1
    mSeparatorAfterCurrentToken = (mIter != mEnd &&
138
1
                                   *mIter == mSeparatorChar);
139
1
    MOZ_ASSERT(mSeparatorOptional ||
140
1
               (mSeparatorAfterCurrentToken == (mIter < mEnd)),
141
1
               "If we require a separator and haven't hit the end of "
142
1
               "our string, then we shouldn't have left the loop "
143
1
               "unless we hit a separator");
144
1
145
1
    // Skip separator (and any whitespace after it), if we're at one.
146
1
    if (mSeparatorAfterCurrentToken) {
147
1
      ++mIter;
148
1
149
1
      while (mIter < mEnd && IsWhitespace(*mIter)) {
150
0
        mWhitespaceAfterCurrentToken = true;
151
0
        ++mIter;
152
0
      }
153
1
    }
154
1
155
1
    return Substring(tokenStart.get(), tokenEnd.get());
156
1
  }
nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &(NS_IsAsciiWhitespace(char16_t))>::nextToken()
Line
Count
Source
107
1
  {
108
1
    mozilla::RangedPtr<const CharType> tokenStart = mIter;
109
1
    mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110
1
111
1
    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
112
1
               "Should be at beginning of token if there is one");
113
1
114
1
    // Search until we hit separator or end (or whitespace, if a separator
115
1
    // isn't required -- see clause with 'break' below).
116
2
    while (mIter < mEnd && *mIter != mSeparatorChar) {
117
1
      // Skip to end of the current word.
118
2
      while (mIter < mEnd &&
119
2
             !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
120
1
        ++mIter;
121
1
      }
122
1
      tokenEnd = mIter;
123
1
124
1
      // Skip whitespace after the current word.
125
1
      mWhitespaceAfterCurrentToken = false;
126
1
      while (mIter < mEnd && IsWhitespace(*mIter)) {
127
0
        mWhitespaceAfterCurrentToken = true;
128
0
        ++mIter;
129
0
      }
130
1
      if (mSeparatorOptional) {
131
0
        // We've hit (and skipped) whitespace, and that's sufficient to end
132
0
        // our token, regardless of whether we've reached a SeparatorChar.
133
0
        break;
134
0
      } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
135
1
    }
136
1
137
1
    mSeparatorAfterCurrentToken = (mIter != mEnd &&
138
1
                                   *mIter == mSeparatorChar);
139
1
    MOZ_ASSERT(mSeparatorOptional ||
140
1
               (mSeparatorAfterCurrentToken == (mIter < mEnd)),
141
1
               "If we require a separator and haven't hit the end of "
142
1
               "our string, then we shouldn't have left the loop "
143
1
               "unless we hit a separator");
144
1
145
1
    // Skip separator (and any whitespace after it), if we're at one.
146
1
    if (mSeparatorAfterCurrentToken) {
147
1
      ++mIter;
148
1
149
1
      while (mIter < mEnd && IsWhitespace(*mIter)) {
150
0
        mWhitespaceAfterCurrentToken = true;
151
0
        ++mIter;
152
0
      }
153
1
    }
154
1
155
1
    return Substring(tokenStart.get(), tokenEnd.get());
156
1
  }
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::nextToken()
Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::nextToken()
157
158
private:
159
  mozilla::RangedPtr<const CharType> mIter;
160
  const mozilla::RangedPtr<const CharType> mEnd;
161
  CharType mSeparatorChar;
162
  bool mWhitespaceBeforeFirstToken;
163
  bool mWhitespaceAfterCurrentToken;
164
  bool mSeparatorAfterCurrentToken;
165
  bool mSeparatorOptional;
166
};
167
168
template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
169
class nsCharSeparatedTokenizerTemplate
170
  : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
171
{
172
public:
173
  nsCharSeparatedTokenizerTemplate(const nsAString& aSource,
174
                                   char16_t aSeparatorChar,
175
                                   uint32_t aFlags = 0)
176
    : nsTCharSeparatedTokenizer<nsDependentSubstring,
177
                                IsWhitespace>(aSource, aSeparatorChar, aFlags)
178
0
  {
179
0
  }
Unexecuted instantiation: nsCharSeparatedTokenizerTemplate<&(NS_IsAsciiWhitespace(char16_t))>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int)
Unexecuted instantiation: nsCharSeparatedTokenizerTemplate<&nsContentUtils::IsHTMLWhitespace>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int)
180
};
181
182
typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
183
184
template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
185
class nsCCharSeparatedTokenizerTemplate
186
  : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
187
{
188
public:
189
  nsCCharSeparatedTokenizerTemplate(const nsACString& aSource,
190
                                    char aSeparatorChar,
191
                                    uint32_t aFlags = 0)
192
    : nsTCharSeparatedTokenizer<nsDependentCSubstring,
193
                                IsWhitespace>(aSource, aSeparatorChar, aFlags)
194
1
  {
195
1
  }
196
};
197
198
typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
199
200
#endif /* __nsCharSeparatedTokenizer_h */