/work/obj-fuzz/dist/include/nsCharSeparatedTokenizer.h

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef __nsCharSeparatedTokenizer_h
#define __nsCharSeparatedTokenizer_h

#include "mozilla/RangedPtr.h"

#include "nsDependentSubstring.h"
#include "nsCRT.h"

/**
 * This parses a SeparatorChar-separated string into tokens.
 * Whitespace surrounding tokens is not treated as part of tokens, however
 * whitespace inside a token is. If the final token is the empty string, it is
 * not returned.
 *
 * Some examples, with SeparatorChar = ',':
 *
 * "foo, bar, baz" ->      "foo" "bar" "baz"
 * "foo,bar,baz" ->        "foo" "bar" "baz"
 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
 * "foo, ,bar,baz" ->      "foo" "" "bar" "baz"
 * "foo,,bar,baz" ->       "foo" "" "bar" "baz"
 * "foo,bar,baz," ->       "foo" "bar" "baz"
 *
 * The function used for whitespace detection is a template argument.
 * By default, it is NS_IsAsciiWhitespace.
 */
template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
class nsTCharSeparatedTokenizer
{
  typedef typename DependentSubstringType::char_type CharType;
  typedef typename DependentSubstringType::substring_type SubstringType;

public:
  // Flags -- only one for now. If we need more, they should be defined to
  // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
  enum
  {
    SEPARATOR_OPTIONAL = 1
  };

  nsTCharSeparatedTokenizer(const SubstringType& aSource,
                            CharType aSeparatorChar,
                            uint32_t aFlags = 0)
    : mIter(aSource.Data(), aSource.Length())
    , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
           aSource.Length())
    , mSeparatorChar(aSeparatorChar)
    , mWhitespaceBeforeFirstToken(false)
    , mWhitespaceAfterCurrentToken(false)
    , mSeparatorAfterCurrentToken(false)
    , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
  {
    // Skip initial whitespace
    while (mIter < mEnd && IsWhitespace(*mIter)) {
      mWhitespaceBeforeFirstToken = true;
      ++mIter;
    }
  }

  /**
   * Checks if any more tokens are available.
   */
  bool hasMoreTokens() const
  {
    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
               "Should be at beginning of token if there is one");

    return mIter < mEnd;
  }

  /*
   * Returns true if there is whitespace prior to the first token.
   */
  bool whitespaceBeforeFirstToken() const
  {
    return mWhitespaceBeforeFirstToken;
  }

  /*
   * Returns true if there is a separator after the current token.
   * Useful if you want to check whether the last token has a separator
   * after it which may not be valid.
   */
  bool separatorAfterCurrentToken() const
  {
    return mSeparatorAfterCurrentToken;
  }

  /*
   * Returns true if there is any whitespace after the current token.
   */
  bool whitespaceAfterCurrentToken() const
  {
    return mWhitespaceAfterCurrentToken;
  }

  /**
   * Returns the next token.
   */
  const DependentSubstringType nextToken()
  {
    mozilla::RangedPtr<const CharType> tokenStart = mIter;
    mozilla::RangedPtr<const CharType> tokenEnd = mIter;

    MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
               "Should be at beginning of token if there is one");

    // Search until we hit separator or end (or whitespace, if a separator
    // isn't required -- see clause with 'break' below).
    while (mIter < mEnd && *mIter != mSeparatorChar) {
      // Skip to end of the current word.
      while (mIter < mEnd &&
             !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
        ++mIter;
      }
      tokenEnd = mIter;

      // Skip whitespace after the current word.
      mWhitespaceAfterCurrentToken = false;
      while (mIter < mEnd && IsWhitespace(*mIter)) {
        mWhitespaceAfterCurrentToken = true;
        ++mIter;
      }
      if (mSeparatorOptional) {
        // We've hit (and skipped) whitespace, and that's sufficient to end
        // our token, regardless of whether we've reached a SeparatorChar.
        break;
      } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
    }

    mSeparatorAfterCurrentToken = (mIter != mEnd &&
                                   *mIter == mSeparatorChar);
    MOZ_ASSERT(mSeparatorOptional ||
               (mSeparatorAfterCurrentToken == (mIter < mEnd)),
               "If we require a separator and haven't hit the end of "
               "our string, then we shouldn't have left the loop "
               "unless we hit a separator");

    // Skip separator (and any whitespace after it), if we're at one.
    if (mSeparatorAfterCurrentToken) {
      ++mIter;

      while (mIter < mEnd && IsWhitespace(*mIter)) {
        mWhitespaceAfterCurrentToken = true;
        ++mIter;
      }
    }

    return Substring(tokenStart.get(), tokenEnd.get());
  }

private:
  mozilla::RangedPtr<const CharType> mIter;
  const mozilla::RangedPtr<const CharType> mEnd;
  CharType mSeparatorChar;
  bool mWhitespaceBeforeFirstToken;
  bool mWhitespaceAfterCurrentToken;
  bool mSeparatorAfterCurrentToken;
  bool mSeparatorOptional;
};

template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
class nsCharSeparatedTokenizerTemplate
  : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
{
public:
  nsCharSeparatedTokenizerTemplate(const nsAString& aSource,
                                   char16_t aSeparatorChar,
                                   uint32_t aFlags = 0)
    : nsTCharSeparatedTokenizer<nsDependentSubstring,
                                IsWhitespace>(aSource, aSeparatorChar, aFlags)
  {
  }
};

typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;

template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
class nsCCharSeparatedTokenizerTemplate
  : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
{
public:
  nsCCharSeparatedTokenizerTemplate(const nsACString& aSource,
                                    char aSeparatorChar,
                                    uint32_t aFlags = 0)
    : nsTCharSeparatedTokenizer<nsDependentCSubstring,
                                IsWhitespace>(aSource, aSeparatorChar, aFlags)
  {
  }
};

typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;

#endif /* __nsCharSeparatedTokenizer_h */

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#ifndef __nsCharSeparatedTokenizer_h
8		#define __nsCharSeparatedTokenizer_h
9
10		#include "mozilla/RangedPtr.h"
11
12		#include "nsDependentSubstring.h"
13		#include "nsCRT.h"
14
15		/**
16		* This parses a SeparatorChar-separated string into tokens.
17		* Whitespace surrounding tokens is not treated as part of tokens, however
18		* whitespace inside a token is. If the final token is the empty string, it is
19		* not returned.
20		*
21		* Some examples, with SeparatorChar = ',':
22		*
23		* "foo, bar, baz" -> "foo" "bar" "baz"
24		* "foo,bar,baz" -> "foo" "bar" "baz"
25		* "foo , bar hi , baz" -> "foo" "bar hi" "baz"
26		* "foo, ,bar,baz" -> "foo" "" "bar" "baz"
27		* "foo,,bar,baz" -> "foo" "" "bar" "baz"
28		* "foo,bar,baz," -> "foo" "bar" "baz"
29		*
30		* The function used for whitespace detection is a template argument.
31		* By default, it is NS_IsAsciiWhitespace.
32		*/
33		template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
34		class nsTCharSeparatedTokenizer
35		{
36		typedef typename DependentSubstringType::char_type CharType;
37		typedef typename DependentSubstringType::substring_type SubstringType;
38
39		public:
40		// Flags -- only one for now. If we need more, they should be defined to
41		// be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
42		enum
43		{
44		SEPARATOR_OPTIONAL = 1
45		};
46
47		nsTCharSeparatedTokenizer(const SubstringType& aSource,
48		CharType aSeparatorChar,
49		uint32_t aFlags = 0)
50		: mIter(aSource.Data(), aSource.Length())
51		, mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
52		aSource.Length())
53		, mSeparatorChar(aSeparatorChar)
54		, mWhitespaceBeforeFirstToken(false)
55		, mWhitespaceAfterCurrentToken(false)
56		, mSeparatorAfterCurrentToken(false)
57		, mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
58	0	{
59	0	// Skip initial whitespace
60	0	while (mIter < mEnd && IsWhitespace(*mIter)) {
61	0	mWhitespaceBeforeFirstToken = true;
62	0	++mIter;
63	0	}
64	0	} Unexecuted instantiation: Unified_cpp_dom_filesystem0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::(anonymous namespace)::TokenizerIgnoreNothing>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: Unified_cpp_dom_quota0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &mozilla::dom::quota::(anonymous namespace)::OriginParser::IgnoreWhitespace>::nsTCharSeparatedTokenizer(nsTSubstring<char> const&, char, unsigned int) Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::IgnoreWhitespace>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: ActorsParent.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::TokenizerIgnoreNothing>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: Unified_cpp_dom_indexedDB1.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::IgnoreWhitespace>::nsTCharSeparatedTokenizer(nsTSubstring<char16_t> const&, char16_t, unsigned int)
65
66		/**
67		* Checks if any more tokens are available.
68		*/
69		bool hasMoreTokens() const
70	0	{
71	0	MOZ_ASSERT(mIter == mEnd \|\| !IsWhitespace(*mIter),
72	0	"Should be at beginning of token if there is one");
73	0
74	0	return mIter < mEnd;
75	0	} Unexecuted instantiation: Unified_cpp_dom_filesystem0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::(anonymous namespace)::TokenizerIgnoreNothing>::hasMoreTokens() const Unexecuted instantiation: Unified_cpp_dom_quota0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &mozilla::dom::quota::(anonymous namespace)::OriginParser::IgnoreWhitespace>::hasMoreTokens() const Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::IgnoreWhitespace>::hasMoreTokens() const Unexecuted instantiation: ActorsParent.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::TokenizerIgnoreNothing>::hasMoreTokens() const Unexecuted instantiation: Unified_cpp_dom_indexedDB1.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::IgnoreWhitespace>::hasMoreTokens() const
76
77		/*
78		* Returns true if there is whitespace prior to the first token.
79		*/
80		bool whitespaceBeforeFirstToken() const
81	0	{
82	0	return mWhitespaceBeforeFirstToken;
83	0	}
84
85		/*
86		* Returns true if there is a separator after the current token.
87		* Useful if you want to check whether the last token has a separator
88		* after it which may not be valid.
89		*/
90		bool separatorAfterCurrentToken() const
91	0	{
92	0	return mSeparatorAfterCurrentToken;
93	0	} Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &nsContentUtils::IsHTMLWhitespace>::separatorAfterCurrentToken() const Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &(NS_IsAsciiWhitespace(char16_t))>::separatorAfterCurrentToken() const Unexecuted instantiation: Unified_cpp_dom_quota0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &mozilla::dom::quota::(anonymous namespace)::OriginParser::IgnoreWhitespace>::separatorAfterCurrentToken() const
94
95		/*
96		* Returns true if there is any whitespace after the current token.
97		*/
98		bool whitespaceAfterCurrentToken() const
99	0	{
100	0	return mWhitespaceAfterCurrentToken;
101	0	}
102
103		/**
104		* Returns the next token.
105		*/
106		const DependentSubstringType nextToken()
107	0	{
108	0	mozilla::RangedPtr<const CharType> tokenStart = mIter;
109	0	mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110	0
111	0	MOZ_ASSERT(mIter == mEnd \|\| !IsWhitespace(*mIter),
112	0	"Should be at beginning of token if there is one");
113	0
114	0	// Search until we hit separator or end (or whitespace, if a separator
115	0	// isn't required -- see clause with 'break' below).
116	0	while (mIter < mEnd && *mIter != mSeparatorChar) {
117	0	// Skip to end of the current word.
118	0	while (mIter < mEnd &&
119	0	!IsWhitespace(mIter) && mIter != mSeparatorChar) {
120	0	++mIter;
121	0	}
122	0	tokenEnd = mIter;
123	0
124	0	// Skip whitespace after the current word.
125	0	mWhitespaceAfterCurrentToken = false;
126	0	while (mIter < mEnd && IsWhitespace(*mIter)) {
127	0	mWhitespaceAfterCurrentToken = true;
128	0	++mIter;
129	0	}
130	0	if (mSeparatorOptional) {
131	0	// We've hit (and skipped) whitespace, and that's sufficient to end
132	0	// our token, regardless of whether we've reached a SeparatorChar.
133	0	break;
134	0	} // (else, we'll keep looping until we hit mEnd or SeparatorChar)
135	0	}
136	0
137	0	mSeparatorAfterCurrentToken = (mIter != mEnd &&
138	0	*mIter == mSeparatorChar);
139	0	MOZ_ASSERT(mSeparatorOptional \|\|
140	0	(mSeparatorAfterCurrentToken == (mIter < mEnd)),
141	0	"If we require a separator and haven't hit the end of "
142	0	"our string, then we shouldn't have left the loop "
143	0	"unless we hit a separator");
144	0
145	0	// Skip separator (and any whitespace after it), if we're at one.
146	0	if (mSeparatorAfterCurrentToken) {
147	0	++mIter;
148	0
149	0	while (mIter < mEnd && IsWhitespace(*mIter)) {
150	0	mWhitespaceAfterCurrentToken = true;
151	0	++mIter;
152	0	}
153	0	}
154	0
155	0	return Substring(tokenStart.get(), tokenEnd.get());
156	0	} Unexecuted instantiation: Unified_cpp_dom_filesystem0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::(anonymous namespace)::TokenizerIgnoreNothing>::nextToken() Unexecuted instantiation: Unified_cpp_dom_quota0.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char>, &mozilla::dom::quota::(anonymous namespace)::OriginParser::IgnoreWhitespace>::nextToken() Unexecuted instantiation: nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::IgnoreWhitespace>::nextToken() Unexecuted instantiation: ActorsParent.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::TokenizerIgnoreNothing>::nextToken() Unexecuted instantiation: Unified_cpp_dom_indexedDB1.cpp:nsTCharSeparatedTokenizer<nsTDependentSubstring<char16_t>, &mozilla::dom::indexedDB::(anonymous namespace)::IgnoreWhitespace>::nextToken()
157
158		private:
159		mozilla::RangedPtr<const CharType> mIter;
160		const mozilla::RangedPtr<const CharType> mEnd;
161		CharType mSeparatorChar;
162		bool mWhitespaceBeforeFirstToken;
163		bool mWhitespaceAfterCurrentToken;
164		bool mSeparatorAfterCurrentToken;
165		bool mSeparatorOptional;
166		};
167
168		template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
169		class nsCharSeparatedTokenizerTemplate
170		: public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
171		{
172		public:
173		nsCharSeparatedTokenizerTemplate(const nsAString& aSource,
174		char16_t aSeparatorChar,
175		uint32_t aFlags = 0)
176		: nsTCharSeparatedTokenizer<nsDependentSubstring,
177		IsWhitespace>(aSource, aSeparatorChar, aFlags)
178	0	{
179	0	} Unexecuted instantiation: Unified_cpp_dom_filesystem0.cpp:nsCharSeparatedTokenizerTemplate<&mozilla::dom::(anonymous namespace)::TokenizerIgnoreNothing>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: nsCharSeparatedTokenizerTemplate<&mozilla::IgnoreWhitespace>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: ActorsParent.cpp:nsCharSeparatedTokenizerTemplate<&mozilla::dom::indexedDB::(anonymous namespace)::TokenizerIgnoreNothing>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int) Unexecuted instantiation: Unified_cpp_dom_indexedDB1.cpp:nsCharSeparatedTokenizerTemplate<&mozilla::dom::indexedDB::(anonymous namespace)::IgnoreWhitespace>::nsCharSeparatedTokenizerTemplate(nsTSubstring<char16_t> const&, char16_t, unsigned int)
180		};
181
182		typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
183
184		template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
185		class nsCCharSeparatedTokenizerTemplate
186		: public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
187		{
188		public:
189		nsCCharSeparatedTokenizerTemplate(const nsACString& aSource,
190		char aSeparatorChar,
191		uint32_t aFlags = 0)
192		: nsTCharSeparatedTokenizer<nsDependentCSubstring,
193		IsWhitespace>(aSource, aSeparatorChar, aFlags)
194	0	{
195	0	}
196		};
197
198		typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
199
200		#endif /* __nsCharSeparatedTokenizer_h */