/src/keystone/llvm/lib/Support/StringRef.cpp

Source
//===-- StringRef.cpp - Lightweight String References ---------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/edit_distance.h"
#include <bitset>

using namespace llvm_ks;

// MSVC emits references to this into the translation units which reference it.
#ifndef _MSC_VER
const size_t StringRef::npos;
#endif

static char ascii_tolower(char x) {
  if (x >= 'A' && x <= 'Z')
    return x - 'A' + 'a';
  return x;
}

static char ascii_toupper(char x) {
  if (x >= 'a' && x <= 'z')
    return x - 'a' + 'A';
  return x;
}

static bool ascii_isdigit(char x) {
  return x >= '0' && x <= '9';
}

// strncasecmp() is not available on non-POSIX systems, so define an
// alternative function here.
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
  for (size_t I = 0; I < Length; ++I) {
    unsigned char LHC = ascii_tolower(LHS[I]);
    unsigned char RHC = ascii_tolower(RHS[I]);
    if (LHC != RHC)
      return LHC < RHC ? -1 : 1;
  }
  return 0;
}

/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
  if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
    return Res;
  if (Length == RHS.Length)
    return 0;
  return Length < RHS.Length ? -1 : 1;
}

/// Check if this string starts with the given \p Prefix, ignoring case.
bool StringRef::startswith_lower(StringRef Prefix) const {
  return Length >= Prefix.Length &&
      ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
}

/// Check if this string ends with the given \p Suffix, ignoring case.
bool StringRef::endswith_lower(StringRef Suffix) const {
  return Length >= Suffix.Length &&
      ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
}

/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
    // Check for sequences of digits.
    if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
      // The longer sequence of numbers is considered larger.
      // This doesn't really handle prefixed zeros well.
      size_t J;
      for (J = I + 1; J != E + 1; ++J) {
        bool ld = J < Length && ascii_isdigit(Data[J]);
        bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
        if (ld != rd)
          return rd ? -1 : 1;
        if (!rd)
          break;
      }
      // The two number sequences have the same length (J-I), just memcmp them.
      if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
        return Res < 0 ? -1 : 1;
      // Identical number sequences, continue search after the numbers.
      I = J - 1;
      continue;
    }
    if (Data[I] != RHS.Data[I])
      return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
  }
  if (Length == RHS.Length)
    return 0;
  return Length < RHS.Length ? -1 : 1;
}

// Compute the edit distance between the two given strings.
unsigned StringRef::edit_distance(llvm_ks::StringRef Other,
                                  bool AllowReplacements,
                                  unsigned MaxEditDistance) const {
  return llvm_ks::ComputeEditDistance(
      makeArrayRef(data(), size()),
      makeArrayRef(Other.data(), Other.size()),
      AllowReplacements, MaxEditDistance);
}

//===----------------------------------------------------------------------===//
// String Operations
//===----------------------------------------------------------------------===//

std::string StringRef::lower() const {
  std::string Result(size(), char());
  for (size_type i = 0, e = size(); i != e; ++i) {
    Result[i] = ascii_tolower(Data[i]);
  }
  return Result;
}

std::string StringRef::upper() const {
  std::string Result(size(), char());
  for (size_type i = 0, e = size(); i != e; ++i) {
    Result[i] = ascii_toupper(Data[i]);
  }
  return Result;
}

//===----------------------------------------------------------------------===//
// String Searching
//===----------------------------------------------------------------------===//


/// find - Search for the first string \arg Str in the string.
///
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
  if (From > Length)
    return npos;

  const char *Needle = Str.data();
  size_t N = Str.size();
  if (N == 0)
    return From;

  size_t Size = Length - From;
  if (Size < N)
    return npos;

  const char *Start = Data + From;
  const char *Stop = Start + (Size - N + 1);

  // For short haystacks or unsupported needles fall back to the naive algorithm
  if (Size < 16 || N > 255) {
    do {
      if (std::memcmp(Start, Needle, N) == 0)
        return Start - Data;
      ++Start;
    } while (Start < Stop);
    return npos;
  }

  // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
  uint8_t BadCharSkip[256];
  std::memset(BadCharSkip, N, 256);
  for (unsigned i = 0; i != N-1; ++i)
    BadCharSkip[(uint8_t)Str[i]] = N-1-i;

  do {
    if (std::memcmp(Start, Needle, N) == 0)
      return Start - Data;

    // Otherwise skip the appropriate number of bytes.
    Start += BadCharSkip[(uint8_t)Start[N-1]];
  } while (Start < Stop);

  return npos;
}

/// rfind - Search for the last string \arg Str in the string.
///
/// \return - The index of the last occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::rfind(StringRef Str) const {
  size_t N = Str.size();
  if (N > Length)
    return npos;
  for (size_t i = Length - N + 1, e = 0; i != e;) {
    --i;
    if (substr(i, N).equals(Str))
      return i;
  }
  return npos;
}

/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
                                              size_t From) const {
  std::bitset<1 << CHAR_BIT> CharBits;
  for (size_type i = 0; i != Chars.size(); ++i)
    CharBits.set((unsigned char)Chars[i]);

  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
    if (CharBits.test((unsigned char)Data[i]))
      return i;
  return npos;
}

/// find_first_not_of - Find the first character in the string that is not
/// \arg C or npos if not found.
StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
    if (Data[i] != C)
      return i;
  return npos;
}

/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
                                                  size_t From) const {
  std::bitset<1 << CHAR_BIT> CharBits;
  for (size_type i = 0; i != Chars.size(); ++i)
    CharBits.set((unsigned char)Chars[i]);

  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
    if (!CharBits.test((unsigned char)Data[i]))
      return i;
  return npos;
}

/// find_last_of - Find the last character in the string that is in \arg C,
/// or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_of(StringRef Chars,
                                             size_t From) const {
  std::bitset<1 << CHAR_BIT> CharBits;
  for (size_type i = 0; i != Chars.size(); ++i)
    CharBits.set((unsigned char)Chars[i]);

  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
    if (CharBits.test((unsigned char)Data[i]))
      return i;
  return npos;
}

/// find_last_not_of - Find the last character in the string that is not
/// \arg C, or npos if not found.
StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
    if (Data[i] != C)
      return i;
  return npos;
}

/// find_last_not_of - Find the last character in the string that is not in
/// \arg Chars, or npos if not found.
///
/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
                                                 size_t From) const {
  std::bitset<1 << CHAR_BIT> CharBits;
  for (size_type i = 0, e = Chars.size(); i != e; ++i)
    CharBits.set((unsigned char)Chars[i]);

  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
    if (!CharBits.test((unsigned char)Data[i]))
      return i;
  return npos;
}

void StringRef::split(SmallVectorImpl<StringRef> &A,
                      StringRef Separator, int MaxSplit,
                      bool KeepEmpty) const {
  StringRef S = *this;

  // Count down from MaxSplit. When MaxSplit is -1, this will just split
  // "forever". This doesn't support splitting more than 2^31 times
  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
  // but that seems unlikely to be useful.
  while (MaxSplit-- != 0) {
    size_t Idx = S.find(Separator);
    if (Idx == npos)
      break;

    // Push this split.
    if (KeepEmpty || Idx > 0)
      A.push_back(S.slice(0, Idx));

    // Jump forward.
    S = S.slice(Idx + Separator.size(), npos);
  }

  // Push the tail.
  if (KeepEmpty || !S.empty())
    A.push_back(S);
}

void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
                      int MaxSplit, bool KeepEmpty) const {
  StringRef S = *this;

  // Count down from MaxSplit. When MaxSplit is -1, this will just split
  // "forever". This doesn't support splitting more than 2^31 times
  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
  // but that seems unlikely to be useful.
  while (MaxSplit-- != 0) {
    size_t Idx = S.find(Separator);
    if (Idx == npos)
      break;

    // Push this split.
    if (KeepEmpty || Idx > 0)
      A.push_back(S.slice(0, Idx));

    // Jump forward.
    S = S.slice(Idx + 1, npos);
  }

  // Push the tail.
  if (KeepEmpty || !S.empty())
    A.push_back(S);
}

//===----------------------------------------------------------------------===//
// Helpful Algorithms
//===----------------------------------------------------------------------===//

/// count - Return the number of non-overlapped occurrences of \arg Str in
/// the string.
size_t StringRef::count(StringRef Str) const {
  size_t Count = 0;
  size_t N = Str.size();
  if (N > Length)
    return 0;
  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
    if (substr(i, N).equals(Str))
      ++Count;
  return Count;
}

static unsigned GetAutoSenseRadix(StringRef &Str) {
  if (Str.startswith("0x") || Str.startswith("0X")) {
    Str = Str.substr(2);
    return 16;
  }
  
  if (Str.startswith("0b")) {
    Str = Str.substr(2);
    return 2;
  }

  if (Str.startswith("0o")) {
    Str = Str.substr(2);
    return 8;
  }

  if (Str.startswith("0"))
    return 8;
  
  return 10;
}


/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
bool llvm_ks::getAsUnsignedInteger(StringRef Str, unsigned Radix,
                                unsigned long long &Result) {
  // Autosense radix if not specified.
  if (Radix == 0)
    Radix = GetAutoSenseRadix(Str);

  // Empty strings (after the radix autosense) are invalid.
  if (Str.empty()) return true;

  // Parse all the bytes of the string given this radix.  Watch for overflow.
  Result = 0;
  while (!Str.empty()) {
    unsigned CharVal;
    if (Str[0] >= '0' && Str[0] <= '9')
      CharVal = Str[0]-'0';
    else if (Str[0] >= 'a' && Str[0] <= 'z')
      CharVal = Str[0]-'a'+10;
    else if (Str[0] >= 'A' && Str[0] <= 'Z')
      CharVal = Str[0]-'A'+10;
    else
      return true;

    // If the parsed value is larger than the integer radix, the string is
    // invalid.
    if (CharVal >= Radix)
      return true;

    // Add in this character.
    unsigned long long PrevResult = Result;
    Result = Result*Radix+CharVal;

    // Check for overflow by shifting back and seeing if bits were lost.
    if (Result/Radix < PrevResult)
      return true;

    Str = Str.substr(1);
  }

  return false;
}

bool llvm_ks::getAsSignedInteger(StringRef Str, unsigned Radix,
                              long long &Result) {
  unsigned long long ULLVal;

  // Handle positive strings first.
  if (Str.empty() || Str.front() != '-') {
    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
        // Check for value so large it overflows a signed value.
        (long long)ULLVal < 0)
      return true;
    Result = ULLVal;
    return false;
  }

  // Get the positive part of the value.
  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
      // Reject values so large they'd overflow as negative signed, but allow
      // "-0".  This negates the unsigned so that the negative isn't undefined
      // on signed overflow.
      (long long)-ULLVal > 0)
    return true;

  Result = -ULLVal;
  return false;
}

bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
  StringRef Str = *this;

  // Autosense radix if not specified.
  if (Radix == 0)
    Radix = GetAutoSenseRadix(Str);

  assert(Radix > 1 && Radix <= 36);

  // Empty strings (after the radix autosense) are invalid.
  if (Str.empty()) return true;

  // Skip leading zeroes.  This can be a significant improvement if
  // it means we don't need > 64 bits.
  while (!Str.empty() && Str.front() == '0')
    Str = Str.substr(1);

  // If it was nothing but zeroes....
  if (Str.empty()) {
    Result = APInt(64, 0);
    return false;
  }

  // (Over-)estimate the required number of bits.
  unsigned Log2Radix = 0;
  while ((1U << Log2Radix) < Radix) Log2Radix++;
  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);

  unsigned BitWidth = Log2Radix * Str.size();
  if (BitWidth < Result.getBitWidth())
    BitWidth = Result.getBitWidth(); // don't shrink the result
  else if (BitWidth > Result.getBitWidth())
    Result = Result.zext(BitWidth);

  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
  if (!IsPowerOf2Radix) {
    // These must have the same bit-width as Result.
    RadixAP = APInt(BitWidth, Radix);
    CharAP = APInt(BitWidth, 0);
  }

  // Parse all the bytes of the string given this radix.
  Result = 0;
  while (!Str.empty()) {
    unsigned CharVal;
    if (Str[0] >= '0' && Str[0] <= '9')
      CharVal = Str[0]-'0';
    else if (Str[0] >= 'a' && Str[0] <= 'z')
      CharVal = Str[0]-'a'+10;
    else if (Str[0] >= 'A' && Str[0] <= 'Z')
      CharVal = Str[0]-'A'+10;
    else
      return true;

    // If the parsed value is larger than the integer radix, the string is
    // invalid.
    if (CharVal >= Radix)
      return true;

    // Add in this character.
    if (IsPowerOf2Radix) {
      Result <<= Log2Radix;
      Result |= CharVal;
    } else {
      Result *= RadixAP;
      CharAP = CharVal;
      Result += CharAP;
    }

    Str = Str.substr(1);
  }

  return false;
}


// Implementation of StringRef hashing.
hash_code llvm_ks::hash_value(StringRef S) {
  return hash_combine_range(S.begin(), S.end());
}

Coverage Report

Created: 2025-11-11 06:44

Line	Count	Source
1		//===-- StringRef.cpp - Lightweight String References ---------------------===//
2		//
3		// The LLVM Compiler Infrastructure
4		//
5		// This file is distributed under the University of Illinois Open Source
6		// License. See LICENSE.TXT for details.
7		//
8		//===----------------------------------------------------------------------===//
9
10		#include "llvm/ADT/StringRef.h"
11		#include "llvm/ADT/APInt.h"
12		#include "llvm/ADT/Hashing.h"
13		#include "llvm/ADT/edit_distance.h"
14		#include <bitset>
15
16		using namespace llvm_ks;
17
18		// MSVC emits references to this into the translation units which reference it.
19		#ifndef _MSC_VER
20		const size_t StringRef::npos;
21		#endif
22
23	107M	static char ascii_tolower(char x) {
24	107M	if (x >= 'A' && x <= 'Z')
25	23.2M	return x - 'A' + 'a';
26	84.6M	return x;
27	107M	}
28
29	259k	static char ascii_toupper(char x) {
30	259k	if (x >= 'a' && x <= 'z')
31	67.7k	return x - 'a' + 'A';
32	191k	return x;
33	259k	}
34
35	0	static bool ascii_isdigit(char x) {
36	0	return x >= '0' && x <= '9';
37	0	}
38
39		// strncasecmp() is not available on non-POSIX systems, so define an
40		// alternative function here.
41	1.06M	static int ascii_strncasecmp(const char LHS, const char RHS, size_t Length) {
42	1.19M	for (size_t I = 0; I < Length; ++I) {
43	1.13M	unsigned char LHC = ascii_tolower(LHS[I]);
44	1.13M	unsigned char RHC = ascii_tolower(RHS[I]);
45	1.13M	if (LHC != RHC)
46	1.00M	return LHC < RHC ? -1 : 1;
47	1.13M	}
48	65.6k	return 0;
49	1.06M	}
50
51		/// compare_lower - Compare strings, ignoring case.
52	1.06M	int StringRef::compare_lower(StringRef RHS) const {
53	1.06M	if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
54	999k	return Res;
55	65.4k	if (Length == RHS.Length)
56	55.0k	return 0;
57	10.4k	return Length < RHS.Length ? -1 : 1;
58	65.4k	}
59
60		/// Check if this string starts with the given \p Prefix, ignoring case.
61	4.74k	bool StringRef::startswith_lower(StringRef Prefix) const {
62	4.74k	return Length >= Prefix.Length &&
63	4.14k	ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
64	4.74k	}
65
66		/// Check if this string ends with the given \p Suffix, ignoring case.
67	0	bool StringRef::endswith_lower(StringRef Suffix) const {
68	0	return Length >= Suffix.Length &&
69	0	ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
70	0	}
71
72		/// compare_numeric - Compare strings, handle embedded numbers.
73	0	int StringRef::compare_numeric(StringRef RHS) const {
74	0	for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
75		// Check for sequences of digits.
76	0	if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
77		// The longer sequence of numbers is considered larger.
78		// This doesn't really handle prefixed zeros well.
79	0	size_t J;
80	0	for (J = I + 1; J != E + 1; ++J) {
81	0	bool ld = J < Length && ascii_isdigit(Data[J]);
82	0	bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
83	0	if (ld != rd)
84	0	return rd ? -1 : 1;
85	0	if (!rd)
86	0	break;
87	0	}
88		// The two number sequences have the same length (J-I), just memcmp them.
89	0	if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
90	0	return Res < 0 ? -1 : 1;
91		// Identical number sequences, continue search after the numbers.
92	0	I = J - 1;
93	0	continue;
94	0	}
95	0	if (Data[I] != RHS.Data[I])
96	0	return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
97	0	}
98	0	if (Length == RHS.Length)
99	0	return 0;
100	0	return Length < RHS.Length ? -1 : 1;
101	0	}
102
103		// Compute the edit distance between the two given strings.
104		unsigned StringRef::edit_distance(llvm_ks::StringRef Other,
105		bool AllowReplacements,
106	0	unsigned MaxEditDistance) const {
107	0	return llvm_ks::ComputeEditDistance(
108	0	makeArrayRef(data(), size()),
109	0	makeArrayRef(Other.data(), Other.size()),
110	0	AllowReplacements, MaxEditDistance);
111	0	}
112
113		//===----------------------------------------------------------------------===//
114		// String Operations
115		//===----------------------------------------------------------------------===//
116
117	14.7M	std::string StringRef::lower() const {
118	14.7M	std::string Result(size(), char());
119	120M	for (size_type i = 0, e = size(); i != e; ++i) {
120	105M	Result[i] = ascii_tolower(Data[i]);
121	105M	}
122	14.7M	return Result;
123	14.7M	}
124
125	56.4k	std::string StringRef::upper() const {
126	56.4k	std::string Result(size(), char());
127	315k	for (size_type i = 0, e = size(); i != e; ++i) {
128	259k	Result[i] = ascii_toupper(Data[i]);
129	259k	}
130	56.4k	return Result;
131	56.4k	}
132
133		//===----------------------------------------------------------------------===//
134		// String Searching
135		//===----------------------------------------------------------------------===//
136
137
138		/// find - Search for the first string \arg Str in the string.
139		///
140		/// \return - The index of the first occurrence of \arg Str, or npos if not
141		/// found.
142	1.29M	size_t StringRef::find(StringRef Str, size_t From) const {
143	1.29M	if (From > Length)
144	0	return npos;
145
146	1.29M	const char *Needle = Str.data();
147	1.29M	size_t N = Str.size();
148	1.29M	if (N == 0)
149	0	return From;
150
151	1.29M	size_t Size = Length - From;
152	1.29M	if (Size < N)
153	0	return npos;
154
155	1.29M	const char *Start = Data + From;
156	1.29M	const char *Stop = Start + (Size - N + 1);
157
158		// For short haystacks or unsupported needles fall back to the naive algorithm
159	1.29M	if (Size < 16 \|\| N > 255) {
160	1.42M	do {
161	1.42M	if (std::memcmp(Start, Needle, N) == 0)
162	191	return Start - Data;
163	1.42M	++Start;
164	1.42M	} while (Start < Stop);
165	1.28M	return npos;
166	1.28M	}
167
168		// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
169	973	uint8_t BadCharSkip[256];
170	973	std::memset(BadCharSkip, N, 256);
171	1.94k	for (unsigned i = 0; i != N-1; ++i)
172	973	BadCharSkip[(uint8_t)Str[i]] = N-1-i;
173
174	21.3k	do {
175	21.3k	if (std::memcmp(Start, Needle, N) == 0)
176	653	return Start - Data;
177
178		// Otherwise skip the appropriate number of bytes.
179	20.6k	Start += BadCharSkip[(uint8_t)Start[N-1]];
180	20.6k	} while (Start < Stop);
181
182	320	return npos;
183	973	}
184
185		/// rfind - Search for the last string \arg Str in the string.
186		///
187		/// \return - The index of the last occurrence of \arg Str, or npos if not
188		/// found.
189	0	size_t StringRef::rfind(StringRef Str) const {
190	0	size_t N = Str.size();
191	0	if (N > Length)
192	0	return npos;
193	0	for (size_t i = Length - N + 1, e = 0; i != e;) {
194	0	--i;
195	0	if (substr(i, N).equals(Str))
196	0	return i;
197	0	}
198	0	return npos;
199	0	}
200
201		/// find_first_of - Find the first character in the string that is in \arg
202		/// Chars, or npos if not found.
203		///
204		/// Note: O(size() + Chars.size())
205		StringRef::size_type StringRef::find_first_of(StringRef Chars,
206	858	size_t From) const {
207	858	std::bitset<1 << CHAR_BIT> CharBits;
208	3.43k	for (size_type i = 0; i != Chars.size(); ++i)
209	2.57k	CharBits.set((unsigned char)Chars[i]);
210
211	15.4k	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
212	14.5k	if (CharBits.test((unsigned char)Data[i]))
213	0	return i;
214	858	return npos;
215	858	}
216
217		/// find_first_not_of - Find the first character in the string that is not
218		/// \arg C or npos if not found.
219	0	StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
220	0	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
221	0	if (Data[i] != C)
222	0	return i;
223	0	return npos;
224	0	}
225
226		/// find_first_not_of - Find the first character in the string that is not
227		/// in the string \arg Chars, or npos if not found.
228		///
229		/// Note: O(size() + Chars.size())
230		StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
231	417k	size_t From) const {
232	417k	std::bitset<1 << CHAR_BIT> CharBits;
233	2.92M	for (size_type i = 0; i != Chars.size(); ++i)
234	2.50M	CharBits.set((unsigned char)Chars[i]);
235
236	485k	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
237	456k	if (!CharBits.test((unsigned char)Data[i]))
238	388k	return i;
239	29.1k	return npos;
240	417k	}
241
242		/// find_last_of - Find the last character in the string that is in \arg C,
243		/// or npos if not found.
244		///
245		/// Note: O(size() + Chars.size())
246		StringRef::size_type StringRef::find_last_of(StringRef Chars,
247	1.45M	size_t From) const {
248	1.45M	std::bitset<1 << CHAR_BIT> CharBits;
249	4.36M	for (size_type i = 0; i != Chars.size(); ++i)
250	2.91M	CharBits.set((unsigned char)Chars[i]);
251
252	54.8M	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
253	53.8M	if (CharBits.test((unsigned char)Data[i]))
254	463k	return i;
255	992k	return npos;
256	1.45M	}
257
258		/// find_last_not_of - Find the last character in the string that is not
259		/// \arg C, or npos if not found.
260	0	StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
261	0	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
262	0	if (Data[i] != C)
263	0	return i;
264	0	return npos;
265	0	}
266
267		/// find_last_not_of - Find the last character in the string that is not in
268		/// \arg Chars, or npos if not found.
269		///
270		/// Note: O(size() + Chars.size())
271		StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
272	417k	size_t From) const {
273	417k	std::bitset<1 << CHAR_BIT> CharBits;
274	2.92M	for (size_type i = 0, e = Chars.size(); i != e; ++i)
275	2.50M	CharBits.set((unsigned char)Chars[i]);
276
277	516k	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
278	487k	if (!CharBits.test((unsigned char)Data[i]))
279	388k	return i;
280	29.1k	return npos;
281	417k	}
282
283		void StringRef::split(SmallVectorImpl<StringRef> &A,
284		StringRef Separator, int MaxSplit,
285	0	bool KeepEmpty) const {
286	0	StringRef S = *this;
287
288		// Count down from MaxSplit. When MaxSplit is -1, this will just split
289		// "forever". This doesn't support splitting more than 2^31 times
290		// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
291		// but that seems unlikely to be useful.
292	0	while (MaxSplit-- != 0) {
293	0	size_t Idx = S.find(Separator);
294	0	if (Idx == npos)
295	0	break;
296
297		// Push this split.
298	0	if (KeepEmpty \|\| Idx > 0)
299	0	A.push_back(S.slice(0, Idx));
300
301		// Jump forward.
302	0	S = S.slice(Idx + Separator.size(), npos);
303	0	}
304
305		// Push the tail.
306	0	if (KeepEmpty \|\| !S.empty())
307	0	A.push_back(S);
308	0	}
309
310		void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
311	1.58M	int MaxSplit, bool KeepEmpty) const {
312	1.58M	StringRef S = *this;
313
314		// Count down from MaxSplit. When MaxSplit is -1, this will just split
315		// "forever". This doesn't support splitting more than 2^31 times
316		// intentionally; if we ever want that we can make MaxSplit a 64-bit integer
317		// but that seems unlikely to be useful.
318	2.28M	while (MaxSplit-- != 0) {
319	2.15M	size_t Idx = S.find(Separator);
320	2.15M	if (Idx == npos)
321	1.45M	break;
322
323		// Push this split.
324	705k	if (KeepEmpty \|\| Idx > 0)
325	704k	A.push_back(S.slice(0, Idx));
326
327		// Jump forward.
328	705k	S = S.slice(Idx + 1, npos);
329	705k	}
330
331		// Push the tail.
332	1.58M	if (KeepEmpty \|\| !S.empty())
333	1.52M	A.push_back(S);
334	1.58M	}
335
336		//===----------------------------------------------------------------------===//
337		// Helpful Algorithms
338		//===----------------------------------------------------------------------===//
339
340		/// count - Return the number of non-overlapped occurrences of \arg Str in
341		/// the string.
342	0	size_t StringRef::count(StringRef Str) const {
343	0	size_t Count = 0;
344	0	size_t N = Str.size();
345	0	if (N > Length)
346	0	return 0;
347	0	for (size_t i = 0, e = Length - N + 1; i != e; ++i)
348	0	if (substr(i, N).equals(Str))
349	0	++Count;
350	0	return Count;
351	0	}
352
353	53.2k	static unsigned GetAutoSenseRadix(StringRef &Str) {
354	53.2k	if (Str.startswith("0x") \|\| Str.startswith("0X")) {
355	53.2k	Str = Str.substr(2);
356	53.2k	return 16;
357	53.2k	}
358
359	0	if (Str.startswith("0b")) {
360	0	Str = Str.substr(2);
361	0	return 2;
362	0	}
363
364	0	if (Str.startswith("0o")) {
365	0	Str = Str.substr(2);
366	0	return 8;
367	0	}
368
369	0	if (Str.startswith("0"))
370	0	return 8;
371
372	0	return 10;
373	0	}
374
375
376		/// GetAsUnsignedInteger - Workhorse method that converts a integer character
377		/// sequence of radix up to 36 to an unsigned long long value.
378		bool llvm_ks::getAsUnsignedInteger(StringRef Str, unsigned Radix,
379	6.37k	unsigned long long &Result) {
380		// Autosense radix if not specified.
381	6.37k	if (Radix == 0)
382	0	Radix = GetAutoSenseRadix(Str);
383
384		// Empty strings (after the radix autosense) are invalid.
385	6.37k	if (Str.empty()) return true;
386
387		// Parse all the bytes of the string given this radix. Watch for overflow.
388	6.16k	Result = 0;
389	24.3k	while (!Str.empty()) {
390	22.4k	unsigned CharVal;
391	22.4k	if (Str[0] >= '0' && Str[0] <= '9')
392	18.2k	CharVal = Str[0]-'0';
393	4.19k	else if (Str[0] >= 'a' && Str[0] <= 'z')
394	2.31k	CharVal = Str[0]-'a'+10;
395	1.88k	else if (Str[0] >= 'A' && Str[0] <= 'Z')
396	1.52k	CharVal = Str[0]-'A'+10;
397	356	else
398	356	return true;
399
400		// If the parsed value is larger than the integer radix, the string is
401		// invalid.
402	22.1k	if (CharVal >= Radix)
403	3.84k	return true;
404
405		// Add in this character.
406	18.2k	unsigned long long PrevResult = Result;
407	18.2k	Result = Result*Radix+CharVal;
408
409		// Check for overflow by shifting back and seeing if bits were lost.
410	18.2k	if (Result/Radix < PrevResult)
411	51	return true;
412
413	18.2k	Str = Str.substr(1);
414	18.2k	}
415
416	1.91k	return false;
417	6.16k	}
418
419		bool llvm_ks::getAsSignedInteger(StringRef Str, unsigned Radix,
420	3.49k	long long &Result) {
421	3.49k	unsigned long long ULLVal;
422
423		// Handle positive strings first.
424	3.49k	if (Str.empty() \|\| Str.front() != '-') {
425	3.49k	if (getAsUnsignedInteger(Str, Radix, ULLVal) \|\|
426		// Check for value so large it overflows a signed value.
427	1.25k	(long long)ULLVal < 0)
428	2.25k	return true;
429	1.24k	Result = ULLVal;
430	1.24k	return false;
431	3.49k	}
432
433		// Get the positive part of the value.
434	0	if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) \|\|
435		// Reject values so large they'd overflow as negative signed, but allow
436		// "-0". This negates the unsigned so that the negative isn't undefined
437		// on signed overflow.
438	0	(long long)-ULLVal > 0)
439	0	return true;
440
441	0	Result = -ULLVal;
442	0	return false;
443	0	}
444
445	5.53M	bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
446	5.53M	StringRef Str = *this;
447
448		// Autosense radix if not specified.
449	5.53M	if (Radix == 0)
450	53.2k	Radix = GetAutoSenseRadix(Str);
451
452	5.53M	assert(Radix > 1 && Radix <= 36);
453
454		// Empty strings (after the radix autosense) are invalid.
455	5.53M	if (Str.empty()) return true;
456
457		// Skip leading zeroes. This can be a significant improvement if
458		// it means we don't need > 64 bits.
459	7.65M	while (!Str.empty() && Str.front() == '0')
460	2.11M	Str = Str.substr(1);
461
462		// If it was nothing but zeroes....
463	5.53M	if (Str.empty()) {
464	965k	Result = APInt(64, 0);
465	965k	return false;
466	965k	}
467
468		// (Over-)estimate the required number of bits.
469	4.57M	unsigned Log2Radix = 0;
470	22.4M	while ((1U << Log2Radix) < Radix) Log2Radix++;
471	4.57M	bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
472
473	4.57M	unsigned BitWidth = Log2Radix * Str.size();
474	4.57M	if (BitWidth < Result.getBitWidth())
475	4.51M	BitWidth = Result.getBitWidth(); // don't shrink the result
476	61.5k	else if (BitWidth > Result.getBitWidth())
477	49.6k	Result = Result.zext(BitWidth);
478
479	4.57M	APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
480	4.57M	if (!IsPowerOf2Radix) {
481		// These must have the same bit-width as Result.
482	210	RadixAP = APInt(BitWidth, Radix);
483	210	CharAP = APInt(BitWidth, 0);
484	210	}
485
486		// Parse all the bytes of the string given this radix.
487	4.57M	Result = 0;
488	18.6M	while (!Str.empty()) {
489	14.2M	unsigned CharVal;
490	14.2M	if (Str[0] >= '0' && Str[0] <= '9')
491	13.6M	CharVal = Str[0]-'0';
492	567k	else if (Str[0] >= 'a' && Str[0] <= 'z')
493	522k	CharVal = Str[0]-'a'+10;
494	45.5k	else if (Str[0] >= 'A' && Str[0] <= 'Z')
495	45.5k	CharVal = Str[0]-'A'+10;
496	0	else
497	0	return true;
498
499		// If the parsed value is larger than the integer radix, the string is
500		// invalid.
501	14.2M	if (CharVal >= Radix)
502	112k	return true;
503
504		// Add in this character.
505	14.0M	if (IsPowerOf2Radix) {
506	14.0M	Result <<= Log2Radix;
507	14.0M	Result \|= CharVal;
508	14.0M	} else {
509	19.1k	Result *= RadixAP;
510	19.1k	CharAP = CharVal;
511	19.1k	Result += CharAP;
512	19.1k	}
513
514	14.0M	Str = Str.substr(1);
515	14.0M	}
516
517	4.46M	return false;
518	4.57M	}
519
520
521		// Implementation of StringRef hashing.
522	104k	hash_code llvm_ks::hash_value(StringRef S) {
523	104k	return hash_combine_range(S.begin(), S.end());
524	104k	}