Coverage Report

Created: 2026-04-27 06:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/behaviortreecpp/src/script_tokenizer.cpp
Line
Count
Source
1
/*  Copyright (C) 2022-2025 Davide Faconti -  All Rights Reserved
2
*
3
*   Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
4
*   to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
5
*   and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
*   The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7
*
8
*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9
*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
10
*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11
*/
12
13
#include "behaviortree_cpp/scripting/any_types.hpp"
14
15
#include <cctype>
16
17
namespace BT::Scripting
18
{
19
20
namespace
21
{
22
23
bool isIdentStart(char c)
24
32.3M
{
25
32.3M
  return std::isalpha(static_cast<unsigned char>(c)) != 0 || c == '_' || c == '@';
26
32.3M
}
27
28
bool isIdentChar(char c)
29
75.7M
{
30
75.7M
  return std::isalnum(static_cast<unsigned char>(c)) != 0 || c == '_';
31
75.7M
}
32
33
bool isDigit(char c)
34
64.0M
{
35
64.0M
  return std::isdigit(static_cast<unsigned char>(c)) != 0;
36
64.0M
}
37
38
bool isHexDigit(char c)
39
770k
{
40
770k
  return std::isxdigit(static_cast<unsigned char>(c)) != 0;
41
770k
}
42
43
// Consume trailing garbage after a malformed number token.
44
void consumeTrailingGarbage(const std::string& source, size_t len, size_t& i)
45
411k
{
46
902k
  while(i < len && (isIdentChar(source[i]) || source[i] == '.'))
47
491k
  {
48
491k
    ++i;
49
491k
  }
50
411k
}
51
52
struct NumberResult
53
{
54
  bool is_real = false;
55
  bool has_error = false;
56
};
57
58
NumberResult scanHexNumber(const std::string& source, size_t len, size_t& i)
59
435k
{
60
435k
  NumberResult result;
61
435k
  i += 2;  // skip "0x"/"0X"
62
435k
  if(i >= len || !isHexDigit(source[i]))
63
431k
  {
64
431k
    result.has_error = true;
65
431k
  }
66
3.19k
  else
67
3.19k
  {
68
335k
    while(i < len && isHexDigit(source[i]))
69
332k
    {
70
332k
      ++i;
71
332k
    }
72
3.19k
  }
73
  // Hex numbers don't support dot or exponent
74
435k
  if(i < len && (source[i] == '.' || isIdentChar(source[i])))
75
409k
  {
76
409k
    result.has_error = true;
77
409k
    consumeTrailingGarbage(source, len, i);
78
409k
  }
79
435k
  return result;
80
435k
}
81
82
NumberResult scanDecimalNumber(const std::string& source, size_t len, size_t& i)
83
2.54M
{
84
2.54M
  NumberResult result;
85
86
  // Integer part
87
18.0M
  while(i < len && isDigit(source[i]))
88
15.4M
  {
89
15.4M
    ++i;
90
15.4M
  }
91
  // Fractional part
92
2.54M
  if(i < len && source[i] == '.')
93
14.4k
  {
94
    // Distinguish from ".." (concat operator)
95
14.4k
    if(i + 1 < len && source[i + 1] == '.')
96
7.15k
    {
97
      // Stop here: "65.." is Integer("65") + DotDot
98
7.15k
    }
99
7.33k
    else if(i + 1 < len && isDigit(source[i + 1]))
100
6.07k
    {
101
6.07k
      result.is_real = true;
102
6.07k
      ++i;  // consume '.'
103
13.0M
      while(i < len && isDigit(source[i]))
104
13.0M
      {
105
13.0M
        ++i;
106
13.0M
      }
107
6.07k
    }
108
1.26k
    else
109
1.26k
    {
110
      // "65." or "65.x" -- incomplete real
111
1.26k
      result.has_error = true;
112
1.26k
      ++i;  // consume the dot
113
1.26k
      consumeTrailingGarbage(source, len, i);
114
1.26k
    }
115
14.4k
  }
116
  // Exponent (only for decimal numbers)
117
2.54M
  if(!result.has_error && i < len && (source[i] == 'e' || source[i] == 'E'))
118
21.0k
  {
119
21.0k
    result.is_real = true;
120
21.0k
    ++i;  // consume 'e'/'E'
121
21.0k
    if(i < len && (source[i] == '+' || source[i] == '-'))
122
9.00k
    {
123
9.00k
      ++i;  // consume sign
124
9.00k
    }
125
21.0k
    if(i >= len || !isDigit(source[i]))
126
4.70k
    {
127
4.70k
      result.has_error = true;
128
4.70k
    }
129
16.3k
    else
130
16.3k
    {
131
257k
      while(i < len && isDigit(source[i]))
132
241k
      {
133
241k
        ++i;
134
241k
      }
135
16.3k
    }
136
21.0k
  }
137
  // Trailing alpha (e.g. "3foo", "65.43foo")
138
2.54M
  if(!result.has_error && i < len && isIdentStart(source[i]))
139
112k
  {
140
112k
    result.has_error = true;
141
4.03M
    while(i < len && isIdentChar(source[i]))
142
3.92M
    {
143
3.92M
      ++i;
144
3.92M
    }
145
112k
  }
146
2.54M
  return result;
147
2.54M
}
148
149
TokenType matchTwoCharOp(char c, char next)
150
21.7M
{
151
21.7M
  if(c == '.' && next == '.')
152
17.6k
    return TokenType::DotDot;
153
21.7M
  if(c == '&' && next == '&')
154
2.63k
    return TokenType::AmpAmp;
155
21.7M
  if(c == '|' && next == '|')
156
4.06k
    return TokenType::PipePipe;
157
21.6M
  if(c == '=' && next == '=')
158
31.6k
    return TokenType::EqualEqual;
159
21.6M
  if(c == '!' && next == '=')
160
159k
    return TokenType::BangEqual;
161
21.5M
  if(c == '<' && next == '=')
162
13.0k
    return TokenType::LessEqual;
163
21.4M
  if(c == '>' && next == '=')
164
12.9k
    return TokenType::GreaterEqual;
165
21.4M
  if(c == ':' && next == '=')
166
58.8k
    return TokenType::ColonEqual;
167
21.4M
  if(c == '+' && next == '=')
168
16.3k
    return TokenType::PlusEqual;
169
21.4M
  if(c == '-' && next == '=')
170
2.78k
    return TokenType::MinusEqual;
171
21.4M
  if(c == '*' && next == '=')
172
1.41k
    return TokenType::StarEqual;
173
21.4M
  if(c == '/' && next == '=')
174
8.06k
    return TokenType::SlashEqual;
175
21.3M
  return TokenType::Error;
176
21.4M
}
177
178
TokenType matchSingleCharOp(char c)
179
21.3M
{
180
21.3M
  switch(c)
181
21.3M
  {
182
22.6k
    case '+':
183
22.6k
      return TokenType::Plus;
184
49.3k
    case '-':
185
49.3k
      return TokenType::Minus;
186
2.81k
    case '*':
187
2.81k
      return TokenType::Star;
188
11.9k
    case '/':
189
11.9k
      return TokenType::Slash;
190
3.69k
    case '&':
191
3.69k
      return TokenType::Ampersand;
192
6.28k
    case '|':
193
6.28k
      return TokenType::Pipe;
194
1.49k
    case '^':
195
1.49k
      return TokenType::Caret;
196
110k
    case '~':
197
110k
      return TokenType::Tilde;
198
67.9k
    case '!':
199
67.9k
      return TokenType::Bang;
200
211k
    case '<':
201
211k
      return TokenType::Less;
202
1.48M
    case '>':
203
1.48M
      return TokenType::Greater;
204
27.1k
    case '=':
205
27.1k
      return TokenType::Equal;
206
9.98k
    case '?':
207
9.98k
      return TokenType::Question;
208
27.0k
    case ':':
209
27.0k
      return TokenType::Colon;
210
170k
    case '(':
211
170k
      return TokenType::LeftParen;
212
5.46k
    case ')':
213
5.46k
      return TokenType::RightParen;
214
482k
    case ';':
215
482k
      return TokenType::Semicolon;
216
18.6M
    default:
217
18.6M
      return TokenType::Error;
218
21.3M
  }
219
21.3M
}
220
221
}  // namespace
222
223
std::vector<Token> tokenize(const std::string& source)
224
13.3k
{
225
13.3k
  std::vector<Token> tokens;
226
13.3k
  const size_t len = source.size();
227
13.3k
  size_t i = 0;
228
229
33.0M
  while(i < len)
230
32.9M
  {
231
32.9M
    const char c = source[i];
232
233
    // Skip whitespace (space, tab, newline, carriage return)
234
32.9M
    if(c == ' ' || c == '\t' || c == '\n' || c == '\r')
235
111k
    {
236
111k
      ++i;
237
111k
      continue;
238
111k
    }
239
240
32.8M
    const size_t start = i;
241
242
    // Single-quoted string literal
243
32.8M
    if(c == '\'')
244
129k
    {
245
129k
      ++i;
246
59.8M
      while(i < len && source[i] != '\'')
247
59.7M
      {
248
59.7M
        ++i;
249
59.7M
      }
250
129k
      if(i < len)
251
128k
      {
252
        // extract content without quotes
253
128k
        std::string_view text(&source[start + 1], i - start - 1);
254
128k
        tokens.push_back({ TokenType::String, text, start });
255
128k
        ++i;  // skip closing quote
256
128k
      }
257
361
      else
258
361
      {
259
361
        std::string_view text(&source[start], i - start);
260
361
        tokens.push_back({ TokenType::Error, text, start });
261
361
      }
262
129k
      continue;
263
129k
    }
264
265
    // Number literal (integer or real)
266
32.7M
    if(isDigit(c))
267
2.97M
    {
268
2.97M
      NumberResult nr;
269
2.97M
      const bool is_hex =
270
2.97M
          c == '0' && i + 1 < len && (source[i + 1] == 'x' || source[i + 1] == 'X');
271
2.97M
      if(is_hex)
272
435k
      {
273
435k
        nr = scanHexNumber(source, len, i);
274
435k
      }
275
2.54M
      else
276
2.54M
      {
277
2.54M
        nr = scanDecimalNumber(source, len, i);
278
2.54M
      }
279
280
2.97M
      std::string_view text(&source[start], i - start);
281
2.97M
      if(nr.has_error)
282
550k
      {
283
550k
        tokens.push_back({ TokenType::Error, text, start });
284
550k
      }
285
2.42M
      else if(nr.is_real)
286
17.5k
      {
287
17.5k
        tokens.push_back({ TokenType::Real, text, start });
288
17.5k
      }
289
2.40M
      else
290
2.40M
      {
291
2.40M
        tokens.push_back({ TokenType::Integer, text, start });
292
2.40M
      }
293
2.97M
      continue;
294
2.97M
    }
295
296
    // Identifier or keyword (true/false)
297
29.7M
    if(isIdentStart(c))
298
8.04M
    {
299
8.04M
      ++i;  // consume start character (may not be isIdentChar, e.g. '@')
300
70.4M
      while(i < len && isIdentChar(source[i]))
301
62.3M
      {
302
62.3M
        ++i;
303
62.3M
      }
304
8.04M
      if(std::string_view text(&source[start], i - start); text == "true" || text == "fal"
305
8.04M
                                                                                     "se")
306
1.55k
      {
307
1.55k
        tokens.push_back({ TokenType::Boolean, text, start });
308
1.55k
      }
309
8.04M
      else
310
8.04M
      {
311
8.04M
        tokens.push_back({ TokenType::Identifier, text, start });
312
8.04M
      }
313
8.04M
      continue;
314
8.04M
    }
315
316
    // Two-character operators (check before single-char)
317
21.7M
    if(i + 1 < len)
318
21.7M
    {
319
21.7M
      TokenType two_char_type = matchTwoCharOp(c, source[i + 1]);
320
21.7M
      if(two_char_type != TokenType::Error)
321
328k
      {
322
328k
        std::string_view text(&source[start], 2);
323
328k
        tokens.push_back({ two_char_type, text, start });
324
328k
        i += 2;
325
328k
        continue;
326
328k
      }
327
21.7M
    }
328
329
    // Single-character operators and delimiters
330
21.3M
    std::string_view text(&source[start], 1);
331
21.3M
    tokens.push_back({ matchSingleCharOp(c), text, start });
332
21.3M
    ++i;
333
21.3M
  }
334
335
  // Sentinel
336
13.3k
  tokens.push_back({ TokenType::EndOfInput, {}, i });
337
13.3k
  return tokens;
338
13.3k
}
339
340
}  // namespace BT::Scripting