Coverage Report

Created: 2026-03-07 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/trafficserver/include/tscore/Tokenizer.h
Line
Count
Source
1
/** @file
2
3
  A brief file description
4
5
  @section license License
6
7
  Licensed to the Apache Software Foundation (ASF) under one
8
  or more contributor license agreements.  See the NOTICE file
9
  distributed with this work for additional information
10
  regarding copyright ownership.  The ASF licenses this file
11
  to you under the Apache License, Version 2.0 (the
12
  "License"); you may not use this file except in compliance
13
  with the License.  You may obtain a copy of the License at
14
15
      http://www.apache.org/licenses/LICENSE-2.0
16
17
  Unless required by applicable law or agreed to in writing, software
18
  distributed under the License is distributed on an "AS IS" BASIS,
19
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
  See the License for the specific language governing permissions and
21
  limitations under the License.
22
 */
23
24
/***************************************/
25
26
#pragma once
27
28
/****************************************************************************
29
 *
30
 *  Tokenizer.h - A string tokenzier
31
 *
32
 *
33
 *
34
 ****************************************************************************/
35
36
/**********************************************************
37
 *  class Tokenizer
38
 *
39
 *  Tokenizes a string, and then allows array like access
40
 *
41
 *  The delimiters are determined by the string passed to the
42
 *   the constructor.
43
 *
44
 *  There are three memory options.
45
 *     SHARE_TOKS - this modifies the original string passed in
46
 *          through Initialize() and shares its space.   NULLs
47
 *          are inserted into string after each token.  Choosing
48
 *          this option means the user is responsible for not
49
 *          deallocating the string storage before deallocating
50
 *          the tokenizer object
51
 *     COPY_TOKS - this option copies the original string and
52
 *          leaves the original unchanged.  The deallocation of the
53
 *          original string and the deallocation of the Tokenizer
54
 *          object are now independent.
55
 *     Note: If neither SHARE_TOKS or COPY_TOKS is selected, COPY_TOKS
56
 *          is the default
57
 *     ALLOW_EMPTY_TOKENS: If multiple delimiters appear next to each
58
 *          other, each delimiter creates a token some of which
59
 *          will be zero length.  The default is to skip repeated
60
 *          delimiters
61
 *
62
 *  Tokenizer(const char* StrOfDelimit) - a string that contains
63
 *     the delimiters for tokenizing.  This string is copied.
64
 *
65
 *  Initialize(char* str, TokenizerOpts opt) - Submits a string
66
 *     to be tokenized according to the memory options listed above
67
 *
68
 *  ReUse() - Allows the object to be reused for a new string
69
 *     After ReUse() is called, Initialize() can be called safely
70
 *     again
71
 *
72
 *  operator[index] - returns a pointer to the number token given
73
 *     by index.  If index > numTokens-1, NULL is returned.
74
 *     Because of way tokens are stored, this is O(n) operation
75
 *     It is very fast though for the first 16 tokens and
76
 *     is intended to be used on a small number of tokens
77
 *
78
 *  iterFirst(tok_iter_state* state) - Returns the first
79
 *     token and initializes state argument for subsequent
80
 *     calls to iterNext.  If no tokens exist, NULL is
81
 *     returned
82
 *
83
 *  iterNext(tok_iter_state* state) - Returns the next token after
84
 *     what arg state returned next last time.   Returns NULL if no
85
 *     more tokens exists.
86
 *
87
 *  Note: To iterate through a list using operator[] takes O(n^2) time
88
 *      Using iterFirst, iterNext the running time is O(n), so use
89
 *      the iteration where possible
90
 *
91
 *  count() - returns the number of tokens
92
 *
93
 *  setMaxTokens() - sets the maximum number of tokens.  Once maxTokens
94
 *                     is reached, delimiters are ignored and the
95
 *                     last token is rest of the string.  Negative numbers
96
 *                     mean no limit on the number of tokens
97
 *
98
 *  getMaxTokens() - returns maxTokens.  UINT_MAX means no limit
99
 *
100
 *  Print() - Debugging method to print out the tokens
101
 *
102
 *******************************************************************/
103
104
#include "tscore/ink_apidefs.h"
105
106
0
#define COPY_TOKS        (1u << 0)
107
0
#define SHARE_TOKS       (1u << 1)
108
0
#define ALLOW_EMPTY_TOKS (1u << 2)
109
0
#define ALLOW_SPACES     (1u << 3)
110
111
0
#define TOK_NODE_ELEMENTS 16
112
113
struct tok_node {
114
  char     *el[TOK_NODE_ELEMENTS];
115
  tok_node *next;
116
};
117
118
struct tok_iter_state {
119
  tok_node *node;
120
  int       index;
121
};
122
123
class Tokenizer
124
{
125
public:
126
  Tokenizer(const char *StrOfDelimiters);
127
  ~Tokenizer();
128
129
  unsigned    Initialize(char *str, unsigned options);
130
  unsigned    Initialize(const char *str); // Automatically sets option to copy
131
  const char *operator[](unsigned index) const;
132
133
  void
134
  setMaxTokens(unsigned max)
135
0
  {
136
0
    maxTokens = max;
137
0
  };
138
139
  unsigned
140
  getMaxTokens() const
141
0
  {
142
0
    return maxTokens;
143
0
  };
144
145
  unsigned count() const;
146
  void     Print() const;
147
148
  const char *iterFirst(tok_iter_state *state);
149
  const char *iterNext(tok_iter_state *state);
150
151
  // noncopyable
152
  Tokenizer &operator=(const Tokenizer &) = delete;
153
  Tokenizer(const Tokenizer &)            = delete;
154
155
private:
156
  int      isDelimiter(char c);
157
  void     addToken(char *startAddr, int length);
158
  void     ReUse();
159
  char    *strOfDelimit;
160
  tok_node start_node;
161
  unsigned numValidTokens;
162
  unsigned maxTokens;
163
  int      options;
164
  bool     quoteFound;
165
166
  // State about where to add the next token
167
  tok_node *add_node;
168
  int       add_index;
169
};