Coverage Report

Created: 2025-11-16 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/poco/dependencies/expat/src/xmltok_impl.c
Line
Count
Source
1
/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)!
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10
   Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
11
   Copyright (c) 2002      Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12
   Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
13
   Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
14
   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
15
   Copyright (c) 2018      Benjamin Peterson <benjamin@python.org>
16
   Copyright (c) 2018      Anton Maklakov <antmak.pub@gmail.com>
17
   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
18
   Copyright (c) 2020      Boris Kolpackov <boris@codesynthesis.com>
19
   Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
20
   Licensed under the MIT license:
21
22
   Permission is  hereby granted,  free of charge,  to any  person obtaining
23
   a  copy  of  this  software   and  associated  documentation  files  (the
24
   "Software"),  to  deal in  the  Software  without restriction,  including
25
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
26
   distribute, sublicense, and/or sell copies of the Software, and to permit
27
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
28
   following conditions:
29
30
   The above copyright  notice and this permission notice  shall be included
31
   in all copies or substantial portions of the Software.
32
33
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
34
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
35
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
36
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
37
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
38
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
39
   USE OR OTHER DEALINGS IN THE SOFTWARE.
40
*/
41
42
#ifdef XML_TOK_IMPL_C
43
44
#  ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined
45
16.6M
#    define IS_INVALID_CHAR(enc, ptr, n) (0)
46
#  endif
47
48
#  define INVALID_LEAD_CASE(n, ptr, nextTokPtr)                                \
49
108M
  case BT_LEAD##n:                                                             \
50
108M
    if (end - ptr < n)                                                         \
51
108M
      return XML_TOK_PARTIAL_CHAR;                                             \
52
108M
    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
53
686
      *(nextTokPtr) = (ptr);                                                   \
54
686
      return XML_TOK_INVALID;                                                  \
55
686
    }                                                                          \
56
108M
    ptr += n;                                                                  \
57
108M
    break;
58
59
#  define INVALID_CASES(ptr, nextTokPtr)                                       \
60
71.5M
    INVALID_LEAD_CASE(2, ptr, nextTokPtr)                                      \
61
71.5M
    INVALID_LEAD_CASE(3, ptr, nextTokPtr)                                      \
62
21.7M
    INVALID_LEAD_CASE(4, ptr, nextTokPtr)                                      \
63
15.2M
  case BT_NONXML:                                                              \
64
3.27k
  case BT_MALFORM:                                                             \
65
4.28k
  case BT_TRAIL:                                                               \
66
4.28k
    *(nextTokPtr) = (ptr);                                                     \
67
4.28k
    return XML_TOK_INVALID;
68
69
#  define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr)                        \
70
12.5M
  case BT_LEAD##n:                                                             \
71
12.5M
    if (end - ptr < n)                                                         \
72
12.5M
      return XML_TOK_PARTIAL_CHAR;                                             \
73
12.5M
    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
74
1.81k
      *nextTokPtr = ptr;                                                       \
75
1.81k
      return XML_TOK_INVALID;                                                  \
76
1.81k
    }                                                                          \
77
12.5M
    ptr += n;                                                                  \
78
12.5M
    break;
79
80
#  define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)                          \
81
380M
  case BT_NONASCII:                                                            \
82
380M
    if (! IS_NAME_CHAR_MINBPC(enc, ptr)) {                                     \
83
1.84k
      *nextTokPtr = ptr;                                                       \
84
1.84k
      return XML_TOK_INVALID;                                                  \
85
1.84k
    }                                                                          \
86
380M
    /* fall through */                                                         \
87
380M
  case BT_NMSTRT:                                                              \
88
350M
  case BT_HEX:                                                                 \
89
377M
  case BT_DIGIT:                                                               \
90
379M
  case BT_NAME:                                                                \
91
380M
  case BT_MINUS:                                                               \
92
380M
    ptr += MINBPC(enc);                                                        \
93
380M
    break;                                                                     \
94
380M
    CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr)                              \
95
11.2M
    CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr)                              \
96
1.29M
    CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
97
98
#  define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr)                      \
99
471k
  case BT_LEAD##n:                                                             \
100
471k
    if ((end) - (ptr) < (n))                                                   \
101
468k
      return XML_TOK_PARTIAL_CHAR;                                             \
102
469k
    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
103
1.46k
      *nextTokPtr = ptr;                                                       \
104
1.46k
      return XML_TOK_INVALID;                                                  \
105
1.46k
    }                                                                          \
106
469k
    ptr += n;                                                                  \
107
468k
    break;
108
109
#  define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)                        \
110
35.7M
  case BT_NONASCII:                                                            \
111
35.7M
    if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {                                   \
112
797
      *nextTokPtr = ptr;                                                       \
113
797
      return XML_TOK_INVALID;                                                  \
114
797
    }                                                                          \
115
35.7M
    /* fall through */                                                         \
116
35.7M
  case BT_NMSTRT:                                                              \
117
35.7M
  case BT_HEX:                                                                 \
118
35.7M
    ptr += MINBPC(enc);                                                        \
119
35.7M
    break;                                                                     \
120
35.7M
    CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr)                            \
121
237k
    CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr)                            \
122
230k
    CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
123
124
#  ifndef PREFIX
125
#    define PREFIX(ident) ident
126
#  endif
127
128
#  define HAS_CHARS(enc, ptr, end, count)                                      \
129
9.06G
    ((end) - (ptr) >= ((count) * MINBPC(enc)))
130
131
152M
#  define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
132
133
#  define REQUIRE_CHARS(enc, ptr, end, count)                                  \
134
321M
    {                                                                          \
135
321M
      if (! HAS_CHARS(enc, ptr, end, count)) {                                 \
136
17.6k
        return XML_TOK_PARTIAL;                                                \
137
17.6k
      }                                                                        \
138
321M
    }
139
140
320M
#  define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
141
142
/* ptr points to character following "<!-" */
143
144
static int PTRCALL
145
PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
146
1.17M
                    const char **nextTokPtr) {
147
1.17M
  if (HAS_CHAR(enc, ptr, end)) {
148
1.17M
    if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149
144
      *nextTokPtr = ptr;
150
144
      return XML_TOK_INVALID;
151
144
    }
152
1.17M
    ptr += MINBPC(enc);
153
101M
    while (HAS_CHAR(enc, ptr, end)) {
154
101M
      switch (BYTE_TYPE(enc, ptr)) {
155
139M
        INVALID_CASES(ptr, nextTokPtr)
156
1.28M
      case BT_MINUS:
157
1.28M
        ptr += MINBPC(enc);
158
1.28M
        REQUIRE_CHAR(enc, ptr, end);
159
1.28M
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160
1.16M
          ptr += MINBPC(enc);
161
1.16M
          REQUIRE_CHAR(enc, ptr, end);
162
1.16M
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163
174
            *nextTokPtr = ptr;
164
174
            return XML_TOK_INVALID;
165
174
          }
166
1.16M
          *nextTokPtr = ptr + MINBPC(enc);
167
1.16M
          return XML_TOK_COMMENT;
168
1.16M
        }
169
118k
        break;
170
30.5M
      default:
171
30.5M
        ptr += MINBPC(enc);
172
30.5M
        break;
173
101M
      }
174
101M
    }
175
1.17M
  }
176
5.19k
  return XML_TOK_PARTIAL;
177
1.17M
}
xmltok.c:normal_scanComment
Line
Count
Source
146
1.13M
                    const char **nextTokPtr) {
147
1.13M
  if (HAS_CHAR(enc, ptr, end)) {
148
1.13M
    if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149
59
      *nextTokPtr = ptr;
150
59
      return XML_TOK_INVALID;
151
59
    }
152
1.13M
    ptr += MINBPC(enc);
153
75.2M
    while (HAS_CHAR(enc, ptr, end)) {
154
75.2M
      switch (BYTE_TYPE(enc, ptr)) {
155
133M
        INVALID_CASES(ptr, nextTokPtr)
156
1.16M
      case BT_MINUS:
157
1.16M
        ptr += MINBPC(enc);
158
1.16M
        REQUIRE_CHAR(enc, ptr, end);
159
1.16M
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160
1.13M
          ptr += MINBPC(enc);
161
1.13M
          REQUIRE_CHAR(enc, ptr, end);
162
1.13M
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163
104
            *nextTokPtr = ptr;
164
104
            return XML_TOK_INVALID;
165
104
          }
166
1.13M
          *nextTokPtr = ptr + MINBPC(enc);
167
1.13M
          return XML_TOK_COMMENT;
168
1.13M
        }
169
36.9k
        break;
170
7.38M
      default:
171
7.38M
        ptr += MINBPC(enc);
172
7.38M
        break;
173
75.2M
      }
174
75.2M
    }
175
1.13M
  }
176
1.56k
  return XML_TOK_PARTIAL;
177
1.13M
}
xmltok.c:little2_scanComment
Line
Count
Source
146
37.2k
                    const char **nextTokPtr) {
147
37.2k
  if (HAS_CHAR(enc, ptr, end)) {
148
37.1k
    if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149
35
      *nextTokPtr = ptr;
150
35
      return XML_TOK_INVALID;
151
35
    }
152
37.1k
    ptr += MINBPC(enc);
153
13.5M
    while (HAS_CHAR(enc, ptr, end)) {
154
13.5M
      switch (BYTE_TYPE(enc, ptr)) {
155
2.91M
        INVALID_CASES(ptr, nextTokPtr)
156
110k
      case BT_MINUS:
157
110k
        ptr += MINBPC(enc);
158
110k
        REQUIRE_CHAR(enc, ptr, end);
159
110k
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160
33.6k
          ptr += MINBPC(enc);
161
33.6k
          REQUIRE_CHAR(enc, ptr, end);
162
33.6k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163
31
            *nextTokPtr = ptr;
164
31
            return XML_TOK_INVALID;
165
31
          }
166
33.6k
          *nextTokPtr = ptr + MINBPC(enc);
167
33.6k
          return XML_TOK_COMMENT;
168
33.6k
        }
169
76.7k
        break;
170
11.9M
      default:
171
11.9M
        ptr += MINBPC(enc);
172
11.9M
        break;
173
13.5M
      }
174
13.5M
    }
175
37.1k
  }
176
2.78k
  return XML_TOK_PARTIAL;
177
37.2k
}
xmltok.c:big2_scanComment
Line
Count
Source
146
2.93k
                    const char **nextTokPtr) {
147
2.93k
  if (HAS_CHAR(enc, ptr, end)) {
148
2.88k
    if (! CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
149
50
      *nextTokPtr = ptr;
150
50
      return XML_TOK_INVALID;
151
50
    }
152
2.83k
    ptr += MINBPC(enc);
153
12.6M
    while (HAS_CHAR(enc, ptr, end)) {
154
12.6M
      switch (BYTE_TYPE(enc, ptr)) {
155
2.79M
        INVALID_CASES(ptr, nextTokPtr)
156
6.02k
      case BT_MINUS:
157
6.02k
        ptr += MINBPC(enc);
158
6.02k
        REQUIRE_CHAR(enc, ptr, end);
159
5.96k
        if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
160
1.61k
          ptr += MINBPC(enc);
161
1.61k
          REQUIRE_CHAR(enc, ptr, end);
162
1.58k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
163
39
            *nextTokPtr = ptr;
164
39
            return XML_TOK_INVALID;
165
39
          }
166
1.54k
          *nextTokPtr = ptr + MINBPC(enc);
167
1.54k
          return XML_TOK_COMMENT;
168
1.58k
        }
169
4.34k
        break;
170
11.2M
      default:
171
11.2M
        ptr += MINBPC(enc);
172
11.2M
        break;
173
12.6M
      }
174
12.6M
    }
175
2.83k
  }
176
852
  return XML_TOK_PARTIAL;
177
2.93k
}
178
179
/* ptr points to character following "<!" */
180
181
static int PTRCALL
182
PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
183
823k
                 const char **nextTokPtr) {
184
823k
  REQUIRE_CHAR(enc, ptr, end);
185
823k
  switch (BYTE_TYPE(enc, ptr)) {
186
459k
  case BT_MINUS:
187
459k
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188
21
  case BT_LSQB:
189
21
    *nextTokPtr = ptr + MINBPC(enc);
190
21
    return XML_TOK_COND_SECT_OPEN;
191
106k
  case BT_NMSTRT:
192
363k
  case BT_HEX:
193
363k
    ptr += MINBPC(enc);
194
363k
    break;
195
179
  default:
196
179
    *nextTokPtr = ptr;
197
179
    return XML_TOK_INVALID;
198
823k
  }
199
2.98M
  while (HAS_CHAR(enc, ptr, end)) {
200
2.98M
    switch (BYTE_TYPE(enc, ptr)) {
201
238
    case BT_PERCNT:
202
238
      REQUIRE_CHARS(enc, ptr, end, 2);
203
      /* don't allow <!ENTITY% foo "whatever"> */
204
207
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205
21
      case BT_S:
206
39
      case BT_CR:
207
57
      case BT_LF:
208
75
      case BT_PERCNT:
209
75
        *nextTokPtr = ptr;
210
75
        return XML_TOK_INVALID;
211
207
      }
212
      /* fall through */
213
172k
    case BT_S:
214
220k
    case BT_CR:
215
361k
    case BT_LF:
216
361k
      *nextTokPtr = ptr;
217
361k
      return XML_TOK_DECL_OPEN;
218
1.87M
    case BT_NMSTRT:
219
2.62M
    case BT_HEX:
220
2.62M
      ptr += MINBPC(enc);
221
2.62M
      break;
222
309
    default:
223
309
      *nextTokPtr = ptr;
224
309
      return XML_TOK_INVALID;
225
2.98M
    }
226
2.98M
  }
227
1.28k
  return XML_TOK_PARTIAL;
228
363k
}
xmltok.c:normal_scanDecl
Line
Count
Source
183
685k
                 const char **nextTokPtr) {
184
685k
  REQUIRE_CHAR(enc, ptr, end);
185
685k
  switch (BYTE_TYPE(enc, ptr)) {
186
455k
  case BT_MINUS:
187
455k
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188
9
  case BT_LSQB:
189
9
    *nextTokPtr = ptr + MINBPC(enc);
190
9
    return XML_TOK_COND_SECT_OPEN;
191
16.6k
  case BT_NMSTRT:
192
229k
  case BT_HEX:
193
229k
    ptr += MINBPC(enc);
194
229k
    break;
195
82
  default:
196
82
    *nextTokPtr = ptr;
197
82
    return XML_TOK_INVALID;
198
685k
  }
199
1.98M
  while (HAS_CHAR(enc, ptr, end)) {
200
1.98M
    switch (BYTE_TYPE(enc, ptr)) {
201
115
    case BT_PERCNT:
202
115
      REQUIRE_CHARS(enc, ptr, end, 2);
203
      /* don't allow <!ENTITY% foo "whatever"> */
204
99
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205
9
      case BT_S:
206
15
      case BT_CR:
207
21
      case BT_LF:
208
27
      case BT_PERCNT:
209
27
        *nextTokPtr = ptr;
210
27
        return XML_TOK_INVALID;
211
99
      }
212
      /* fall through */
213
130k
    case BT_S:
214
163k
    case BT_CR:
215
228k
    case BT_LF:
216
228k
      *nextTokPtr = ptr;
217
228k
      return XML_TOK_DECL_OPEN;
218
1.14M
    case BT_NMSTRT:
219
1.75M
    case BT_HEX:
220
1.75M
      ptr += MINBPC(enc);
221
1.75M
      break;
222
139
    default:
223
139
      *nextTokPtr = ptr;
224
139
      return XML_TOK_INVALID;
225
1.98M
    }
226
1.98M
  }
227
547
  return XML_TOK_PARTIAL;
228
229k
}
xmltok.c:little2_scanDecl
Line
Count
Source
183
33.2k
                 const char **nextTokPtr) {
184
33.2k
  REQUIRE_CHAR(enc, ptr, end);
185
33.2k
  switch (BYTE_TYPE(enc, ptr)) {
186
2.49k
  case BT_MINUS:
187
2.49k
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188
6
  case BT_LSQB:
189
6
    *nextTokPtr = ptr + MINBPC(enc);
190
6
    return XML_TOK_COND_SECT_OPEN;
191
5.37k
  case BT_NMSTRT:
192
30.6k
  case BT_HEX:
193
30.6k
    ptr += MINBPC(enc);
194
30.6k
    break;
195
60
  default:
196
60
    *nextTokPtr = ptr;
197
60
    return XML_TOK_INVALID;
198
33.2k
  }
199
209k
  while (HAS_CHAR(enc, ptr, end)) {
200
209k
    switch (BYTE_TYPE(enc, ptr)) {
201
71
    case BT_PERCNT:
202
71
      REQUIRE_CHARS(enc, ptr, end, 2);
203
      /* don't allow <!ENTITY% foo "whatever"> */
204
62
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205
6
      case BT_S:
206
12
      case BT_CR:
207
18
      case BT_LF:
208
24
      case BT_PERCNT:
209
24
        *nextTokPtr = ptr;
210
24
        return XML_TOK_INVALID;
211
62
      }
212
      /* fall through */
213
17.9k
    case BT_S:
214
25.6k
    case BT_CR:
215
30.3k
    case BT_LF:
216
30.3k
      *nextTokPtr = ptr;
217
30.3k
      return XML_TOK_DECL_OPEN;
218
142k
    case BT_NMSTRT:
219
179k
    case BT_HEX:
220
179k
      ptr += MINBPC(enc);
221
179k
      break;
222
67
    default:
223
67
      *nextTokPtr = ptr;
224
67
      return XML_TOK_INVALID;
225
209k
    }
226
209k
  }
227
187
  return XML_TOK_PARTIAL;
228
30.6k
}
xmltok.c:big2_scanDecl
Line
Count
Source
183
104k
                 const char **nextTokPtr) {
184
104k
  REQUIRE_CHAR(enc, ptr, end);
185
104k
  switch (BYTE_TYPE(enc, ptr)) {
186
1.51k
  case BT_MINUS:
187
1.51k
    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
188
6
  case BT_LSQB:
189
6
    *nextTokPtr = ptr + MINBPC(enc);
190
6
    return XML_TOK_COND_SECT_OPEN;
191
84.5k
  case BT_NMSTRT:
192
102k
  case BT_HEX:
193
102k
    ptr += MINBPC(enc);
194
102k
    break;
195
37
  default:
196
37
    *nextTokPtr = ptr;
197
37
    return XML_TOK_INVALID;
198
104k
  }
199
793k
  while (HAS_CHAR(enc, ptr, end)) {
200
793k
    switch (BYTE_TYPE(enc, ptr)) {
201
52
    case BT_PERCNT:
202
52
      REQUIRE_CHARS(enc, ptr, end, 2);
203
      /* don't allow <!ENTITY% foo "whatever"> */
204
46
      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
205
6
      case BT_S:
206
12
      case BT_CR:
207
18
      case BT_LF:
208
24
      case BT_PERCNT:
209
24
        *nextTokPtr = ptr;
210
24
        return XML_TOK_INVALID;
211
46
      }
212
      /* fall through */
213
24.1k
    case BT_S:
214
31.0k
    case BT_CR:
215
102k
    case BT_LF:
216
102k
      *nextTokPtr = ptr;
217
102k
      return XML_TOK_DECL_OPEN;
218
588k
    case BT_NMSTRT:
219
690k
    case BT_HEX:
220
690k
      ptr += MINBPC(enc);
221
690k
      break;
222
103
    default:
223
103
      *nextTokPtr = ptr;
224
103
      return XML_TOK_INVALID;
225
793k
    }
226
793k
  }
227
546
  return XML_TOK_PARTIAL;
228
102k
}
229
230
static int PTRCALL
231
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end,
232
510k
                      int *tokPtr) {
233
510k
  int upper = 0;
234
510k
  UNUSED_P(enc);
235
510k
  *tokPtr = XML_TOK_PI;
236
510k
  if (end - ptr != MINBPC(enc) * 3)
237
342k
    return 1;
238
168k
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
26.7k
  case ASCII_x:
240
26.7k
    break;
241
11.2k
  case ASCII_X:
242
11.2k
    upper = 1;
243
11.2k
    break;
244
130k
  default:
245
130k
    return 1;
246
168k
  }
247
38.0k
  ptr += MINBPC(enc);
248
38.0k
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
18.2k
  case ASCII_m:
250
18.2k
    break;
251
10.0k
  case ASCII_M:
252
10.0k
    upper = 1;
253
10.0k
    break;
254
9.77k
  default:
255
9.77k
    return 1;
256
38.0k
  }
257
28.2k
  ptr += MINBPC(enc);
258
28.2k
  switch (BYTE_TO_ASCII(enc, ptr)) {
259
9.23k
  case ASCII_l:
260
9.23k
    break;
261
36
  case ASCII_L:
262
36
    upper = 1;
263
36
    break;
264
19.0k
  default:
265
19.0k
    return 1;
266
28.2k
  }
267
9.27k
  if (upper)
268
75
    return 0;
269
9.19k
  *tokPtr = XML_TOK_XML_DECL;
270
9.19k
  return 1;
271
9.27k
}
xmltok.c:normal_checkPiTarget
Line
Count
Source
232
123k
                      int *tokPtr) {
233
123k
  int upper = 0;
234
123k
  UNUSED_P(enc);
235
123k
  *tokPtr = XML_TOK_PI;
236
123k
  if (end - ptr != MINBPC(enc) * 3)
237
70.7k
    return 1;
238
52.6k
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
20.0k
  case ASCII_x:
240
20.0k
    break;
241
2.86k
  case ASCII_X:
242
2.86k
    upper = 1;
243
2.86k
    break;
244
29.7k
  default:
245
29.7k
    return 1;
246
52.6k
  }
247
22.9k
  ptr += MINBPC(enc);
248
22.9k
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
14.1k
  case ASCII_m:
250
14.1k
    break;
251
4.67k
  case ASCII_M:
252
4.67k
    upper = 1;
253
4.67k
    break;
254
4.11k
  default:
255
4.11k
    return 1;
256
22.9k
  }
257
18.8k
  ptr += MINBPC(enc);
258
18.8k
  switch (BYTE_TO_ASCII(enc, ptr)) {
259
8.97k
  case ASCII_l:
260
8.97k
    break;
261
12
  case ASCII_L:
262
12
    upper = 1;
263
12
    break;
264
9.83k
  default:
265
9.83k
    return 1;
266
18.8k
  }
267
8.98k
  if (upper)
268
27
    return 0;
269
8.95k
  *tokPtr = XML_TOK_XML_DECL;
270
8.95k
  return 1;
271
8.98k
}
xmltok.c:little2_checkPiTarget
Line
Count
Source
232
130k
                      int *tokPtr) {
233
130k
  int upper = 0;
234
130k
  UNUSED_P(enc);
235
130k
  *tokPtr = XML_TOK_PI;
236
130k
  if (end - ptr != MINBPC(enc) * 3)
237
115k
    return 1;
238
14.8k
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
4.16k
  case ASCII_x:
240
4.16k
    break;
241
5.34k
  case ASCII_X:
242
5.34k
    upper = 1;
243
5.34k
    break;
244
5.33k
  default:
245
5.33k
    return 1;
246
14.8k
  }
247
9.51k
  ptr += MINBPC(enc);
248
9.51k
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
2.60k
  case ASCII_m:
250
2.60k
    break;
251
2.82k
  case ASCII_M:
252
2.82k
    upper = 1;
253
2.82k
    break;
254
4.07k
  default:
255
4.07k
    return 1;
256
9.51k
  }
257
5.43k
  ptr += MINBPC(enc);
258
5.43k
  switch (BYTE_TO_ASCII(enc, ptr)) {
259
90
  case ASCII_l:
260
90
    break;
261
12
  case ASCII_L:
262
12
    upper = 1;
263
12
    break;
264
5.33k
  default:
265
5.33k
    return 1;
266
5.43k
  }
267
102
  if (upper)
268
24
    return 0;
269
78
  *tokPtr = XML_TOK_XML_DECL;
270
78
  return 1;
271
102
}
xmltok.c:big2_checkPiTarget
Line
Count
Source
232
256k
                      int *tokPtr) {
233
256k
  int upper = 0;
234
256k
  UNUSED_P(enc);
235
256k
  *tokPtr = XML_TOK_PI;
236
256k
  if (end - ptr != MINBPC(enc) * 3)
237
155k
    return 1;
238
101k
  switch (BYTE_TO_ASCII(enc, ptr)) {
239
2.55k
  case ASCII_x:
240
2.55k
    break;
241
3.07k
  case ASCII_X:
242
3.07k
    upper = 1;
243
3.07k
    break;
244
95.4k
  default:
245
95.4k
    return 1;
246
101k
  }
247
5.63k
  ptr += MINBPC(enc);
248
5.63k
  switch (BYTE_TO_ASCII(enc, ptr)) {
249
1.48k
  case ASCII_m:
250
1.48k
    break;
251
2.55k
  case ASCII_M:
252
2.55k
    upper = 1;
253
2.55k
    break;
254
1.59k
  default:
255
1.59k
    return 1;
256
5.63k
  }
257
4.04k
  ptr += MINBPC(enc);
258
4.04k
  switch (BYTE_TO_ASCII(enc, ptr)) {
259
175
  case ASCII_l:
260
175
    break;
261
12
  case ASCII_L:
262
12
    upper = 1;
263
12
    break;
264
3.85k
  default:
265
3.85k
    return 1;
266
4.04k
  }
267
187
  if (upper)
268
24
    return 0;
269
163
  *tokPtr = XML_TOK_XML_DECL;
270
163
  return 1;
271
187
}
272
273
/* ptr points to character following "<?" */
274
275
static int PTRCALL
276
PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
277
514k
               const char **nextTokPtr) {
278
514k
  int tok;
279
514k
  const char *target = ptr;
280
514k
  REQUIRE_CHAR(enc, ptr, end);
281
513k
  switch (BYTE_TYPE(enc, ptr)) {
282
563k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283
272
  default:
284
272
    *nextTokPtr = ptr;
285
272
    return XML_TOK_INVALID;
286
513k
  }
287
18.0M
  while (HAS_CHAR(enc, ptr, end)) {
288
18.0M
    switch (BYTE_TYPE(enc, ptr)) {
289
66.7M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290
44.5k
    case BT_S:
291
222k
    case BT_CR:
292
433k
    case BT_LF:
293
433k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294
39
        *nextTokPtr = ptr;
295
39
        return XML_TOK_INVALID;
296
39
      }
297
433k
      ptr += MINBPC(enc);
298
125M
      while (HAS_CHAR(enc, ptr, end)) {
299
125M
        switch (BYTE_TYPE(enc, ptr)) {
300
33.6M
          INVALID_CASES(ptr, nextTokPtr)
301
737k
        case BT_QUEST:
302
737k
          ptr += MINBPC(enc);
303
737k
          REQUIRE_CHAR(enc, ptr, end);
304
736k
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305
422k
            *nextTokPtr = ptr + MINBPC(enc);
306
422k
            return tok;
307
422k
          }
308
314k
          break;
309
107M
        default:
310
107M
          ptr += MINBPC(enc);
311
107M
          break;
312
125M
        }
313
125M
      }
314
9.08k
      return XML_TOK_PARTIAL;
315
77.3k
    case BT_QUEST:
316
77.3k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317
36
        *nextTokPtr = ptr;
318
36
        return XML_TOK_INVALID;
319
36
      }
320
77.2k
      ptr += MINBPC(enc);
321
77.2k
      REQUIRE_CHAR(enc, ptr, end);
322
76.9k
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323
76.6k
        *nextTokPtr = ptr + MINBPC(enc);
324
76.6k
        return tok;
325
76.6k
      }
326
      /* fall through */
327
604
    default:
328
604
      *nextTokPtr = ptr;
329
604
      return XML_TOK_INVALID;
330
18.0M
    }
331
18.0M
  }
332
1.45k
  return XML_TOK_PARTIAL;
333
513k
}
xmltok.c:normal_scanPi
Line
Count
Source
277
124k
               const char **nextTokPtr) {
278
124k
  int tok;
279
124k
  const char *target = ptr;
280
124k
  REQUIRE_CHAR(enc, ptr, end);
281
124k
  switch (BYTE_TYPE(enc, ptr)) {
282
194k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283
63
  default:
284
63
    *nextTokPtr = ptr;
285
63
    return XML_TOK_INVALID;
286
124k
  }
287
15.9M
  while (HAS_CHAR(enc, ptr, end)) {
288
15.9M
    switch (BYTE_TYPE(enc, ptr)) {
289
58.6M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290
23.1k
    case BT_S:
291
26.3k
    case BT_CR:
292
98.1k
    case BT_LF:
293
98.1k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294
15
        *nextTokPtr = ptr;
295
15
        return XML_TOK_INVALID;
296
15
      }
297
98.1k
      ptr += MINBPC(enc);
298
69.8M
      while (HAS_CHAR(enc, ptr, end)) {
299
69.8M
        switch (BYTE_TYPE(enc, ptr)) {
300
29.3M
          INVALID_CASES(ptr, nextTokPtr)
301
277k
        case BT_QUEST:
302
277k
          ptr += MINBPC(enc);
303
277k
          REQUIRE_CHAR(enc, ptr, end);
304
277k
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305
96.6k
            *nextTokPtr = ptr + MINBPC(enc);
306
96.6k
            return tok;
307
96.6k
          }
308
180k
          break;
309
54.8M
        default:
310
54.8M
          ptr += MINBPC(enc);
311
54.8M
          break;
312
69.8M
        }
313
69.8M
      }
314
1.13k
      return XML_TOK_PARTIAL;
315
25.2k
    case BT_QUEST:
316
25.2k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317
12
        *nextTokPtr = ptr;
318
12
        return XML_TOK_INVALID;
319
12
      }
320
25.2k
      ptr += MINBPC(enc);
321
25.2k
      REQUIRE_CHAR(enc, ptr, end);
322
25.1k
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323
25.0k
        *nextTokPtr = ptr + MINBPC(enc);
324
25.0k
        return tok;
325
25.0k
      }
326
      /* fall through */
327
234
    default:
328
234
      *nextTokPtr = ptr;
329
234
      return XML_TOK_INVALID;
330
15.9M
    }
331
15.9M
  }
332
550
  return XML_TOK_PARTIAL;
333
124k
}
xmltok.c:little2_scanPi
Line
Count
Source
277
131k
               const char **nextTokPtr) {
278
131k
  int tok;
279
131k
  const char *target = ptr;
280
131k
  REQUIRE_CHAR(enc, ptr, end);
281
131k
  switch (BYTE_TYPE(enc, ptr)) {
282
126k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283
111
  default:
284
111
    *nextTokPtr = ptr;
285
111
    return XML_TOK_INVALID;
286
131k
  }
287
322k
  while (HAS_CHAR(enc, ptr, end)) {
288
322k
    switch (BYTE_TYPE(enc, ptr)) {
289
823k
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290
16.2k
    case BT_S:
291
76.0k
    case BT_CR:
292
120k
    case BT_LF:
293
120k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294
12
        *nextTokPtr = ptr;
295
12
        return XML_TOK_INVALID;
296
12
      }
297
120k
      ptr += MINBPC(enc);
298
40.1M
      while (HAS_CHAR(enc, ptr, end)) {
299
40.1M
        switch (BYTE_TYPE(enc, ptr)) {
300
3.34M
          INVALID_CASES(ptr, nextTokPtr)
301
175k
        case BT_QUEST:
302
175k
          ptr += MINBPC(enc);
303
175k
          REQUIRE_CHAR(enc, ptr, end);
304
174k
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305
115k
            *nextTokPtr = ptr + MINBPC(enc);
306
115k
            return tok;
307
115k
          }
308
59.9k
          break;
309
38.2M
        default:
310
38.2M
          ptr += MINBPC(enc);
311
38.2M
          break;
312
40.1M
        }
313
40.1M
      }
314
4.46k
      return XML_TOK_PARTIAL;
315
10.2k
    case BT_QUEST:
316
10.2k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317
12
        *nextTokPtr = ptr;
318
12
        return XML_TOK_INVALID;
319
12
      }
320
10.1k
      ptr += MINBPC(enc);
321
10.1k
      REQUIRE_CHAR(enc, ptr, end);
322
10.0k
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323
9.95k
        *nextTokPtr = ptr + MINBPC(enc);
324
9.95k
        return tok;
325
9.95k
      }
326
      /* fall through */
327
182
    default:
328
182
      *nextTokPtr = ptr;
329
182
      return XML_TOK_INVALID;
330
322k
    }
331
322k
  }
332
380
  return XML_TOK_PARTIAL;
333
131k
}
xmltok.c:big2_scanPi
Line
Count
Source
277
258k
               const char **nextTokPtr) {
278
258k
  int tok;
279
258k
  const char *target = ptr;
280
258k
  REQUIRE_CHAR(enc, ptr, end);
281
257k
  switch (BYTE_TYPE(enc, ptr)) {
282
241k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
283
98
  default:
284
98
    *nextTokPtr = ptr;
285
98
    return XML_TOK_INVALID;
286
257k
  }
287
1.77M
  while (HAS_CHAR(enc, ptr, end)) {
288
1.77M
    switch (BYTE_TYPE(enc, ptr)) {
289
7.22M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
290
5.09k
    case BT_S:
291
119k
    case BT_CR:
292
215k
    case BT_LF:
293
215k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
294
12
        *nextTokPtr = ptr;
295
12
        return XML_TOK_INVALID;
296
12
      }
297
215k
      ptr += MINBPC(enc);
298
15.4M
      while (HAS_CHAR(enc, ptr, end)) {
299
15.4M
        switch (BYTE_TYPE(enc, ptr)) {
300
953k
          INVALID_CASES(ptr, nextTokPtr)
301
284k
        case BT_QUEST:
302
284k
          ptr += MINBPC(enc);
303
284k
          REQUIRE_CHAR(enc, ptr, end);
304
284k
          if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
305
210k
            *nextTokPtr = ptr + MINBPC(enc);
306
210k
            return tok;
307
210k
          }
308
73.5k
          break;
309
14.7M
        default:
310
14.7M
          ptr += MINBPC(enc);
311
14.7M
          break;
312
15.4M
        }
313
15.4M
      }
314
3.48k
      return XML_TOK_PARTIAL;
315
41.8k
    case BT_QUEST:
316
41.8k
      if (! PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
317
12
        *nextTokPtr = ptr;
318
12
        return XML_TOK_INVALID;
319
12
      }
320
41.8k
      ptr += MINBPC(enc);
321
41.8k
      REQUIRE_CHAR(enc, ptr, end);
322
41.7k
      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
323
41.6k
        *nextTokPtr = ptr + MINBPC(enc);
324
41.6k
        return tok;
325
41.6k
      }
326
      /* fall through */
327
188
    default:
328
188
      *nextTokPtr = ptr;
329
188
      return XML_TOK_INVALID;
330
1.77M
    }
331
1.77M
  }
332
522
  return XML_TOK_PARTIAL;
333
257k
}
334
335
static int PTRCALL
336
PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
337
1.43M
                         const char **nextTokPtr) {
338
1.43M
  static const char CDATA_LSQB[]
339
1.43M
      = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340
1.43M
  int i;
341
1.43M
  UNUSED_P(enc);
342
  /* CDATA[ */
343
1.43M
  REQUIRE_CHARS(enc, ptr, end, 6);
344
10.0M
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345
8.58M
    if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346
496
      *nextTokPtr = ptr;
347
496
      return XML_TOK_INVALID;
348
496
    }
349
8.58M
  }
350
1.43M
  *nextTokPtr = ptr;
351
1.43M
  return XML_TOK_CDATA_SECT_OPEN;
352
1.43M
}
xmltok.c:normal_scanCdataSection
Line
Count
Source
337
1.42M
                         const char **nextTokPtr) {
338
1.42M
  static const char CDATA_LSQB[]
339
1.42M
      = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340
1.42M
  int i;
341
1.42M
  UNUSED_P(enc);
342
  /* CDATA[ */
343
1.42M
  REQUIRE_CHARS(enc, ptr, end, 6);
344
9.94M
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345
8.52M
    if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346
190
      *nextTokPtr = ptr;
347
190
      return XML_TOK_INVALID;
348
190
    }
349
8.52M
  }
350
1.42M
  *nextTokPtr = ptr;
351
1.42M
  return XML_TOK_CDATA_SECT_OPEN;
352
1.42M
}
xmltok.c:little2_scanCdataSection
Line
Count
Source
337
1.51k
                         const char **nextTokPtr) {
338
1.51k
  static const char CDATA_LSQB[]
339
1.51k
      = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340
1.51k
  int i;
341
1.51k
  UNUSED_P(enc);
342
  /* CDATA[ */
343
1.51k
  REQUIRE_CHARS(enc, ptr, end, 6);
344
9.80k
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345
8.47k
    if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346
141
      *nextTokPtr = ptr;
347
141
      return XML_TOK_INVALID;
348
141
    }
349
8.47k
  }
350
1.33k
  *nextTokPtr = ptr;
351
1.33k
  return XML_TOK_CDATA_SECT_OPEN;
352
1.48k
}
xmltok.c:big2_scanCdataSection
Line
Count
Source
337
9.20k
                         const char **nextTokPtr) {
338
9.20k
  static const char CDATA_LSQB[]
339
9.20k
      = {ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB};
340
9.20k
  int i;
341
9.20k
  UNUSED_P(enc);
342
  /* CDATA[ */
343
9.20k
  REQUIRE_CHARS(enc, ptr, end, 6);
344
63.2k
  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
345
54.3k
    if (! CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
346
165
      *nextTokPtr = ptr;
347
165
      return XML_TOK_INVALID;
348
165
    }
349
54.3k
  }
350
8.96k
  *nextTokPtr = ptr;
351
8.96k
  return XML_TOK_CDATA_SECT_OPEN;
352
9.12k
}
353
354
static int PTRCALL
355
PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
356
6.33M
                        const char **nextTokPtr) {
357
6.33M
  if (ptr >= end)
358
5.17k
    return XML_TOK_NONE;
359
6.33M
  if (MINBPC(enc) > 1) {
360
1.12M
    size_t n = end - ptr;
361
1.12M
    if (n & (MINBPC(enc) - 1)) {
362
28.1k
      n &= ~(MINBPC(enc) - 1);
363
28.1k
      if (n == 0)
364
495
        return XML_TOK_PARTIAL;
365
27.6k
      end = ptr + n;
366
27.6k
    }
367
1.12M
  }
368
6.32M
  switch (BYTE_TYPE(enc, ptr)) {
369
3.48M
  case BT_RSQB:
370
3.48M
    ptr += MINBPC(enc);
371
3.48M
    REQUIRE_CHAR(enc, ptr, end);
372
3.48M
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373
226k
      break;
374
3.25M
    ptr += MINBPC(enc);
375
3.25M
    REQUIRE_CHAR(enc, ptr, end);
376
3.25M
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377
1.82M
      ptr -= MINBPC(enc);
378
1.82M
      break;
379
1.82M
    }
380
1.42M
    *nextTokPtr = ptr + MINBPC(enc);
381
1.42M
    return XML_TOK_CDATA_SECT_CLOSE;
382
1.86M
  case BT_CR:
383
1.86M
    ptr += MINBPC(enc);
384
1.86M
    REQUIRE_CHAR(enc, ptr, end);
385
1.85M
    if (BYTE_TYPE(enc, ptr) == BT_LF)
386
9.67k
      ptr += MINBPC(enc);
387
1.85M
    *nextTokPtr = ptr;
388
1.85M
    return XML_TOK_DATA_NEWLINE;
389
292k
  case BT_LF:
390
292k
    *nextTokPtr = ptr + MINBPC(enc);
391
292k
    return XML_TOK_DATA_NEWLINE;
392
705k
    INVALID_CASES(ptr, nextTokPtr)
393
338k
  default:
394
338k
    ptr += MINBPC(enc);
395
338k
    break;
396
6.32M
  }
397
40.7M
  while (HAS_CHAR(enc, ptr, end)) {
398
40.7M
    switch (BYTE_TYPE(enc, ptr)) {
399
0
#  define LEAD_CASE(n)                                                         \
400
26.0M
  case BT_LEAD##n:                                                             \
401
26.0M
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402
1.16k
      *nextTokPtr = ptr;                                                       \
403
1.16k
      return XML_TOK_DATA_CHARS;                                               \
404
1.16k
    }                                                                          \
405
26.0M
    ptr += n;                                                                  \
406
26.0M
    break;
407
13.8M
      LEAD_CASE(2)
408
10.6M
      LEAD_CASE(3)
409
1.56M
      LEAD_CASE(4)
410
0
#  undef LEAD_CASE
411
305
    case BT_NONXML:
412
333
    case BT_MALFORM:
413
431
    case BT_TRAIL:
414
417k
    case BT_CR:
415
686k
    case BT_LF:
416
2.73M
    case BT_RSQB:
417
2.73M
      *nextTokPtr = ptr;
418
2.73M
      return XML_TOK_DATA_CHARS;
419
11.9M
    default:
420
11.9M
      ptr += MINBPC(enc);
421
11.9M
      break;
422
40.7M
    }
423
40.7M
  }
424
4.73k
  *nextTokPtr = ptr;
425
4.73k
  return XML_TOK_DATA_CHARS;
426
2.74M
}
xmltok.c:normal_cdataSectionTok
Line
Count
Source
356
5.20M
                        const char **nextTokPtr) {
357
5.20M
  if (ptr >= end)
358
2.18k
    return XML_TOK_NONE;
359
5.20M
  if (MINBPC(enc) > 1) {
360
0
    size_t n = end - ptr;
361
0
    if (n & (MINBPC(enc) - 1)) {
362
0
      n &= ~(MINBPC(enc) - 1);
363
0
      if (n == 0)
364
0
        return XML_TOK_PARTIAL;
365
0
      end = ptr + n;
366
0
    }
367
0
  }
368
5.20M
  switch (BYTE_TYPE(enc, ptr)) {
369
3.16M
  case BT_RSQB:
370
3.16M
    ptr += MINBPC(enc);
371
3.16M
    REQUIRE_CHAR(enc, ptr, end);
372
3.16M
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373
67.7k
      break;
374
3.10M
    ptr += MINBPC(enc);
375
3.10M
    REQUIRE_CHAR(enc, ptr, end);
376
3.10M
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377
1.68M
      ptr -= MINBPC(enc);
378
1.68M
      break;
379
1.68M
    }
380
1.41M
    *nextTokPtr = ptr + MINBPC(enc);
381
1.41M
    return XML_TOK_CDATA_SECT_CLOSE;
382
1.14M
  case BT_CR:
383
1.14M
    ptr += MINBPC(enc);
384
1.14M
    REQUIRE_CHAR(enc, ptr, end);
385
1.14M
    if (BYTE_TYPE(enc, ptr) == BT_LF)
386
1.36k
      ptr += MINBPC(enc);
387
1.14M
    *nextTokPtr = ptr;
388
1.14M
    return XML_TOK_DATA_NEWLINE;
389
259k
  case BT_LF:
390
259k
    *nextTokPtr = ptr + MINBPC(enc);
391
259k
    return XML_TOK_DATA_NEWLINE;
392
694k
    INVALID_CASES(ptr, nextTokPtr)
393
280k
  default:
394
280k
    ptr += MINBPC(enc);
395
280k
    break;
396
5.20M
  }
397
34.4M
  while (HAS_CHAR(enc, ptr, end)) {
398
34.4M
    switch (BYTE_TYPE(enc, ptr)) {
399
0
#  define LEAD_CASE(n)                                                         \
400
0
  case BT_LEAD##n:                                                             \
401
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402
0
      *nextTokPtr = ptr;                                                       \
403
0
      return XML_TOK_DATA_CHARS;                                               \
404
0
    }                                                                          \
405
0
    ptr += n;                                                                  \
406
0
    break;
407
13.8M
      LEAD_CASE(2)
408
10.6M
      LEAD_CASE(3)
409
856k
      LEAD_CASE(4)
410
0
#  undef LEAD_CASE
411
87
    case BT_NONXML:
412
115
    case BT_MALFORM:
413
151
    case BT_TRAIL:
414
390k
    case BT_CR:
415
631k
    case BT_LF:
416
2.37M
    case BT_RSQB:
417
2.37M
      *nextTokPtr = ptr;
418
2.37M
      return XML_TOK_DATA_CHARS;
419
6.69M
    default:
420
6.69M
      ptr += MINBPC(enc);
421
6.69M
      break;
422
34.4M
    }
423
34.4M
  }
424
1.96k
  *nextTokPtr = ptr;
425
1.96k
  return XML_TOK_DATA_CHARS;
426
2.37M
}
xmltok.c:little2_cdataSectionTok
Line
Count
Source
356
682k
                        const char **nextTokPtr) {
357
682k
  if (ptr >= end)
358
1.55k
    return XML_TOK_NONE;
359
680k
  if (MINBPC(enc) > 1) {
360
680k
    size_t n = end - ptr;
361
680k
    if (n & (MINBPC(enc) - 1)) {
362
12.6k
      n &= ~(MINBPC(enc) - 1);
363
12.6k
      if (n == 0)
364
233
        return XML_TOK_PARTIAL;
365
12.4k
      end = ptr + n;
366
12.4k
    }
367
680k
  }
368
680k
  switch (BYTE_TYPE(enc, ptr)) {
369
251k
  case BT_RSQB:
370
251k
    ptr += MINBPC(enc);
371
251k
    REQUIRE_CHAR(enc, ptr, end);
372
250k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373
130k
      break;
374
120k
    ptr += MINBPC(enc);
375
120k
    REQUIRE_CHAR(enc, ptr, end);
376
120k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377
119k
      ptr -= MINBPC(enc);
378
119k
      break;
379
119k
    }
380
659
    *nextTokPtr = ptr + MINBPC(enc);
381
659
    return XML_TOK_CDATA_SECT_CLOSE;
382
367k
  case BT_CR:
383
367k
    ptr += MINBPC(enc);
384
367k
    REQUIRE_CHAR(enc, ptr, end);
385
367k
    if (BYTE_TYPE(enc, ptr) == BT_LF)
386
2.08k
      ptr += MINBPC(enc);
387
367k
    *nextTokPtr = ptr;
388
367k
    return XML_TOK_DATA_NEWLINE;
389
23.7k
  case BT_LF:
390
23.7k
    *nextTokPtr = ptr + MINBPC(enc);
391
23.7k
    return XML_TOK_DATA_NEWLINE;
392
23.7k
    INVALID_CASES(ptr, nextTokPtr)
393
34.6k
  default:
394
34.6k
    ptr += MINBPC(enc);
395
34.6k
    break;
396
680k
  }
397
3.68M
  while (HAS_CHAR(enc, ptr, end)) {
398
3.68M
    switch (BYTE_TYPE(enc, ptr)) {
399
0
#  define LEAD_CASE(n)                                                         \
400
0
  case BT_LEAD##n:                                                             \
401
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402
0
      *nextTokPtr = ptr;                                                       \
403
0
      return XML_TOK_DATA_CHARS;                                               \
404
0
    }                                                                          \
405
0
    ptr += n;                                                                  \
406
0
    break;
407
0
      LEAD_CASE(2)
408
0
      LEAD_CASE(3)
409
301k
      LEAD_CASE(4)
410
0
#  undef LEAD_CASE
411
85
    case BT_NONXML:
412
85
    case BT_MALFORM:
413
117
    case BT_TRAIL:
414
17.4k
    case BT_CR:
415
37.5k
    case BT_LF:
416
286k
    case BT_RSQB:
417
286k
      *nextTokPtr = ptr;
418
286k
      return XML_TOK_DATA_CHARS;
419
3.09M
    default:
420
3.09M
      ptr += MINBPC(enc);
421
3.09M
      break;
422
3.68M
    }
423
3.68M
  }
424
1.43k
  *nextTokPtr = ptr;
425
1.43k
  return XML_TOK_DATA_CHARS;
426
287k
}
xmltok.c:big2_cdataSectionTok
Line
Count
Source
356
447k
                        const char **nextTokPtr) {
357
447k
  if (ptr >= end)
358
1.42k
    return XML_TOK_NONE;
359
445k
  if (MINBPC(enc) > 1) {
360
445k
    size_t n = end - ptr;
361
445k
    if (n & (MINBPC(enc) - 1)) {
362
15.4k
      n &= ~(MINBPC(enc) - 1);
363
15.4k
      if (n == 0)
364
262
        return XML_TOK_PARTIAL;
365
15.1k
      end = ptr + n;
366
15.1k
    }
367
445k
  }
368
445k
  switch (BYTE_TYPE(enc, ptr)) {
369
64.4k
  case BT_RSQB:
370
64.4k
    ptr += MINBPC(enc);
371
64.4k
    REQUIRE_CHAR(enc, ptr, end);
372
64.4k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
373
28.3k
      break;
374
36.0k
    ptr += MINBPC(enc);
375
36.0k
    REQUIRE_CHAR(enc, ptr, end);
376
36.0k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
377
27.7k
      ptr -= MINBPC(enc);
378
27.7k
      break;
379
27.7k
    }
380
8.24k
    *nextTokPtr = ptr + MINBPC(enc);
381
8.24k
    return XML_TOK_CDATA_SECT_CLOSE;
382
344k
  case BT_CR:
383
344k
    ptr += MINBPC(enc);
384
344k
    REQUIRE_CHAR(enc, ptr, end);
385
344k
    if (BYTE_TYPE(enc, ptr) == BT_LF)
386
6.22k
      ptr += MINBPC(enc);
387
344k
    *nextTokPtr = ptr;
388
344k
    return XML_TOK_DATA_NEWLINE;
389
9.81k
  case BT_LF:
390
9.81k
    *nextTokPtr = ptr + MINBPC(enc);
391
9.81k
    return XML_TOK_DATA_NEWLINE;
392
9.81k
    INVALID_CASES(ptr, nextTokPtr)
393
23.8k
  default:
394
23.8k
    ptr += MINBPC(enc);
395
23.8k
    break;
396
445k
  }
397
2.67M
  while (HAS_CHAR(enc, ptr, end)) {
398
2.67M
    switch (BYTE_TYPE(enc, ptr)) {
399
0
#  define LEAD_CASE(n)                                                         \
400
0
  case BT_LEAD##n:                                                             \
401
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
402
0
      *nextTokPtr = ptr;                                                       \
403
0
      return XML_TOK_DATA_CHARS;                                               \
404
0
    }                                                                          \
405
0
    ptr += n;                                                                  \
406
0
    break;
407
0
      LEAD_CASE(2)
408
0
      LEAD_CASE(3)
409
403k
      LEAD_CASE(4)
410
0
#  undef LEAD_CASE
411
133
    case BT_NONXML:
412
133
    case BT_MALFORM:
413
163
    case BT_TRAIL:
414
8.84k
    case BT_CR:
415
17.0k
    case BT_LF:
416
80.1k
    case BT_RSQB:
417
80.1k
      *nextTokPtr = ptr;
418
80.1k
      return XML_TOK_DATA_CHARS;
419
2.18M
    default:
420
2.18M
      ptr += MINBPC(enc);
421
2.18M
      break;
422
2.67M
    }
423
2.67M
  }
424
1.34k
  *nextTokPtr = ptr;
425
1.34k
  return XML_TOK_DATA_CHARS;
426
81.8k
}
427
428
/* ptr points to character following "</" */
429
430
static int PTRCALL
431
PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
432
199k
                   const char **nextTokPtr) {
433
199k
  REQUIRE_CHAR(enc, ptr, end);
434
199k
  switch (BYTE_TYPE(enc, ptr)) {
435
104k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436
97
  default:
437
97
    *nextTokPtr = ptr;
438
97
    return XML_TOK_INVALID;
439
199k
  }
440
10.9M
  while (HAS_CHAR(enc, ptr, end)) {
441
10.9M
    switch (BYTE_TYPE(enc, ptr)) {
442
22.3M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443
2.06k
    case BT_S:
444
43.0k
    case BT_CR:
445
44.9k
    case BT_LF:
446
1.31M
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447
1.31M
        switch (BYTE_TYPE(enc, ptr)) {
448
3.12k
        case BT_S:
449
1.26M
        case BT_CR:
450
1.26M
        case BT_LF:
451
1.26M
          break;
452
44.3k
        case BT_GT:
453
44.3k
          *nextTokPtr = ptr + MINBPC(enc);
454
44.3k
          return XML_TOK_END_TAG;
455
210
        default:
456
210
          *nextTokPtr = ptr;
457
210
          return XML_TOK_INVALID;
458
1.31M
        }
459
1.31M
      }
460
384
      return XML_TOK_PARTIAL;
461
0
#  ifdef XML_NS
462
7.55k
    case BT_COLON:
463
      /* no need to check qname syntax here,
464
         since end-tag must match exactly */
465
7.55k
      ptr += MINBPC(enc);
466
7.55k
      break;
467
0
#  endif
468
152k
    case BT_GT:
469
152k
      *nextTokPtr = ptr + MINBPC(enc);
470
152k
      return XML_TOK_END_TAG;
471
206
    default:
472
206
      *nextTokPtr = ptr;
473
206
      return XML_TOK_INVALID;
474
10.9M
    }
475
10.9M
  }
476
897
  return XML_TOK_PARTIAL;
477
199k
}
xmltok.c:normal_scanEndTag
Line
Count
Source
432
155k
                   const char **nextTokPtr) {
433
155k
  REQUIRE_CHAR(enc, ptr, end);
434
155k
  switch (BYTE_TYPE(enc, ptr)) {
435
59.8k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436
21
  default:
437
21
    *nextTokPtr = ptr;
438
21
    return XML_TOK_INVALID;
439
155k
  }
440
10.6M
  while (HAS_CHAR(enc, ptr, end)) {
441
10.6M
    switch (BYTE_TYPE(enc, ptr)) {
442
21.5M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443
995
    case BT_S:
444
1.14k
    case BT_CR:
445
2.69k
    case BT_LF:
446
1.23M
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447
1.23M
        switch (BYTE_TYPE(enc, ptr)) {
448
2.36k
        case BT_S:
449
1.22M
        case BT_CR:
450
1.23M
        case BT_LF:
451
1.23M
          break;
452
2.45k
        case BT_GT:
453
2.45k
          *nextTokPtr = ptr + MINBPC(enc);
454
2.45k
          return XML_TOK_END_TAG;
455
97
        default:
456
97
          *nextTokPtr = ptr;
457
97
          return XML_TOK_INVALID;
458
1.23M
        }
459
1.23M
      }
460
144
      return XML_TOK_PARTIAL;
461
0
#  ifdef XML_NS
462
7.07k
    case BT_COLON:
463
      /* no need to check qname syntax here,
464
         since end-tag must match exactly */
465
7.07k
      ptr += MINBPC(enc);
466
7.07k
      break;
467
0
#  endif
468
151k
    case BT_GT:
469
151k
      *nextTokPtr = ptr + MINBPC(enc);
470
151k
      return XML_TOK_END_TAG;
471
112
    default:
472
112
      *nextTokPtr = ptr;
473
112
      return XML_TOK_INVALID;
474
10.6M
    }
475
10.6M
  }
476
363
  return XML_TOK_PARTIAL;
477
155k
}
xmltok.c:little2_scanEndTag
Line
Count
Source
432
2.03k
                   const char **nextTokPtr) {
433
2.03k
  REQUIRE_CHAR(enc, ptr, end);
434
2.02k
  switch (BYTE_TYPE(enc, ptr)) {
435
2.20k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436
38
  default:
437
38
    *nextTokPtr = ptr;
438
38
    return XML_TOK_INVALID;
439
2.02k
  }
440
51.6k
  while (HAS_CHAR(enc, ptr, end)) {
441
51.6k
    switch (BYTE_TYPE(enc, ptr)) {
442
244k
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443
593
    case BT_S:
444
989
    case BT_CR:
445
1.15k
    case BT_LF:
446
14.5k
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447
14.5k
        switch (BYTE_TYPE(enc, ptr)) {
448
384
        case BT_S:
449
13.1k
        case BT_CR:
450
13.4k
        case BT_LF:
451
13.4k
          break;
452
998
        case BT_GT:
453
998
          *nextTokPtr = ptr + MINBPC(enc);
454
998
          return XML_TOK_END_TAG;
455
60
        default:
456
60
          *nextTokPtr = ptr;
457
60
          return XML_TOK_INVALID;
458
14.5k
        }
459
14.5k
      }
460
95
      return XML_TOK_PARTIAL;
461
0
#  ifdef XML_NS
462
262
    case BT_COLON:
463
      /* no need to check qname syntax here,
464
         since end-tag must match exactly */
465
262
      ptr += MINBPC(enc);
466
262
      break;
467
0
#  endif
468
361
    case BT_GT:
469
361
      *nextTokPtr = ptr + MINBPC(enc);
470
361
      return XML_TOK_END_TAG;
471
50
    default:
472
50
      *nextTokPtr = ptr;
473
50
      return XML_TOK_INVALID;
474
51.6k
    }
475
51.6k
  }
476
240
  return XML_TOK_PARTIAL;
477
1.92k
}
xmltok.c:big2_scanEndTag
Line
Count
Source
432
42.1k
                   const char **nextTokPtr) {
433
42.1k
  REQUIRE_CHAR(enc, ptr, end);
434
42.1k
  switch (BYTE_TYPE(enc, ptr)) {
435
42.2k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
436
38
  default:
437
38
    *nextTokPtr = ptr;
438
38
    return XML_TOK_INVALID;
439
42.1k
  }
440
190k
  while (HAS_CHAR(enc, ptr, end)) {
441
190k
    switch (BYTE_TYPE(enc, ptr)) {
442
585k
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
443
478
    case BT_S:
444
40.8k
    case BT_CR:
445
41.1k
    case BT_LF:
446
66.5k
      for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
447
66.5k
        switch (BYTE_TYPE(enc, ptr)) {
448
377
        case BT_S:
449
25.3k
        case BT_CR:
450
25.5k
        case BT_LF:
451
25.5k
          break;
452
40.9k
        case BT_GT:
453
40.9k
          *nextTokPtr = ptr + MINBPC(enc);
454
40.9k
          return XML_TOK_END_TAG;
455
53
        default:
456
53
          *nextTokPtr = ptr;
457
53
          return XML_TOK_INVALID;
458
66.5k
        }
459
66.5k
      }
460
145
      return XML_TOK_PARTIAL;
461
0
#  ifdef XML_NS
462
216
    case BT_COLON:
463
      /* no need to check qname syntax here,
464
         since end-tag must match exactly */
465
216
      ptr += MINBPC(enc);
466
216
      break;
467
0
#  endif
468
408
    case BT_GT:
469
408
      *nextTokPtr = ptr + MINBPC(enc);
470
408
      return XML_TOK_END_TAG;
471
44
    default:
472
44
      *nextTokPtr = ptr;
473
44
      return XML_TOK_INVALID;
474
190k
    }
475
190k
  }
476
294
  return XML_TOK_PARTIAL;
477
42.0k
}
478
479
/* ptr points to character following "&#X" */
480
481
static int PTRCALL
482
PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
483
566k
                       const char **nextTokPtr) {
484
566k
  if (HAS_CHAR(enc, ptr, end)) {
485
566k
    switch (BYTE_TYPE(enc, ptr)) {
486
74.4k
    case BT_DIGIT:
487
566k
    case BT_HEX:
488
566k
      break;
489
79
    default:
490
79
      *nextTokPtr = ptr;
491
79
      return XML_TOK_INVALID;
492
566k
    }
493
1.34M
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494
1.34M
      switch (BYTE_TYPE(enc, ptr)) {
495
59.1k
      case BT_DIGIT:
496
781k
      case BT_HEX:
497
781k
        break;
498
565k
      case BT_SEMI:
499
565k
        *nextTokPtr = ptr + MINBPC(enc);
500
565k
        return XML_TOK_CHAR_REF;
501
138
      default:
502
138
        *nextTokPtr = ptr;
503
138
        return XML_TOK_INVALID;
504
1.34M
      }
505
1.34M
    }
506
566k
  }
507
1.03k
  return XML_TOK_PARTIAL;
508
566k
}
xmltok.c:normal_scanHexCharRef
Line
Count
Source
483
373k
                       const char **nextTokPtr) {
484
373k
  if (HAS_CHAR(enc, ptr, end)) {
485
373k
    switch (BYTE_TYPE(enc, ptr)) {
486
38.0k
    case BT_DIGIT:
487
373k
    case BT_HEX:
488
373k
      break;
489
29
    default:
490
29
      *nextTokPtr = ptr;
491
29
      return XML_TOK_INVALID;
492
373k
    }
493
475k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494
475k
      switch (BYTE_TYPE(enc, ptr)) {
495
20.1k
      case BT_DIGIT:
496
101k
      case BT_HEX:
497
101k
        break;
498
373k
      case BT_SEMI:
499
373k
        *nextTokPtr = ptr + MINBPC(enc);
500
373k
        return XML_TOK_CHAR_REF;
501
35
      default:
502
35
        *nextTokPtr = ptr;
503
35
        return XML_TOK_INVALID;
504
475k
      }
505
475k
    }
506
373k
  }
507
271
  return XML_TOK_PARTIAL;
508
373k
}
xmltok.c:little2_scanHexCharRef
Line
Count
Source
483
45.4k
                       const char **nextTokPtr) {
484
45.4k
  if (HAS_CHAR(enc, ptr, end)) {
485
45.3k
    switch (BYTE_TYPE(enc, ptr)) {
486
13.6k
    case BT_DIGIT:
487
45.3k
    case BT_HEX:
488
45.3k
      break;
489
35
    default:
490
35
      *nextTokPtr = ptr;
491
35
      return XML_TOK_INVALID;
492
45.3k
    }
493
182k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494
182k
      switch (BYTE_TYPE(enc, ptr)) {
495
16.5k
      case BT_DIGIT:
496
137k
      case BT_HEX:
497
137k
        break;
498
45.0k
      case BT_SEMI:
499
45.0k
        *nextTokPtr = ptr + MINBPC(enc);
500
45.0k
        return XML_TOK_CHAR_REF;
501
39
      default:
502
39
        *nextTokPtr = ptr;
503
39
        return XML_TOK_INVALID;
504
182k
      }
505
182k
    }
506
45.3k
  }
507
235
  return XML_TOK_PARTIAL;
508
45.4k
}
xmltok.c:big2_scanHexCharRef
Line
Count
Source
483
147k
                       const char **nextTokPtr) {
484
147k
  if (HAS_CHAR(enc, ptr, end)) {
485
147k
    switch (BYTE_TYPE(enc, ptr)) {
486
22.7k
    case BT_DIGIT:
487
147k
    case BT_HEX:
488
147k
      break;
489
15
    default:
490
15
      *nextTokPtr = ptr;
491
15
      return XML_TOK_INVALID;
492
147k
    }
493
688k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
494
688k
      switch (BYTE_TYPE(enc, ptr)) {
495
22.4k
      case BT_DIGIT:
496
541k
      case BT_HEX:
497
541k
        break;
498
146k
      case BT_SEMI:
499
146k
        *nextTokPtr = ptr + MINBPC(enc);
500
146k
        return XML_TOK_CHAR_REF;
501
64
      default:
502
64
        *nextTokPtr = ptr;
503
64
        return XML_TOK_INVALID;
504
688k
      }
505
688k
    }
506
147k
  }
507
533
  return XML_TOK_PARTIAL;
508
147k
}
509
510
/* ptr points to character following "&#" */
511
512
static int PTRCALL
513
PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
514
588k
                    const char **nextTokPtr) {
515
588k
  if (HAS_CHAR(enc, ptr, end)) {
516
588k
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
517
566k
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518
22.1k
    switch (BYTE_TYPE(enc, ptr)) {
519
21.9k
    case BT_DIGIT:
520
21.9k
      break;
521
190
    default:
522
190
      *nextTokPtr = ptr;
523
190
      return XML_TOK_INVALID;
524
22.1k
    }
525
89.2k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526
89.2k
      switch (BYTE_TYPE(enc, ptr)) {
527
67.4k
      case BT_DIGIT:
528
67.4k
        break;
529
21.8k
      case BT_SEMI:
530
21.8k
        *nextTokPtr = ptr + MINBPC(enc);
531
21.8k
        return XML_TOK_CHAR_REF;
532
70
      default:
533
70
        *nextTokPtr = ptr;
534
70
        return XML_TOK_INVALID;
535
89.2k
      }
536
89.2k
    }
537
21.9k
  }
538
360
  return XML_TOK_PARTIAL;
539
588k
}
xmltok.c:normal_scanCharRef
Line
Count
Source
514
385k
                    const char **nextTokPtr) {
515
385k
  if (HAS_CHAR(enc, ptr, end)) {
516
385k
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
517
373k
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518
11.8k
    switch (BYTE_TYPE(enc, ptr)) {
519
11.8k
    case BT_DIGIT:
520
11.8k
      break;
521
99
    default:
522
99
      *nextTokPtr = ptr;
523
99
      return XML_TOK_INVALID;
524
11.8k
    }
525
62.5k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526
62.5k
      switch (BYTE_TYPE(enc, ptr)) {
527
50.7k
      case BT_DIGIT:
528
50.7k
        break;
529
11.7k
      case BT_SEMI:
530
11.7k
        *nextTokPtr = ptr + MINBPC(enc);
531
11.7k
        return XML_TOK_CHAR_REF;
532
14
      default:
533
14
        *nextTokPtr = ptr;
534
14
        return XML_TOK_INVALID;
535
62.5k
      }
536
62.5k
    }
537
11.8k
  }
538
149
  return XML_TOK_PARTIAL;
539
385k
}
xmltok.c:little2_scanCharRef
Line
Count
Source
514
55.1k
                    const char **nextTokPtr) {
515
55.1k
  if (HAS_CHAR(enc, ptr, end)) {
516
55.1k
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
517
45.4k
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518
9.74k
    switch (BYTE_TYPE(enc, ptr)) {
519
9.69k
    case BT_DIGIT:
520
9.69k
      break;
521
46
    default:
522
46
      *nextTokPtr = ptr;
523
46
      return XML_TOK_INVALID;
524
9.74k
    }
525
25.4k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526
25.4k
      switch (BYTE_TYPE(enc, ptr)) {
527
15.7k
      case BT_DIGIT:
528
15.7k
        break;
529
9.65k
      case BT_SEMI:
530
9.65k
        *nextTokPtr = ptr + MINBPC(enc);
531
9.65k
        return XML_TOK_CHAR_REF;
532
24
      default:
533
24
        *nextTokPtr = ptr;
534
24
        return XML_TOK_INVALID;
535
25.4k
      }
536
25.4k
    }
537
9.69k
  }
538
68
  return XML_TOK_PARTIAL;
539
55.1k
}
xmltok.c:big2_scanCharRef
Line
Count
Source
514
148k
                    const char **nextTokPtr) {
515
148k
  if (HAS_CHAR(enc, ptr, end)) {
516
148k
    if (CHAR_MATCHES(enc, ptr, ASCII_x))
517
147k
      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
518
523
    switch (BYTE_TYPE(enc, ptr)) {
519
478
    case BT_DIGIT:
520
478
      break;
521
45
    default:
522
45
      *nextTokPtr = ptr;
523
45
      return XML_TOK_INVALID;
524
523
    }
525
1.29k
    for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
526
1.29k
      switch (BYTE_TYPE(enc, ptr)) {
527
847
      case BT_DIGIT:
528
847
        break;
529
416
      case BT_SEMI:
530
416
        *nextTokPtr = ptr + MINBPC(enc);
531
416
        return XML_TOK_CHAR_REF;
532
32
      default:
533
32
        *nextTokPtr = ptr;
534
32
        return XML_TOK_INVALID;
535
1.29k
      }
536
1.29k
    }
537
478
  }
538
143
  return XML_TOK_PARTIAL;
539
148k
}
540
541
/* ptr points to character following "&" */
542
543
static int PTRCALL
544
PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
545
7.73M
                const char **nextTokPtr) {
546
7.73M
  REQUIRE_CHAR(enc, ptr, end);
547
7.73M
  switch (BYTE_TYPE(enc, ptr)) {
548
6.57M
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549
588k
  case BT_NUM:
550
588k
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551
1.03k
  default:
552
1.03k
    *nextTokPtr = ptr;
553
1.03k
    return XML_TOK_INVALID;
554
7.73M
  }
555
33.9M
  while (HAS_CHAR(enc, ptr, end)) {
556
33.9M
    switch (BYTE_TYPE(enc, ptr)) {
557
91.2M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558
7.14M
    case BT_SEMI:
559
7.14M
      *nextTokPtr = ptr + MINBPC(enc);
560
7.14M
      return XML_TOK_ENTITY_REF;
561
849
    default:
562
849
      *nextTokPtr = ptr;
563
849
      return XML_TOK_INVALID;
564
33.9M
    }
565
33.9M
  }
566
1.35k
  return XML_TOK_PARTIAL;
567
7.14M
}
xmltok.c:normal_scanRef
Line
Count
Source
545
7.13M
                const char **nextTokPtr) {
546
7.13M
  REQUIRE_CHAR(enc, ptr, end);
547
7.13M
  switch (BYTE_TYPE(enc, ptr)) {
548
6.53M
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549
385k
  case BT_NUM:
550
385k
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551
880
  default:
552
880
    *nextTokPtr = ptr;
553
880
    return XML_TOK_INVALID;
554
7.13M
  }
555
33.1M
  while (HAS_CHAR(enc, ptr, end)) {
556
33.1M
    switch (BYTE_TYPE(enc, ptr)) {
557
89.3M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558
6.74M
    case BT_SEMI:
559
6.74M
      *nextTokPtr = ptr + MINBPC(enc);
560
6.74M
      return XML_TOK_ENTITY_REF;
561
725
    default:
562
725
      *nextTokPtr = ptr;
563
725
      return XML_TOK_INVALID;
564
33.1M
    }
565
33.1M
  }
566
718
  return XML_TOK_PARTIAL;
567
6.75M
}
xmltok.c:little2_scanRef
Line
Count
Source
545
118k
                const char **nextTokPtr) {
546
118k
  REQUIRE_CHAR(enc, ptr, end);
547
117k
  switch (BYTE_TYPE(enc, ptr)) {
548
17.9k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549
55.1k
  case BT_NUM:
550
55.1k
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551
68
  default:
552
68
    *nextTokPtr = ptr;
553
68
    return XML_TOK_INVALID;
554
117k
  }
555
157k
  while (HAS_CHAR(enc, ptr, end)) {
556
157k
    switch (BYTE_TYPE(enc, ptr)) {
557
418k
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558
62.0k
    case BT_SEMI:
559
62.0k
      *nextTokPtr = ptr + MINBPC(enc);
560
62.0k
      return XML_TOK_ENTITY_REF;
561
58
    default:
562
58
      *nextTokPtr = ptr;
563
58
      return XML_TOK_INVALID;
564
157k
    }
565
157k
  }
566
300
  return XML_TOK_PARTIAL;
567
62.5k
}
xmltok.c:big2_scanRef
Line
Count
Source
545
479k
                const char **nextTokPtr) {
546
479k
  REQUIRE_CHAR(enc, ptr, end);
547
479k
  switch (BYTE_TYPE(enc, ptr)) {
548
21.8k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
549
148k
  case BT_NUM:
550
148k
    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
551
84
  default:
552
84
    *nextTokPtr = ptr;
553
84
    return XML_TOK_INVALID;
554
479k
  }
555
641k
  while (HAS_CHAR(enc, ptr, end)) {
556
641k
    switch (BYTE_TYPE(enc, ptr)) {
557
1.41M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
558
330k
    case BT_SEMI:
559
330k
      *nextTokPtr = ptr + MINBPC(enc);
560
330k
      return XML_TOK_ENTITY_REF;
561
66
    default:
562
66
      *nextTokPtr = ptr;
563
66
      return XML_TOK_INVALID;
564
641k
    }
565
641k
  }
566
341
  return XML_TOK_PARTIAL;
567
331k
}
568
569
/* ptr points to character following first character of attribute name */
570
571
static int PTRCALL
572
PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
573
1.04M
                 const char **nextTokPtr) {
574
1.04M
#  ifdef XML_NS
575
1.04M
  int hadColon = 0;
576
1.04M
#  endif
577
49.7M
  while (HAS_CHAR(enc, ptr, end)) {
578
49.7M
    switch (BYTE_TYPE(enc, ptr)) {
579
143M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580
0
#  ifdef XML_NS
581
200k
    case BT_COLON:
582
200k
      if (hadColon) {
583
31
        *nextTokPtr = ptr;
584
31
        return XML_TOK_INVALID;
585
31
      }
586
200k
      hadColon = 1;
587
200k
      ptr += MINBPC(enc);
588
200k
      REQUIRE_CHAR(enc, ptr, end);
589
200k
      switch (BYTE_TYPE(enc, ptr)) {
590
194k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591
93
      default:
592
93
        *nextTokPtr = ptr;
593
93
        return XML_TOK_INVALID;
594
200k
      }
595
199k
      break;
596
199k
#  endif
597
199k
    case BT_S:
598
10.3k
    case BT_CR:
599
34.7k
    case BT_LF:
600
632k
      for (;;) {
601
632k
        int t;
602
603
632k
        ptr += MINBPC(enc);
604
632k
        REQUIRE_CHAR(enc, ptr, end);
605
631k
        t = BYTE_TYPE(enc, ptr);
606
631k
        if (t == BT_EQUALS)
607
33.9k
          break;
608
597k
        switch (t) {
609
123k
        case BT_S:
610
285k
        case BT_LF:
611
597k
        case BT_CR:
612
597k
          break;
613
244
        default:
614
244
          *nextTokPtr = ptr;
615
244
          return XML_TOK_INVALID;
616
597k
        }
617
597k
      }
618
      /* fall through */
619
5.67M
    case BT_EQUALS: {
620
5.67M
      int open;
621
5.67M
#  ifdef XML_NS
622
5.67M
      hadColon = 0;
623
5.67M
#  endif
624
6.21M
      for (;;) {
625
6.21M
        ptr += MINBPC(enc);
626
6.21M
        REQUIRE_CHAR(enc, ptr, end);
627
6.21M
        open = BYTE_TYPE(enc, ptr);
628
6.21M
        if (open == BT_QUOT || open == BT_APOS)
629
5.67M
          break;
630
544k
        switch (open) {
631
510k
        case BT_S:
632
524k
        case BT_LF:
633
543k
        case BT_CR:
634
543k
          break;
635
171
        default:
636
171
          *nextTokPtr = ptr;
637
171
          return XML_TOK_INVALID;
638
544k
        }
639
544k
      }
640
5.67M
      ptr += MINBPC(enc);
641
      /* in attribute value */
642
247M
      for (;;) {
643
247M
        int t;
644
247M
        REQUIRE_CHAR(enc, ptr, end);
645
247M
        t = BYTE_TYPE(enc, ptr);
646
247M
        if (t == open)
647
5.66M
          break;
648
242M
        switch (t) {
649
22.0M
          INVALID_CASES(ptr, nextTokPtr)
650
390k
        case BT_AMP: {
651
390k
          int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652
390k
          if (tok <= 0) {
653
460
            if (tok == XML_TOK_INVALID)
654
123
              *nextTokPtr = ptr;
655
460
            return tok;
656
460
          }
657
390k
          break;
658
390k
        }
659
390k
        case BT_LT:
660
118
          *nextTokPtr = ptr;
661
118
          return XML_TOK_INVALID;
662
230M
        default:
663
230M
          ptr += MINBPC(enc);
664
230M
          break;
665
242M
        }
666
242M
      }
667
5.66M
      ptr += MINBPC(enc);
668
5.66M
      REQUIRE_CHAR(enc, ptr, end);
669
5.66M
      switch (BYTE_TYPE(enc, ptr)) {
670
2.89M
      case BT_S:
671
4.59M
      case BT_CR:
672
4.67M
      case BT_LF:
673
4.67M
        break;
674
10.2k
      case BT_SOL:
675
10.2k
        goto sol;
676
978k
      case BT_GT:
677
978k
        goto gt;
678
164
      default:
679
164
        *nextTokPtr = ptr;
680
164
        return XML_TOK_INVALID;
681
5.66M
      }
682
      /* ptr points to closing quote */
683
6.69M
      for (;;) {
684
6.69M
        ptr += MINBPC(enc);
685
6.69M
        REQUIRE_CHAR(enc, ptr, end);
686
6.69M
        switch (BYTE_TYPE(enc, ptr)) {
687
4.45M
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688
783k
        case BT_S:
689
2.01M
        case BT_CR:
690
2.02M
        case BT_LF:
691
2.02M
          continue;
692
6.22k
        case BT_GT:
693
985k
        gt:
694
985k
          *nextTokPtr = ptr + MINBPC(enc);
695
985k
          return XML_TOK_START_TAG_WITH_ATTS;
696
35.1k
        case BT_SOL:
697
45.4k
        sol:
698
45.4k
          ptr += MINBPC(enc);
699
45.4k
          REQUIRE_CHAR(enc, ptr, end);
700
45.3k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701
95
            *nextTokPtr = ptr;
702
95
            return XML_TOK_INVALID;
703
95
          }
704
45.3k
          *nextTokPtr = ptr + MINBPC(enc);
705
45.3k
          return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706
147
        default:
707
147
          *nextTokPtr = ptr;
708
147
          return XML_TOK_INVALID;
709
6.69M
        }
710
4.63M
        break;
711
6.69M
      }
712
4.63M
      break;
713
4.67M
    }
714
4.63M
    default:
715
497
      *nextTokPtr = ptr;
716
497
      return XML_TOK_INVALID;
717
49.7M
    }
718
49.7M
  }
719
2.81k
  return XML_TOK_PARTIAL;
720
1.04M
}
xmltok.c:normal_scanAtts
Line
Count
Source
573
1.02M
                 const char **nextTokPtr) {
574
1.02M
#  ifdef XML_NS
575
1.02M
  int hadColon = 0;
576
1.02M
#  endif
577
46.0M
  while (HAS_CHAR(enc, ptr, end)) {
578
46.0M
    switch (BYTE_TYPE(enc, ptr)) {
579
128M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580
0
#  ifdef XML_NS
581
196k
    case BT_COLON:
582
196k
      if (hadColon) {
583
17
        *nextTokPtr = ptr;
584
17
        return XML_TOK_INVALID;
585
17
      }
586
196k
      hadColon = 1;
587
196k
      ptr += MINBPC(enc);
588
196k
      REQUIRE_CHAR(enc, ptr, end);
589
196k
      switch (BYTE_TYPE(enc, ptr)) {
590
189k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591
18
      default:
592
18
        *nextTokPtr = ptr;
593
18
        return XML_TOK_INVALID;
594
196k
      }
595
196k
      break;
596
196k
#  endif
597
196k
    case BT_S:
598
4.64k
    case BT_CR:
599
15.9k
    case BT_LF:
600
364k
      for (;;) {
601
364k
        int t;
602
603
364k
        ptr += MINBPC(enc);
604
364k
        REQUIRE_CHAR(enc, ptr, end);
605
364k
        t = BYTE_TYPE(enc, ptr);
606
364k
        if (t == BT_EQUALS)
607
15.6k
          break;
608
348k
        switch (t) {
609
285
        case BT_S:
610
50.5k
        case BT_LF:
611
348k
        case BT_CR:
612
348k
          break;
613
119
        default:
614
119
          *nextTokPtr = ptr;
615
119
          return XML_TOK_INVALID;
616
348k
        }
617
348k
      }
618
      /* fall through */
619
4.84M
    case BT_EQUALS: {
620
4.84M
      int open;
621
4.84M
#  ifdef XML_NS
622
4.84M
      hadColon = 0;
623
4.84M
#  endif
624
5.35M
      for (;;) {
625
5.35M
        ptr += MINBPC(enc);
626
5.35M
        REQUIRE_CHAR(enc, ptr, end);
627
5.35M
        open = BYTE_TYPE(enc, ptr);
628
5.35M
        if (open == BT_QUOT || open == BT_APOS)
629
4.84M
          break;
630
509k
        switch (open) {
631
503k
        case BT_S:
632
509k
        case BT_LF:
633
509k
        case BT_CR:
634
509k
          break;
635
69
        default:
636
69
          *nextTokPtr = ptr;
637
69
          return XML_TOK_INVALID;
638
509k
        }
639
509k
      }
640
4.84M
      ptr += MINBPC(enc);
641
      /* in attribute value */
642
127M
      for (;;) {
643
127M
        int t;
644
127M
        REQUIRE_CHAR(enc, ptr, end);
645
127M
        t = BYTE_TYPE(enc, ptr);
646
127M
        if (t == open)
647
4.84M
          break;
648
123M
        switch (t) {
649
19.8M
          INVALID_CASES(ptr, nextTokPtr)
650
133k
        case BT_AMP: {
651
133k
          int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652
133k
          if (tok <= 0) {
653
247
            if (tok == XML_TOK_INVALID)
654
68
              *nextTokPtr = ptr;
655
247
            return tok;
656
247
          }
657
132k
          break;
658
133k
        }
659
132k
        case BT_LT:
660
78
          *nextTokPtr = ptr;
661
78
          return XML_TOK_INVALID;
662
113M
        default:
663
113M
          ptr += MINBPC(enc);
664
113M
          break;
665
123M
        }
666
123M
      }
667
4.84M
      ptr += MINBPC(enc);
668
4.84M
      REQUIRE_CHAR(enc, ptr, end);
669
4.84M
      switch (BYTE_TYPE(enc, ptr)) {
670
2.16M
      case BT_S:
671
3.85M
      case BT_CR:
672
3.86M
      case BT_LF:
673
3.86M
        break;
674
7.79k
      case BT_SOL:
675
7.79k
        goto sol;
676
973k
      case BT_GT:
677
973k
        goto gt;
678
82
      default:
679
82
        *nextTokPtr = ptr;
680
82
        return XML_TOK_INVALID;
681
4.84M
      }
682
      /* ptr points to closing quote */
683
5.85M
      for (;;) {
684
5.85M
        ptr += MINBPC(enc);
685
5.85M
        REQUIRE_CHAR(enc, ptr, end);
686
5.85M
        switch (BYTE_TYPE(enc, ptr)) {
687
3.79M
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688
777k
        case BT_S:
689
1.98M
        case BT_CR:
690
1.99M
        case BT_LF:
691
1.99M
          continue;
692
3.79k
        case BT_GT:
693
977k
        gt:
694
977k
          *nextTokPtr = ptr + MINBPC(enc);
695
977k
          return XML_TOK_START_TAG_WITH_ATTS;
696
34.3k
        case BT_SOL:
697
42.0k
        sol:
698
42.0k
          ptr += MINBPC(enc);
699
42.0k
          REQUIRE_CHAR(enc, ptr, end);
700
42.0k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701
24
            *nextTokPtr = ptr;
702
24
            return XML_TOK_INVALID;
703
24
          }
704
42.0k
          *nextTokPtr = ptr + MINBPC(enc);
705
42.0k
          return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706
38
        default:
707
38
          *nextTokPtr = ptr;
708
38
          return XML_TOK_INVALID;
709
5.85M
        }
710
3.82M
        break;
711
5.85M
      }
712
3.82M
      break;
713
3.86M
    }
714
3.82M
    default:
715
339
      *nextTokPtr = ptr;
716
339
      return XML_TOK_INVALID;
717
46.0M
    }
718
46.0M
  }
719
1.98k
  return XML_TOK_PARTIAL;
720
1.02M
}
xmltok.c:little2_scanAtts
Line
Count
Source
573
9.49k
                 const char **nextTokPtr) {
574
9.49k
#  ifdef XML_NS
575
9.49k
  int hadColon = 0;
576
9.49k
#  endif
577
2.45M
  while (HAS_CHAR(enc, ptr, end)) {
578
2.45M
    switch (BYTE_TYPE(enc, ptr)) {
579
8.55M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580
0
#  ifdef XML_NS
581
911
    case BT_COLON:
582
911
      if (hadColon) {
583
7
        *nextTokPtr = ptr;
584
7
        return XML_TOK_INVALID;
585
7
      }
586
904
      hadColon = 1;
587
904
      ptr += MINBPC(enc);
588
904
      REQUIRE_CHAR(enc, ptr, end);
589
892
      switch (BYTE_TYPE(enc, ptr)) {
590
778
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591
36
      default:
592
36
        *nextTokPtr = ptr;
593
36
        return XML_TOK_INVALID;
594
892
      }
595
779
      break;
596
779
#  endif
597
779
    case BT_S:
598
2.56k
    case BT_CR:
599
8.87k
    case BT_LF:
600
26.6k
      for (;;) {
601
26.6k
        int t;
602
603
26.6k
        ptr += MINBPC(enc);
604
26.6k
        REQUIRE_CHAR(enc, ptr, end);
605
26.4k
        t = BYTE_TYPE(enc, ptr);
606
26.4k
        if (t == BT_EQUALS)
607
8.63k
          break;
608
17.8k
        switch (t) {
609
3.71k
        case BT_S:
610
7.00k
        case BT_LF:
611
17.7k
        case BT_CR:
612
17.7k
          break;
613
72
        default:
614
72
          *nextTokPtr = ptr;
615
72
          return XML_TOK_INVALID;
616
17.8k
        }
617
17.8k
      }
618
      /* fall through */
619
737k
    case BT_EQUALS: {
620
737k
      int open;
621
737k
#  ifdef XML_NS
622
737k
      hadColon = 0;
623
737k
#  endif
624
758k
      for (;;) {
625
758k
        ptr += MINBPC(enc);
626
758k
        REQUIRE_CHAR(enc, ptr, end);
627
758k
        open = BYTE_TYPE(enc, ptr);
628
758k
        if (open == BT_QUOT || open == BT_APOS)
629
737k
          break;
630
20.6k
        switch (open) {
631
4.67k
        case BT_S:
632
8.98k
        case BT_LF:
633
20.6k
        case BT_CR:
634
20.6k
          break;
635
52
        default:
636
52
          *nextTokPtr = ptr;
637
52
          return XML_TOK_INVALID;
638
20.6k
        }
639
20.6k
      }
640
737k
      ptr += MINBPC(enc);
641
      /* in attribute value */
642
40.8M
      for (;;) {
643
40.8M
        int t;
644
40.8M
        REQUIRE_CHAR(enc, ptr, end);
645
40.8M
        t = BYTE_TYPE(enc, ptr);
646
40.8M
        if (t == open)
647
735k
          break;
648
40.1M
        switch (t) {
649
1.23M
          INVALID_CASES(ptr, nextTokPtr)
650
37.3k
        case BT_AMP: {
651
37.3k
          int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652
37.3k
          if (tok <= 0) {
653
78
            if (tok == XML_TOK_INVALID)
654
12
              *nextTokPtr = ptr;
655
78
            return tok;
656
78
          }
657
37.2k
          break;
658
37.3k
        }
659
37.2k
        case BT_LT:
660
17
          *nextTokPtr = ptr;
661
17
          return XML_TOK_INVALID;
662
39.4M
        default:
663
39.4M
          ptr += MINBPC(enc);
664
39.4M
          break;
665
40.1M
        }
666
40.1M
      }
667
735k
      ptr += MINBPC(enc);
668
735k
      REQUIRE_CHAR(enc, ptr, end);
669
735k
      switch (BYTE_TYPE(enc, ptr)) {
670
724k
      case BT_S:
671
726k
      case BT_CR:
672
731k
      case BT_LF:
673
731k
        break;
674
1.56k
      case BT_SOL:
675
1.56k
        goto sol;
676
2.51k
      case BT_GT:
677
2.51k
        goto gt;
678
44
      default:
679
44
        *nextTokPtr = ptr;
680
44
        return XML_TOK_INVALID;
681
735k
      }
682
      /* ptr points to closing quote */
683
755k
      for (;;) {
684
755k
        ptr += MINBPC(enc);
685
755k
        REQUIRE_CHAR(enc, ptr, end);
686
755k
        switch (BYTE_TYPE(enc, ptr)) {
687
588k
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688
885
        case BT_S:
689
24.4k
        case BT_CR:
690
24.8k
        case BT_LF:
691
24.8k
          continue;
692
1.30k
        case BT_GT:
693
3.82k
        gt:
694
3.82k
          *nextTokPtr = ptr + MINBPC(enc);
695
3.82k
          return XML_TOK_START_TAG_WITH_ATTS;
696
233
        case BT_SOL:
697
1.79k
        sol:
698
1.79k
          ptr += MINBPC(enc);
699
1.79k
          REQUIRE_CHAR(enc, ptr, end);
700
1.77k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701
27
            *nextTokPtr = ptr;
702
27
            return XML_TOK_INVALID;
703
27
          }
704
1.74k
          *nextTokPtr = ptr + MINBPC(enc);
705
1.74k
          return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706
56
        default:
707
56
          *nextTokPtr = ptr;
708
56
          return XML_TOK_INVALID;
709
755k
        }
710
729k
        break;
711
755k
      }
712
729k
      break;
713
731k
    }
714
729k
    default:
715
72
      *nextTokPtr = ptr;
716
72
      return XML_TOK_INVALID;
717
2.45M
    }
718
2.45M
  }
719
390
  return XML_TOK_PARTIAL;
720
9.49k
}
xmltok.c:big2_scanAtts
Line
Count
Source
573
10.2k
                 const char **nextTokPtr) {
574
10.2k
#  ifdef XML_NS
575
10.2k
  int hadColon = 0;
576
10.2k
#  endif
577
1.25M
  while (HAS_CHAR(enc, ptr, end)) {
578
1.25M
    switch (BYTE_TYPE(enc, ptr)) {
579
5.76M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
580
0
#  ifdef XML_NS
581
2.51k
    case BT_COLON:
582
2.51k
      if (hadColon) {
583
7
        *nextTokPtr = ptr;
584
7
        return XML_TOK_INVALID;
585
7
      }
586
2.51k
      hadColon = 1;
587
2.51k
      ptr += MINBPC(enc);
588
2.51k
      REQUIRE_CHAR(enc, ptr, end);
589
2.49k
      switch (BYTE_TYPE(enc, ptr)) {
590
4.39k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
591
39
      default:
592
39
        *nextTokPtr = ptr;
593
39
        return XML_TOK_INVALID;
594
2.49k
      }
595
2.37k
      break;
596
2.37k
#  endif
597
2.37k
    case BT_S:
598
3.11k
    case BT_CR:
599
9.89k
    case BT_LF:
600
241k
      for (;;) {
601
241k
        int t;
602
603
241k
        ptr += MINBPC(enc);
604
241k
        REQUIRE_CHAR(enc, ptr, end);
605
241k
        t = BYTE_TYPE(enc, ptr);
606
241k
        if (t == BT_EQUALS)
607
9.71k
          break;
608
231k
        switch (t) {
609
119k
        case BT_S:
610
227k
        case BT_LF:
611
231k
        case BT_CR:
612
231k
          break;
613
53
        default:
614
53
          *nextTokPtr = ptr;
615
53
          return XML_TOK_INVALID;
616
231k
        }
617
231k
      }
618
      /* fall through */
619
89.0k
    case BT_EQUALS: {
620
89.0k
      int open;
621
89.0k
#  ifdef XML_NS
622
89.0k
      hadColon = 0;
623
89.0k
#  endif
624
102k
      for (;;) {
625
102k
        ptr += MINBPC(enc);
626
102k
        REQUIRE_CHAR(enc, ptr, end);
627
102k
        open = BYTE_TYPE(enc, ptr);
628
102k
        if (open == BT_QUOT || open == BT_APOS)
629
88.8k
          break;
630
13.4k
        switch (open) {
631
2.91k
        case BT_S:
632
5.93k
        case BT_LF:
633
13.4k
        case BT_CR:
634
13.4k
          break;
635
50
        default:
636
50
          *nextTokPtr = ptr;
637
50
          return XML_TOK_INVALID;
638
13.4k
        }
639
13.4k
      }
640
88.8k
      ptr += MINBPC(enc);
641
      /* in attribute value */
642
79.0M
      for (;;) {
643
79.0M
        int t;
644
79.0M
        REQUIRE_CHAR(enc, ptr, end);
645
79.0M
        t = BYTE_TYPE(enc, ptr);
646
79.0M
        if (t == open)
647
85.6k
          break;
648
78.9M
        switch (t) {
649
1.00M
          INVALID_CASES(ptr, nextTokPtr)
650
219k
        case BT_AMP: {
651
219k
          int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
652
219k
          if (tok <= 0) {
653
135
            if (tok == XML_TOK_INVALID)
654
43
              *nextTokPtr = ptr;
655
135
            return tok;
656
135
          }
657
219k
          break;
658
219k
        }
659
219k
        case BT_LT:
660
23
          *nextTokPtr = ptr;
661
23
          return XML_TOK_INVALID;
662
78.2M
        default:
663
78.2M
          ptr += MINBPC(enc);
664
78.2M
          break;
665
78.9M
        }
666
78.9M
      }
667
85.6k
      ptr += MINBPC(enc);
668
85.6k
      REQUIRE_CHAR(enc, ptr, end);
669
85.6k
      switch (BYTE_TYPE(enc, ptr)) {
670
11.1k
      case BT_S:
671
15.2k
      case BT_CR:
672
81.8k
      case BT_LF:
673
81.8k
        break;
674
937
      case BT_SOL:
675
937
        goto sol;
676
2.78k
      case BT_GT:
677
2.78k
        goto gt;
678
38
      default:
679
38
        *nextTokPtr = ptr;
680
38
        return XML_TOK_INVALID;
681
85.6k
      }
682
      /* ptr points to closing quote */
683
90.0k
      for (;;) {
684
90.0k
        ptr += MINBPC(enc);
685
90.0k
        REQUIRE_CHAR(enc, ptr, end);
686
89.9k
        switch (BYTE_TYPE(enc, ptr)) {
687
70.6k
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
688
5.12k
        case BT_S:
689
7.36k
        case BT_CR:
690
8.17k
        case BT_LF:
691
8.17k
          continue;
692
1.12k
        case BT_GT:
693
3.90k
        gt:
694
3.90k
          *nextTokPtr = ptr + MINBPC(enc);
695
3.90k
          return XML_TOK_START_TAG_WITH_ATTS;
696
646
        case BT_SOL:
697
1.58k
        sol:
698
1.58k
          ptr += MINBPC(enc);
699
1.58k
          REQUIRE_CHAR(enc, ptr, end);
700
1.56k
          if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
701
44
            *nextTokPtr = ptr;
702
44
            return XML_TOK_INVALID;
703
44
          }
704
1.52k
          *nextTokPtr = ptr + MINBPC(enc);
705
1.52k
          return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
706
53
        default:
707
53
          *nextTokPtr = ptr;
708
53
          return XML_TOK_INVALID;
709
89.9k
        }
710
79.8k
        break;
711
89.9k
      }
712
79.8k
      break;
713
81.8k
    }
714
79.8k
    default:
715
86
      *nextTokPtr = ptr;
716
86
      return XML_TOK_INVALID;
717
1.25M
    }
718
1.25M
  }
719
443
  return XML_TOK_PARTIAL;
720
10.2k
}
721
722
/* ptr points to character following "<" */
723
724
static int PTRCALL
725
PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
726
24.7M
               const char **nextTokPtr) {
727
24.7M
#  ifdef XML_NS
728
24.7M
  int hadColon;
729
24.7M
#  endif
730
24.7M
  REQUIRE_CHAR(enc, ptr, end);
731
24.7M
  switch (BYTE_TYPE(enc, ptr)) {
732
8.23M
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733
2.14M
  case BT_EXCL:
734
2.14M
    ptr += MINBPC(enc);
735
2.14M
    REQUIRE_CHAR(enc, ptr, end);
736
2.14M
    switch (BYTE_TYPE(enc, ptr)) {
737
714k
    case BT_MINUS:
738
714k
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739
1.43M
    case BT_LSQB:
740
1.43M
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741
2.14M
    }
742
221
    *nextTokPtr = ptr;
743
221
    return XML_TOK_INVALID;
744
441k
  case BT_QUEST:
745
441k
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746
199k
  case BT_SOL:
747
199k
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748
722
  default:
749
722
    *nextTokPtr = ptr;
750
722
    return XML_TOK_INVALID;
751
24.7M
  }
752
21.9M
#  ifdef XML_NS
753
21.9M
  hadColon = 0;
754
21.9M
#  endif
755
  /* we have a start-tag */
756
133M
  while (HAS_CHAR(enc, ptr, end)) {
757
133M
    switch (BYTE_TYPE(enc, ptr)) {
758
386M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759
0
#  ifdef XML_NS
760
25.5k
    case BT_COLON:
761
25.5k
      if (hadColon) {
762
35
        *nextTokPtr = ptr;
763
35
        return XML_TOK_INVALID;
764
35
      }
765
25.4k
      hadColon = 1;
766
25.4k
      ptr += MINBPC(enc);
767
25.4k
      REQUIRE_CHAR(enc, ptr, end);
768
25.4k
      switch (BYTE_TYPE(enc, ptr)) {
769
19.6k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770
188
      default:
771
188
        *nextTokPtr = ptr;
772
188
        return XML_TOK_INVALID;
773
25.4k
      }
774
24.9k
      break;
775
24.9k
#  endif
776
3.46M
    case BT_S:
777
3.59M
    case BT_CR:
778
3.66M
    case BT_LF: {
779
3.66M
      ptr += MINBPC(enc);
780
4.39M
      while (HAS_CHAR(enc, ptr, end)) {
781
4.39M
        switch (BYTE_TYPE(enc, ptr)) {
782
1.16M
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783
80.0k
        case BT_GT:
784
80.0k
          goto gt;
785
2.54M
        case BT_SOL:
786
2.54M
          goto sol;
787
71.7k
        case BT_S:
788
596k
        case BT_CR:
789
726k
        case BT_LF:
790
726k
          ptr += MINBPC(enc);
791
726k
          continue;
792
235
        default:
793
235
          *nextTokPtr = ptr;
794
235
          return XML_TOK_INVALID;
795
4.39M
        }
796
1.04M
        return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797
4.39M
      }
798
737
      return XML_TOK_PARTIAL;
799
3.66M
    }
800
18.2M
    case BT_GT:
801
18.2M
    gt:
802
18.2M
      *nextTokPtr = ptr + MINBPC(enc);
803
18.2M
      return XML_TOK_START_TAG_NO_ATTS;
804
52.8k
    case BT_SOL:
805
2.59M
    sol:
806
2.59M
      ptr += MINBPC(enc);
807
2.59M
      REQUIRE_CHAR(enc, ptr, end);
808
2.59M
      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809
185
        *nextTokPtr = ptr;
810
185
        return XML_TOK_INVALID;
811
185
      }
812
2.59M
      *nextTokPtr = ptr + MINBPC(enc);
813
2.59M
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814
851
    default:
815
851
      *nextTokPtr = ptr;
816
851
      return XML_TOK_INVALID;
817
133M
    }
818
133M
  }
819
3.65k
  return XML_TOK_PARTIAL;
820
21.9M
}
xmltok.c:normal_scanLt
Line
Count
Source
726
23.9M
               const char **nextTokPtr) {
727
23.9M
#  ifdef XML_NS
728
23.9M
  int hadColon;
729
23.9M
#  endif
730
23.9M
  REQUIRE_CHAR(enc, ptr, end);
731
23.9M
  switch (BYTE_TYPE(enc, ptr)) {
732
7.91M
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733
2.09M
  case BT_EXCL:
734
2.09M
    ptr += MINBPC(enc);
735
2.09M
    REQUIRE_CHAR(enc, ptr, end);
736
2.09M
    switch (BYTE_TYPE(enc, ptr)) {
737
678k
    case BT_MINUS:
738
678k
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739
1.42M
    case BT_LSQB:
740
1.42M
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741
2.09M
    }
742
154
    *nextTokPtr = ptr;
743
154
    return XML_TOK_INVALID;
744
105k
  case BT_QUEST:
745
105k
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746
155k
  case BT_SOL:
747
155k
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748
558
  default:
749
558
    *nextTokPtr = ptr;
750
558
    return XML_TOK_INVALID;
751
23.9M
  }
752
21.5M
#  ifdef XML_NS
753
21.5M
  hadColon = 0;
754
21.5M
#  endif
755
  /* we have a start-tag */
756
131M
  while (HAS_CHAR(enc, ptr, end)) {
757
131M
    switch (BYTE_TYPE(enc, ptr)) {
758
378M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759
0
#  ifdef XML_NS
760
21.9k
    case BT_COLON:
761
21.9k
      if (hadColon) {
762
18
        *nextTokPtr = ptr;
763
18
        return XML_TOK_INVALID;
764
18
      }
765
21.9k
      hadColon = 1;
766
21.9k
      ptr += MINBPC(enc);
767
21.9k
      REQUIRE_CHAR(enc, ptr, end);
768
21.9k
      switch (BYTE_TYPE(enc, ptr)) {
769
16.3k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770
78
      default:
771
78
        *nextTokPtr = ptr;
772
78
        return XML_TOK_INVALID;
773
21.9k
      }
774
21.7k
      break;
775
21.7k
#  endif
776
3.45M
    case BT_S:
777
3.53M
    case BT_CR:
778
3.60M
    case BT_LF: {
779
3.60M
      ptr += MINBPC(enc);
780
3.97M
      while (HAS_CHAR(enc, ptr, end)) {
781
3.97M
        switch (BYTE_TYPE(enc, ptr)) {
782
1.14M
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783
37.4k
        case BT_GT:
784
37.4k
          goto gt;
785
2.53M
        case BT_SOL:
786
2.53M
          goto sol;
787
4.19k
        case BT_S:
788
300k
        case BT_CR:
789
366k
        case BT_LF:
790
366k
          ptr += MINBPC(enc);
791
366k
          continue;
792
102
        default:
793
102
          *nextTokPtr = ptr;
794
102
          return XML_TOK_INVALID;
795
3.97M
        }
796
1.02M
        return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797
3.97M
      }
798
351
      return XML_TOK_PARTIAL;
799
3.60M
    }
800
17.8M
    case BT_GT:
801
17.9M
    gt:
802
17.9M
      *nextTokPtr = ptr + MINBPC(enc);
803
17.9M
      return XML_TOK_START_TAG_NO_ATTS;
804
51.5k
    case BT_SOL:
805
2.59M
    sol:
806
2.59M
      ptr += MINBPC(enc);
807
2.59M
      REQUIRE_CHAR(enc, ptr, end);
808
2.59M
      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809
84
        *nextTokPtr = ptr;
810
84
        return XML_TOK_INVALID;
811
84
      }
812
2.59M
      *nextTokPtr = ptr + MINBPC(enc);
813
2.59M
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814
698
    default:
815
698
      *nextTokPtr = ptr;
816
698
      return XML_TOK_INVALID;
817
131M
    }
818
131M
  }
819
2.54k
  return XML_TOK_PARTIAL;
820
21.5M
}
xmltok.c:little2_scanLt
Line
Count
Source
726
370k
               const char **nextTokPtr) {
727
370k
#  ifdef XML_NS
728
370k
  int hadColon;
729
370k
#  endif
730
370k
  REQUIRE_CHAR(enc, ptr, end);
731
370k
  switch (BYTE_TYPE(enc, ptr)) {
732
243k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733
36.3k
  case BT_EXCL:
734
36.3k
    ptr += MINBPC(enc);
735
36.3k
    REQUIRE_CHAR(enc, ptr, end);
736
36.3k
    switch (BYTE_TYPE(enc, ptr)) {
737
34.7k
    case BT_MINUS:
738
34.7k
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739
1.51k
    case BT_LSQB:
740
1.51k
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741
36.3k
    }
742
38
    *nextTokPtr = ptr;
743
38
    return XML_TOK_INVALID;
744
106k
  case BT_QUEST:
745
106k
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746
2.03k
  case BT_SOL:
747
2.03k
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748
75
  default:
749
75
    *nextTokPtr = ptr;
750
75
    return XML_TOK_INVALID;
751
370k
  }
752
225k
#  ifdef XML_NS
753
225k
  hadColon = 0;
754
225k
#  endif
755
  /* we have a start-tag */
756
1.35M
  while (HAS_CHAR(enc, ptr, end)) {
757
1.35M
    switch (BYTE_TYPE(enc, ptr)) {
758
5.63M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759
0
#  ifdef XML_NS
760
2.54k
    case BT_COLON:
761
2.54k
      if (hadColon) {
762
9
        *nextTokPtr = ptr;
763
9
        return XML_TOK_INVALID;
764
9
      }
765
2.53k
      hadColon = 1;
766
2.53k
      ptr += MINBPC(enc);
767
2.53k
      REQUIRE_CHAR(enc, ptr, end);
768
2.51k
      switch (BYTE_TYPE(enc, ptr)) {
769
2.40k
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770
63
      default:
771
63
        *nextTokPtr = ptr;
772
63
        return XML_TOK_INVALID;
773
2.51k
      }
774
2.36k
      break;
775
2.36k
#  endif
776
6.03k
    case BT_S:
777
9.03k
    case BT_CR:
778
11.6k
    case BT_LF: {
779
11.6k
      ptr += MINBPC(enc);
780
116k
      while (HAS_CHAR(enc, ptr, end)) {
781
116k
        switch (BYTE_TYPE(enc, ptr)) {
782
10.9k
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783
1.58k
        case BT_GT:
784
1.58k
          goto gt;
785
96
        case BT_SOL:
786
96
          goto sol;
787
42.8k
        case BT_S:
788
64.5k
        case BT_CR:
789
105k
        case BT_LF:
790
105k
          ptr += MINBPC(enc);
791
105k
          continue;
792
69
        default:
793
69
          *nextTokPtr = ptr;
794
69
          return XML_TOK_INVALID;
795
116k
        }
796
9.49k
        return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797
116k
      }
798
200
      return XML_TOK_PARTIAL;
799
11.6k
    }
800
211k
    case BT_GT:
801
213k
    gt:
802
213k
      *nextTokPtr = ptr + MINBPC(enc);
803
213k
      return XML_TOK_START_TAG_NO_ATTS;
804
808
    case BT_SOL:
805
904
    sol:
806
904
      ptr += MINBPC(enc);
807
904
      REQUIRE_CHAR(enc, ptr, end);
808
878
      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809
57
        *nextTokPtr = ptr;
810
57
        return XML_TOK_INVALID;
811
57
      }
812
821
      *nextTokPtr = ptr + MINBPC(enc);
813
821
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814
83
    default:
815
83
      *nextTokPtr = ptr;
816
83
      return XML_TOK_INVALID;
817
1.35M
    }
818
1.35M
  }
819
624
  return XML_TOK_PARTIAL;
820
225k
}
xmltok.c:big2_scanLt
Line
Count
Source
726
436k
               const char **nextTokPtr) {
727
436k
#  ifdef XML_NS
728
436k
  int hadColon;
729
436k
#  endif
730
436k
  REQUIRE_CHAR(enc, ptr, end);
731
436k
  switch (BYTE_TYPE(enc, ptr)) {
732
75.5k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
733
10.6k
  case BT_EXCL:
734
10.6k
    ptr += MINBPC(enc);
735
10.6k
    REQUIRE_CHAR(enc, ptr, end);
736
10.6k
    switch (BYTE_TYPE(enc, ptr)) {
737
1.41k
    case BT_MINUS:
738
1.41k
      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
739
9.20k
    case BT_LSQB:
740
9.20k
      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
741
10.6k
    }
742
29
    *nextTokPtr = ptr;
743
29
    return XML_TOK_INVALID;
744
229k
  case BT_QUEST:
745
229k
    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
746
42.1k
  case BT_SOL:
747
42.1k
    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
748
89
  default:
749
89
    *nextTokPtr = ptr;
750
89
    return XML_TOK_INVALID;
751
436k
  }
752
153k
#  ifdef XML_NS
753
153k
  hadColon = 0;
754
153k
#  endif
755
  /* we have a start-tag */
756
473k
  while (HAS_CHAR(enc, ptr, end)) {
757
473k
    switch (BYTE_TYPE(enc, ptr)) {
758
1.58M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
759
0
#  ifdef XML_NS
760
1.01k
    case BT_COLON:
761
1.01k
      if (hadColon) {
762
8
        *nextTokPtr = ptr;
763
8
        return XML_TOK_INVALID;
764
8
      }
765
1.01k
      hadColon = 1;
766
1.01k
      ptr += MINBPC(enc);
767
1.01k
      REQUIRE_CHAR(enc, ptr, end);
768
997
      switch (BYTE_TYPE(enc, ptr)) {
769
864
        CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
770
47
      default:
771
47
        *nextTokPtr = ptr;
772
47
        return XML_TOK_INVALID;
773
997
      }
774
843
      break;
775
843
#  endif
776
5.27k
    case BT_S:
777
50.7k
    case BT_CR:
778
52.1k
    case BT_LF: {
779
52.1k
      ptr += MINBPC(enc);
780
306k
      while (HAS_CHAR(enc, ptr, end)) {
781
306k
        switch (BYTE_TYPE(enc, ptr)) {
782
8.28k
          CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
783
41.0k
        case BT_GT:
784
41.0k
          goto gt;
785
470
        case BT_SOL:
786
470
          goto sol;
787
24.6k
        case BT_S:
788
231k
        case BT_CR:
789
254k
        case BT_LF:
790
254k
          ptr += MINBPC(enc);
791
254k
          continue;
792
64
        default:
793
64
          *nextTokPtr = ptr;
794
64
          return XML_TOK_INVALID;
795
306k
        }
796
10.2k
        return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
797
306k
      }
798
186
      return XML_TOK_PARTIAL;
799
52.1k
    }
800
99.5k
    case BT_GT:
801
140k
    gt:
802
140k
      *nextTokPtr = ptr + MINBPC(enc);
803
140k
      return XML_TOK_START_TAG_NO_ATTS;
804
487
    case BT_SOL:
805
957
    sol:
806
957
      ptr += MINBPC(enc);
807
957
      REQUIRE_CHAR(enc, ptr, end);
808
911
      if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
809
44
        *nextTokPtr = ptr;
810
44
        return XML_TOK_INVALID;
811
44
      }
812
867
      *nextTokPtr = ptr + MINBPC(enc);
813
867
      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
814
70
    default:
815
70
      *nextTokPtr = ptr;
816
70
      return XML_TOK_INVALID;
817
473k
    }
818
473k
  }
819
483
  return XML_TOK_PARTIAL;
820
153k
}
821
822
static int PTRCALL
823
PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
824
45.8M
                   const char **nextTokPtr) {
825
45.8M
  if (ptr >= end)
826
1.13M
    return XML_TOK_NONE;
827
44.6M
  if (MINBPC(enc) > 1) {
828
2.58M
    size_t n = end - ptr;
829
2.58M
    if (n & (MINBPC(enc) - 1)) {
830
103k
      n &= ~(MINBPC(enc) - 1);
831
103k
      if (n == 0)
832
1.89k
        return XML_TOK_PARTIAL;
833
101k
      end = ptr + n;
834
101k
    }
835
2.58M
  }
836
44.6M
  switch (BYTE_TYPE(enc, ptr)) {
837
24.7M
  case BT_LT:
838
24.7M
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839
4.08M
  case BT_AMP:
840
4.08M
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841
2.14M
  case BT_CR:
842
2.14M
    ptr += MINBPC(enc);
843
2.14M
    if (! HAS_CHAR(enc, ptr, end))
844
1.06k
      return XML_TOK_TRAILING_CR;
845
2.14M
    if (BYTE_TYPE(enc, ptr) == BT_LF)
846
28.4k
      ptr += MINBPC(enc);
847
2.14M
    *nextTokPtr = ptr;
848
2.14M
    return XML_TOK_DATA_NEWLINE;
849
3.58M
  case BT_LF:
850
3.58M
    *nextTokPtr = ptr + MINBPC(enc);
851
3.58M
    return XML_TOK_DATA_NEWLINE;
852
64.7k
  case BT_RSQB:
853
64.7k
    ptr += MINBPC(enc);
854
64.7k
    if (! HAS_CHAR(enc, ptr, end))
855
2.65k
      return XML_TOK_TRAILING_RSQB;
856
62.0k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857
42.0k
      break;
858
19.9k
    ptr += MINBPC(enc);
859
19.9k
    if (! HAS_CHAR(enc, ptr, end))
860
3.98k
      return XML_TOK_TRAILING_RSQB;
861
15.9k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862
15.9k
      ptr -= MINBPC(enc);
863
15.9k
      break;
864
15.9k
    }
865
24
    *nextTokPtr = ptr;
866
24
    return XML_TOK_INVALID;
867
2.56M
    INVALID_CASES(ptr, nextTokPtr)
868
8.79M
  default:
869
8.79M
    ptr += MINBPC(enc);
870
8.79M
    break;
871
44.6M
  }
872
1.09G
  while (HAS_CHAR(enc, ptr, end)) {
873
1.09G
    switch (BYTE_TYPE(enc, ptr)) {
874
0
#  define LEAD_CASE(n)                                                         \
875
351M
  case BT_LEAD##n:                                                             \
876
351M
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877
1.78k
      *nextTokPtr = ptr;                                                       \
878
1.78k
      return XML_TOK_DATA_CHARS;                                               \
879
1.78k
    }                                                                          \
880
351M
    ptr += n;                                                                  \
881
351M
    break;
882
231M
      LEAD_CASE(2)
883
118M
      LEAD_CASE(3)
884
2.31M
      LEAD_CASE(4)
885
0
#  undef LEAD_CASE
886
1.56M
    case BT_RSQB:
887
1.56M
      if (HAS_CHARS(enc, ptr, end, 2)) {
888
1.56M
        if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889
417k
          ptr += MINBPC(enc);
890
417k
          break;
891
417k
        }
892
1.14M
        if (HAS_CHARS(enc, ptr, end, 3)) {
893
1.14M
          if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894
1.14M
            ptr += MINBPC(enc);
895
1.14M
            break;
896
1.14M
          }
897
39
          *nextTokPtr = ptr + 2 * MINBPC(enc);
898
39
          return XML_TOK_INVALID;
899
1.14M
        }
900
1.14M
      }
901
      /* fall through */
902
1.06M
    case BT_AMP:
903
6.04M
    case BT_LT:
904
6.04M
    case BT_NONXML:
905
6.04M
    case BT_MALFORM:
906
6.04M
    case BT_TRAIL:
907
6.38M
    case BT_CR:
908
8.21M
    case BT_LF:
909
8.21M
      *nextTokPtr = ptr;
910
8.21M
      return XML_TOK_DATA_CHARS;
911
736M
    default:
912
736M
      ptr += MINBPC(enc);
913
736M
      break;
914
1.09G
    }
915
1.09G
  }
916
1.91M
  *nextTokPtr = ptr;
917
1.91M
  return XML_TOK_DATA_CHARS;
918
10.1M
}
xmltok.c:normal_contentTok
Line
Count
Source
824
43.2M
                   const char **nextTokPtr) {
825
43.2M
  if (ptr >= end)
826
1.12M
    return XML_TOK_NONE;
827
42.0M
  if (MINBPC(enc) > 1) {
828
0
    size_t n = end - ptr;
829
0
    if (n & (MINBPC(enc) - 1)) {
830
0
      n &= ~(MINBPC(enc) - 1);
831
0
      if (n == 0)
832
0
        return XML_TOK_PARTIAL;
833
0
      end = ptr + n;
834
0
    }
835
0
  }
836
42.0M
  switch (BYTE_TYPE(enc, ptr)) {
837
23.9M
  case BT_LT:
838
23.9M
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839
3.89M
  case BT_AMP:
840
3.89M
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841
1.29M
  case BT_CR:
842
1.29M
    ptr += MINBPC(enc);
843
1.29M
    if (! HAS_CHAR(enc, ptr, end))
844
464
      return XML_TOK_TRAILING_CR;
845
1.29M
    if (BYTE_TYPE(enc, ptr) == BT_LF)
846
346
      ptr += MINBPC(enc);
847
1.29M
    *nextTokPtr = ptr;
848
1.29M
    return XML_TOK_DATA_NEWLINE;
849
3.46M
  case BT_LF:
850
3.46M
    *nextTokPtr = ptr + MINBPC(enc);
851
3.46M
    return XML_TOK_DATA_NEWLINE;
852
50.8k
  case BT_RSQB:
853
50.8k
    ptr += MINBPC(enc);
854
50.8k
    if (! HAS_CHAR(enc, ptr, end))
855
2.29k
      return XML_TOK_TRAILING_RSQB;
856
48.5k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857
33.2k
      break;
858
15.3k
    ptr += MINBPC(enc);
859
15.3k
    if (! HAS_CHAR(enc, ptr, end))
860
3.73k
      return XML_TOK_TRAILING_RSQB;
861
11.6k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862
11.5k
      ptr -= MINBPC(enc);
863
11.5k
      break;
864
11.5k
    }
865
12
    *nextTokPtr = ptr;
866
12
    return XML_TOK_INVALID;
867
2.53M
    INVALID_CASES(ptr, nextTokPtr)
868
8.20M
  default:
869
8.20M
    ptr += MINBPC(enc);
870
8.20M
    break;
871
42.0M
  }
872
1.08G
  while (HAS_CHAR(enc, ptr, end)) {
873
1.08G
    switch (BYTE_TYPE(enc, ptr)) {
874
0
#  define LEAD_CASE(n)                                                         \
875
0
  case BT_LEAD##n:                                                             \
876
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877
0
      *nextTokPtr = ptr;                                                       \
878
0
      return XML_TOK_DATA_CHARS;                                               \
879
0
    }                                                                          \
880
0
    ptr += n;                                                                  \
881
0
    break;
882
231M
      LEAD_CASE(2)
883
118M
      LEAD_CASE(3)
884
949k
      LEAD_CASE(4)
885
0
#  undef LEAD_CASE
886
1.19M
    case BT_RSQB:
887
1.19M
      if (HAS_CHARS(enc, ptr, end, 2)) {
888
1.18M
        if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889
225k
          ptr += MINBPC(enc);
890
225k
          break;
891
225k
        }
892
962k
        if (HAS_CHARS(enc, ptr, end, 3)) {
893
958k
          if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894
958k
            ptr += MINBPC(enc);
895
958k
            break;
896
958k
          }
897
22
          *nextTokPtr = ptr + 2 * MINBPC(enc);
898
22
          return XML_TOK_INVALID;
899
958k
        }
900
962k
      }
901
      /* fall through */
902
905k
    case BT_AMP:
903
5.62M
    case BT_LT:
904
5.62M
    case BT_NONXML:
905
5.62M
    case BT_MALFORM:
906
5.62M
    case BT_TRAIL:
907
5.85M
    case BT_CR:
908
7.61M
    case BT_LF:
909
7.61M
      *nextTokPtr = ptr;
910
7.61M
      return XML_TOK_DATA_CHARS;
911
723M
    default:
912
723M
      ptr += MINBPC(enc);
913
723M
      break;
914
1.08G
    }
915
1.08G
  }
916
1.90M
  *nextTokPtr = ptr;
917
1.90M
  return XML_TOK_DATA_CHARS;
918
9.51M
}
xmltok.c:little2_contentTok
Line
Count
Source
824
1.22M
                   const char **nextTokPtr) {
825
1.22M
  if (ptr >= end)
826
4.80k
    return XML_TOK_NONE;
827
1.22M
  if (MINBPC(enc) > 1) {
828
1.22M
    size_t n = end - ptr;
829
1.22M
    if (n & (MINBPC(enc) - 1)) {
830
57.9k
      n &= ~(MINBPC(enc) - 1);
831
57.9k
      if (n == 0)
832
896
        return XML_TOK_PARTIAL;
833
57.0k
      end = ptr + n;
834
57.0k
    }
835
1.22M
  }
836
1.22M
  switch (BYTE_TYPE(enc, ptr)) {
837
370k
  case BT_LT:
838
370k
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839
40.9k
  case BT_AMP:
840
40.9k
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841
400k
  case BT_CR:
842
400k
    ptr += MINBPC(enc);
843
400k
    if (! HAS_CHAR(enc, ptr, end))
844
290
      return XML_TOK_TRAILING_CR;
845
400k
    if (BYTE_TYPE(enc, ptr) == BT_LF)
846
14.4k
      ptr += MINBPC(enc);
847
400k
    *nextTokPtr = ptr;
848
400k
    return XML_TOK_DATA_NEWLINE;
849
97.2k
  case BT_LF:
850
97.2k
    *nextTokPtr = ptr + MINBPC(enc);
851
97.2k
    return XML_TOK_DATA_NEWLINE;
852
6.76k
  case BT_RSQB:
853
6.76k
    ptr += MINBPC(enc);
854
6.76k
    if (! HAS_CHAR(enc, ptr, end))
855
183
      return XML_TOK_TRAILING_RSQB;
856
6.58k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857
4.06k
      break;
858
2.51k
    ptr += MINBPC(enc);
859
2.51k
    if (! HAS_CHAR(enc, ptr, end))
860
140
      return XML_TOK_TRAILING_RSQB;
861
2.37k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862
2.37k
      ptr -= MINBPC(enc);
863
2.37k
      break;
864
2.37k
    }
865
6
    *nextTokPtr = ptr;
866
6
    return XML_TOK_INVALID;
867
10.4k
    INVALID_CASES(ptr, nextTokPtr)
868
301k
  default:
869
301k
    ptr += MINBPC(enc);
870
301k
    break;
871
1.22M
  }
872
5.91M
  while (HAS_CHAR(enc, ptr, end)) {
873
5.91M
    switch (BYTE_TYPE(enc, ptr)) {
874
0
#  define LEAD_CASE(n)                                                         \
875
0
  case BT_LEAD##n:                                                             \
876
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877
0
      *nextTokPtr = ptr;                                                       \
878
0
      return XML_TOK_DATA_CHARS;                                               \
879
0
    }                                                                          \
880
0
    ptr += n;                                                                  \
881
0
    break;
882
0
      LEAD_CASE(2)
883
0
      LEAD_CASE(3)
884
509k
      LEAD_CASE(4)
885
0
#  undef LEAD_CASE
886
221k
    case BT_RSQB:
887
221k
      if (HAS_CHARS(enc, ptr, end, 2)) {
888
221k
        if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889
115k
          ptr += MINBPC(enc);
890
115k
          break;
891
115k
        }
892
105k
        if (HAS_CHARS(enc, ptr, end, 3)) {
893
105k
          if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894
105k
            ptr += MINBPC(enc);
895
105k
            break;
896
105k
          }
897
6
          *nextTokPtr = ptr + 2 * MINBPC(enc);
898
6
          return XML_TOK_INVALID;
899
105k
        }
900
105k
      }
901
      /* fall through */
902
31.9k
    case BT_AMP:
903
230k
    case BT_LT:
904
230k
    case BT_NONXML:
905
230k
    case BT_MALFORM:
906
230k
    case BT_TRAIL:
907
258k
    case BT_CR:
908
308k
    case BT_LF:
909
308k
      *nextTokPtr = ptr;
910
308k
      return XML_TOK_DATA_CHARS;
911
4.87M
    default:
912
4.87M
      ptr += MINBPC(enc);
913
4.87M
      break;
914
5.91M
    }
915
5.91M
  }
916
3.08k
  *nextTokPtr = ptr;
917
3.08k
  return XML_TOK_DATA_CHARS;
918
312k
}
xmltok.c:big2_contentTok
Line
Count
Source
824
1.36M
                   const char **nextTokPtr) {
825
1.36M
  if (ptr >= end)
826
6.34k
    return XML_TOK_NONE;
827
1.35M
  if (MINBPC(enc) > 1) {
828
1.35M
    size_t n = end - ptr;
829
1.35M
    if (n & (MINBPC(enc) - 1)) {
830
45.7k
      n &= ~(MINBPC(enc) - 1);
831
45.7k
      if (n == 0)
832
995
        return XML_TOK_PARTIAL;
833
44.7k
      end = ptr + n;
834
44.7k
    }
835
1.35M
  }
836
1.35M
  switch (BYTE_TYPE(enc, ptr)) {
837
436k
  case BT_LT:
838
436k
    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
839
148k
  case BT_AMP:
840
148k
    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
841
449k
  case BT_CR:
842
449k
    ptr += MINBPC(enc);
843
449k
    if (! HAS_CHAR(enc, ptr, end))
844
306
      return XML_TOK_TRAILING_CR;
845
449k
    if (BYTE_TYPE(enc, ptr) == BT_LF)
846
13.5k
      ptr += MINBPC(enc);
847
449k
    *nextTokPtr = ptr;
848
449k
    return XML_TOK_DATA_NEWLINE;
849
23.7k
  case BT_LF:
850
23.7k
    *nextTokPtr = ptr + MINBPC(enc);
851
23.7k
    return XML_TOK_DATA_NEWLINE;
852
7.09k
  case BT_RSQB:
853
7.09k
    ptr += MINBPC(enc);
854
7.09k
    if (! HAS_CHAR(enc, ptr, end))
855
185
      return XML_TOK_TRAILING_RSQB;
856
6.90k
    if (! CHAR_MATCHES(enc, ptr, ASCII_RSQB))
857
4.78k
      break;
858
2.12k
    ptr += MINBPC(enc);
859
2.12k
    if (! HAS_CHAR(enc, ptr, end))
860
109
      return XML_TOK_TRAILING_RSQB;
861
2.01k
    if (! CHAR_MATCHES(enc, ptr, ASCII_GT)) {
862
2.01k
      ptr -= MINBPC(enc);
863
2.01k
      break;
864
2.01k
    }
865
6
    *nextTokPtr = ptr;
866
6
    return XML_TOK_INVALID;
867
21.8k
    INVALID_CASES(ptr, nextTokPtr)
868
280k
  default:
869
280k
    ptr += MINBPC(enc);
870
280k
    break;
871
1.35M
  }
872
9.13M
  while (HAS_CHAR(enc, ptr, end)) {
873
9.13M
    switch (BYTE_TYPE(enc, ptr)) {
874
0
#  define LEAD_CASE(n)                                                         \
875
0
  case BT_LEAD##n:                                                             \
876
0
    if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) {                       \
877
0
      *nextTokPtr = ptr;                                                       \
878
0
      return XML_TOK_DATA_CHARS;                                               \
879
0
    }                                                                          \
880
0
    ptr += n;                                                                  \
881
0
    break;
882
0
      LEAD_CASE(2)
883
0
      LEAD_CASE(3)
884
856k
      LEAD_CASE(4)
885
0
#  undef LEAD_CASE
886
154k
    case BT_RSQB:
887
154k
      if (HAS_CHARS(enc, ptr, end, 2)) {
888
154k
        if (! CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
889
77.0k
          ptr += MINBPC(enc);
890
77.0k
          break;
891
77.0k
        }
892
77.5k
        if (HAS_CHARS(enc, ptr, end, 3)) {
893
77.4k
          if (! CHAR_MATCHES(enc, ptr + 2 * MINBPC(enc), ASCII_GT)) {
894
77.4k
            ptr += MINBPC(enc);
895
77.4k
            break;
896
77.4k
          }
897
11
          *nextTokPtr = ptr + 2 * MINBPC(enc);
898
11
          return XML_TOK_INVALID;
899
77.4k
        }
900
77.5k
      }
901
      /* fall through */
902
129k
    case BT_AMP:
903
196k
    case BT_LT:
904
196k
    case BT_NONXML:
905
196k
    case BT_MALFORM:
906
196k
    case BT_TRAIL:
907
278k
    case BT_CR:
908
292k
    case BT_LF:
909
292k
      *nextTokPtr = ptr;
910
292k
      return XML_TOK_DATA_CHARS;
911
7.83M
    default:
912
7.83M
      ptr += MINBPC(enc);
913
7.83M
      break;
914
9.13M
    }
915
9.13M
  }
916
4.44k
  *nextTokPtr = ptr;
917
4.44k
  return XML_TOK_DATA_CHARS;
918
297k
}
919
920
/* ptr points to character following "%" */
921
922
static int PTRCALL
923
PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
924
500k
                    const char **nextTokPtr) {
925
500k
  REQUIRE_CHAR(enc, ptr, end);
926
500k
  switch (BYTE_TYPE(enc, ptr)) {
927
190k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928
9.27k
  case BT_S:
929
29.8k
  case BT_LF:
930
128k
  case BT_CR:
931
128k
  case BT_PERCNT:
932
128k
    *nextTokPtr = ptr;
933
128k
    return XML_TOK_PERCENT;
934
217
  default:
935
217
    *nextTokPtr = ptr;
936
217
    return XML_TOK_INVALID;
937
500k
  }
938
10.9M
  while (HAS_CHAR(enc, ptr, end)) {
939
10.9M
    switch (BYTE_TYPE(enc, ptr)) {
940
45.6M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941
368k
    case BT_SEMI:
942
368k
      *nextTokPtr = ptr + MINBPC(enc);
943
368k
      return XML_TOK_PARAM_ENTITY_REF;
944
302
    default:
945
302
      *nextTokPtr = ptr;
946
302
      return XML_TOK_INVALID;
947
10.9M
    }
948
10.9M
  }
949
1.67k
  return XML_TOK_PARTIAL;
950
371k
}
xmltok.c:normal_scanPercent
Line
Count
Source
924
489k
                    const char **nextTokPtr) {
925
489k
  REQUIRE_CHAR(enc, ptr, end);
926
489k
  switch (BYTE_TYPE(enc, ptr)) {
927
187k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928
2.23k
  case BT_S:
929
21.8k
  case BT_LF:
930
120k
  case BT_CR:
931
120k
  case BT_PERCNT:
932
120k
    *nextTokPtr = ptr;
933
120k
    return XML_TOK_PERCENT;
934
87
  default:
935
87
    *nextTokPtr = ptr;
936
87
    return XML_TOK_INVALID;
937
489k
  }
938
4.47M
  while (HAS_CHAR(enc, ptr, end)) {
939
4.47M
    switch (BYTE_TYPE(enc, ptr)) {
940
13.5M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941
367k
    case BT_SEMI:
942
367k
      *nextTokPtr = ptr + MINBPC(enc);
943
367k
      return XML_TOK_PARAM_ENTITY_REF;
944
149
    default:
945
149
      *nextTokPtr = ptr;
946
149
      return XML_TOK_INVALID;
947
4.47M
    }
948
4.47M
  }
949
957
  return XML_TOK_PARTIAL;
950
368k
}
xmltok.c:little2_scanPercent
Line
Count
Source
924
4.34k
                    const char **nextTokPtr) {
925
4.34k
  REQUIRE_CHAR(enc, ptr, end);
926
4.32k
  switch (BYTE_TYPE(enc, ptr)) {
927
1.55k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928
2.61k
  case BT_S:
929
2.73k
  case BT_LF:
930
2.98k
  case BT_CR:
931
2.99k
  case BT_PERCNT:
932
2.99k
    *nextTokPtr = ptr;
933
2.99k
    return XML_TOK_PERCENT;
934
72
  default:
935
72
    *nextTokPtr = ptr;
936
72
    return XML_TOK_INVALID;
937
4.32k
  }
938
3.83M
  while (HAS_CHAR(enc, ptr, end)) {
939
3.83M
    switch (BYTE_TYPE(enc, ptr)) {
940
19.1M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941
584
    case BT_SEMI:
942
584
      *nextTokPtr = ptr + MINBPC(enc);
943
584
      return XML_TOK_PARAM_ENTITY_REF;
944
91
    default:
945
91
      *nextTokPtr = ptr;
946
91
      return XML_TOK_INVALID;
947
3.83M
    }
948
3.83M
  }
949
354
  return XML_TOK_PARTIAL;
950
1.15k
}
xmltok.c:big2_scanPercent
Line
Count
Source
924
6.87k
                    const char **nextTokPtr) {
925
6.87k
  REQUIRE_CHAR(enc, ptr, end);
926
6.85k
  switch (BYTE_TYPE(enc, ptr)) {
927
1.50k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
928
4.42k
  case BT_S:
929
5.24k
  case BT_LF:
930
5.58k
  case BT_CR:
931
5.59k
  case BT_PERCNT:
932
5.59k
    *nextTokPtr = ptr;
933
5.59k
    return XML_TOK_PERCENT;
934
58
  default:
935
58
    *nextTokPtr = ptr;
936
58
    return XML_TOK_INVALID;
937
6.85k
  }
938
2.59M
  while (HAS_CHAR(enc, ptr, end)) {
939
2.59M
    switch (BYTE_TYPE(enc, ptr)) {
940
12.9M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
941
521
    case BT_SEMI:
942
521
      *nextTokPtr = ptr + MINBPC(enc);
943
521
      return XML_TOK_PARAM_ENTITY_REF;
944
62
    default:
945
62
      *nextTokPtr = ptr;
946
62
      return XML_TOK_INVALID;
947
2.59M
    }
948
2.59M
  }
949
368
  return XML_TOK_PARTIAL;
950
1.08k
}
951
952
static int PTRCALL
953
PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
954
119k
                      const char **nextTokPtr) {
955
119k
  REQUIRE_CHAR(enc, ptr, end);
956
119k
  switch (BYTE_TYPE(enc, ptr)) {
957
116k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958
213
  default:
959
213
    *nextTokPtr = ptr;
960
213
    return XML_TOK_INVALID;
961
119k
  }
962
18.9M
  while (HAS_CHAR(enc, ptr, end)) {
963
18.9M
    switch (BYTE_TYPE(enc, ptr)) {
964
69.3M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965
104k
    case BT_CR:
966
105k
    case BT_LF:
967
108k
    case BT_S:
968
111k
    case BT_RPAR:
969
113k
    case BT_GT:
970
113k
    case BT_PERCNT:
971
116k
    case BT_VERBAR:
972
116k
      *nextTokPtr = ptr;
973
116k
      return XML_TOK_POUND_NAME;
974
237
    default:
975
237
      *nextTokPtr = ptr;
976
237
      return XML_TOK_INVALID;
977
18.9M
    }
978
18.9M
  }
979
1.34k
  return -XML_TOK_POUND_NAME;
980
118k
}
xmltok.c:normal_scanPoundName
Line
Count
Source
954
104k
                      const char **nextTokPtr) {
955
104k
  REQUIRE_CHAR(enc, ptr, end);
956
104k
  switch (BYTE_TYPE(enc, ptr)) {
957
103k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958
81
  default:
959
81
    *nextTokPtr = ptr;
960
81
    return XML_TOK_INVALID;
961
104k
  }
962
14.2M
  while (HAS_CHAR(enc, ptr, end)) {
963
14.2M
    switch (BYTE_TYPE(enc, ptr)) {
964
45.9M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965
98.5k
    case BT_CR:
966
99.5k
    case BT_LF:
967
101k
    case BT_S:
968
101k
    case BT_RPAR:
969
102k
    case BT_GT:
970
102k
    case BT_PERCNT:
971
102k
    case BT_VERBAR:
972
102k
      *nextTokPtr = ptr;
973
102k
      return XML_TOK_POUND_NAME;
974
103
    default:
975
103
      *nextTokPtr = ptr;
976
103
      return XML_TOK_INVALID;
977
14.2M
    }
978
14.2M
  }
979
743
  return -XML_TOK_POUND_NAME;
980
103k
}
xmltok.c:little2_scanPoundName
Line
Count
Source
954
10.6k
                      const char **nextTokPtr) {
955
10.6k
  REQUIRE_CHAR(enc, ptr, end);
956
10.6k
  switch (BYTE_TYPE(enc, ptr)) {
957
9.74k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958
68
  default:
959
68
    *nextTokPtr = ptr;
960
68
    return XML_TOK_INVALID;
961
10.6k
  }
962
4.29M
  while (HAS_CHAR(enc, ptr, end)) {
963
4.29M
    switch (BYTE_TYPE(enc, ptr)) {
964
21.3M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965
3.81k
    case BT_CR:
966
3.91k
    case BT_LF:
967
4.13k
    case BT_S:
968
7.02k
    case BT_RPAR:
969
7.08k
    case BT_GT:
970
7.08k
    case BT_PERCNT:
971
9.91k
    case BT_VERBAR:
972
9.91k
      *nextTokPtr = ptr;
973
9.91k
      return XML_TOK_POUND_NAME;
974
65
    default:
975
65
      *nextTokPtr = ptr;
976
65
      return XML_TOK_INVALID;
977
4.29M
    }
978
4.29M
  }
979
345
  return -XML_TOK_POUND_NAME;
980
10.4k
}
xmltok.c:big2_scanPoundName
Line
Count
Source
954
4.70k
                      const char **nextTokPtr) {
955
4.70k
  REQUIRE_CHAR(enc, ptr, end);
956
4.66k
  switch (BYTE_TYPE(enc, ptr)) {
957
3.47k
    CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
958
64
  default:
959
64
    *nextTokPtr = ptr;
960
64
    return XML_TOK_INVALID;
961
4.66k
  }
962
422k
  while (HAS_CHAR(enc, ptr, end)) {
963
422k
    switch (BYTE_TYPE(enc, ptr)) {
964
2.05M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
965
1.85k
    case BT_CR:
966
2.12k
    case BT_LF:
967
3.27k
    case BT_S:
968
3.34k
    case BT_RPAR:
969
3.45k
    case BT_GT:
970
3.45k
    case BT_PERCNT:
971
4.07k
    case BT_VERBAR:
972
4.07k
      *nextTokPtr = ptr;
973
4.07k
      return XML_TOK_POUND_NAME;
974
69
    default:
975
69
      *nextTokPtr = ptr;
976
69
      return XML_TOK_INVALID;
977
422k
    }
978
422k
  }
979
261
  return -XML_TOK_POUND_NAME;
980
4.51k
}
981
982
static int PTRCALL
983
PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end,
984
405k
                const char **nextTokPtr) {
985
1.35G
  while (HAS_CHAR(enc, ptr, end)) {
986
1.35G
    int t = BYTE_TYPE(enc, ptr);
987
1.35G
    switch (t) {
988
19.0M
      INVALID_CASES(ptr, nextTokPtr)
989
1.57M
    case BT_QUOT:
990
2.18M
    case BT_APOS:
991
2.18M
      ptr += MINBPC(enc);
992
2.18M
      if (t != open)
993
1.80M
        break;
994
380k
      if (! HAS_CHAR(enc, ptr, end))
995
2.76k
        return -XML_TOK_LITERAL;
996
377k
      *nextTokPtr = ptr;
997
377k
      switch (BYTE_TYPE(enc, ptr)) {
998
28.1k
      case BT_S:
999
36.5k
      case BT_CR:
1000
79.0k
      case BT_LF:
1001
376k
      case BT_GT:
1002
376k
      case BT_PERCNT:
1003
377k
      case BT_LSQB:
1004
377k
        return XML_TOK_LITERAL;
1005
186
      default:
1006
186
        return XML_TOK_INVALID;
1007
377k
      }
1008
1.34G
    default:
1009
1.34G
      ptr += MINBPC(enc);
1010
1.34G
      break;
1011
1.35G
    }
1012
1.35G
  }
1013
23.5k
  return XML_TOK_PARTIAL;
1014
405k
}
xmltok.c:normal_scanLit
Line
Count
Source
984
277k
                const char **nextTokPtr) {
985
1.10G
  while (HAS_CHAR(enc, ptr, end)) {
986
1.10G
    int t = BYTE_TYPE(enc, ptr);
987
1.10G
    switch (t) {
988
2.17M
      INVALID_CASES(ptr, nextTokPtr)
989
789k
    case BT_QUOT:
990
1.27M
    case BT_APOS:
991
1.27M
      ptr += MINBPC(enc);
992
1.27M
      if (t != open)
993
1.01M
        break;
994
261k
      if (! HAS_CHAR(enc, ptr, end))
995
1.32k
        return -XML_TOK_LITERAL;
996
260k
      *nextTokPtr = ptr;
997
260k
      switch (BYTE_TYPE(enc, ptr)) {
998
24.1k
      case BT_S:
999
25.3k
      case BT_CR:
1000
64.5k
      case BT_LF:
1001
259k
      case BT_GT:
1002
259k
      case BT_PERCNT:
1003
260k
      case BT_LSQB:
1004
260k
        return XML_TOK_LITERAL;
1005
105
      default:
1006
105
        return XML_TOK_INVALID;
1007
260k
      }
1008
1.10G
    default:
1009
1.10G
      ptr += MINBPC(enc);
1010
1.10G
      break;
1011
1.10G
    }
1012
1.10G
  }
1013
14.6k
  return XML_TOK_PARTIAL;
1014
277k
}
xmltok.c:little2_scanLit
Line
Count
Source
984
23.7k
                const char **nextTokPtr) {
985
119M
  while (HAS_CHAR(enc, ptr, end)) {
986
119M
    int t = BYTE_TYPE(enc, ptr);
987
119M
    switch (t) {
988
6.54M
      INVALID_CASES(ptr, nextTokPtr)
989
679k
    case BT_QUOT:
990
699k
    case BT_APOS:
991
699k
      ptr += MINBPC(enc);
992
699k
      if (t != open)
993
680k
        break;
994
18.9k
      if (! HAS_CHAR(enc, ptr, end))
995
698
        return -XML_TOK_LITERAL;
996
18.2k
      *nextTokPtr = ptr;
997
18.2k
      switch (BYTE_TYPE(enc, ptr)) {
998
1.11k
      case BT_S:
999
4.17k
      case BT_CR:
1000
5.25k
      case BT_LF:
1001
18.0k
      case BT_GT:
1002
18.1k
      case BT_PERCNT:
1003
18.2k
      case BT_LSQB:
1004
18.2k
        return XML_TOK_LITERAL;
1005
45
      default:
1006
45
        return XML_TOK_INVALID;
1007
18.2k
      }
1008
116M
    default:
1009
116M
      ptr += MINBPC(enc);
1010
116M
      break;
1011
119M
    }
1012
119M
  }
1013
4.43k
  return XML_TOK_PARTIAL;
1014
23.7k
}
xmltok.c:big2_scanLit
Line
Count
Source
984
104k
                const char **nextTokPtr) {
985
132M
  while (HAS_CHAR(enc, ptr, end)) {
986
132M
    int t = BYTE_TYPE(enc, ptr);
987
132M
    switch (t) {
988
10.2M
      INVALID_CASES(ptr, nextTokPtr)
989
105k
    case BT_QUOT:
990
206k
    case BT_APOS:
991
206k
      ptr += MINBPC(enc);
992
206k
      if (t != open)
993
107k
        break;
994
99.6k
      if (! HAS_CHAR(enc, ptr, end))
995
736
        return -XML_TOK_LITERAL;
996
98.9k
      *nextTokPtr = ptr;
997
98.9k
      switch (BYTE_TYPE(enc, ptr)) {
998
2.83k
      case BT_S:
999
7.03k
      case BT_CR:
1000
9.27k
      case BT_LF:
1001
98.4k
      case BT_GT:
1002
98.4k
      case BT_PERCNT:
1003
98.8k
      case BT_LSQB:
1004
98.8k
        return XML_TOK_LITERAL;
1005
36
      default:
1006
36
        return XML_TOK_INVALID;
1007
98.9k
      }
1008
127M
    default:
1009
127M
      ptr += MINBPC(enc);
1010
127M
      break;
1011
132M
    }
1012
132M
  }
1013
4.46k
  return XML_TOK_PARTIAL;
1014
104k
}
1015
1016
static int PTRCALL
1017
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
1018
21.4M
                  const char **nextTokPtr) {
1019
21.4M
  int tok;
1020
21.4M
  if (ptr >= end)
1021
11.7k
    return XML_TOK_NONE;
1022
21.3M
  if (MINBPC(enc) > 1) {
1023
3.02M
    size_t n = end - ptr;
1024
3.02M
    if (n & (MINBPC(enc) - 1)) {
1025
128k
      n &= ~(MINBPC(enc) - 1);
1026
128k
      if (n == 0)
1027
284
        return XML_TOK_PARTIAL;
1028
128k
      end = ptr + n;
1029
128k
    }
1030
3.02M
  }
1031
21.3M
  switch (BYTE_TYPE(enc, ptr)) {
1032
229k
  case BT_QUOT:
1033
229k
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034
175k
  case BT_APOS:
1035
175k
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036
944k
  case BT_LT: {
1037
944k
    ptr += MINBPC(enc);
1038
944k
    REQUIRE_CHAR(enc, ptr, end);
1039
943k
    switch (BYTE_TYPE(enc, ptr)) {
1040
823k
    case BT_EXCL:
1041
823k
      return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042
72.6k
    case BT_QUEST:
1043
72.6k
      return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044
21.3k
    case BT_NMSTRT:
1045
35.2k
    case BT_HEX:
1046
45.1k
    case BT_NONASCII:
1047
45.2k
    case BT_LEAD2:
1048
45.4k
    case BT_LEAD3:
1049
45.5k
    case BT_LEAD4:
1050
45.5k
      *nextTokPtr = ptr - MINBPC(enc);
1051
45.5k
      return XML_TOK_INSTANCE_START;
1052
943k
    }
1053
2.01k
    *nextTokPtr = ptr;
1054
2.01k
    return XML_TOK_INVALID;
1055
943k
  }
1056
698k
  case BT_CR:
1057
698k
    if (ptr + MINBPC(enc) == end) {
1058
4.83k
      *nextTokPtr = end;
1059
      /* indicate that this might be part of a CR/LF pair */
1060
4.83k
      return -XML_TOK_PROLOG_S;
1061
4.83k
    }
1062
    /* fall through */
1063
1.38M
  case BT_S:
1064
2.02M
  case BT_LF:
1065
20.2M
    for (;;) {
1066
20.2M
      ptr += MINBPC(enc);
1067
20.2M
      if (! HAS_CHAR(enc, ptr, end))
1068
1.65k
        break;
1069
20.2M
      switch (BYTE_TYPE(enc, ptr)) {
1070
1.24M
      case BT_S:
1071
1.57M
      case BT_LF:
1072
1.57M
        break;
1073
16.6M
      case BT_CR:
1074
        /* don't split CR/LF pair */
1075
16.6M
        if (ptr + MINBPC(enc) != end)
1076
16.6M
          break;
1077
        /* fall through */
1078
2.01M
      default:
1079
2.01M
        *nextTokPtr = ptr;
1080
2.01M
        return XML_TOK_PROLOG_S;
1081
20.2M
      }
1082
20.2M
    }
1083
1.65k
    *nextTokPtr = ptr;
1084
1.65k
    return XML_TOK_PROLOG_S;
1085
499k
  case BT_PERCNT:
1086
499k
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087
133k
  case BT_COMMA:
1088
133k
    *nextTokPtr = ptr + MINBPC(enc);
1089
133k
    return XML_TOK_COMMA;
1090
21.6k
  case BT_LSQB:
1091
21.6k
    *nextTokPtr = ptr + MINBPC(enc);
1092
21.6k
    return XML_TOK_OPEN_BRACKET;
1093
13.6k
  case BT_RSQB:
1094
13.6k
    ptr += MINBPC(enc);
1095
13.6k
    if (! HAS_CHAR(enc, ptr, end))
1096
53
      return -XML_TOK_CLOSE_BRACKET;
1097
13.6k
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098
239
      REQUIRE_CHARS(enc, ptr, end, 2);
1099
214
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100
25
        *nextTokPtr = ptr + 2 * MINBPC(enc);
1101
25
        return XML_TOK_COND_SECT_CLOSE;
1102
25
      }
1103
214
    }
1104
13.5k
    *nextTokPtr = ptr;
1105
13.5k
    return XML_TOK_CLOSE_BRACKET;
1106
8.16M
  case BT_LPAR:
1107
8.16M
    *nextTokPtr = ptr + MINBPC(enc);
1108
8.16M
    return XML_TOK_OPEN_PAREN;
1109
399k
  case BT_RPAR:
1110
399k
    ptr += MINBPC(enc);
1111
399k
    if (! HAS_CHAR(enc, ptr, end))
1112
248
      return -XML_TOK_CLOSE_PAREN;
1113
399k
    switch (BYTE_TYPE(enc, ptr)) {
1114
15.8k
    case BT_AST:
1115
15.8k
      *nextTokPtr = ptr + MINBPC(enc);
1116
15.8k
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1117
17.9k
    case BT_QUEST:
1118
17.9k
      *nextTokPtr = ptr + MINBPC(enc);
1119
17.9k
      return XML_TOK_CLOSE_PAREN_QUESTION;
1120
84.9k
    case BT_PLUS:
1121
84.9k
      *nextTokPtr = ptr + MINBPC(enc);
1122
84.9k
      return XML_TOK_CLOSE_PAREN_PLUS;
1123
675
    case BT_CR:
1124
3.57k
    case BT_LF:
1125
180k
    case BT_S:
1126
182k
    case BT_GT:
1127
210k
    case BT_COMMA:
1128
218k
    case BT_VERBAR:
1129
280k
    case BT_RPAR:
1130
280k
      *nextTokPtr = ptr;
1131
280k
      return XML_TOK_CLOSE_PAREN;
1132
399k
    }
1133
198
    *nextTokPtr = ptr;
1134
198
    return XML_TOK_INVALID;
1135
3.63M
  case BT_VERBAR:
1136
3.63M
    *nextTokPtr = ptr + MINBPC(enc);
1137
3.63M
    return XML_TOK_OR;
1138
345k
  case BT_GT:
1139
345k
    *nextTokPtr = ptr + MINBPC(enc);
1140
345k
    return XML_TOK_DECL_CLOSE;
1141
119k
  case BT_NUM:
1142
119k
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143
0
#  define LEAD_CASE(n)                                                         \
1144
463k
  case BT_LEAD##n:                                                             \
1145
463k
    if (end - ptr < n)                                                         \
1146
729
      return XML_TOK_PARTIAL_CHAR;                                             \
1147
463k
    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148
354
      *nextTokPtr = ptr;                                                       \
1149
354
      return XML_TOK_INVALID;                                                  \
1150
354
    }                                                                          \
1151
463k
    if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152
174k
      ptr += n;                                                                \
1153
174k
      tok = XML_TOK_NAME;                                                      \
1154
174k
      break;                                                                   \
1155
174k
    }                                                                          \
1156
462k
    if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157
288k
      ptr += n;                                                                \
1158
288k
      tok = XML_TOK_NMTOKEN;                                                   \
1159
288k
      break;                                                                   \
1160
288k
    }                                                                          \
1161
288k
    *nextTokPtr = ptr;                                                         \
1162
190
    return XML_TOK_INVALID;
1163
1.15M
    LEAD_CASE(2)
1164
518k
    LEAD_CASE(3)
1165
882
    LEAD_CASE(4)
1166
0
#  undef LEAD_CASE
1167
2.54M
  case BT_NMSTRT:
1168
2.96M
  case BT_HEX:
1169
2.96M
    tok = XML_TOK_NAME;
1170
2.96M
    ptr += MINBPC(enc);
1171
2.96M
    break;
1172
15.7k
  case BT_DIGIT:
1173
131k
  case BT_NAME:
1174
537k
  case BT_MINUS:
1175
537k
#  ifdef XML_NS
1176
910k
  case BT_COLON:
1177
910k
#  endif
1178
910k
    tok = XML_TOK_NMTOKEN;
1179
910k
    ptr += MINBPC(enc);
1180
910k
    break;
1181
337k
  case BT_NONASCII:
1182
337k
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183
336k
      ptr += MINBPC(enc);
1184
336k
      tok = XML_TOK_NAME;
1185
336k
      break;
1186
336k
    }
1187
1.04k
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188
676
      ptr += MINBPC(enc);
1189
676
      tok = XML_TOK_NMTOKEN;
1190
676
      break;
1191
676
    }
1192
    /* fall through */
1193
1.82k
  default:
1194
1.82k
    *nextTokPtr = ptr;
1195
1.82k
    return XML_TOK_INVALID;
1196
21.3M
  }
1197
158M
  while (HAS_CHAR(enc, ptr, end)) {
1198
158M
    switch (BYTE_TYPE(enc, ptr)) {
1199
476M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200
8.73k
    case BT_GT:
1201
145k
    case BT_RPAR:
1202
149k
    case BT_COMMA:
1203
3.69M
    case BT_VERBAR:
1204
3.71M
    case BT_LSQB:
1205
3.71M
    case BT_PERCNT:
1206
3.97M
    case BT_S:
1207
4.25M
    case BT_CR:
1208
4.53M
    case BT_LF:
1209
4.53M
      *nextTokPtr = ptr;
1210
4.53M
      return tok;
1211
0
#  ifdef XML_NS
1212
1.12M
    case BT_COLON:
1213
1.12M
      ptr += MINBPC(enc);
1214
1.12M
      switch (tok) {
1215
924k
      case XML_TOK_NAME:
1216
924k
        REQUIRE_CHAR(enc, ptr, end);
1217
923k
        tok = XML_TOK_PREFIXED_NAME;
1218
923k
        switch (BYTE_TYPE(enc, ptr)) {
1219
1.92M
          CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220
57.7k
        default:
1221
57.7k
          tok = XML_TOK_NMTOKEN;
1222
57.7k
          break;
1223
923k
        }
1224
923k
        break;
1225
923k
      case XML_TOK_PREFIXED_NAME:
1226
69.6k
        tok = XML_TOK_NMTOKEN;
1227
69.6k
        break;
1228
1.12M
      }
1229
1.12M
      break;
1230
1.12M
#  endif
1231
1.12M
    case BT_PLUS:
1232
97.8k
      if (tok == XML_TOK_NMTOKEN) {
1233
23
        *nextTokPtr = ptr;
1234
23
        return XML_TOK_INVALID;
1235
23
      }
1236
97.7k
      *nextTokPtr = ptr + MINBPC(enc);
1237
97.7k
      return XML_TOK_NAME_PLUS;
1238
1.73k
    case BT_AST:
1239
1.73k
      if (tok == XML_TOK_NMTOKEN) {
1240
26
        *nextTokPtr = ptr;
1241
26
        return XML_TOK_INVALID;
1242
26
      }
1243
1.71k
      *nextTokPtr = ptr + MINBPC(enc);
1244
1.71k
      return XML_TOK_NAME_ASTERISK;
1245
27.5k
    case BT_QUEST:
1246
27.5k
      if (tok == XML_TOK_NMTOKEN) {
1247
38
        *nextTokPtr = ptr;
1248
38
        return XML_TOK_INVALID;
1249
38
      }
1250
27.5k
      *nextTokPtr = ptr + MINBPC(enc);
1251
27.5k
      return XML_TOK_NAME_QUESTION;
1252
901
    default:
1253
901
      *nextTokPtr = ptr;
1254
901
      return XML_TOK_INVALID;
1255
158M
    }
1256
158M
  }
1257
14.4k
  return -tok;
1258
4.67M
}
xmltok.c:normal_prologTok
Line
Count
Source
1018
18.3M
                  const char **nextTokPtr) {
1019
18.3M
  int tok;
1020
18.3M
  if (ptr >= end)
1021
9.42k
    return XML_TOK_NONE;
1022
18.3M
  if (MINBPC(enc) > 1) {
1023
0
    size_t n = end - ptr;
1024
0
    if (n & (MINBPC(enc) - 1)) {
1025
0
      n &= ~(MINBPC(enc) - 1);
1026
0
      if (n == 0)
1027
0
        return XML_TOK_PARTIAL;
1028
0
      end = ptr + n;
1029
0
    }
1030
0
  }
1031
18.3M
  switch (BYTE_TYPE(enc, ptr)) {
1032
226k
  case BT_QUOT:
1033
226k
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034
50.9k
  case BT_APOS:
1035
50.9k
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036
731k
  case BT_LT: {
1037
731k
    ptr += MINBPC(enc);
1038
731k
    REQUIRE_CHAR(enc, ptr, end);
1039
730k
    switch (BYTE_TYPE(enc, ptr)) {
1040
685k
    case BT_EXCL:
1041
685k
      return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042
19.4k
    case BT_QUEST:
1043
19.4k
      return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044
14.0k
    case BT_NMSTRT:
1045
24.8k
    case BT_HEX:
1046
24.8k
    case BT_NONASCII:
1047
24.8k
    case BT_LEAD2:
1048
25.0k
    case BT_LEAD3:
1049
25.0k
    case BT_LEAD4:
1050
25.0k
      *nextTokPtr = ptr - MINBPC(enc);
1051
25.0k
      return XML_TOK_INSTANCE_START;
1052
730k
    }
1053
607
    *nextTokPtr = ptr;
1054
607
    return XML_TOK_INVALID;
1055
730k
  }
1056
658k
  case BT_CR:
1057
658k
    if (ptr + MINBPC(enc) == end) {
1058
4.52k
      *nextTokPtr = end;
1059
      /* indicate that this might be part of a CR/LF pair */
1060
4.52k
      return -XML_TOK_PROLOG_S;
1061
4.52k
    }
1062
    /* fall through */
1063
1.11M
  case BT_S:
1064
1.49M
  case BT_LF:
1065
19.1M
    for (;;) {
1066
19.1M
      ptr += MINBPC(enc);
1067
19.1M
      if (! HAS_CHAR(enc, ptr, end))
1068
1.02k
        break;
1069
19.1M
      switch (BYTE_TYPE(enc, ptr)) {
1070
1.08M
      case BT_S:
1071
1.27M
      case BT_LF:
1072
1.27M
        break;
1073
16.4M
      case BT_CR:
1074
        /* don't split CR/LF pair */
1075
16.4M
        if (ptr + MINBPC(enc) != end)
1076
16.4M
          break;
1077
        /* fall through */
1078
1.49M
      default:
1079
1.49M
        *nextTokPtr = ptr;
1080
1.49M
        return XML_TOK_PROLOG_S;
1081
19.1M
      }
1082
19.1M
    }
1083
1.02k
    *nextTokPtr = ptr;
1084
1.02k
    return XML_TOK_PROLOG_S;
1085
488k
  case BT_PERCNT:
1086
488k
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087
1.69k
  case BT_COMMA:
1088
1.69k
    *nextTokPtr = ptr + MINBPC(enc);
1089
1.69k
    return XML_TOK_COMMA;
1090
16.2k
  case BT_LSQB:
1091
16.2k
    *nextTokPtr = ptr + MINBPC(enc);
1092
16.2k
    return XML_TOK_OPEN_BRACKET;
1093
10.7k
  case BT_RSQB:
1094
10.7k
    ptr += MINBPC(enc);
1095
10.7k
    if (! HAS_CHAR(enc, ptr, end))
1096
23
      return -XML_TOK_CLOSE_BRACKET;
1097
10.6k
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098
76
      REQUIRE_CHARS(enc, ptr, end, 2);
1099
65
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100
10
        *nextTokPtr = ptr + 2 * MINBPC(enc);
1101
10
        return XML_TOK_COND_SECT_CLOSE;
1102
10
      }
1103
65
    }
1104
10.6k
    *nextTokPtr = ptr;
1105
10.6k
    return XML_TOK_CLOSE_BRACKET;
1106
7.92M
  case BT_LPAR:
1107
7.92M
    *nextTokPtr = ptr + MINBPC(enc);
1108
7.92M
    return XML_TOK_OPEN_PAREN;
1109
214k
  case BT_RPAR:
1110
214k
    ptr += MINBPC(enc);
1111
214k
    if (! HAS_CHAR(enc, ptr, end))
1112
129
      return -XML_TOK_CLOSE_PAREN;
1113
214k
    switch (BYTE_TYPE(enc, ptr)) {
1114
5.32k
    case BT_AST:
1115
5.32k
      *nextTokPtr = ptr + MINBPC(enc);
1116
5.32k
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1117
16.0k
    case BT_QUEST:
1118
16.0k
      *nextTokPtr = ptr + MINBPC(enc);
1119
16.0k
      return XML_TOK_CLOSE_PAREN_QUESTION;
1120
31.5k
    case BT_PLUS:
1121
31.5k
      *nextTokPtr = ptr + MINBPC(enc);
1122
31.5k
      return XML_TOK_CLOSE_PAREN_PLUS;
1123
366
    case BT_CR:
1124
883
    case BT_LF:
1125
98.5k
    case BT_S:
1126
98.9k
    case BT_GT:
1127
99.3k
    case BT_COMMA:
1128
104k
    case BT_VERBAR:
1129
161k
    case BT_RPAR:
1130
161k
      *nextTokPtr = ptr;
1131
161k
      return XML_TOK_CLOSE_PAREN;
1132
214k
    }
1133
75
    *nextTokPtr = ptr;
1134
75
    return XML_TOK_INVALID;
1135
3.11M
  case BT_VERBAR:
1136
3.11M
    *nextTokPtr = ptr + MINBPC(enc);
1137
3.11M
    return XML_TOK_OR;
1138
218k
  case BT_GT:
1139
218k
    *nextTokPtr = ptr + MINBPC(enc);
1140
218k
    return XML_TOK_DECL_CLOSE;
1141
104k
  case BT_NUM:
1142
104k
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143
0
#  define LEAD_CASE(n)                                                         \
1144
0
  case BT_LEAD##n:                                                             \
1145
0
    if (end - ptr < n)                                                         \
1146
0
      return XML_TOK_PARTIAL_CHAR;                                             \
1147
0
    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148
0
      *nextTokPtr = ptr;                                                       \
1149
0
      return XML_TOK_INVALID;                                                  \
1150
0
    }                                                                          \
1151
0
    if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152
0
      ptr += n;                                                                \
1153
0
      tok = XML_TOK_NAME;                                                      \
1154
0
      break;                                                                   \
1155
0
    }                                                                          \
1156
0
    if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157
0
      ptr += n;                                                                \
1158
0
      tok = XML_TOK_NMTOKEN;                                                   \
1159
0
      break;                                                                   \
1160
0
    }                                                                          \
1161
0
    *nextTokPtr = ptr;                                                         \
1162
0
    return XML_TOK_INVALID;
1163
1.15M
    LEAD_CASE(2)
1164
518k
    LEAD_CASE(3)
1165
491
    LEAD_CASE(4)
1166
0
#  undef LEAD_CASE
1167
2.30M
  case BT_NMSTRT:
1168
2.58M
  case BT_HEX:
1169
2.58M
    tok = XML_TOK_NAME;
1170
2.58M
    ptr += MINBPC(enc);
1171
2.58M
    break;
1172
2.21k
  case BT_DIGIT:
1173
3.93k
  case BT_NAME:
1174
409k
  case BT_MINUS:
1175
409k
#  ifdef XML_NS
1176
718k
  case BT_COLON:
1177
718k
#  endif
1178
718k
    tok = XML_TOK_NMTOKEN;
1179
718k
    ptr += MINBPC(enc);
1180
718k
    break;
1181
0
  case BT_NONASCII:
1182
0
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183
0
      ptr += MINBPC(enc);
1184
0
      tok = XML_TOK_NAME;
1185
0
      break;
1186
0
    }
1187
0
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188
0
      ptr += MINBPC(enc);
1189
0
      tok = XML_TOK_NMTOKEN;
1190
0
      break;
1191
0
    }
1192
    /* fall through */
1193
1.23k
  default:
1194
1.23k
    *nextTokPtr = ptr;
1195
1.23k
    return XML_TOK_INVALID;
1196
18.3M
  }
1197
152M
  while (HAS_CHAR(enc, ptr, end)) {
1198
152M
    switch (BYTE_TYPE(enc, ptr)) {
1199
453M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200
7.86k
    case BT_GT:
1201
118k
    case BT_RPAR:
1202
118k
    case BT_COMMA:
1203
3.14M
    case BT_VERBAR:
1204
3.16M
    case BT_LSQB:
1205
3.16M
    case BT_PERCNT:
1206
3.36M
    case BT_S:
1207
3.63M
    case BT_CR:
1208
3.72M
    case BT_LF:
1209
3.72M
      *nextTokPtr = ptr;
1210
3.72M
      return tok;
1211
0
#  ifdef XML_NS
1212
686k
    case BT_COLON:
1213
686k
      ptr += MINBPC(enc);
1214
686k
      switch (tok) {
1215
682k
      case XML_TOK_NAME:
1216
682k
        REQUIRE_CHAR(enc, ptr, end);
1217
681k
        tok = XML_TOK_PREFIXED_NAME;
1218
681k
        switch (BYTE_TYPE(enc, ptr)) {
1219
1.38M
          CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220
619
        default:
1221
619
          tok = XML_TOK_NMTOKEN;
1222
619
          break;
1223
681k
        }
1224
681k
        break;
1225
681k
      case XML_TOK_PREFIXED_NAME:
1226
1.89k
        tok = XML_TOK_NMTOKEN;
1227
1.89k
        break;
1228
686k
      }
1229
686k
      break;
1230
686k
#  endif
1231
686k
    case BT_PLUS:
1232
378
      if (tok == XML_TOK_NMTOKEN) {
1233
9
        *nextTokPtr = ptr;
1234
9
        return XML_TOK_INVALID;
1235
9
      }
1236
369
      *nextTokPtr = ptr + MINBPC(enc);
1237
369
      return XML_TOK_NAME_PLUS;
1238
620
    case BT_AST:
1239
620
      if (tok == XML_TOK_NMTOKEN) {
1240
7
        *nextTokPtr = ptr;
1241
7
        return XML_TOK_INVALID;
1242
7
      }
1243
613
      *nextTokPtr = ptr + MINBPC(enc);
1244
613
      return XML_TOK_NAME_ASTERISK;
1245
26.5k
    case BT_QUEST:
1246
26.5k
      if (tok == XML_TOK_NMTOKEN) {
1247
19
        *nextTokPtr = ptr;
1248
19
        return XML_TOK_INVALID;
1249
19
      }
1250
26.4k
      *nextTokPtr = ptr + MINBPC(enc);
1251
26.4k
      return XML_TOK_NAME_QUESTION;
1252
604
    default:
1253
604
      *nextTokPtr = ptr;
1254
604
      return XML_TOK_INVALID;
1255
152M
    }
1256
152M
  }
1257
11.5k
  return -tok;
1258
3.76M
}
xmltok.c:little2_prologTok
Line
Count
Source
1018
804k
                  const char **nextTokPtr) {
1019
804k
  int tok;
1020
804k
  if (ptr >= end)
1021
894
    return XML_TOK_NONE;
1022
804k
  if (MINBPC(enc) > 1) {
1023
804k
    size_t n = end - ptr;
1024
804k
    if (n & (MINBPC(enc) - 1)) {
1025
56.5k
      n &= ~(MINBPC(enc) - 1);
1026
56.5k
      if (n == 0)
1027
160
        return XML_TOK_PARTIAL;
1028
56.3k
      end = ptr + n;
1029
56.3k
    }
1030
804k
  }
1031
803k
  switch (BYTE_TYPE(enc, ptr)) {
1032
1.03k
  case BT_QUOT:
1033
1.03k
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034
22.7k
  case BT_APOS:
1035
22.7k
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036
69.2k
  case BT_LT: {
1037
69.2k
    ptr += MINBPC(enc);
1038
69.2k
    REQUIRE_CHAR(enc, ptr, end);
1039
69.1k
    switch (BYTE_TYPE(enc, ptr)) {
1040
33.2k
    case BT_EXCL:
1041
33.2k
      return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042
25.1k
    case BT_QUEST:
1043
25.1k
      return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044
3.85k
    case BT_NMSTRT:
1045
5.05k
    case BT_HEX:
1046
10.0k
    case BT_NONASCII:
1047
10.0k
    case BT_LEAD2:
1048
10.0k
    case BT_LEAD3:
1049
10.1k
    case BT_LEAD4:
1050
10.1k
      *nextTokPtr = ptr - MINBPC(enc);
1051
10.1k
      return XML_TOK_INSTANCE_START;
1052
69.1k
    }
1053
694
    *nextTokPtr = ptr;
1054
694
    return XML_TOK_INVALID;
1055
69.1k
  }
1056
18.2k
  case BT_CR:
1057
18.2k
    if (ptr + MINBPC(enc) == end) {
1058
109
      *nextTokPtr = end;
1059
      /* indicate that this might be part of a CR/LF pair */
1060
109
      return -XML_TOK_PROLOG_S;
1061
109
    }
1062
    /* fall through */
1063
74.7k
  case BT_S:
1064
95.1k
  case BT_LF:
1065
315k
    for (;;) {
1066
315k
      ptr += MINBPC(enc);
1067
315k
      if (! HAS_CHAR(enc, ptr, end))
1068
238
        break;
1069
315k
      switch (BYTE_TYPE(enc, ptr)) {
1070
79.6k
      case BT_S:
1071
151k
      case BT_LF:
1072
151k
        break;
1073
69.0k
      case BT_CR:
1074
        /* don't split CR/LF pair */
1075
69.0k
        if (ptr + MINBPC(enc) != end)
1076
68.9k
          break;
1077
        /* fall through */
1078
94.9k
      default:
1079
94.9k
        *nextTokPtr = ptr;
1080
94.9k
        return XML_TOK_PROLOG_S;
1081
315k
      }
1082
315k
    }
1083
238
    *nextTokPtr = ptr;
1084
238
    return XML_TOK_PROLOG_S;
1085
4.32k
  case BT_PERCNT:
1086
4.32k
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087
3.04k
  case BT_COMMA:
1088
3.04k
    *nextTokPtr = ptr + MINBPC(enc);
1089
3.04k
    return XML_TOK_COMMA;
1090
2.48k
  case BT_LSQB:
1091
2.48k
    *nextTokPtr = ptr + MINBPC(enc);
1092
2.48k
    return XML_TOK_OPEN_BRACKET;
1093
1.24k
  case BT_RSQB:
1094
1.24k
    ptr += MINBPC(enc);
1095
1.24k
    if (! HAS_CHAR(enc, ptr, end))
1096
11
      return -XML_TOK_CLOSE_BRACKET;
1097
1.23k
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098
72
      REQUIRE_CHARS(enc, ptr, end, 2);
1099
66
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100
6
        *nextTokPtr = ptr + 2 * MINBPC(enc);
1101
6
        return XML_TOK_COND_SECT_CLOSE;
1102
6
      }
1103
66
    }
1104
1.22k
    *nextTokPtr = ptr;
1105
1.22k
    return XML_TOK_CLOSE_BRACKET;
1106
17.8k
  case BT_LPAR:
1107
17.8k
    *nextTokPtr = ptr + MINBPC(enc);
1108
17.8k
    return XML_TOK_OPEN_PAREN;
1109
16.3k
  case BT_RPAR:
1110
16.3k
    ptr += MINBPC(enc);
1111
16.3k
    if (! HAS_CHAR(enc, ptr, end))
1112
33
      return -XML_TOK_CLOSE_PAREN;
1113
16.2k
    switch (BYTE_TYPE(enc, ptr)) {
1114
8.91k
    case BT_AST:
1115
8.91k
      *nextTokPtr = ptr + MINBPC(enc);
1116
8.91k
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1117
1.02k
    case BT_QUEST:
1118
1.02k
      *nextTokPtr = ptr + MINBPC(enc);
1119
1.02k
      return XML_TOK_CLOSE_PAREN_QUESTION;
1120
663
    case BT_PLUS:
1121
663
      *nextTokPtr = ptr + MINBPC(enc);
1122
663
      return XML_TOK_CLOSE_PAREN_PLUS;
1123
183
    case BT_CR:
1124
1.86k
    case BT_LF:
1125
2.54k
    case BT_S:
1126
2.86k
    case BT_GT:
1127
3.39k
    case BT_COMMA:
1128
4.78k
    case BT_VERBAR:
1129
5.60k
    case BT_RPAR:
1130
5.60k
      *nextTokPtr = ptr;
1131
5.60k
      return XML_TOK_CLOSE_PAREN;
1132
16.2k
    }
1133
57
    *nextTokPtr = ptr;
1134
57
    return XML_TOK_INVALID;
1135
240k
  case BT_VERBAR:
1136
240k
    *nextTokPtr = ptr + MINBPC(enc);
1137
240k
    return XML_TOK_OR;
1138
27.2k
  case BT_GT:
1139
27.2k
    *nextTokPtr = ptr + MINBPC(enc);
1140
27.2k
    return XML_TOK_DECL_CLOSE;
1141
10.6k
  case BT_NUM:
1142
10.6k
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143
0
#  define LEAD_CASE(n)                                                         \
1144
0
  case BT_LEAD##n:                                                             \
1145
0
    if (end - ptr < n)                                                         \
1146
0
      return XML_TOK_PARTIAL_CHAR;                                             \
1147
0
    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148
0
      *nextTokPtr = ptr;                                                       \
1149
0
      return XML_TOK_INVALID;                                                  \
1150
0
    }                                                                          \
1151
0
    if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152
0
      ptr += n;                                                                \
1153
0
      tok = XML_TOK_NAME;                                                      \
1154
0
      break;                                                                   \
1155
0
    }                                                                          \
1156
0
    if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157
0
      ptr += n;                                                                \
1158
0
      tok = XML_TOK_NMTOKEN;                                                   \
1159
0
      break;                                                                   \
1160
0
    }                                                                          \
1161
0
    *nextTokPtr = ptr;                                                         \
1162
0
    return XML_TOK_INVALID;
1163
0
    LEAD_CASE(2)
1164
0
    LEAD_CASE(3)
1165
160
    LEAD_CASE(4)
1166
0
#  undef LEAD_CASE
1167
34.0k
  case BT_NMSTRT:
1168
49.3k
  case BT_HEX:
1169
49.3k
    tok = XML_TOK_NAME;
1170
49.3k
    ptr += MINBPC(enc);
1171
49.3k
    break;
1172
1.31k
  case BT_DIGIT:
1173
114k
  case BT_NAME:
1174
115k
  case BT_MINUS:
1175
115k
#  ifdef XML_NS
1176
120k
  case BT_COLON:
1177
120k
#  endif
1178
120k
    tok = XML_TOK_NMTOKEN;
1179
120k
    ptr += MINBPC(enc);
1180
120k
    break;
1181
121k
  case BT_NONASCII:
1182
121k
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183
121k
      ptr += MINBPC(enc);
1184
121k
      tok = XML_TOK_NAME;
1185
121k
      break;
1186
121k
    }
1187
539
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188
309
      ptr += MINBPC(enc);
1189
309
      tok = XML_TOK_NMTOKEN;
1190
309
      break;
1191
309
    }
1192
    /* fall through */
1193
325
  default:
1194
325
    *nextTokPtr = ptr;
1195
325
    return XML_TOK_INVALID;
1196
803k
  }
1197
1.58M
  while (HAS_CHAR(enc, ptr, end)) {
1198
1.58M
    switch (BYTE_TYPE(enc, ptr)) {
1199
4.73M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200
351
    case BT_GT:
1201
9.76k
    case BT_RPAR:
1202
10.3k
    case BT_COMMA:
1203
244k
    case BT_VERBAR:
1204
246k
    case BT_LSQB:
1205
246k
    case BT_PERCNT:
1206
272k
    case BT_S:
1207
275k
    case BT_CR:
1208
287k
    case BT_LF:
1209
287k
      *nextTokPtr = ptr;
1210
287k
      return tok;
1211
0
#  ifdef XML_NS
1212
211k
    case BT_COLON:
1213
211k
      ptr += MINBPC(enc);
1214
211k
      switch (tok) {
1215
90.1k
      case XML_TOK_NAME:
1216
90.1k
        REQUIRE_CHAR(enc, ptr, end);
1217
90.1k
        tok = XML_TOK_PREFIXED_NAME;
1218
90.1k
        switch (BYTE_TYPE(enc, ptr)) {
1219
125k
          CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220
18.5k
        default:
1221
18.5k
          tok = XML_TOK_NMTOKEN;
1222
18.5k
          break;
1223
90.1k
        }
1224
89.9k
        break;
1225
89.9k
      case XML_TOK_PREFIXED_NAME:
1226
9.33k
        tok = XML_TOK_NMTOKEN;
1227
9.33k
        break;
1228
211k
      }
1229
211k
      break;
1230
211k
#  endif
1231
211k
    case BT_PLUS:
1232
1.11k
      if (tok == XML_TOK_NMTOKEN) {
1233
6
        *nextTokPtr = ptr;
1234
6
        return XML_TOK_INVALID;
1235
6
      }
1236
1.10k
      *nextTokPtr = ptr + MINBPC(enc);
1237
1.10k
      return XML_TOK_NAME_PLUS;
1238
705
    case BT_AST:
1239
705
      if (tok == XML_TOK_NMTOKEN) {
1240
10
        *nextTokPtr = ptr;
1241
10
        return XML_TOK_INVALID;
1242
10
      }
1243
695
      *nextTokPtr = ptr + MINBPC(enc);
1244
695
      return XML_TOK_NAME_ASTERISK;
1245
198
    case BT_QUEST:
1246
198
      if (tok == XML_TOK_NMTOKEN) {
1247
12
        *nextTokPtr = ptr;
1248
12
        return XML_TOK_INVALID;
1249
12
      }
1250
186
      *nextTokPtr = ptr + MINBPC(enc);
1251
186
      return XML_TOK_NAME_QUESTION;
1252
141
    default:
1253
141
      *nextTokPtr = ptr;
1254
141
      return XML_TOK_INVALID;
1255
1.58M
    }
1256
1.58M
  }
1257
1.23k
  return -tok;
1258
291k
}
xmltok.c:big2_prologTok
Line
Count
Source
1018
2.22M
                  const char **nextTokPtr) {
1019
2.22M
  int tok;
1020
2.22M
  if (ptr >= end)
1021
1.39k
    return XML_TOK_NONE;
1022
2.22M
  if (MINBPC(enc) > 1) {
1023
2.22M
    size_t n = end - ptr;
1024
2.22M
    if (n & (MINBPC(enc) - 1)) {
1025
71.8k
      n &= ~(MINBPC(enc) - 1);
1026
71.8k
      if (n == 0)
1027
124
        return XML_TOK_PARTIAL;
1028
71.6k
      end = ptr + n;
1029
71.6k
    }
1030
2.22M
  }
1031
2.22M
  switch (BYTE_TYPE(enc, ptr)) {
1032
2.53k
  case BT_QUOT:
1033
2.53k
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
1034
102k
  case BT_APOS:
1035
102k
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
1036
143k
  case BT_LT: {
1037
143k
    ptr += MINBPC(enc);
1038
143k
    REQUIRE_CHAR(enc, ptr, end);
1039
143k
    switch (BYTE_TYPE(enc, ptr)) {
1040
104k
    case BT_EXCL:
1041
104k
      return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1042
28.1k
    case BT_QUEST:
1043
28.1k
      return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1044
3.40k
    case BT_NMSTRT:
1045
5.43k
    case BT_HEX:
1046
10.3k
    case BT_NONASCII:
1047
10.3k
    case BT_LEAD2:
1048
10.3k
    case BT_LEAD3:
1049
10.3k
    case BT_LEAD4:
1050
10.3k
      *nextTokPtr = ptr - MINBPC(enc);
1051
10.3k
      return XML_TOK_INSTANCE_START;
1052
143k
    }
1053
710
    *nextTokPtr = ptr;
1054
710
    return XML_TOK_INVALID;
1055
143k
  }
1056
21.5k
  case BT_CR:
1057
21.5k
    if (ptr + MINBPC(enc) == end) {
1058
196
      *nextTokPtr = end;
1059
      /* indicate that this might be part of a CR/LF pair */
1060
196
      return -XML_TOK_PROLOG_S;
1061
196
    }
1062
    /* fall through */
1063
186k
  case BT_S:
1064
432k
  case BT_LF:
1065
767k
    for (;;) {
1066
767k
      ptr += MINBPC(enc);
1067
767k
      if (! HAS_CHAR(enc, ptr, end))
1068
389
        break;
1069
767k
      switch (BYTE_TYPE(enc, ptr)) {
1070
80.4k
      case BT_S:
1071
144k
      case BT_LF:
1072
144k
        break;
1073
191k
      case BT_CR:
1074
        /* don't split CR/LF pair */
1075
191k
        if (ptr + MINBPC(enc) != end)
1076
191k
          break;
1077
        /* fall through */
1078
432k
      default:
1079
432k
        *nextTokPtr = ptr;
1080
432k
        return XML_TOK_PROLOG_S;
1081
767k
      }
1082
767k
    }
1083
389
    *nextTokPtr = ptr;
1084
389
    return XML_TOK_PROLOG_S;
1085
6.84k
  case BT_PERCNT:
1086
6.84k
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1087
128k
  case BT_COMMA:
1088
128k
    *nextTokPtr = ptr + MINBPC(enc);
1089
128k
    return XML_TOK_COMMA;
1090
2.93k
  case BT_LSQB:
1091
2.93k
    *nextTokPtr = ptr + MINBPC(enc);
1092
2.93k
    return XML_TOK_OPEN_BRACKET;
1093
1.72k
  case BT_RSQB:
1094
1.72k
    ptr += MINBPC(enc);
1095
1.72k
    if (! HAS_CHAR(enc, ptr, end))
1096
19
      return -XML_TOK_CLOSE_BRACKET;
1097
1.70k
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1098
91
      REQUIRE_CHARS(enc, ptr, end, 2);
1099
83
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
1100
9
        *nextTokPtr = ptr + 2 * MINBPC(enc);
1101
9
        return XML_TOK_COND_SECT_CLOSE;
1102
9
      }
1103
83
    }
1104
1.69k
    *nextTokPtr = ptr;
1105
1.69k
    return XML_TOK_CLOSE_BRACKET;
1106
225k
  case BT_LPAR:
1107
225k
    *nextTokPtr = ptr + MINBPC(enc);
1108
225k
    return XML_TOK_OPEN_PAREN;
1109
168k
  case BT_RPAR:
1110
168k
    ptr += MINBPC(enc);
1111
168k
    if (! HAS_CHAR(enc, ptr, end))
1112
86
      return -XML_TOK_CLOSE_PAREN;
1113
168k
    switch (BYTE_TYPE(enc, ptr)) {
1114
1.62k
    case BT_AST:
1115
1.62k
      *nextTokPtr = ptr + MINBPC(enc);
1116
1.62k
      return XML_TOK_CLOSE_PAREN_ASTERISK;
1117
920
    case BT_QUEST:
1118
920
      *nextTokPtr = ptr + MINBPC(enc);
1119
920
      return XML_TOK_CLOSE_PAREN_QUESTION;
1120
52.7k
    case BT_PLUS:
1121
52.7k
      *nextTokPtr = ptr + MINBPC(enc);
1122
52.7k
      return XML_TOK_CLOSE_PAREN_PLUS;
1123
126
    case BT_CR:
1124
831
    case BT_LF:
1125
79.4k
    case BT_S:
1126
80.8k
    case BT_GT:
1127
108k
    case BT_COMMA:
1128
108k
    case BT_VERBAR:
1129
113k
    case BT_RPAR:
1130
113k
      *nextTokPtr = ptr;
1131
113k
      return XML_TOK_CLOSE_PAREN;
1132
168k
    }
1133
66
    *nextTokPtr = ptr;
1134
66
    return XML_TOK_INVALID;
1135
285k
  case BT_VERBAR:
1136
285k
    *nextTokPtr = ptr + MINBPC(enc);
1137
285k
    return XML_TOK_OR;
1138
99.2k
  case BT_GT:
1139
99.2k
    *nextTokPtr = ptr + MINBPC(enc);
1140
99.2k
    return XML_TOK_DECL_CLOSE;
1141
4.70k
  case BT_NUM:
1142
4.70k
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1143
0
#  define LEAD_CASE(n)                                                         \
1144
0
  case BT_LEAD##n:                                                             \
1145
0
    if (end - ptr < n)                                                         \
1146
0
      return XML_TOK_PARTIAL_CHAR;                                             \
1147
0
    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
1148
0
      *nextTokPtr = ptr;                                                       \
1149
0
      return XML_TOK_INVALID;                                                  \
1150
0
    }                                                                          \
1151
0
    if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
1152
0
      ptr += n;                                                                \
1153
0
      tok = XML_TOK_NAME;                                                      \
1154
0
      break;                                                                   \
1155
0
    }                                                                          \
1156
0
    if (IS_NAME_CHAR(enc, ptr, n)) {                                           \
1157
0
      ptr += n;                                                                \
1158
0
      tok = XML_TOK_NMTOKEN;                                                   \
1159
0
      break;                                                                   \
1160
0
    }                                                                          \
1161
0
    *nextTokPtr = ptr;                                                         \
1162
0
    return XML_TOK_INVALID;
1163
0
    LEAD_CASE(2)
1164
0
    LEAD_CASE(3)
1165
231
    LEAD_CASE(4)
1166
0
#  undef LEAD_CASE
1167
202k
  case BT_NMSTRT:
1168
329k
  case BT_HEX:
1169
329k
    tok = XML_TOK_NAME;
1170
329k
    ptr += MINBPC(enc);
1171
329k
    break;
1172
12.1k
  case BT_DIGIT:
1173
12.7k
  case BT_NAME:
1174
12.8k
  case BT_MINUS:
1175
12.8k
#  ifdef XML_NS
1176
70.7k
  case BT_COLON:
1177
70.7k
#  endif
1178
70.7k
    tok = XML_TOK_NMTOKEN;
1179
70.7k
    ptr += MINBPC(enc);
1180
70.7k
    break;
1181
215k
  case BT_NONASCII:
1182
215k
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
1183
215k
      ptr += MINBPC(enc);
1184
215k
      tok = XML_TOK_NAME;
1185
215k
      break;
1186
215k
    }
1187
502
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
1188
367
      ptr += MINBPC(enc);
1189
367
      tok = XML_TOK_NMTOKEN;
1190
367
      break;
1191
367
    }
1192
    /* fall through */
1193
265
  default:
1194
265
    *nextTokPtr = ptr;
1195
265
    return XML_TOK_INVALID;
1196
2.22M
  }
1197
4.64M
  while (HAS_CHAR(enc, ptr, end)) {
1198
4.64M
    switch (BYTE_TYPE(enc, ptr)) {
1199
18.1M
      CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1200
516
    case BT_GT:
1201
17.9k
    case BT_RPAR:
1202
20.2k
    case BT_COMMA:
1203
303k
    case BT_VERBAR:
1204
306k
    case BT_LSQB:
1205
306k
    case BT_PERCNT:
1206
343k
    case BT_S:
1207
346k
    case BT_CR:
1208
516k
    case BT_LF:
1209
516k
      *nextTokPtr = ptr;
1210
516k
      return tok;
1211
0
#  ifdef XML_NS
1212
223k
    case BT_COLON:
1213
223k
      ptr += MINBPC(enc);
1214
223k
      switch (tok) {
1215
152k
      case XML_TOK_NAME:
1216
152k
        REQUIRE_CHAR(enc, ptr, end);
1217
151k
        tok = XML_TOK_PREFIXED_NAME;
1218
151k
        switch (BYTE_TYPE(enc, ptr)) {
1219
421k
          CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1220
38.5k
        default:
1221
38.5k
          tok = XML_TOK_NMTOKEN;
1222
38.5k
          break;
1223
151k
        }
1224
151k
        break;
1225
151k
      case XML_TOK_PREFIXED_NAME:
1226
58.4k
        tok = XML_TOK_NMTOKEN;
1227
58.4k
        break;
1228
223k
      }
1229
223k
      break;
1230
223k
#  endif
1231
223k
    case BT_PLUS:
1232
96.3k
      if (tok == XML_TOK_NMTOKEN) {
1233
8
        *nextTokPtr = ptr;
1234
8
        return XML_TOK_INVALID;
1235
8
      }
1236
96.3k
      *nextTokPtr = ptr + MINBPC(enc);
1237
96.3k
      return XML_TOK_NAME_PLUS;
1238
414
    case BT_AST:
1239
414
      if (tok == XML_TOK_NMTOKEN) {
1240
9
        *nextTokPtr = ptr;
1241
9
        return XML_TOK_INVALID;
1242
9
      }
1243
405
      *nextTokPtr = ptr + MINBPC(enc);
1244
405
      return XML_TOK_NAME_ASTERISK;
1245
835
    case BT_QUEST:
1246
835
      if (tok == XML_TOK_NMTOKEN) {
1247
7
        *nextTokPtr = ptr;
1248
7
        return XML_TOK_INVALID;
1249
7
      }
1250
828
      *nextTokPtr = ptr + MINBPC(enc);
1251
828
      return XML_TOK_NAME_QUESTION;
1252
156
    default:
1253
156
      *nextTokPtr = ptr;
1254
156
      return XML_TOK_INVALID;
1255
4.64M
    }
1256
4.64M
  }
1257
1.66k
  return -tok;
1258
616k
}
1259
1260
static int PTRCALL
1261
PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1262
89.9M
                          const char **nextTokPtr) {
1263
89.9M
  const char *start;
1264
89.9M
  if (ptr >= end)
1265
2.40M
    return XML_TOK_NONE;
1266
87.5M
  else if (! HAS_CHAR(enc, ptr, end)) {
1267
    /* This line cannot be executed.  The incoming data has already
1268
     * been tokenized once, so incomplete characters like this have
1269
     * already been eliminated from the input.  Retaining the paranoia
1270
     * check is still valuable, however.
1271
     */
1272
13
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273
13
  }
1274
87.5M
  start = ptr;
1275
4.29G
  while (HAS_CHAR(enc, ptr, end)) {
1276
4.29G
    switch (BYTE_TYPE(enc, ptr)) {
1277
0
#  define LEAD_CASE(n)                                                         \
1278
3.07G
  case BT_LEAD##n:                                                             \
1279
3.07G
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1280
3.07G
    break;
1281
1.06G
      LEAD_CASE(2)
1282
1.99G
      LEAD_CASE(3)
1283
4.87M
      LEAD_CASE(4)
1284
0
#  undef LEAD_CASE
1285
4.08M
    case BT_AMP:
1286
4.08M
      if (ptr == start)
1287
2.94M
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288
1.14M
      *nextTokPtr = ptr;
1289
1.14M
      return XML_TOK_DATA_CHARS;
1290
46
    case BT_LT:
1291
      /* this is for inside entity references */
1292
46
      *nextTokPtr = ptr;
1293
46
      return XML_TOK_INVALID;
1294
62.1M
    case BT_LF:
1295
62.1M
      if (ptr == start) {
1296
42.8M
        *nextTokPtr = ptr + MINBPC(enc);
1297
42.8M
        return XML_TOK_DATA_NEWLINE;
1298
42.8M
      }
1299
19.3M
      *nextTokPtr = ptr;
1300
19.3M
      return XML_TOK_DATA_CHARS;
1301
3.14M
    case BT_CR:
1302
3.14M
      if (ptr == start) {
1303
3.02M
        ptr += MINBPC(enc);
1304
3.02M
        if (! HAS_CHAR(enc, ptr, end))
1305
1.89k
          return XML_TOK_TRAILING_CR;
1306
3.02M
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1307
389k
          ptr += MINBPC(enc);
1308
3.02M
        *nextTokPtr = ptr;
1309
3.02M
        return XML_TOK_DATA_NEWLINE;
1310
3.02M
      }
1311
120k
      *nextTokPtr = ptr;
1312
120k
      return XML_TOK_DATA_CHARS;
1313
15.8M
    case BT_S:
1314
15.8M
      if (ptr == start) {
1315
10.9M
        *nextTokPtr = ptr + MINBPC(enc);
1316
10.9M
        return XML_TOK_ATTRIBUTE_VALUE_S;
1317
10.9M
      }
1318
4.88M
      *nextTokPtr = ptr;
1319
4.88M
      return XML_TOK_DATA_CHARS;
1320
1.13G
    default:
1321
1.13G
      ptr += MINBPC(enc);
1322
1.13G
      break;
1323
4.29G
    }
1324
4.29G
  }
1325
2.36M
  *nextTokPtr = ptr;
1326
2.36M
  return XML_TOK_DATA_CHARS;
1327
87.5M
}
xmltok.c:normal_attributeValueTok
Line
Count
Source
1262
88.9M
                          const char **nextTokPtr) {
1263
88.9M
  const char *start;
1264
88.9M
  if (ptr >= end)
1265
2.39M
    return XML_TOK_NONE;
1266
86.5M
  else if (! HAS_CHAR(enc, ptr, end)) {
1267
    /* This line cannot be executed.  The incoming data has already
1268
     * been tokenized once, so incomplete characters like this have
1269
     * already been eliminated from the input.  Retaining the paranoia
1270
     * check is still valuable, however.
1271
     */
1272
0
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273
0
  }
1274
86.5M
  start = ptr;
1275
4.27G
  while (HAS_CHAR(enc, ptr, end)) {
1276
4.27G
    switch (BYTE_TYPE(enc, ptr)) {
1277
0
#  define LEAD_CASE(n)                                                         \
1278
0
  case BT_LEAD##n:                                                             \
1279
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1280
0
    break;
1281
1.06G
      LEAD_CASE(2)
1282
1.99G
      LEAD_CASE(3)
1283
4.60M
      LEAD_CASE(4)
1284
0
#  undef LEAD_CASE
1285
3.93M
    case BT_AMP:
1286
3.93M
      if (ptr == start)
1287
2.83M
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288
1.09M
      *nextTokPtr = ptr;
1289
1.09M
      return XML_TOK_DATA_CHARS;
1290
29
    case BT_LT:
1291
      /* this is for inside entity references */
1292
29
      *nextTokPtr = ptr;
1293
29
      return XML_TOK_INVALID;
1294
61.9M
    case BT_LF:
1295
61.9M
      if (ptr == start) {
1296
42.7M
        *nextTokPtr = ptr + MINBPC(enc);
1297
42.7M
        return XML_TOK_DATA_NEWLINE;
1298
42.7M
      }
1299
19.1M
      *nextTokPtr = ptr;
1300
19.1M
      return XML_TOK_DATA_CHARS;
1301
2.53M
    case BT_CR:
1302
2.53M
      if (ptr == start) {
1303
2.51M
        ptr += MINBPC(enc);
1304
2.51M
        if (! HAS_CHAR(enc, ptr, end))
1305
692
          return XML_TOK_TRAILING_CR;
1306
2.51M
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1307
1.82k
          ptr += MINBPC(enc);
1308
2.51M
        *nextTokPtr = ptr;
1309
2.51M
        return XML_TOK_DATA_NEWLINE;
1310
2.51M
      }
1311
21.8k
      *nextTokPtr = ptr;
1312
21.8k
      return XML_TOK_DATA_CHARS;
1313
15.7M
    case BT_S:
1314
15.7M
      if (ptr == start) {
1315
10.9M
        *nextTokPtr = ptr + MINBPC(enc);
1316
10.9M
        return XML_TOK_ATTRIBUTE_VALUE_S;
1317
10.9M
      }
1318
4.85M
      *nextTokPtr = ptr;
1319
4.85M
      return XML_TOK_DATA_CHARS;
1320
1.11G
    default:
1321
1.11G
      ptr += MINBPC(enc);
1322
1.11G
      break;
1323
4.27G
    }
1324
4.27G
  }
1325
2.35M
  *nextTokPtr = ptr;
1326
2.35M
  return XML_TOK_DATA_CHARS;
1327
86.5M
}
xmltok.c:little2_attributeValueTok
Line
Count
Source
1262
186k
                          const char **nextTokPtr) {
1263
186k
  const char *start;
1264
186k
  if (ptr >= end)
1265
4.60k
    return XML_TOK_NONE;
1266
181k
  else if (! HAS_CHAR(enc, ptr, end)) {
1267
    /* This line cannot be executed.  The incoming data has already
1268
     * been tokenized once, so incomplete characters like this have
1269
     * already been eliminated from the input.  Retaining the paranoia
1270
     * check is still valuable, however.
1271
     */
1272
5
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273
5
  }
1274
181k
  start = ptr;
1275
5.72M
  while (HAS_CHAR(enc, ptr, end)) {
1276
5.72M
    switch (BYTE_TYPE(enc, ptr)) {
1277
0
#  define LEAD_CASE(n)                                                         \
1278
0
  case BT_LEAD##n:                                                             \
1279
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1280
0
    break;
1281
0
      LEAD_CASE(2)
1282
0
      LEAD_CASE(3)
1283
157k
      LEAD_CASE(4)
1284
0
#  undef LEAD_CASE
1285
26.6k
    case BT_AMP:
1286
26.6k
      if (ptr == start)
1287
16.2k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288
10.3k
      *nextTokPtr = ptr;
1289
10.3k
      return XML_TOK_DATA_CHARS;
1290
9
    case BT_LT:
1291
      /* this is for inside entity references */
1292
9
      *nextTokPtr = ptr;
1293
9
      return XML_TOK_INVALID;
1294
20.3k
    case BT_LF:
1295
20.3k
      if (ptr == start) {
1296
10.5k
        *nextTokPtr = ptr + MINBPC(enc);
1297
10.5k
        return XML_TOK_DATA_NEWLINE;
1298
10.5k
      }
1299
9.76k
      *nextTokPtr = ptr;
1300
9.76k
      return XML_TOK_DATA_CHARS;
1301
108k
    case BT_CR:
1302
108k
      if (ptr == start) {
1303
74.1k
        ptr += MINBPC(enc);
1304
74.1k
        if (! HAS_CHAR(enc, ptr, end))
1305
659
          return XML_TOK_TRAILING_CR;
1306
73.5k
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1307
14.7k
          ptr += MINBPC(enc);
1308
73.5k
        *nextTokPtr = ptr;
1309
73.5k
        return XML_TOK_DATA_NEWLINE;
1310
74.1k
      }
1311
34.5k
      *nextTokPtr = ptr;
1312
34.5k
      return XML_TOK_DATA_CHARS;
1313
24.1k
    case BT_S:
1314
24.1k
      if (ptr == start) {
1315
14.5k
        *nextTokPtr = ptr + MINBPC(enc);
1316
14.5k
        return XML_TOK_ATTRIBUTE_VALUE_S;
1317
14.5k
      }
1318
9.56k
      *nextTokPtr = ptr;
1319
9.56k
      return XML_TOK_DATA_CHARS;
1320
5.38M
    default:
1321
5.38M
      ptr += MINBPC(enc);
1322
5.38M
      break;
1323
5.72M
    }
1324
5.72M
  }
1325
1.74k
  *nextTokPtr = ptr;
1326
1.74k
  return XML_TOK_DATA_CHARS;
1327
181k
}
xmltok.c:big2_attributeValueTok
Line
Count
Source
1262
876k
                          const char **nextTokPtr) {
1263
876k
  const char *start;
1264
876k
  if (ptr >= end)
1265
7.43k
    return XML_TOK_NONE;
1266
869k
  else if (! HAS_CHAR(enc, ptr, end)) {
1267
    /* This line cannot be executed.  The incoming data has already
1268
     * been tokenized once, so incomplete characters like this have
1269
     * already been eliminated from the input.  Retaining the paranoia
1270
     * check is still valuable, however.
1271
     */
1272
8
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1273
8
  }
1274
869k
  start = ptr;
1275
17.9M
  while (HAS_CHAR(enc, ptr, end)) {
1276
17.9M
    switch (BYTE_TYPE(enc, ptr)) {
1277
0
#  define LEAD_CASE(n)                                                         \
1278
0
  case BT_LEAD##n:                                                             \
1279
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1280
0
    break;
1281
0
      LEAD_CASE(2)
1282
0
      LEAD_CASE(3)
1283
111k
      LEAD_CASE(4)
1284
0
#  undef LEAD_CASE
1285
125k
    case BT_AMP:
1286
125k
      if (ptr == start)
1287
92.6k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1288
32.5k
      *nextTokPtr = ptr;
1289
32.5k
      return XML_TOK_DATA_CHARS;
1290
8
    case BT_LT:
1291
      /* this is for inside entity references */
1292
8
      *nextTokPtr = ptr;
1293
8
      return XML_TOK_INVALID;
1294
201k
    case BT_LF:
1295
201k
      if (ptr == start) {
1296
106k
        *nextTokPtr = ptr + MINBPC(enc);
1297
106k
        return XML_TOK_DATA_NEWLINE;
1298
106k
      }
1299
94.9k
      *nextTokPtr = ptr;
1300
94.9k
      return XML_TOK_DATA_CHARS;
1301
496k
    case BT_CR:
1302
496k
      if (ptr == start) {
1303
432k
        ptr += MINBPC(enc);
1304
432k
        if (! HAS_CHAR(enc, ptr, end))
1305
547
          return XML_TOK_TRAILING_CR;
1306
432k
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1307
373k
          ptr += MINBPC(enc);
1308
432k
        *nextTokPtr = ptr;
1309
432k
        return XML_TOK_DATA_NEWLINE;
1310
432k
      }
1311
64.2k
      *nextTokPtr = ptr;
1312
64.2k
      return XML_TOK_DATA_CHARS;
1313
42.1k
    case BT_S:
1314
42.1k
      if (ptr == start) {
1315
24.3k
        *nextTokPtr = ptr + MINBPC(enc);
1316
24.3k
        return XML_TOK_ATTRIBUTE_VALUE_S;
1317
24.3k
      }
1318
17.8k
      *nextTokPtr = ptr;
1319
17.8k
      return XML_TOK_DATA_CHARS;
1320
17.0M
    default:
1321
17.0M
      ptr += MINBPC(enc);
1322
17.0M
      break;
1323
17.9M
    }
1324
17.9M
  }
1325
3.50k
  *nextTokPtr = ptr;
1326
3.50k
  return XML_TOK_DATA_CHARS;
1327
869k
}
1328
1329
static int PTRCALL
1330
PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
1331
3.62M
                       const char **nextTokPtr) {
1332
3.62M
  const char *start;
1333
3.62M
  if (ptr >= end)
1334
34.8k
    return XML_TOK_NONE;
1335
3.58M
  else if (! HAS_CHAR(enc, ptr, end)) {
1336
    /* This line cannot be executed.  The incoming data has already
1337
     * been tokenized once, so incomplete characters like this have
1338
     * already been eliminated from the input.  Retaining the paranoia
1339
     * check is still valuable, however.
1340
     */
1341
98
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342
98
  }
1343
3.58M
  start = ptr;
1344
284M
  while (HAS_CHAR(enc, ptr, end)) {
1345
284M
    switch (BYTE_TYPE(enc, ptr)) {
1346
0
#  define LEAD_CASE(n)                                                         \
1347
3.46M
  case BT_LEAD##n:                                                             \
1348
3.46M
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1349
3.46M
    break;
1350
144k
      LEAD_CASE(2)
1351
30.7k
      LEAD_CASE(3)
1352
3.28M
      LEAD_CASE(4)
1353
0
#  undef LEAD_CASE
1354
585k
    case BT_AMP:
1355
585k
      if (ptr == start)
1356
314k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357
271k
      *nextTokPtr = ptr;
1358
271k
      return XML_TOK_DATA_CHARS;
1359
1.55k
    case BT_PERCNT:
1360
1.55k
      if (ptr == start) {
1361
852
        int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362
852
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363
852
      }
1364
705
      *nextTokPtr = ptr;
1365
705
      return XML_TOK_DATA_CHARS;
1366
1.00M
    case BT_LF:
1367
1.00M
      if (ptr == start) {
1368
577k
        *nextTokPtr = ptr + MINBPC(enc);
1369
577k
        return XML_TOK_DATA_NEWLINE;
1370
577k
      }
1371
425k
      *nextTokPtr = ptr;
1372
425k
      return XML_TOK_DATA_CHARS;
1373
1.97M
    case BT_CR:
1374
1.97M
      if (ptr == start) {
1375
1.08M
        ptr += MINBPC(enc);
1376
1.08M
        if (! HAS_CHAR(enc, ptr, end))
1377
7.65k
          return XML_TOK_TRAILING_CR;
1378
1.07M
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1379
93.6k
          ptr += MINBPC(enc);
1380
1.07M
        *nextTokPtr = ptr;
1381
1.07M
        return XML_TOK_DATA_NEWLINE;
1382
1.08M
      }
1383
895k
      *nextTokPtr = ptr;
1384
895k
      return XML_TOK_DATA_CHARS;
1385
277M
    default:
1386
277M
      ptr += MINBPC(enc);
1387
277M
      break;
1388
284M
    }
1389
284M
  }
1390
20.4k
  *nextTokPtr = ptr;
1391
20.4k
  return XML_TOK_DATA_CHARS;
1392
3.58M
}
xmltok.c:normal_entityValueTok
Line
Count
Source
1331
1.76M
                       const char **nextTokPtr) {
1332
1.76M
  const char *start;
1333
1.76M
  if (ptr >= end)
1334
19.2k
    return XML_TOK_NONE;
1335
1.74M
  else if (! HAS_CHAR(enc, ptr, end)) {
1336
    /* This line cannot be executed.  The incoming data has already
1337
     * been tokenized once, so incomplete characters like this have
1338
     * already been eliminated from the input.  Retaining the paranoia
1339
     * check is still valuable, however.
1340
     */
1341
0
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342
0
  }
1343
1.74M
  start = ptr;
1344
184M
  while (HAS_CHAR(enc, ptr, end)) {
1345
184M
    switch (BYTE_TYPE(enc, ptr)) {
1346
0
#  define LEAD_CASE(n)                                                         \
1347
0
  case BT_LEAD##n:                                                             \
1348
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1349
0
    break;
1350
144k
      LEAD_CASE(2)
1351
30.7k
      LEAD_CASE(3)
1352
424
      LEAD_CASE(4)
1353
0
#  undef LEAD_CASE
1354
511k
    case BT_AMP:
1355
511k
      if (ptr == start)
1356
272k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357
238k
      *nextTokPtr = ptr;
1358
238k
      return XML_TOK_DATA_CHARS;
1359
1.47k
    case BT_PERCNT:
1360
1.47k
      if (ptr == start) {
1361
804
        int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362
804
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363
804
      }
1364
673
      *nextTokPtr = ptr;
1365
673
      return XML_TOK_DATA_CHARS;
1366
352k
    case BT_LF:
1367
352k
      if (ptr == start) {
1368
234k
        *nextTokPtr = ptr + MINBPC(enc);
1369
234k
        return XML_TOK_DATA_NEWLINE;
1370
234k
      }
1371
118k
      *nextTokPtr = ptr;
1372
118k
      return XML_TOK_DATA_CHARS;
1373
867k
    case BT_CR:
1374
867k
      if (ptr == start) {
1375
454k
        ptr += MINBPC(enc);
1376
454k
        if (! HAS_CHAR(enc, ptr, end))
1377
1.00k
          return XML_TOK_TRAILING_CR;
1378
453k
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1379
1.82k
          ptr += MINBPC(enc);
1380
453k
        *nextTokPtr = ptr;
1381
453k
        return XML_TOK_DATA_NEWLINE;
1382
454k
      }
1383
413k
      *nextTokPtr = ptr;
1384
413k
      return XML_TOK_DATA_CHARS;
1385
182M
    default:
1386
182M
      ptr += MINBPC(enc);
1387
182M
      break;
1388
184M
    }
1389
184M
  }
1390
11.4k
  *nextTokPtr = ptr;
1391
11.4k
  return XML_TOK_DATA_CHARS;
1392
1.74M
}
xmltok.c:little2_entityValueTok
Line
Count
Source
1331
1.01M
                       const char **nextTokPtr) {
1332
1.01M
  const char *start;
1333
1.01M
  if (ptr >= end)
1334
8.25k
    return XML_TOK_NONE;
1335
1.00M
  else if (! HAS_CHAR(enc, ptr, end)) {
1336
    /* This line cannot be executed.  The incoming data has already
1337
     * been tokenized once, so incomplete characters like this have
1338
     * already been eliminated from the input.  Retaining the paranoia
1339
     * check is still valuable, however.
1340
     */
1341
52
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342
52
  }
1343
1.00M
  start = ptr;
1344
46.7M
  while (HAS_CHAR(enc, ptr, end)) {
1345
46.7M
    switch (BYTE_TYPE(enc, ptr)) {
1346
0
#  define LEAD_CASE(n)                                                         \
1347
0
  case BT_LEAD##n:                                                             \
1348
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1349
0
    break;
1350
0
      LEAD_CASE(2)
1351
0
      LEAD_CASE(3)
1352
1.34M
      LEAD_CASE(4)
1353
0
#  undef LEAD_CASE
1354
39.0k
    case BT_AMP:
1355
39.0k
      if (ptr == start)
1356
23.4k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357
15.6k
      *nextTokPtr = ptr;
1358
15.6k
      return XML_TOK_DATA_CHARS;
1359
29
    case BT_PERCNT:
1360
29
      if (ptr == start) {
1361
16
        int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362
16
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363
16
      }
1364
13
      *nextTokPtr = ptr;
1365
13
      return XML_TOK_DATA_CHARS;
1366
430k
    case BT_LF:
1367
430k
      if (ptr == start) {
1368
221k
        *nextTokPtr = ptr + MINBPC(enc);
1369
221k
        return XML_TOK_DATA_NEWLINE;
1370
221k
      }
1371
209k
      *nextTokPtr = ptr;
1372
209k
      return XML_TOK_DATA_CHARS;
1373
531k
    case BT_CR:
1374
531k
      if (ptr == start) {
1375
286k
        ptr += MINBPC(enc);
1376
286k
        if (! HAS_CHAR(enc, ptr, end))
1377
4.73k
          return XML_TOK_TRAILING_CR;
1378
281k
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1379
3.34k
          ptr += MINBPC(enc);
1380
281k
        *nextTokPtr = ptr;
1381
281k
        return XML_TOK_DATA_NEWLINE;
1382
286k
      }
1383
245k
      *nextTokPtr = ptr;
1384
245k
      return XML_TOK_DATA_CHARS;
1385
44.4M
    default:
1386
44.4M
      ptr += MINBPC(enc);
1387
44.4M
      break;
1388
46.7M
    }
1389
46.7M
  }
1390
3.51k
  *nextTokPtr = ptr;
1391
3.51k
  return XML_TOK_DATA_CHARS;
1392
1.00M
}
xmltok.c:big2_entityValueTok
Line
Count
Source
1331
844k
                       const char **nextTokPtr) {
1332
844k
  const char *start;
1333
844k
  if (ptr >= end)
1334
7.37k
    return XML_TOK_NONE;
1335
837k
  else if (! HAS_CHAR(enc, ptr, end)) {
1336
    /* This line cannot be executed.  The incoming data has already
1337
     * been tokenized once, so incomplete characters like this have
1338
     * already been eliminated from the input.  Retaining the paranoia
1339
     * check is still valuable, however.
1340
     */
1341
46
    return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */
1342
46
  }
1343
837k
  start = ptr;
1344
53.1M
  while (HAS_CHAR(enc, ptr, end)) {
1345
53.1M
    switch (BYTE_TYPE(enc, ptr)) {
1346
0
#  define LEAD_CASE(n)                                                         \
1347
0
  case BT_LEAD##n:                                                             \
1348
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1349
0
    break;
1350
0
      LEAD_CASE(2)
1351
0
      LEAD_CASE(3)
1352
1.94M
      LEAD_CASE(4)
1353
0
#  undef LEAD_CASE
1354
35.1k
    case BT_AMP:
1355
35.1k
      if (ptr == start)
1356
18.5k
        return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1357
16.6k
      *nextTokPtr = ptr;
1358
16.6k
      return XML_TOK_DATA_CHARS;
1359
51
    case BT_PERCNT:
1360
51
      if (ptr == start) {
1361
32
        int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
1362
32
        return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
1363
32
      }
1364
19
      *nextTokPtr = ptr;
1365
19
      return XML_TOK_DATA_CHARS;
1366
219k
    case BT_LF:
1367
219k
      if (ptr == start) {
1368
121k
        *nextTokPtr = ptr + MINBPC(enc);
1369
121k
        return XML_TOK_DATA_NEWLINE;
1370
121k
      }
1371
97.6k
      *nextTokPtr = ptr;
1372
97.6k
      return XML_TOK_DATA_CHARS;
1373
577k
    case BT_CR:
1374
577k
      if (ptr == start) {
1375
339k
        ptr += MINBPC(enc);
1376
339k
        if (! HAS_CHAR(enc, ptr, end))
1377
1.90k
          return XML_TOK_TRAILING_CR;
1378
337k
        if (BYTE_TYPE(enc, ptr) == BT_LF)
1379
88.5k
          ptr += MINBPC(enc);
1380
337k
        *nextTokPtr = ptr;
1381
337k
        return XML_TOK_DATA_NEWLINE;
1382
339k
      }
1383
237k
      *nextTokPtr = ptr;
1384
237k
      return XML_TOK_DATA_CHARS;
1385
50.3M
    default:
1386
50.3M
      ptr += MINBPC(enc);
1387
50.3M
      break;
1388
53.1M
    }
1389
53.1M
  }
1390
5.46k
  *nextTokPtr = ptr;
1391
5.46k
  return XML_TOK_DATA_CHARS;
1392
837k
}
1393
1394
#  ifdef XML_DTD
1395
1396
static int PTRCALL
1397
PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
1398
0
                         const char **nextTokPtr) {
1399
0
  int level = 0;
1400
0
  if (MINBPC(enc) > 1) {
1401
0
    size_t n = end - ptr;
1402
0
    if (n & (MINBPC(enc) - 1)) {
1403
0
      n &= ~(MINBPC(enc) - 1);
1404
0
      end = ptr + n;
1405
0
    }
1406
0
  }
1407
0
  while (HAS_CHAR(enc, ptr, end)) {
1408
0
    switch (BYTE_TYPE(enc, ptr)) {
1409
0
      INVALID_CASES(ptr, nextTokPtr)
1410
0
    case BT_LT:
1411
0
      ptr += MINBPC(enc);
1412
0
      REQUIRE_CHAR(enc, ptr, end);
1413
0
      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
1414
0
        ptr += MINBPC(enc);
1415
0
        REQUIRE_CHAR(enc, ptr, end);
1416
0
        if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
1417
0
          ++level;
1418
0
          ptr += MINBPC(enc);
1419
0
        }
1420
0
      }
1421
0
      break;
1422
0
    case BT_RSQB:
1423
0
      ptr += MINBPC(enc);
1424
0
      REQUIRE_CHAR(enc, ptr, end);
1425
0
      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
1426
0
        ptr += MINBPC(enc);
1427
0
        REQUIRE_CHAR(enc, ptr, end);
1428
0
        if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
1429
0
          ptr += MINBPC(enc);
1430
0
          if (level == 0) {
1431
0
            *nextTokPtr = ptr;
1432
0
            return XML_TOK_IGNORE_SECT;
1433
0
          }
1434
0
          --level;
1435
0
        }
1436
0
      }
1437
0
      break;
1438
0
    default:
1439
0
      ptr += MINBPC(enc);
1440
0
      break;
1441
0
    }
1442
0
  }
1443
0
  return XML_TOK_PARTIAL;
1444
0
}
Unexecuted instantiation: xmltok.c:normal_ignoreSectionTok
Unexecuted instantiation: xmltok.c:little2_ignoreSectionTok
Unexecuted instantiation: xmltok.c:big2_ignoreSectionTok
1445
1446
#  endif /* XML_DTD */
1447
1448
static int PTRCALL
1449
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
1450
140k
                   const char **badPtr) {
1451
140k
  ptr += MINBPC(enc);
1452
140k
  end -= MINBPC(enc);
1453
15.1M
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454
15.1M
    switch (BYTE_TYPE(enc, ptr)) {
1455
264k
    case BT_DIGIT:
1456
10.1M
    case BT_HEX:
1457
10.1M
    case BT_MINUS:
1458
10.1M
    case BT_APOS:
1459
10.1M
    case BT_LPAR:
1460
10.2M
    case BT_RPAR:
1461
10.2M
    case BT_PLUS:
1462
10.2M
    case BT_COMMA:
1463
10.2M
    case BT_SOL:
1464
10.2M
    case BT_EQUALS:
1465
10.3M
    case BT_QUEST:
1466
11.0M
    case BT_CR:
1467
11.0M
    case BT_LF:
1468
11.1M
    case BT_SEMI:
1469
11.2M
    case BT_EXCL:
1470
11.2M
    case BT_AST:
1471
11.2M
    case BT_PERCNT:
1472
11.3M
    case BT_NUM:
1473
11.3M
#  ifdef XML_NS
1474
11.3M
    case BT_COLON:
1475
11.3M
#  endif
1476
11.3M
      break;
1477
62.0k
    case BT_S:
1478
62.0k
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479
23
        *badPtr = ptr;
1480
23
        return 0;
1481
23
      }
1482
62.0k
      break;
1483
112k
    case BT_NAME:
1484
3.21M
    case BT_NMSTRT:
1485
3.21M
      if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486
3.21M
        break;
1487
      /* fall through */
1488
514k
    default:
1489
514k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1490
491k
      case 0x24: /* $ */
1491
514k
      case 0x40: /* @ */
1492
514k
        break;
1493
443
      default:
1494
443
        *badPtr = ptr;
1495
443
        return 0;
1496
514k
      }
1497
514k
      break;
1498
15.1M
    }
1499
15.1M
  }
1500
140k
  return 1;
1501
140k
}
xmltok.c:normal_isPublicId
Line
Count
Source
1450
49.2k
                   const char **badPtr) {
1451
49.2k
  ptr += MINBPC(enc);
1452
49.2k
  end -= MINBPC(enc);
1453
14.6M
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454
14.6M
    switch (BYTE_TYPE(enc, ptr)) {
1455
244k
    case BT_DIGIT:
1456
10.1M
    case BT_HEX:
1457
10.1M
    case BT_MINUS:
1458
10.1M
    case BT_APOS:
1459
10.1M
    case BT_LPAR:
1460
10.1M
    case BT_RPAR:
1461
10.1M
    case BT_PLUS:
1462
10.1M
    case BT_COMMA:
1463
10.1M
    case BT_SOL:
1464
10.1M
    case BT_EQUALS:
1465
10.1M
    case BT_QUEST:
1466
10.8M
    case BT_CR:
1467
10.9M
    case BT_LF:
1468
10.9M
    case BT_SEMI:
1469
10.9M
    case BT_EXCL:
1470
10.9M
    case BT_AST:
1471
10.9M
    case BT_PERCNT:
1472
11.0M
    case BT_NUM:
1473
11.0M
#  ifdef XML_NS
1474
11.0M
    case BT_COLON:
1475
11.0M
#  endif
1476
11.0M
      break;
1477
13.0k
    case BT_S:
1478
13.0k
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479
13
        *badPtr = ptr;
1480
13
        return 0;
1481
13
      }
1482
13.0k
      break;
1483
86.6k
    case BT_NAME:
1484
3.16M
    case BT_NMSTRT:
1485
3.16M
      if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486
3.16M
        break;
1487
      /* fall through */
1488
482k
    default:
1489
482k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1490
480k
      case 0x24: /* $ */
1491
482k
      case 0x40: /* @ */
1492
482k
        break;
1493
47
      default:
1494
47
        *badPtr = ptr;
1495
47
        return 0;
1496
482k
      }
1497
482k
      break;
1498
14.6M
    }
1499
14.6M
  }
1500
49.1k
  return 1;
1501
49.2k
}
xmltok.c:little2_isPublicId
Line
Count
Source
1450
6.09k
                   const char **badPtr) {
1451
6.09k
  ptr += MINBPC(enc);
1452
6.09k
  end -= MINBPC(enc);
1453
211k
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454
211k
    switch (BYTE_TYPE(enc, ptr)) {
1455
13.9k
    case BT_DIGIT:
1456
20.4k
    case BT_HEX:
1457
28.1k
    case BT_MINUS:
1458
29.2k
    case BT_APOS:
1459
30.8k
    case BT_LPAR:
1460
61.9k
    case BT_RPAR:
1461
69.3k
    case BT_PLUS:
1462
78.7k
    case BT_COMMA:
1463
79.6k
    case BT_SOL:
1464
80.6k
    case BT_EQUALS:
1465
81.5k
    case BT_QUEST:
1466
81.9k
    case BT_CR:
1467
82.6k
    case BT_LF:
1468
84.8k
    case BT_SEMI:
1469
98.2k
    case BT_EXCL:
1470
131k
    case BT_AST:
1471
135k
    case BT_PERCNT:
1472
148k
    case BT_NUM:
1473
148k
#  ifdef XML_NS
1474
155k
    case BT_COLON:
1475
155k
#  endif
1476
155k
      break;
1477
13.2k
    case BT_S:
1478
13.2k
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479
5
        *badPtr = ptr;
1480
5
        return 0;
1481
5
      }
1482
13.2k
      break;
1483
13.2k
    case BT_NAME:
1484
27.5k
    case BT_NMSTRT:
1485
27.5k
      if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486
27.4k
        break;
1487
      /* fall through */
1488
15.2k
    default:
1489
15.2k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1490
6.56k
      case 0x24: /* $ */
1491
14.9k
      case 0x40: /* @ */
1492
14.9k
        break;
1493
226
      default:
1494
226
        *badPtr = ptr;
1495
226
        return 0;
1496
15.2k
      }
1497
14.9k
      break;
1498
211k
    }
1499
211k
  }
1500
5.86k
  return 1;
1501
6.09k
}
xmltok.c:big2_isPublicId
Line
Count
Source
1450
85.6k
                   const char **badPtr) {
1451
85.6k
  ptr += MINBPC(enc);
1452
85.6k
  end -= MINBPC(enc);
1453
238k
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
1454
238k
    switch (BYTE_TYPE(enc, ptr)) {
1455
5.89k
    case BT_DIGIT:
1456
14.6k
    case BT_HEX:
1457
22.0k
    case BT_MINUS:
1458
24.1k
    case BT_APOS:
1459
28.3k
    case BT_LPAR:
1460
56.3k
    case BT_RPAR:
1461
60.3k
    case BT_PLUS:
1462
72.5k
    case BT_COMMA:
1463
73.6k
    case BT_SOL:
1464
76.4k
    case BT_EQUALS:
1465
76.8k
    case BT_QUEST:
1466
78.0k
    case BT_CR:
1467
79.8k
    case BT_LF:
1468
117k
    case BT_SEMI:
1469
127k
    case BT_EXCL:
1470
160k
    case BT_AST:
1471
162k
    case BT_PERCNT:
1472
165k
    case BT_NUM:
1473
165k
#  ifdef XML_NS
1474
168k
    case BT_COLON:
1475
168k
#  endif
1476
168k
      break;
1477
35.7k
    case BT_S:
1478
35.7k
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
1479
5
        *badPtr = ptr;
1480
5
        return 0;
1481
5
      }
1482
35.7k
      break;
1483
35.7k
    case BT_NAME:
1484
18.4k
    case BT_NMSTRT:
1485
18.4k
      if (! (BYTE_TO_ASCII(enc, ptr) & ~0x7f))
1486
18.4k
        break;
1487
      /* fall through */
1488
16.5k
    default:
1489
16.5k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1490
4.28k
      case 0x24: /* $ */
1491
16.3k
      case 0x40: /* @ */
1492
16.3k
        break;
1493
170
      default:
1494
170
        *badPtr = ptr;
1495
170
        return 0;
1496
16.5k
      }
1497
16.3k
      break;
1498
238k
    }
1499
238k
  }
1500
85.4k
  return 1;
1501
85.6k
}
1502
1503
/* This must only be called for a well-formed start-tag or empty
1504
   element tag.  Returns the number of attributes.  Pointers to the
1505
   first attsMax attributes are stored in atts.
1506
*/
1507
1508
static int PTRCALL
1509
PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
1510
21.9M
                ATTRIBUTE *atts) {
1511
21.9M
  enum { other, inName, inValue } state = inName;
1512
21.9M
  int nAtts = 0;
1513
21.9M
  int open = 0; /* defined when state == inValue;
1514
                   initialization just to shut up compilers */
1515
1516
237M
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517
237M
    switch (BYTE_TYPE(enc, ptr)) {
1518
0
#  define START_NAME                                                           \
1519
177M
    if (state == other) {                                                      \
1520
4.84M
      if (nAtts < attsMax) {                                                   \
1521
3.05M
        atts[nAtts].name = ptr;                                                \
1522
3.05M
        atts[nAtts].normalized = 1;                                            \
1523
3.05M
      }                                                                        \
1524
4.84M
      state = inName;                                                          \
1525
4.84M
    }
1526
0
#  define LEAD_CASE(n)                                                         \
1527
10.5M
  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528
10.5M
    START_NAME ptr += (n - MINBPC(enc));                                       \
1529
10.5M
    break;
1530
2.41M
      LEAD_CASE(2)
1531
7.72M
      LEAD_CASE(3)
1532
397k
      LEAD_CASE(4)
1533
0
#  undef LEAD_CASE
1534
28.7M
    case BT_NONASCII:
1535
110M
    case BT_NMSTRT:
1536
167M
    case BT_HEX:
1537
167M
      START_NAME
1538
167M
      break;
1539
0
#  undef START_NAME
1540
2.97M
    case BT_QUOT:
1541
2.97M
      if (state != inValue) {
1542
1.47M
        if (nAtts < attsMax)
1543
1.33M
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544
1.47M
        state = inValue;
1545
1.47M
        open = BT_QUOT;
1546
1.50M
      } else if (open == BT_QUOT) {
1547
1.47M
        state = other;
1548
1.47M
        if (nAtts < attsMax)
1549
1.33M
          atts[nAtts].valueEnd = ptr;
1550
1.47M
        nAtts++;
1551
1.47M
      }
1552
2.97M
      break;
1553
6.78M
    case BT_APOS:
1554
6.78M
      if (state != inValue) {
1555
3.37M
        if (nAtts < attsMax)
1556
1.72M
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557
3.37M
        state = inValue;
1558
3.37M
        open = BT_APOS;
1559
3.41M
      } else if (open == BT_APOS) {
1560
3.37M
        state = other;
1561
3.37M
        if (nAtts < attsMax)
1562
1.72M
          atts[nAtts].valueEnd = ptr;
1563
3.37M
        nAtts++;
1564
3.37M
      }
1565
6.78M
      break;
1566
194k
    case BT_AMP:
1567
194k
      if (nAtts < attsMax)
1568
191k
        atts[nAtts].normalized = 0;
1569
194k
      break;
1570
7.68M
    case BT_S:
1571
7.68M
      if (state == inName)
1572
3.45M
        state = other;
1573
4.23M
      else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574
286k
               && (ptr == atts[nAtts].valuePtr
1575
230k
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576
206k
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577
100k
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578
275k
        atts[nAtts].normalized = 0;
1579
7.68M
      break;
1580
5.43M
    case BT_CR:
1581
6.26M
    case BT_LF:
1582
      /* This case ensures that the first attribute name is counted
1583
         Apart from that we could just change state on the quote. */
1584
6.26M
      if (state == inName)
1585
222k
        state = other;
1586
6.04M
      else if (state == inValue && nAtts < attsMax)
1587
3.81M
        atts[nAtts].normalized = 0;
1588
6.26M
      break;
1589
19.6M
    case BT_GT:
1590
22.3M
    case BT_SOL:
1591
22.3M
      if (state != inValue)
1592
21.9M
        return nAtts;
1593
455k
      break;
1594
13.9M
    default:
1595
13.9M
      break;
1596
237M
    }
1597
237M
  }
1598
  /* not reached */
1599
21.9M
}
xmltok.c:normal_getAtts
Line
Count
Source
1510
21.5M
                ATTRIBUTE *atts) {
1511
21.5M
  enum { other, inName, inValue } state = inName;
1512
21.5M
  int nAtts = 0;
1513
21.5M
  int open = 0; /* defined when state == inValue;
1514
                   initialization just to shut up compilers */
1515
1516
200M
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517
200M
    switch (BYTE_TYPE(enc, ptr)) {
1518
0
#  define START_NAME                                                           \
1519
0
    if (state == other) {                                                      \
1520
0
      if (nAtts < attsMax) {                                                   \
1521
0
        atts[nAtts].name = ptr;                                                \
1522
0
        atts[nAtts].normalized = 1;                                            \
1523
0
      }                                                                        \
1524
0
      state = inName;                                                          \
1525
0
    }
1526
0
#  define LEAD_CASE(n)                                                         \
1527
0
  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528
0
    START_NAME ptr += (n - MINBPC(enc));                                       \
1529
0
    break;
1530
2.41M
      LEAD_CASE(2)
1531
7.72M
      LEAD_CASE(3)
1532
10.4k
      LEAD_CASE(4)
1533
0
#  undef LEAD_CASE
1534
0
    case BT_NONASCII:
1535
80.8M
    case BT_NMSTRT:
1536
136M
    case BT_HEX:
1537
136M
      START_NAME
1538
136M
      break;
1539
0
#  undef START_NAME
1540
2.84M
    case BT_QUOT:
1541
2.84M
      if (state != inValue) {
1542
1.41M
        if (nAtts < attsMax)
1543
1.29M
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544
1.41M
        state = inValue;
1545
1.41M
        open = BT_QUOT;
1546
1.42M
      } else if (open == BT_QUOT) {
1547
1.41M
        state = other;
1548
1.41M
        if (nAtts < attsMax)
1549
1.29M
          atts[nAtts].valueEnd = ptr;
1550
1.41M
        nAtts++;
1551
1.41M
      }
1552
2.84M
      break;
1553
5.59M
    case BT_APOS:
1554
5.59M
      if (state != inValue) {
1555
2.79M
        if (nAtts < attsMax)
1556
1.42M
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557
2.79M
        state = inValue;
1558
2.79M
        open = BT_APOS;
1559
2.80M
      } else if (open == BT_APOS) {
1560
2.79M
        state = other;
1561
2.79M
        if (nAtts < attsMax)
1562
1.42M
          atts[nAtts].valueEnd = ptr;
1563
2.79M
        nAtts++;
1564
2.79M
      }
1565
5.59M
      break;
1566
80.4k
    case BT_AMP:
1567
80.4k
      if (nAtts < attsMax)
1568
77.7k
        atts[nAtts].normalized = 0;
1569
80.4k
      break;
1570
6.01M
    case BT_S:
1571
6.01M
      if (state == inName)
1572
3.44M
        state = other;
1573
2.57M
      else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574
26.5k
               && (ptr == atts[nAtts].valuePtr
1575
18.3k
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576
6.24k
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577
5.40k
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578
24.8k
        atts[nAtts].normalized = 0;
1579
6.01M
      break;
1580
4.61M
    case BT_CR:
1581
4.82M
    case BT_LF:
1582
      /* This case ensures that the first attribute name is counted
1583
         Apart from that we could just change state on the quote. */
1584
4.82M
      if (state == inName)
1585
162k
        state = other;
1586
4.66M
      else if (state == inValue && nAtts < attsMax)
1587
2.63M
        atts[nAtts].normalized = 0;
1588
4.82M
      break;
1589
18.9M
    case BT_GT:
1590
21.6M
    case BT_SOL:
1591
21.6M
      if (state != inValue)
1592
21.5M
        return nAtts;
1593
112k
      break;
1594
12.5M
    default:
1595
12.5M
      break;
1596
200M
    }
1597
200M
  }
1598
  /* not reached */
1599
21.5M
}
xmltok.c:little2_getAtts
Line
Count
Source
1510
219k
                ATTRIBUTE *atts) {
1511
219k
  enum { other, inName, inValue } state = inName;
1512
219k
  int nAtts = 0;
1513
219k
  int open = 0; /* defined when state == inValue;
1514
                   initialization just to shut up compilers */
1515
1516
17.0M
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517
17.0M
    switch (BYTE_TYPE(enc, ptr)) {
1518
0
#  define START_NAME                                                           \
1519
0
    if (state == other) {                                                      \
1520
0
      if (nAtts < attsMax) {                                                   \
1521
0
        atts[nAtts].name = ptr;                                                \
1522
0
        atts[nAtts].normalized = 1;                                            \
1523
0
      }                                                                        \
1524
0
      state = inName;                                                          \
1525
0
    }
1526
0
#  define LEAD_CASE(n)                                                         \
1527
0
  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528
0
    START_NAME ptr += (n - MINBPC(enc));                                       \
1529
0
    break;
1530
0
      LEAD_CASE(2)
1531
0
      LEAD_CASE(3)
1532
243k
      LEAD_CASE(4)
1533
0
#  undef LEAD_CASE
1534
10.9M
    case BT_NONASCII:
1535
11.8M
    case BT_NMSTRT:
1536
11.9M
    case BT_HEX:
1537
11.9M
      START_NAME
1538
11.9M
      break;
1539
0
#  undef START_NAME
1540
30.8k
    case BT_QUOT:
1541
30.8k
      if (state != inValue) {
1542
11.8k
        if (nAtts < attsMax)
1543
10.3k
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544
11.8k
        state = inValue;
1545
11.8k
        open = BT_QUOT;
1546
19.0k
      } else if (open == BT_QUOT) {
1547
11.8k
        state = other;
1548
11.8k
        if (nAtts < attsMax)
1549
10.3k
          atts[nAtts].valueEnd = ptr;
1550
11.8k
        nAtts++;
1551
11.8k
      }
1552
30.8k
      break;
1553
1.15M
    case BT_APOS:
1554
1.15M
      if (state != inValue) {
1555
569k
        if (nAtts < attsMax)
1556
287k
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557
569k
        state = inValue;
1558
569k
        open = BT_APOS;
1559
586k
      } else if (open == BT_APOS) {
1560
569k
        state = other;
1561
569k
        if (nAtts < attsMax)
1562
287k
          atts[nAtts].valueEnd = ptr;
1563
569k
        nAtts++;
1564
569k
      }
1565
1.15M
      break;
1566
17.1k
    case BT_AMP:
1567
17.1k
      if (nAtts < attsMax)
1568
16.8k
        atts[nAtts].normalized = 0;
1569
17.1k
      break;
1570
1.61M
    case BT_S:
1571
1.61M
      if (state == inName)
1572
4.12k
        state = other;
1573
1.61M
      else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574
252k
               && (ptr == atts[nAtts].valuePtr
1575
206k
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576
195k
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577
91.1k
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578
245k
        atts[nAtts].normalized = 0;
1579
1.61M
      break;
1580
302k
    case BT_CR:
1581
359k
    case BT_LF:
1582
      /* This case ensures that the first attribute name is counted
1583
         Apart from that we could just change state on the quote. */
1584
359k
      if (state == inName)
1585
9.85k
        state = other;
1586
349k
      else if (state == inValue && nAtts < attsMax)
1587
211k
        atts[nAtts].normalized = 0;
1588
359k
      break;
1589
473k
    case BT_GT:
1590
532k
    case BT_SOL:
1591
532k
      if (state != inValue)
1592
219k
        return nAtts;
1593
312k
      break;
1594
1.07M
    default:
1595
1.07M
      break;
1596
17.0M
    }
1597
17.0M
  }
1598
  /* not reached */
1599
219k
}
xmltok.c:big2_getAtts
Line
Count
Source
1510
147k
                ATTRIBUTE *atts) {
1511
147k
  enum { other, inName, inValue } state = inName;
1512
147k
  int nAtts = 0;
1513
147k
  int open = 0; /* defined when state == inValue;
1514
                   initialization just to shut up compilers */
1515
1516
20.6M
  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
1517
20.6M
    switch (BYTE_TYPE(enc, ptr)) {
1518
0
#  define START_NAME                                                           \
1519
0
    if (state == other) {                                                      \
1520
0
      if (nAtts < attsMax) {                                                   \
1521
0
        atts[nAtts].name = ptr;                                                \
1522
0
        atts[nAtts].normalized = 1;                                            \
1523
0
      }                                                                        \
1524
0
      state = inName;                                                          \
1525
0
    }
1526
0
#  define LEAD_CASE(n)                                                         \
1527
0
  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
1528
0
    START_NAME ptr += (n - MINBPC(enc));                                       \
1529
0
    break;
1530
0
      LEAD_CASE(2)
1531
0
      LEAD_CASE(3)
1532
143k
      LEAD_CASE(4)
1533
0
#  undef LEAD_CASE
1534
17.8M
    case BT_NONASCII:
1535
18.2M
    case BT_NMSTRT:
1536
18.6M
    case BT_HEX:
1537
18.6M
      START_NAME
1538
18.6M
      break;
1539
0
#  undef START_NAME
1540
105k
    case BT_QUOT:
1541
105k
      if (state != inValue) {
1542
50.6k
        if (nAtts < attsMax)
1543
28.8k
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1544
50.6k
        state = inValue;
1545
50.6k
        open = BT_QUOT;
1546
54.6k
      } else if (open == BT_QUOT) {
1547
50.6k
        state = other;
1548
50.6k
        if (nAtts < attsMax)
1549
28.8k
          atts[nAtts].valueEnd = ptr;
1550
50.6k
        nAtts++;
1551
50.6k
      }
1552
105k
      break;
1553
38.3k
    case BT_APOS:
1554
38.3k
      if (state != inValue) {
1555
10.8k
        if (nAtts < attsMax)
1556
8.17k
          atts[nAtts].valuePtr = ptr + MINBPC(enc);
1557
10.8k
        state = inValue;
1558
10.8k
        open = BT_APOS;
1559
27.5k
      } else if (open == BT_APOS) {
1560
10.8k
        state = other;
1561
10.8k
        if (nAtts < attsMax)
1562
8.17k
          atts[nAtts].valueEnd = ptr;
1563
10.8k
        nAtts++;
1564
10.8k
      }
1565
38.3k
      break;
1566
97.1k
    case BT_AMP:
1567
97.1k
      if (nAtts < attsMax)
1568
96.7k
        atts[nAtts].normalized = 0;
1569
97.1k
      break;
1570
48.0k
    case BT_S:
1571
48.0k
      if (state == inName)
1572
4.13k
        state = other;
1573
43.9k
      else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574
7.53k
               && (ptr == atts[nAtts].valuePtr
1575
5.43k
                   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
1576
4.72k
                   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
1577
3.93k
                   || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
1578
4.63k
        atts[nAtts].normalized = 0;
1579
48.0k
      break;
1580
512k
    case BT_CR:
1581
1.07M
    case BT_LF:
1582
      /* This case ensures that the first attribute name is counted
1583
         Apart from that we could just change state on the quote. */
1584
1.07M
      if (state == inName)
1585
50.2k
        state = other;
1586
1.02M
      else if (state == inValue && nAtts < attsMax)
1587
973k
        atts[nAtts].normalized = 0;
1588
1.07M
      break;
1589
158k
    case BT_GT:
1590
177k
    case BT_SOL:
1591
177k
      if (state != inValue)
1592
147k
        return nAtts;
1593
30.9k
      break;
1594
299k
    default:
1595
299k
      break;
1596
20.6M
    }
1597
20.6M
  }
1598
  /* not reached */
1599
147k
}
1600
1601
static int PTRFASTCALL
1602
536k
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603
536k
  int result = 0;
1604
  /* skip &# */
1605
536k
  UNUSED_P(enc);
1606
536k
  ptr += 2 * MINBPC(enc);
1607
536k
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608
1.70M
    for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609
1.18M
         ptr += MINBPC(enc)) {
1610
1.18M
      int c = BYTE_TO_ASCII(enc, ptr);
1611
1.18M
      switch (c) {
1612
6.89k
      case ASCII_0:
1613
12.4k
      case ASCII_1:
1614
17.0k
      case ASCII_2:
1615
22.8k
      case ASCII_3:
1616
35.5k
      case ASCII_4:
1617
42.5k
      case ASCII_5:
1618
48.3k
      case ASCII_6:
1619
55.4k
      case ASCII_7:
1620
71.3k
      case ASCII_8:
1621
73.5k
      case ASCII_9:
1622
73.5k
        result <<= 4;
1623
73.5k
        result |= (c - ASCII_0);
1624
73.5k
        break;
1625
463k
      case ASCII_A:
1626
468k
      case ASCII_B:
1627
471k
      case ASCII_C:
1628
472k
      case ASCII_D:
1629
486k
      case ASCII_E:
1630
640k
      case ASCII_F:
1631
640k
        result <<= 4;
1632
640k
        result += 10 + (c - ASCII_A);
1633
640k
        break;
1634
144k
      case ASCII_a:
1635
290k
      case ASCII_b:
1636
424k
      case ASCII_c:
1637
436k
      case ASCII_d:
1638
459k
      case ASCII_e:
1639
472k
      case ASCII_f:
1640
472k
        result <<= 4;
1641
472k
        result += 10 + (c - ASCII_a);
1642
472k
        break;
1643
1.18M
      }
1644
1.18M
      if (result >= 0x110000)
1645
148
        return -1;
1646
1.18M
    }
1647
518k
  } else {
1648
58.0k
    for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649
40.1k
      int c = BYTE_TO_ASCII(enc, ptr);
1650
40.1k
      result *= 10;
1651
40.1k
      result += (c - ASCII_0);
1652
40.1k
      if (result >= 0x110000)
1653
23
        return -1;
1654
40.1k
    }
1655
17.9k
  }
1656
536k
  return checkCharRefNumber(result);
1657
536k
}
xmltok.c:normal_charRefNumber
Line
Count
Source
1602
358k
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603
358k
  int result = 0;
1604
  /* skip &# */
1605
358k
  UNUSED_P(enc);
1606
358k
  ptr += 2 * MINBPC(enc);
1607
358k
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608
752k
    for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609
402k
         ptr += MINBPC(enc)) {
1610
402k
      int c = BYTE_TO_ASCII(enc, ptr);
1611
402k
      switch (c) {
1612
3.45k
      case ASCII_0:
1613
7.02k
      case ASCII_1:
1614
9.22k
      case ASCII_2:
1615
9.69k
      case ASCII_3:
1616
12.1k
      case ASCII_4:
1617
12.8k
      case ASCII_5:
1618
13.1k
      case ASCII_6:
1619
16.3k
      case ASCII_7:
1620
27.5k
      case ASCII_8:
1621
28.1k
      case ASCII_9:
1622
28.1k
        result <<= 4;
1623
28.1k
        result |= (c - ASCII_0);
1624
28.1k
        break;
1625
330k
      case ASCII_A:
1626
330k
      case ASCII_B:
1627
331k
      case ASCII_C:
1628
331k
      case ASCII_D:
1629
343k
      case ASCII_E:
1630
358k
      case ASCII_F:
1631
358k
        result <<= 4;
1632
358k
        result += 10 + (c - ASCII_A);
1633
358k
        break;
1634
718
      case ASCII_a:
1635
2.36k
      case ASCII_b:
1636
3.12k
      case ASCII_c:
1637
3.53k
      case ASCII_d:
1638
15.1k
      case ASCII_e:
1639
15.8k
      case ASCII_f:
1640
15.8k
        result <<= 4;
1641
15.8k
        result += 10 + (c - ASCII_a);
1642
15.8k
        break;
1643
402k
      }
1644
402k
      if (result >= 0x110000)
1645
66
        return -1;
1646
402k
    }
1647
350k
  } else {
1648
22.4k
    for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649
14.2k
      int c = BYTE_TO_ASCII(enc, ptr);
1650
14.2k
      result *= 10;
1651
14.2k
      result += (c - ASCII_0);
1652
14.2k
      if (result >= 0x110000)
1653
12
        return -1;
1654
14.2k
    }
1655
8.17k
  }
1656
358k
  return checkCharRefNumber(result);
1657
358k
}
xmltok.c:little2_charRefNumber
Line
Count
Source
1602
46.3k
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603
46.3k
  int result = 0;
1604
  /* skip &# */
1605
46.3k
  UNUSED_P(enc);
1606
46.3k
  ptr += 2 * MINBPC(enc);
1607
46.3k
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608
186k
    for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609
149k
         ptr += MINBPC(enc)) {
1610
149k
      int c = BYTE_TO_ASCII(enc, ptr);
1611
149k
      switch (c) {
1612
1.91k
      case ASCII_0:
1613
2.74k
      case ASCII_1:
1614
3.72k
      case ASCII_2:
1615
5.98k
      case ASCII_3:
1616
9.32k
      case ASCII_4:
1617
12.5k
      case ASCII_5:
1618
16.5k
      case ASCII_6:
1619
19.0k
      case ASCII_7:
1620
20.1k
      case ASCII_8:
1621
21.2k
      case ASCII_9:
1622
21.2k
        result <<= 4;
1623
21.2k
        result |= (c - ASCII_0);
1624
21.2k
        break;
1625
20.4k
      case ASCII_A:
1626
22.1k
      case ASCII_B:
1627
22.5k
      case ASCII_C:
1628
23.0k
      case ASCII_D:
1629
24.1k
      case ASCII_E:
1630
47.2k
      case ASCII_F:
1631
47.2k
        result <<= 4;
1632
47.2k
        result += 10 + (c - ASCII_A);
1633
47.2k
        break;
1634
22.3k
      case ASCII_a:
1635
51.4k
      case ASCII_b:
1636
71.8k
      case ASCII_c:
1637
73.5k
      case ASCII_d:
1638
75.4k
      case ASCII_e:
1639
80.9k
      case ASCII_f:
1640
80.9k
        result <<= 4;
1641
80.9k
        result += 10 + (c - ASCII_a);
1642
80.9k
        break;
1643
149k
      }
1644
149k
      if (result >= 0x110000)
1645
43
        return -1;
1646
149k
    }
1647
36.7k
  } else {
1648
34.9k
    for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649
25.3k
      int c = BYTE_TO_ASCII(enc, ptr);
1650
25.3k
      result *= 10;
1651
25.3k
      result += (c - ASCII_0);
1652
25.3k
      if (result >= 0x110000)
1653
5
        return -1;
1654
25.3k
    }
1655
9.60k
  }
1656
46.2k
  return checkCharRefNumber(result);
1657
46.3k
}
xmltok.c:big2_charRefNumber
Line
Count
Source
1602
132k
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) {
1603
132k
  int result = 0;
1604
  /* skip &# */
1605
132k
  UNUSED_P(enc);
1606
132k
  ptr += 2 * MINBPC(enc);
1607
132k
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
1608
766k
    for (ptr += MINBPC(enc); ! CHAR_MATCHES(enc, ptr, ASCII_SEMI);
1609
634k
         ptr += MINBPC(enc)) {
1610
634k
      int c = BYTE_TO_ASCII(enc, ptr);
1611
634k
      switch (c) {
1612
1.52k
      case ASCII_0:
1613
2.69k
      case ASCII_1:
1614
4.07k
      case ASCII_2:
1615
7.17k
      case ASCII_3:
1616
14.0k
      case ASCII_4:
1617
17.1k
      case ASCII_5:
1618
18.7k
      case ASCII_6:
1619
20.0k
      case ASCII_7:
1620
23.6k
      case ASCII_8:
1621
24.1k
      case ASCII_9:
1622
24.1k
        result <<= 4;
1623
24.1k
        result |= (c - ASCII_0);
1624
24.1k
        break;
1625
113k
      case ASCII_A:
1626
115k
      case ASCII_B:
1627
117k
      case ASCII_C:
1628
117k
      case ASCII_D:
1629
118k
      case ASCII_E:
1630
234k
      case ASCII_F:
1631
234k
        result <<= 4;
1632
234k
        result += 10 + (c - ASCII_A);
1633
234k
        break;
1634
121k
      case ASCII_a:
1635
236k
      case ASCII_b:
1636
349k
      case ASCII_c:
1637
359k
      case ASCII_d:
1638
368k
      case ASCII_e:
1639
375k
      case ASCII_f:
1640
375k
        result <<= 4;
1641
375k
        result += 10 + (c - ASCII_a);
1642
375k
        break;
1643
634k
      }
1644
634k
      if (result >= 0x110000)
1645
39
        return -1;
1646
634k
    }
1647
132k
  } else {
1648
727
    for (; ! CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
1649
555
      int c = BYTE_TO_ASCII(enc, ptr);
1650
555
      result *= 10;
1651
555
      result += (c - ASCII_0);
1652
555
      if (result >= 0x110000)
1653
6
        return -1;
1654
555
    }
1655
178
  }
1656
132k
  return checkCharRefNumber(result);
1657
132k
}
1658
1659
static int PTRCALL
1660
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
1661
6.78M
                             const char *end) {
1662
6.78M
  UNUSED_P(enc);
1663
6.78M
  switch ((end - ptr) / MINBPC(enc)) {
1664
160k
  case 2:
1665
160k
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666
133k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1667
85.3k
      case ASCII_l:
1668
85.3k
        return ASCII_LT;
1669
46.5k
      case ASCII_g:
1670
46.5k
        return ASCII_GT;
1671
133k
      }
1672
133k
    }
1673
28.7k
    break;
1674
28.7k
  case 3:
1675
25.2k
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676
20.7k
      ptr += MINBPC(enc);
1677
20.7k
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678
11.4k
        ptr += MINBPC(enc);
1679
11.4k
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680
1.92k
          return ASCII_AMP;
1681
11.4k
      }
1682
20.7k
    }
1683
23.3k
    break;
1684
30.6k
  case 4:
1685
30.6k
    switch (BYTE_TO_ASCII(enc, ptr)) {
1686
10.0k
    case ASCII_q:
1687
10.0k
      ptr += MINBPC(enc);
1688
10.0k
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689
7.54k
        ptr += MINBPC(enc);
1690
7.54k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691
4.39k
          ptr += MINBPC(enc);
1692
4.39k
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693
2.16k
            return ASCII_QUOT;
1694
4.39k
        }
1695
7.54k
      }
1696
7.91k
      break;
1697
11.3k
    case ASCII_a:
1698
11.3k
      ptr += MINBPC(enc);
1699
11.3k
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700
8.10k
        ptr += MINBPC(enc);
1701
8.10k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702
4.67k
          ptr += MINBPC(enc);
1703
4.67k
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704
1.50k
            return ASCII_APOS;
1705
4.67k
        }
1706
8.10k
      }
1707
9.87k
      break;
1708
30.6k
    }
1709
6.78M
  }
1710
6.64M
  return 0;
1711
6.78M
}
xmltok.c:normal_predefinedEntityName
Line
Count
Source
1661
6.62M
                             const char *end) {
1662
6.62M
  UNUSED_P(enc);
1663
6.62M
  switch ((end - ptr) / MINBPC(enc)) {
1664
152k
  case 2:
1665
152k
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666
130k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1667
84.4k
      case ASCII_l:
1668
84.4k
        return ASCII_LT;
1669
45.9k
      case ASCII_g:
1670
45.9k
        return ASCII_GT;
1671
130k
      }
1672
130k
    }
1673
22.5k
    break;
1674
22.5k
  case 3:
1675
16.8k
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676
14.3k
      ptr += MINBPC(enc);
1677
14.3k
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678
9.00k
        ptr += MINBPC(enc);
1679
9.00k
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680
966
          return ASCII_AMP;
1681
9.00k
      }
1682
14.3k
    }
1683
15.8k
    break;
1684
15.8k
  case 4:
1685
12.0k
    switch (BYTE_TO_ASCII(enc, ptr)) {
1686
2.43k
    case ASCII_q:
1687
2.43k
      ptr += MINBPC(enc);
1688
2.43k
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689
1.89k
        ptr += MINBPC(enc);
1690
1.89k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691
1.42k
          ptr += MINBPC(enc);
1692
1.42k
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693
645
            return ASCII_QUOT;
1694
1.42k
        }
1695
1.89k
      }
1696
1.78k
      break;
1697
3.19k
    case ASCII_a:
1698
3.19k
      ptr += MINBPC(enc);
1699
3.19k
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700
1.33k
        ptr += MINBPC(enc);
1701
1.33k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702
1.11k
          ptr += MINBPC(enc);
1703
1.11k
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704
863
            return ASCII_APOS;
1705
1.11k
        }
1706
1.33k
      }
1707
2.33k
      break;
1708
12.0k
    }
1709
6.62M
  }
1710
6.49M
  return 0;
1711
6.62M
}
xmltok.c:little2_predefinedEntityName
Line
Count
Source
1661
34.7k
                             const char *end) {
1662
34.7k
  UNUSED_P(enc);
1663
34.7k
  switch ((end - ptr) / MINBPC(enc)) {
1664
3.18k
  case 2:
1665
3.18k
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666
1.27k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1667
549
      case ASCII_l:
1668
549
        return ASCII_LT;
1669
280
      case ASCII_g:
1670
280
        return ASCII_GT;
1671
1.27k
      }
1672
1.27k
    }
1673
2.35k
    break;
1674
2.79k
  case 3:
1675
2.79k
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676
1.38k
      ptr += MINBPC(enc);
1677
1.38k
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678
579
        ptr += MINBPC(enc);
1679
579
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680
84
          return ASCII_AMP;
1681
579
      }
1682
1.38k
    }
1683
2.71k
    break;
1684
7.96k
  case 4:
1685
7.96k
    switch (BYTE_TO_ASCII(enc, ptr)) {
1686
3.42k
    case ASCII_q:
1687
3.42k
      ptr += MINBPC(enc);
1688
3.42k
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689
2.41k
        ptr += MINBPC(enc);
1690
2.41k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691
1.41k
          ptr += MINBPC(enc);
1692
1.41k
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693
642
            return ASCII_QUOT;
1694
1.41k
        }
1695
2.41k
      }
1696
2.78k
      break;
1697
2.78k
    case ASCII_a:
1698
2.73k
      ptr += MINBPC(enc);
1699
2.73k
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700
1.94k
        ptr += MINBPC(enc);
1701
1.94k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702
1.04k
          ptr += MINBPC(enc);
1703
1.04k
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704
334
            return ASCII_APOS;
1705
1.04k
        }
1706
1.94k
      }
1707
2.40k
      break;
1708
7.96k
    }
1709
34.7k
  }
1710
32.8k
  return 0;
1711
34.7k
}
xmltok.c:big2_predefinedEntityName
Line
Count
Source
1661
123k
                             const char *end) {
1662
123k
  UNUSED_P(enc);
1663
123k
  switch ((end - ptr) / MINBPC(enc)) {
1664
4.58k
  case 2:
1665
4.58k
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
1666
1.47k
      switch (BYTE_TO_ASCII(enc, ptr)) {
1667
400
      case ASCII_l:
1668
400
        return ASCII_LT;
1669
309
      case ASCII_g:
1670
309
        return ASCII_GT;
1671
1.47k
      }
1672
1.47k
    }
1673
3.87k
    break;
1674
5.62k
  case 3:
1675
5.62k
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
1676
5.01k
      ptr += MINBPC(enc);
1677
5.01k
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
1678
1.84k
        ptr += MINBPC(enc);
1679
1.84k
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
1680
879
          return ASCII_AMP;
1681
1.84k
      }
1682
5.01k
    }
1683
4.74k
    break;
1684
10.6k
  case 4:
1685
10.6k
    switch (BYTE_TO_ASCII(enc, ptr)) {
1686
4.21k
    case ASCII_q:
1687
4.21k
      ptr += MINBPC(enc);
1688
4.21k
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
1689
3.23k
        ptr += MINBPC(enc);
1690
3.23k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1691
1.55k
          ptr += MINBPC(enc);
1692
1.55k
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
1693
874
            return ASCII_QUOT;
1694
1.55k
        }
1695
3.23k
      }
1696
3.34k
      break;
1697
5.44k
    case ASCII_a:
1698
5.44k
      ptr += MINBPC(enc);
1699
5.44k
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
1700
4.82k
        ptr += MINBPC(enc);
1701
4.82k
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
1702
2.51k
          ptr += MINBPC(enc);
1703
2.51k
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
1704
307
            return ASCII_APOS;
1705
2.51k
        }
1706
4.82k
      }
1707
5.14k
      break;
1708
10.6k
    }
1709
123k
  }
1710
120k
  return 0;
1711
123k
}
1712
1713
static int PTRCALL
1714
PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
1715
1.33M
                         const char *end1, const char *ptr2) {
1716
1.33M
  UNUSED_P(enc);
1717
6.39M
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718
5.60M
    if (end1 - ptr1 < MINBPC(enc)) {
1719
      /* This line cannot be executed.  The incoming data has already
1720
       * been tokenized once, so incomplete characters like this have
1721
       * already been eliminated from the input.  Retaining the
1722
       * paranoia check is still valuable, however.
1723
       */
1724
291
      return 0; /* LCOV_EXCL_LINE */
1725
291
    }
1726
5.60M
    if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727
546k
      return 0;
1728
5.60M
  }
1729
789k
  return ptr1 == end1;
1730
1.33M
}
xmltok.c:normal_nameMatchesAscii
Line
Count
Source
1715
685k
                         const char *end1, const char *ptr2) {
1716
685k
  UNUSED_P(enc);
1717
4.05M
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718
3.51M
    if (end1 - ptr1 < MINBPC(enc)) {
1719
      /* This line cannot be executed.  The incoming data has already
1720
       * been tokenized once, so incomplete characters like this have
1721
       * already been eliminated from the input.  Retaining the
1722
       * paranoia check is still valuable, however.
1723
       */
1724
125
      return 0; /* LCOV_EXCL_LINE */
1725
125
    }
1726
3.51M
    if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727
145k
      return 0;
1728
3.51M
  }
1729
540k
  return ptr1 == end1;
1730
685k
}
xmltok.c:little2_nameMatchesAscii
Line
Count
Source
1715
98.7k
                         const char *end1, const char *ptr2) {
1716
98.7k
  UNUSED_P(enc);
1717
430k
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718
380k
    if (end1 - ptr1 < MINBPC(enc)) {
1719
      /* This line cannot be executed.  The incoming data has already
1720
       * been tokenized once, so incomplete characters like this have
1721
       * already been eliminated from the input.  Retaining the
1722
       * paranoia check is still valuable, however.
1723
       */
1724
100
      return 0; /* LCOV_EXCL_LINE */
1725
100
    }
1726
380k
    if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727
48.2k
      return 0;
1728
380k
  }
1729
50.3k
  return ptr1 == end1;
1730
98.7k
}
xmltok.c:big2_nameMatchesAscii
Line
Count
Source
1715
552k
                         const char *end1, const char *ptr2) {
1716
552k
  UNUSED_P(enc);
1717
1.91M
  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
1718
1.71M
    if (end1 - ptr1 < MINBPC(enc)) {
1719
      /* This line cannot be executed.  The incoming data has already
1720
       * been tokenized once, so incomplete characters like this have
1721
       * already been eliminated from the input.  Retaining the
1722
       * paranoia check is still valuable, however.
1723
       */
1724
66
      return 0; /* LCOV_EXCL_LINE */
1725
66
    }
1726
1.71M
    if (! CHAR_MATCHES(enc, ptr1, *ptr2))
1727
353k
      return 0;
1728
1.71M
  }
1729
199k
  return ptr1 == end1;
1730
552k
}
1731
1732
static int PTRFASTCALL
1733
23.6M
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734
23.6M
  const char *start = ptr;
1735
150M
  for (;;) {
1736
150M
    switch (BYTE_TYPE(enc, ptr)) {
1737
0
#  define LEAD_CASE(n)                                                         \
1738
10.5M
  case BT_LEAD##n:                                                             \
1739
10.5M
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1740
10.5M
    break;
1741
10.3M
      LEAD_CASE(2)
1742
242k
      LEAD_CASE(3)
1743
0
      LEAD_CASE(4)
1744
0
#  undef LEAD_CASE
1745
631k
    case BT_NONASCII:
1746
58.6M
    case BT_NMSTRT:
1747
58.6M
#  ifdef XML_NS
1748
58.8M
    case BT_COLON:
1749
58.8M
#  endif
1750
114M
    case BT_HEX:
1751
115M
    case BT_DIGIT:
1752
116M
    case BT_NAME:
1753
116M
    case BT_MINUS:
1754
116M
      ptr += MINBPC(enc);
1755
116M
      break;
1756
23.6M
    default:
1757
23.6M
      return (int)(ptr - start);
1758
150M
    }
1759
150M
  }
1760
23.6M
}
xmltok.c:normal_nameLength
Line
Count
Source
1733
23.0M
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734
23.0M
  const char *start = ptr;
1735
148M
  for (;;) {
1736
148M
    switch (BYTE_TYPE(enc, ptr)) {
1737
0
#  define LEAD_CASE(n)                                                         \
1738
0
  case BT_LEAD##n:                                                             \
1739
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1740
0
    break;
1741
10.3M
      LEAD_CASE(2)
1742
242k
      LEAD_CASE(3)
1743
0
      LEAD_CASE(4)
1744
0
#  undef LEAD_CASE
1745
0
    case BT_NONASCII:
1746
57.3M
    case BT_NMSTRT:
1747
57.3M
#  ifdef XML_NS
1748
57.5M
    case BT_COLON:
1749
57.5M
#  endif
1750
112M
    case BT_HEX:
1751
114M
    case BT_DIGIT:
1752
114M
    case BT_NAME:
1753
114M
    case BT_MINUS:
1754
114M
      ptr += MINBPC(enc);
1755
114M
      break;
1756
23.0M
    default:
1757
23.0M
      return (int)(ptr - start);
1758
148M
    }
1759
148M
  }
1760
23.0M
}
xmltok.c:little2_nameLength
Line
Count
Source
1733
306k
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734
306k
  const char *start = ptr;
1735
1.11M
  for (;;) {
1736
1.11M
    switch (BYTE_TYPE(enc, ptr)) {
1737
0
#  define LEAD_CASE(n)                                                         \
1738
0
  case BT_LEAD##n:                                                             \
1739
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1740
0
    break;
1741
0
      LEAD_CASE(2)
1742
0
      LEAD_CASE(3)
1743
0
      LEAD_CASE(4)
1744
0
#  undef LEAD_CASE
1745
459k
    case BT_NONASCII:
1746
775k
    case BT_NMSTRT:
1747
775k
#  ifdef XML_NS
1748
777k
    case BT_COLON:
1749
777k
#  endif
1750
805k
    case BT_HEX:
1751
810k
    case BT_DIGIT:
1752
810k
    case BT_NAME:
1753
812k
    case BT_MINUS:
1754
812k
      ptr += MINBPC(enc);
1755
812k
      break;
1756
306k
    default:
1757
306k
      return (int)(ptr - start);
1758
1.11M
    }
1759
1.11M
  }
1760
306k
}
xmltok.c:big2_nameLength
Line
Count
Source
1733
364k
PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
1734
364k
  const char *start = ptr;
1735
1.04M
  for (;;) {
1736
1.04M
    switch (BYTE_TYPE(enc, ptr)) {
1737
0
#  define LEAD_CASE(n)                                                         \
1738
0
  case BT_LEAD##n:                                                             \
1739
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1740
0
    break;
1741
0
      LEAD_CASE(2)
1742
0
      LEAD_CASE(3)
1743
0
      LEAD_CASE(4)
1744
0
#  undef LEAD_CASE
1745
171k
    case BT_NONASCII:
1746
508k
    case BT_NMSTRT:
1747
508k
#  ifdef XML_NS
1748
508k
    case BT_COLON:
1749
508k
#  endif
1750
682k
    case BT_HEX:
1751
684k
    case BT_DIGIT:
1752
684k
    case BT_NAME:
1753
685k
    case BT_MINUS:
1754
685k
      ptr += MINBPC(enc);
1755
685k
      break;
1756
364k
    default:
1757
364k
      return (int)(ptr - start);
1758
1.04M
    }
1759
1.04M
  }
1760
364k
}
1761
1762
static const char *PTRFASTCALL
1763
319k
PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764
601k
  for (;;) {
1765
601k
    switch (BYTE_TYPE(enc, ptr)) {
1766
136k
    case BT_LF:
1767
253k
    case BT_CR:
1768
281k
    case BT_S:
1769
281k
      ptr += MINBPC(enc);
1770
281k
      break;
1771
319k
    default:
1772
319k
      return ptr;
1773
601k
    }
1774
601k
  }
1775
319k
}
xmltok.c:normal_skipS
Line
Count
Source
1763
73.6k
PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764
138k
  for (;;) {
1765
138k
    switch (BYTE_TYPE(enc, ptr)) {
1766
46.0k
    case BT_LF:
1767
50.7k
    case BT_CR:
1768
64.7k
    case BT_S:
1769
64.7k
      ptr += MINBPC(enc);
1770
64.7k
      break;
1771
73.6k
    default:
1772
73.6k
      return ptr;
1773
138k
    }
1774
138k
  }
1775
73.6k
}
xmltok.c:little2_skipS
Line
Count
Source
1763
77.4k
PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764
152k
  for (;;) {
1765
152k
    switch (BYTE_TYPE(enc, ptr)) {
1766
27.8k
    case BT_LF:
1767
63.4k
    case BT_CR:
1768
74.9k
    case BT_S:
1769
74.9k
      ptr += MINBPC(enc);
1770
74.9k
      break;
1771
77.4k
    default:
1772
77.4k
      return ptr;
1773
152k
    }
1774
152k
  }
1775
77.4k
}
xmltok.c:big2_skipS
Line
Count
Source
1763
168k
PREFIX(skipS)(const ENCODING *enc, const char *ptr) {
1764
310k
  for (;;) {
1765
310k
    switch (BYTE_TYPE(enc, ptr)) {
1766
62.3k
    case BT_LF:
1767
138k
    case BT_CR:
1768
141k
    case BT_S:
1769
141k
      ptr += MINBPC(enc);
1770
141k
      break;
1771
168k
    default:
1772
168k
      return ptr;
1773
310k
    }
1774
310k
  }
1775
168k
}
1776
1777
static void PTRCALL
1778
PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
1779
37.0M
                       POSITION *pos) {
1780
752M
  while (HAS_CHAR(enc, ptr, end)) {
1781
752M
    switch (BYTE_TYPE(enc, ptr)) {
1782
0
#  define LEAD_CASE(n)                                                         \
1783
11.1M
  case BT_LEAD##n:                                                             \
1784
11.1M
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1785
11.1M
    pos->columnNumber++;                                                       \
1786
11.1M
    break;
1787
1.46M
      LEAD_CASE(2)
1788
1.88M
      LEAD_CASE(3)
1789
7.84M
      LEAD_CASE(4)
1790
0
#  undef LEAD_CASE
1791
4.51M
    case BT_LF:
1792
4.51M
      pos->columnNumber = 0;
1793
4.51M
      pos->lineNumber++;
1794
4.51M
      ptr += MINBPC(enc);
1795
4.51M
      break;
1796
31.1M
    case BT_CR:
1797
31.1M
      pos->lineNumber++;
1798
31.1M
      ptr += MINBPC(enc);
1799
31.1M
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800
462k
        ptr += MINBPC(enc);
1801
31.1M
      pos->columnNumber = 0;
1802
31.1M
      break;
1803
705M
    default:
1804
705M
      ptr += MINBPC(enc);
1805
705M
      pos->columnNumber++;
1806
705M
      break;
1807
752M
    }
1808
752M
  }
1809
37.0M
}
xmltok.c:normal_updatePosition
Line
Count
Source
1779
32.5M
                       POSITION *pos) {
1780
544M
  while (HAS_CHAR(enc, ptr, end)) {
1781
544M
    switch (BYTE_TYPE(enc, ptr)) {
1782
0
#  define LEAD_CASE(n)                                                         \
1783
0
  case BT_LEAD##n:                                                             \
1784
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1785
0
    pos->columnNumber++;                                                       \
1786
0
    break;
1787
1.46M
      LEAD_CASE(2)
1788
1.88M
      LEAD_CASE(3)
1789
14.0k
      LEAD_CASE(4)
1790
0
#  undef LEAD_CASE
1791
3.09M
    case BT_LF:
1792
3.09M
      pos->columnNumber = 0;
1793
3.09M
      pos->lineNumber++;
1794
3.09M
      ptr += MINBPC(enc);
1795
3.09M
      break;
1796
27.7M
    case BT_CR:
1797
27.7M
      pos->lineNumber++;
1798
27.7M
      ptr += MINBPC(enc);
1799
27.7M
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800
15.6k
        ptr += MINBPC(enc);
1801
27.7M
      pos->columnNumber = 0;
1802
27.7M
      break;
1803
509M
    default:
1804
509M
      ptr += MINBPC(enc);
1805
509M
      pos->columnNumber++;
1806
509M
      break;
1807
544M
    }
1808
544M
  }
1809
32.5M
}
xmltok.c:little2_updatePosition
Line
Count
Source
1779
2.06M
                       POSITION *pos) {
1780
102M
  while (HAS_CHAR(enc, ptr, end)) {
1781
102M
    switch (BYTE_TYPE(enc, ptr)) {
1782
0
#  define LEAD_CASE(n)                                                         \
1783
0
  case BT_LEAD##n:                                                             \
1784
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1785
0
    pos->columnNumber++;                                                       \
1786
0
    break;
1787
0
      LEAD_CASE(2)
1788
0
      LEAD_CASE(3)
1789
3.77M
      LEAD_CASE(4)
1790
0
#  undef LEAD_CASE
1791
665k
    case BT_LF:
1792
665k
      pos->columnNumber = 0;
1793
665k
      pos->lineNumber++;
1794
665k
      ptr += MINBPC(enc);
1795
665k
      break;
1796
1.45M
    case BT_CR:
1797
1.45M
      pos->lineNumber++;
1798
1.45M
      ptr += MINBPC(enc);
1799
1.45M
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800
64.7k
        ptr += MINBPC(enc);
1801
1.45M
      pos->columnNumber = 0;
1802
1.45M
      break;
1803
96.1M
    default:
1804
96.1M
      ptr += MINBPC(enc);
1805
96.1M
      pos->columnNumber++;
1806
96.1M
      break;
1807
102M
    }
1808
102M
  }
1809
2.06M
}
xmltok.c:big2_updatePosition
Line
Count
Source
1779
2.39M
                       POSITION *pos) {
1780
106M
  while (HAS_CHAR(enc, ptr, end)) {
1781
106M
    switch (BYTE_TYPE(enc, ptr)) {
1782
0
#  define LEAD_CASE(n)                                                         \
1783
0
  case BT_LEAD##n:                                                             \
1784
0
    ptr += n; /* NOTE: The encoding has already been validated. */             \
1785
0
    pos->columnNumber++;                                                       \
1786
0
    break;
1787
0
      LEAD_CASE(2)
1788
0
      LEAD_CASE(3)
1789
4.05M
      LEAD_CASE(4)
1790
0
#  undef LEAD_CASE
1791
753k
    case BT_LF:
1792
753k
      pos->columnNumber = 0;
1793
753k
      pos->lineNumber++;
1794
753k
      ptr += MINBPC(enc);
1795
753k
      break;
1796
1.96M
    case BT_CR:
1797
1.96M
      pos->lineNumber++;
1798
1.96M
      ptr += MINBPC(enc);
1799
1.96M
      if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF)
1800
382k
        ptr += MINBPC(enc);
1801
1.96M
      pos->columnNumber = 0;
1802
1.96M
      break;
1803
99.3M
    default:
1804
99.3M
      ptr += MINBPC(enc);
1805
99.3M
      pos->columnNumber++;
1806
99.3M
      break;
1807
106M
    }
1808
106M
  }
1809
2.39M
}
1810
1811
#  undef DO_LEAD_CASE
1812
#  undef MULTIBYTE_CASES
1813
#  undef INVALID_CASES
1814
#  undef CHECK_NAME_CASE
1815
#  undef CHECK_NAME_CASES
1816
#  undef CHECK_NMSTRT_CASE
1817
#  undef CHECK_NMSTRT_CASES
1818
1819
#endif /* XML_TOK_IMPL_C */