Coverage Report

Created: 2025-06-16 07:00

/src/imagemagick/MagickCore/token.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3
%                                                                             %
4
%                                                                             %
5
%                                                                             %
6
%                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7
%                      T    O   O  K  K   E      NN  N                        %
8
%                      T    O   O  KKK    EEE    N N N                        %
9
%                      T    O   O  K  K   E      N  NN                        %
10
%                      T     OOO   K   K  EEEEE  N   N                        %
11
%                                                                             %
12
%                                                                             %
13
%                         MagickCore Token Methods                            %
14
%                                                                             %
15
%                             Software Design                                 %
16
%                                  Cristy                                     %
17
%                              January 1993                                   %
18
%                                                                             %
19
%                                                                             %
20
%  Copyright @ 1999 ImageMagick Studio LLC, a non-profit organization         %
21
%  dedicated to making software imaging solutions freely available.           %
22
%                                                                             %
23
%  You may not use this file except in compliance with the License.  You may  %
24
%  obtain a copy of the License at                                            %
25
%                                                                             %
26
%    https://imagemagick.org/script/license.php                               %
27
%                                                                             %
28
%  Unless required by applicable law or agreed to in writing, software        %
29
%  distributed under the License is distributed on an "AS IS" BASIS,          %
30
%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31
%  See the License for the specific language governing permissions and        %
32
%  limitations under the License.                                             %
33
%                                                                             %
34
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35
%
36
%
37
%
38
*/
39

40
/*
41
  Include declarations.
42
*/
43
#include "MagickCore/studio.h"
44
#include "MagickCore/exception.h"
45
#include "MagickCore/exception-private.h"
46
#include "MagickCore/image.h"
47
#include "MagickCore/image-private.h"
48
#include "MagickCore/locale-private.h"
49
#include "MagickCore/memory_.h"
50
#include "MagickCore/memory-private.h"
51
#include "MagickCore/string_.h"
52
#include "MagickCore/string-private.h"
53
#include "MagickCore/token.h"
54
#include "MagickCore/token-private.h"
55
#include "MagickCore/utility.h"
56
#include "MagickCore/utility-private.h"
57

58
/*
59
  Typedef declarations.
60
*/
61
struct _TokenInfo
62
{
63
  int
64
    state;
65
66
  MagickStatusType
67
    flag;
68
69
  ssize_t
70
    offset;
71
72
  char
73
    quote;
74
75
  size_t
76
    signature;
77
};
78

79
/*
80
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
81
%                                                                             %
82
%                                                                             %
83
%                                                                             %
84
%   A c q u i r e T o k e n I n f o                                           %
85
%                                                                             %
86
%                                                                             %
87
%                                                                             %
88
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
89
%
90
%  AcquireTokenInfo() allocates the TokenInfo structure.
91
%
92
%  The format of the AcquireTokenInfo method is:
93
%
94
%      TokenInfo *AcquireTokenInfo()
95
%
96
*/
97
MagickExport TokenInfo *AcquireTokenInfo(void)
98
198
{
99
198
  TokenInfo
100
198
    *token_info;
101
102
198
  token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
103
198
  token_info->signature=MagickCoreSignature;
104
198
  return(token_info);
105
198
}
106

107
/*
108
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109
%                                                                             %
110
%                                                                             %
111
%                                                                             %
112
%   D e s t r o y T o k e n I n f o                                           %
113
%                                                                             %
114
%                                                                             %
115
%                                                                             %
116
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117
%
118
%  DestroyTokenInfo() deallocates memory associated with an TokenInfo
119
%  structure.
120
%
121
%  The format of the DestroyTokenInfo method is:
122
%
123
%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
124
%
125
%  A description of each parameter follows:
126
%
127
%    o token_info: Specifies a pointer to an TokenInfo structure.
128
%
129
*/
130
MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
131
198
{
132
198
  assert(token_info != (TokenInfo *) NULL);
133
198
  assert(token_info->signature == MagickCoreSignature);
134
198
  if (IsEventLogging() != MagickFalse)
135
0
    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
136
198
  token_info->signature=(~MagickCoreSignature);
137
198
  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
138
198
  return(token_info);
139
198
}
140

141
/*
142
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
143
%                                                                             %
144
%                                                                             %
145
%                                                                             %
146
+   G e t N e x t T o k e n                                                   %
147
%                                                                             %
148
%                                                                             %
149
%                                                                             %
150
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151
%
152
%  GetNextToken() gets a token from the token stream.  A token is defined as
153
%  a sequence of characters delimited by whitespace (e.g. clip-path), a
154
%  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
155
%  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
156
%  separator characters: ':', '=', ',', and ';'.  GetNextToken() returns the
157
%  length of the consumed token.
158
%
159
%  The format of the GetNextToken method is:
160
%
161
%      size_t GetNextToken(const char *magick_restrict start,
162
%        const char **magick_restrict end,const size_t extent,
163
%        char *magick_restrict token)
164
%
165
%  A description of each parameter follows:
166
%
167
%    o start: the start of the token sequence.
168
%
169
%    o end: point to the end of the token sequence.
170
%
171
%    o extent: maximum extent of the token.
172
%
173
%    o token: copy the token to this buffer.
174
%
175
*/
176
MagickExport magick_hot_spot size_t GetNextToken(
177
  const char *magick_restrict start,const char **magick_restrict end,
178
  const size_t extent,char *magick_restrict token)
179
42.8M
{
180
42.8M
  char
181
42.8M
    *magick_restrict q;
182
183
42.8M
  const char
184
42.8M
    *magick_restrict p;
185
186
42.8M
  double
187
42.8M
    value;
188
189
42.8M
  ssize_t
190
42.8M
    i;
191
192
42.8M
  assert(start != (const char *) NULL);
193
42.8M
  assert(token != (char *) NULL);
194
42.8M
  i=0;
195
42.8M
  p=start;
196
43.1M
  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
197
263k
    p++;
198
42.8M
  switch (*p)
199
42.8M
  {
200
19.3k
    case '\0':
201
19.3k
      break;
202
1.70M
    case '"':
203
2.05M
    case '\'':
204
2.09M
    case '`':
205
2.20M
    case '{':
206
2.20M
    {
207
2.20M
      char
208
2.20M
        escape;
209
210
2.20M
      switch (*p)
211
2.20M
      {
212
1.70M
        case '"': escape='"'; break;
213
354k
        case '\'': escape='\''; break;
214
39.2k
        case '`': escape='\''; break;
215
110k
        case '{': escape='}'; break;
216
0
        default: escape=(*p); break;
217
2.20M
      }
218
226M
      for (p++; *p != '\0'; p++)
219
226M
      {
220
226M
        if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
221
5.72M
          p++;
222
221M
        else
223
221M
          if (*p == escape)
224
2.03M
            {
225
2.03M
              p++;
226
2.03M
              break;
227
2.03M
            }
228
224M
        if (i < (ssize_t) (extent-1))
229
224M
          token[i++]=(*p);
230
224M
        if ((size_t) (p-start) >= (extent-1))
231
292
          break;
232
224M
      }
233
2.20M
      break;
234
2.20M
    }
235
68.2k
    case '/':
236
68.2k
    {
237
68.2k
      if (i < (ssize_t) (extent-1))
238
68.2k
        token[i++]=(*p);
239
68.2k
      p++;
240
68.2k
      if ((*p == '>') || (*p == '/'))
241
8.87k
        {
242
8.87k
          if (i < (ssize_t) (extent-1))
243
8.87k
            token[i++]=(*p);
244
8.87k
          p++;
245
8.87k
        }
246
68.2k
      break;
247
2.20M
    }
248
40.5M
    default:
249
40.5M
    {
250
40.5M
      value=StringToDouble(p,&q);
251
40.5M
      (void) value;
252
40.5M
      if ((p != q) && (*p != ','))
253
5.70M
        {
254
18.2M
          for ( ; (p < q) && (*p != ','); p++)
255
12.5M
          {
256
12.5M
            if (i < (ssize_t) (extent-1))
257
12.5M
              token[i++]=(*p);
258
12.5M
            if ((size_t) (p-start) >= (extent-1))
259
1
              break;
260
12.5M
          }
261
5.70M
          if (*p == '%')
262
59.7k
            {
263
59.7k
              if (i < (ssize_t) (extent-1))
264
59.7k
                token[i++]=(*p);
265
59.7k
              p++;
266
59.7k
            }
267
5.70M
          break;
268
5.70M
        }
269
34.8M
      if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
270
34.8M
          (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
271
24.7M
        {
272
24.7M
          if (i < (ssize_t) (extent-1))
273
24.7M
            token[i++]=(*p);
274
24.7M
          p++;
275
24.7M
          break;
276
24.7M
        }
277
102M
      for ( ; *p != '\0'; p++)
278
102M
      {
279
102M
        if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
280
102M
            (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
281
9.97M
          break;
282
92.2M
        if ((i > 0) && (*p == '<'))
283
46.0k
          break;
284
92.1M
        if (i < (ssize_t) (extent-1))
285
92.1M
          token[i++]=(*p);
286
92.1M
        if (*p == '>')
287
29.7k
          break;
288
92.1M
        if (*p == '(')
289
140k
          {
290
38.4M
            for (p++; *p != '\0'; p++)
291
38.4M
            {
292
38.4M
              if (i < (ssize_t) (extent-1))
293
38.4M
                token[i++]=(*p);
294
38.4M
              if ((*p == ')') && (*(p-1) != '\\'))
295
132k
                break;
296
38.2M
              if ((size_t) (p-start) >= (extent-1))
297
34
                break;
298
38.2M
            }
299
140k
            if (*p == '\0')
300
7.46k
              break;
301
140k
          }
302
92.1M
        if ((size_t) (p-start) >= (extent-1))
303
97
          break;
304
92.1M
      }
305
10.1M
      break;
306
34.8M
    }
307
42.8M
  }
308
42.8M
  token[i]='\0';
309
42.8M
  if (LocaleNCompare(token,"url(#",5) == 0)
310
1.18k
    {
311
1.18k
      q=strrchr(token,')');
312
1.18k
      if (q != (char *) NULL)
313
692
        {
314
692
          *q='\0';
315
692
          (void) memmove(token,token+5,(size_t) (q-token-4));
316
692
        }
317
1.18k
    }
318
60.0M
  while (isspace((int) ((unsigned char) *p)) != 0)
319
17.1M
    p++;
320
42.8M
  if (end != (const char **) NULL)
321
42.4M
    *end=(const char *) p;
322
42.8M
  return((size_t) (p-start+1));
323
42.8M
}
324

325
/*
326
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
327
%                                                                             %
328
%                                                                             %
329
%                                                                             %
330
%   G l o b E x p r e s s i o n                                               %
331
%                                                                             %
332
%                                                                             %
333
%                                                                             %
334
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335
%
336
%  GlobExpression() returns MagickTrue if the expression matches the pattern.
337
%
338
%  The format of the GlobExpression function is:
339
%
340
%      MagickBooleanType GlobExpression(const char *magick_restrict expression,
341
%        const char *magick_restrict pattern,
342
%        const MagickBooleanType case_insensitive)
343
%
344
%  A description of each parameter follows:
345
%
346
%    o expression: Specifies a pointer to a text string containing a file name.
347
%
348
%    o pattern: Specifies a pointer to a text string containing a pattern.
349
%
350
%    o case_insensitive: set to MagickTrue to ignore the case when matching
351
%      an expression.
352
%
353
*/
354
MagickExport MagickBooleanType GlobExpression(
355
  const char *magick_restrict expression,const char *magick_restrict pattern,
356
  const MagickBooleanType case_insensitive)
357
1.24M
{
358
1.24M
  char
359
1.24M
    path[MagickPathExtent];
360
361
1.24M
  MagickBooleanType
362
1.24M
    done,
363
1.24M
    match;
364
365
  /*
366
    Return on empty pattern or '*'.
367
  */
368
1.24M
  if (pattern == (char *) NULL)
369
0
    return(MagickTrue);
370
1.24M
  if (GetUTFCode(pattern) == 0)
371
922
    return(MagickTrue);
372
1.24M
  if (LocaleCompare(pattern,"*") == 0)
373
1.63k
    return(MagickTrue);
374
1.24M
  GetPathComponent(pattern,SubimagePath,path);
375
1.24M
  if (*path != '\0')
376
12.3k
    return(MagickFalse);
377
  /*
378
    Evaluate glob expression.
379
  */
380
1.23M
  done=MagickFalse;
381
2.57M
  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
382
1.39M
  {
383
1.39M
    if (GetUTFCode(expression) == 0)
384
105k
      if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
385
48.5k
        break;
386
1.34M
    switch (GetUTFCode(pattern))
387
1.34M
    {
388
86.2k
      case '*':
389
86.2k
      {
390
86.2k
        MagickBooleanType
391
86.2k
          status;
392
393
86.2k
        status=MagickFalse;
394
186k
        while (GetUTFCode(pattern) == '*')
395
100k
          pattern+=GetUTFOctets(pattern);
396
1.10M
        while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
397
1.02M
        {
398
1.02M
          status=GlobExpression(expression,pattern,case_insensitive);
399
1.02M
          expression+=GetUTFOctets(expression);
400
1.02M
        }
401
86.2k
        if (status != MagickFalse)
402
2.86k
          {
403
10.0k
            while (GetUTFCode(expression) != 0)
404
7.15k
              expression+=GetUTFOctets(expression);
405
147k
            while (GetUTFCode(pattern) != 0)
406
144k
              pattern+=GetUTFOctets(pattern);
407
2.86k
          }
408
86.2k
        break;
409
0
      }
410
71.5k
      case '[':
411
71.5k
      {
412
71.5k
        int
413
71.5k
          c;
414
415
71.5k
        pattern+=GetUTFOctets(pattern);
416
71.5k
        for ( ; ; )
417
2.05M
        {
418
2.05M
          if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
419
26.6k
            {
420
26.6k
              done=MagickTrue;
421
26.6k
              break;
422
26.6k
            }
423
2.02M
          if (GetUTFCode(pattern) == '\\')
424
45.7k
            {
425
45.7k
              pattern+=GetUTFOctets(pattern);
426
45.7k
              if (GetUTFCode(pattern) == 0)
427
713
                {
428
713
                  done=MagickTrue;
429
713
                  break;
430
713
                }
431
45.7k
             }
432
2.02M
          if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
433
53.1k
            {
434
53.1k
              c=GetUTFCode(pattern);
435
53.1k
              pattern+=GetUTFOctets(pattern);
436
53.1k
              pattern+=GetUTFOctets(pattern);
437
53.1k
              if (GetUTFCode(pattern) == ']')
438
3.04k
                {
439
3.04k
                  done=MagickTrue;
440
3.04k
                  break;
441
3.04k
                }
442
50.1k
              if (GetUTFCode(pattern) == '\\')
443
7.66k
                {
444
7.66k
                  pattern+=GetUTFOctets(pattern);
445
7.66k
                  if (GetUTFCode(pattern) == 0)
446
572
                    {
447
572
                      done=MagickTrue;
448
572
                      break;
449
572
                    }
450
7.66k
                }
451
49.5k
              if ((GetUTFCode(expression) < c) ||
452
49.5k
                  (GetUTFCode(expression) > GetUTFCode(pattern)))
453
43.1k
                {
454
43.1k
                  pattern+=GetUTFOctets(pattern);
455
43.1k
                  continue;
456
43.1k
                }
457
49.5k
            }
458
1.97M
          else
459
1.97M
            if (GetUTFCode(pattern) != GetUTFCode(expression))
460
1.93M
              {
461
1.93M
                pattern+=GetUTFOctets(pattern);
462
1.93M
                continue;
463
1.93M
              }
464
40.5k
          pattern+=GetUTFOctets(pattern);
465
3.88M
          while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
466
3.84M
          {
467
3.84M
            if ((GetUTFCode(pattern) == '\\') &&
468
3.84M
                (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
469
19.6k
              pattern+=GetUTFOctets(pattern);
470
3.84M
            pattern+=GetUTFOctets(pattern);
471
3.84M
          }
472
40.5k
          if (GetUTFCode(pattern) != 0)
473
37.9k
            {
474
37.9k
              pattern+=GetUTFOctets(pattern);
475
37.9k
              expression+=GetUTFOctets(expression);
476
37.9k
            }
477
40.5k
          break;
478
2.02M
        }
479
71.5k
        break;
480
0
      }
481
19.1k
      case '?':
482
19.1k
      {
483
19.1k
        pattern+=GetUTFOctets(pattern);
484
19.1k
        expression+=GetUTFOctets(expression);
485
19.1k
        break;
486
0
      }
487
948k
      case '{':
488
948k
      {
489
948k
        char
490
948k
          *target;
491
492
948k
        char
493
948k
          *p;
494
495
948k
        target=AcquireString(pattern);
496
948k
        p=target;
497
948k
        pattern++;
498
182M
        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
499
181M
        {
500
181M
          *p++=(*pattern++);
501
181M
          if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
502
33.2k
            {
503
33.2k
              *p='\0';
504
33.2k
              match=GlobExpression(expression,target,case_insensitive);
505
33.2k
              if (match != MagickFalse)
506
3.61k
                {
507
3.61k
                  expression+=MagickMin(strlen(expression),strlen(target));
508
3.61k
                  break;
509
3.61k
                }
510
29.6k
              p=target;
511
29.6k
              pattern+=GetUTFOctets(pattern);
512
29.6k
            }
513
181M
        }
514
1.08M
        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
515
136k
          pattern+=GetUTFOctets(pattern);
516
948k
        if (GetUTFCode(pattern) != 0)
517
20.7k
          pattern+=GetUTFOctets(pattern);
518
948k
        target=DestroyString(target);
519
948k
        break;
520
0
      }
521
4.55k
      case '\\':
522
4.55k
      {
523
4.55k
        pattern+=GetUTFOctets(pattern);
524
4.55k
        if (GetUTFCode(pattern) == 0)
525
552
          break;
526
4.00k
        magick_fallthrough;
527
4.00k
      }
528
215k
      default:
529
215k
      {
530
215k
        if (case_insensitive != MagickFalse)
531
72.0k
          {
532
72.0k
            if (LocaleToLowercase((int) GetUTFCode(expression)) != LocaleToLowercase((int) GetUTFCode(pattern)))
533
70.5k
              {
534
70.5k
                done=MagickTrue;
535
70.5k
                break;
536
70.5k
              }
537
72.0k
          }
538
143k
        else
539
143k
          if (GetUTFCode(expression) != GetUTFCode(pattern))
540
143k
            {
541
143k
              done=MagickTrue;
542
143k
              break;
543
143k
            }
544
1.55k
        expression+=GetUTFOctets(expression);
545
1.55k
        pattern+=GetUTFOctets(pattern);
546
1.55k
      }
547
1.34M
    }
548
1.34M
  }
549
1.23M
  while (GetUTFCode(pattern) == '*')
550
500
    pattern+=GetUTFOctets(pattern);
551
1.23M
  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
552
1.19M
    MagickTrue : MagickFalse;
553
1.23M
  return(match);
554
1.23M
}
555

556
/*
557
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
558
%                                                                             %
559
%                                                                             %
560
%                                                                             %
561
+     I s G l o b                                                             %
562
%                                                                             %
563
%                                                                             %
564
%                                                                             %
565
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
566
%
567
%  IsGlob() returns MagickTrue if the path specification contains a globbing
568
%  pattern.
569
%
570
%  The format of the IsGlob method is:
571
%
572
%      MagickBooleanType IsGlob(const char *geometry)
573
%
574
%  A description of each parameter follows:
575
%
576
%    o path: the path.
577
%
578
*/
579
MagickPrivate MagickBooleanType IsGlob(const char *path)
580
144k
{
581
144k
  MagickBooleanType
582
144k
    status = MagickFalse;
583
584
144k
  const char
585
144k
    *p;
586
587
144k
  if (IsPathAccessible(path) != MagickFalse)
588
527
    return(MagickFalse);
589
6.73M
  for (p=path; *p != '\0'; p++)
590
6.58M
  {
591
6.58M
    switch (*p)
592
6.58M
    {
593
29.3k
      case '*':
594
39.0k
      case '?':
595
69.4k
      case '{':
596
72.6k
      case '}':
597
124k
      case '[':
598
189k
      case ']':
599
189k
      {
600
189k
        status=MagickTrue;
601
189k
        break;
602
124k
      }
603
6.39M
      default:
604
6.39M
        break;
605
6.58M
    }
606
6.58M
  }
607
144k
  return(status);
608
144k
}
609

610
/*
611
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
612
%                                                                             %
613
%                                                                             %
614
%                                                                             %
615
%   T o k e n i z e r                                                         %
616
%                                                                             %
617
%                                                                             %
618
%                                                                             %
619
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
620
%
621
%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
622
%  one at a time from a string of characters.  The characters used for white
623
%  space, for break characters, and for quotes can be specified.  Also,
624
%  characters in the string can be preceded by a specifiable escape character
625
%  which removes any special meaning the character may have.
626
%
627
%  Here is some terminology:
628
%
629
%    o token: A single unit of information in the form of a group of
630
%      characters.
631
%
632
%    o white space: Apace that gets ignored (except within quotes or when
633
%      escaped), like blanks and tabs. in addition, white space terminates a
634
%      non-quoted token.
635
%
636
%    o break set: One or more characters that separates non-quoted tokens.
637
%      Commas are a common break character. The usage of break characters to
638
%      signal the end of a token is the same as that of white space, except
639
%      multiple break characters with nothing or only white space between
640
%      generate a null token for each two break characters together.
641
%
642
%      For example, if blank is set to be the white space and comma is set to
643
%      be the break character, the line
644
%
645
%        A, B, C ,  , DEF
646
%
647
%        ... consists of 5 tokens:
648
%
649
%        1)  "A"
650
%        2)  "B"
651
%        3)  "C"
652
%        4)  "" (the null string)
653
%        5)  "DEF"
654
%
655
%    o Quote character: A character that, when surrounding a group of other
656
%      characters, causes the group of characters to be treated as a single
657
%      token, no matter how many white spaces or break characters exist in
658
%      the group. Also, a token always terminates after the closing quote.
659
%      For example, if ' is the quote character, blank is white space, and
660
%      comma is the break character, the following string
661
%
662
%        A, ' B, CD'EF GHI
663
%
664
%        ... consists of 4 tokens:
665
%
666
%        1)  "A"
667
%        2)  " B, CD" (note the blanks & comma)
668
%        3)  "EF"
669
%        4)  "GHI"
670
%
671
%      The quote characters themselves do not appear in the resultant
672
%      tokens.  The double quotes are delimiters i use here for
673
%      documentation purposes only.
674
%
675
%    o Escape character: A character which itself is ignored but which
676
%      causes the next character to be used as is.  ^ and \ are often used
677
%      as escape characters. An escape in the last position of the string
678
%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
679
%      and non-escape) character. For example, assume white space, break
680
%      character, and quote are the same as in the above examples, and
681
%      further, assume that ^ is the escape character. Then, in the string
682
%
683
%        ABC, ' DEF ^' GH' I ^ J K^ L ^
684
%
685
%        ... there are 7 tokens:
686
%
687
%        1)  "ABC"
688
%        2)  " DEF ' GH"
689
%        3)  "I"
690
%        4)  " "     (a lone blank)
691
%        5)  "J"
692
%        6)  "K L"
693
%        7)  "^"     (passed as is at end of line)
694
%
695
%  The format of the Tokenizer method is:
696
%
697
%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
698
%        const size_t max_token_length,const char *line,const char *white,
699
%        const char *break_set,const char *quote,const char escape,
700
%        char *breaker,int *next,char *quoted)
701
%
702
%  A description of each parameter follows:
703
%
704
%    o flag: right now, only the low order 3 bits are used.
705
%
706
%        1 => convert non-quoted tokens to upper case
707
%        2 => convert non-quoted tokens to lower case
708
%        0 => do not convert non-quoted tokens
709
%
710
%    o token: a character string containing the returned next token
711
%
712
%    o max_token_length: the maximum size of "token".  Characters beyond
713
%      "max_token_length" are truncated.
714
%
715
%    o string: the string to be parsed.
716
%
717
%    o white: a string of the valid white spaces.  example:
718
%
719
%        char whitesp[]={" \t"};
720
%
721
%      blank and tab will be valid white space.
722
%
723
%    o break: a string of the valid break characters. example:
724
%
725
%        char breakch[]={";,"};
726
%
727
%      semicolon and comma will be valid break characters.
728
%
729
%    o quote: a string of the valid quote characters. An example would be
730
%
731
%        char whitesp[]={"'\"");
732
%
733
%      (this causes single and double quotes to be valid) Note that a
734
%      token starting with one of these characters needs the same quote
735
%      character to terminate it.
736
%
737
%      for example:
738
%
739
%        "ABC '
740
%
741
%      is unterminated, but
742
%
743
%        "DEF" and 'GHI'
744
%
745
%      are properly terminated.  Note that different quote characters
746
%      can appear on the same line; only for a given token do the quote
747
%      characters have to be the same.
748
%
749
%    o escape: the escape character (NOT a string ... only one
750
%      allowed). Use zero if none is desired.
751
%
752
%    o breaker: the break character used to terminate the current
753
%      token.  If the token was quoted, this will be the quote used.  If
754
%      the token is the last one on the line, this will be zero.
755
%
756
%    o next: this variable points to the first character of the
757
%      next token.  it gets reset by "tokenizer" as it steps through the
758
%      string.  Set it to 0 upon initialization, and leave it alone
759
%      after that.  You can change it if you want to jump around in the
760
%      string or re-parse from the beginning, but be careful.
761
%
762
%    o quoted: set to True if the token was quoted and MagickFalse
763
%      if not.  You may need this information (for example:  in C, a
764
%      string with quotes around it is a character string, while one
765
%      without is an identifier).
766
%
767
%    o result: 0 if we haven't reached EOS (end of string), and 1
768
%      if we have.
769
%
770
*/
771
772
34.1k
#define IN_WHITE 0
773
49.3k
#define IN_TOKEN 1
774
111k
#define IN_QUOTE 2
775
13.7k
#define IN_OZONE 3
776
777
static ssize_t sindex(int c,const char *string)
778
199k
{
779
199k
  const char
780
199k
    *p;
781
782
300k
  for (p=string; *p != '\0'; p++)
783
117k
    if (c == (int) (*p))
784
16.3k
      return((ssize_t) (p-string));
785
182k
  return(-1);
786
199k
}
787
788
static void StoreToken(TokenInfo *token_info,char *string,
789
  size_t max_token_length,int c)
790
58.3k
{
791
58.3k
  ssize_t
792
58.3k
    i;
793
794
58.3k
  if ((token_info->offset < 0) ||
795
58.3k
      ((size_t) token_info->offset >= (max_token_length-1)))
796
0
    return;
797
58.3k
  i=token_info->offset++;
798
58.3k
  string[i]=(char) c;
799
58.3k
  if (token_info->state == IN_QUOTE)
800
19.4k
    return;
801
38.8k
  switch (token_info->flag & 0x03)
802
38.8k
  {
803
0
    case 1:
804
0
    {
805
0
      string[i]=(char) LocaleToUppercase(c);
806
0
      break;
807
0
    }
808
0
    case 2:
809
0
    {
810
0
      string[i]=(char) LocaleToLowercase(c);
811
0
      break;
812
0
    }
813
38.8k
    default:
814
38.8k
      break;
815
38.8k
  }
816
38.8k
}
817
818
MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
819
  char *token,const size_t max_token_length,const char *line,const char *white,
820
  const char *break_set,const char *quote,const char escape,char *breaker,
821
  int *next,char *quoted)
822
23.8k
{
823
23.8k
  int
824
23.8k
    c;
825
826
23.8k
  ssize_t
827
23.8k
    i;
828
829
23.8k
  *breaker='\0';
830
23.8k
  *quoted='\0';
831
23.8k
  if (line[*next] == '\0')
832
6.77k
    return(1);
833
17.0k
  token_info->state=IN_WHITE;
834
17.0k
  token_info->quote=(char) MagickFalse;
835
17.0k
  token_info->flag=flag;
836
80.1k
  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
837
74.9k
  {
838
74.9k
    c=(int) line[*next];
839
74.9k
    i=sindex(c,break_set);
840
74.9k
    if (i >= 0)
841
9.49k
      {
842
9.49k
        switch (token_info->state)
843
9.49k
        {
844
3.91k
          case IN_WHITE:
845
8.65k
          case IN_TOKEN:
846
8.89k
          case IN_OZONE:
847
8.89k
          {
848
8.89k
            (*next)++;
849
8.89k
            *breaker=break_set[i];
850
8.89k
            token[token_info->offset]='\0';
851
8.89k
            return(0);
852
8.65k
          }
853
604
          case IN_QUOTE:
854
604
          {
855
604
            StoreToken(token_info,token,max_token_length,c);
856
604
            break;
857
8.65k
          }
858
9.49k
        }
859
604
        continue;
860
9.49k
      }
861
65.4k
    i=sindex(c,quote);
862
65.4k
    if (i >= 0)
863
6.82k
      {
864
6.82k
        switch (token_info->state)
865
6.82k
        {
866
2.89k
          case IN_WHITE:
867
2.89k
          {
868
2.89k
            token_info->state=IN_QUOTE;
869
2.89k
            token_info->quote=quote[i];
870
2.89k
            *quoted=(char) MagickTrue;
871
2.89k
            break;
872
0
          }
873
1.90k
          case IN_QUOTE:
874
1.90k
          {
875
1.90k
            if (quote[i] != token_info->quote)
876
0
              StoreToken(token_info,token,max_token_length,c);
877
1.90k
            else
878
1.90k
              {
879
1.90k
                token_info->state=IN_OZONE;
880
1.90k
                token_info->quote='\0';
881
1.90k
              }
882
1.90k
            break;
883
0
          }
884
1.78k
          case IN_TOKEN:
885
2.02k
          case IN_OZONE:
886
2.02k
          {
887
2.02k
            *breaker=(char) c;
888
2.02k
            token[token_info->offset]='\0';
889
2.02k
            return(0);
890
1.78k
          }
891
6.82k
        }
892
4.80k
        continue;
893
6.82k
      }
894
58.6k
    i=sindex(c,white);
895
58.6k
    if (i >= 0)
896
0
      {
897
0
        switch (token_info->state)
898
0
        {
899
0
          case IN_WHITE:
900
0
          case IN_OZONE:
901
0
            break;
902
0
          case IN_TOKEN:
903
0
          {
904
0
            token_info->state=IN_OZONE;
905
0
            break;
906
0
          }
907
0
          case IN_QUOTE:
908
0
          {
909
0
            StoreToken(token_info,token,max_token_length,c);
910
0
            break;
911
0
          }
912
0
        }
913
0
        continue;
914
0
      }
915
58.6k
    if (c == (int) escape)
916
0
      {
917
0
        if (line[(*next)+1] == '\0')
918
0
          {
919
0
            *breaker='\0';
920
0
            StoreToken(token_info,token,max_token_length,c);
921
0
            (*next)++;
922
0
            token[token_info->offset]='\0';
923
0
            return(0);
924
0
          }
925
0
        switch (token_info->state)
926
0
        {
927
0
          case IN_WHITE:
928
0
          {
929
0
            (*next)--;
930
0
            token_info->state=IN_TOKEN;
931
0
            break;
932
0
          }
933
0
          case IN_TOKEN:
934
0
          case IN_QUOTE:
935
0
          {
936
0
            (*next)++;
937
0
            c=(int) line[*next];
938
0
            StoreToken(token_info,token,max_token_length,c);
939
0
            break;
940
0
          }
941
0
          case IN_OZONE:
942
0
          {
943
0
            token[token_info->offset]='\0';
944
0
            return(0);
945
0
          }
946
0
        }
947
0
        continue;
948
0
      }
949
58.6k
    switch (token_info->state)
950
58.6k
    {
951
10.2k
      case IN_WHITE:
952
10.2k
      {
953
10.2k
        token_info->state=IN_TOKEN;
954
10.2k
        StoreToken(token_info,token,max_token_length,c);
955
10.2k
        break;
956
0
      }
957
28.6k
      case IN_TOKEN:
958
47.4k
      case IN_QUOTE:
959
47.4k
      {
960
47.4k
        StoreToken(token_info,token,max_token_length,c);
961
47.4k
        break;
962
28.6k
      }
963
940
      case IN_OZONE:
964
940
      {
965
940
        token[token_info->offset]='\0';
966
940
        return(0);
967
28.6k
      }
968
58.6k
    }
969
58.6k
  }
970
5.20k
  token[token_info->offset]='\0';
971
5.20k
  return(0);
972
17.0k
}