Coverage Report

Created: 2023-12-13 20:03

/src/libxml2/dict.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * dict.c: dictionary of reusable strings, just used to avoid allocation
3
 *         and freeing operations.
4
 *
5
 * Copyright (C) 2003-2012 Daniel Veillard.
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
12
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND
14
 * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER.
15
 *
16
 * Author: daniel@veillard.com
17
 */
18
19
#define IN_LIBXML
20
#include "libxml.h"
21
22
#include <limits.h>
23
#include <stdlib.h>
24
#include <time.h>
25
26
#include "private/dict.h"
27
28
/*
29
 * Following http://www.ocert.org/advisories/ocert-2011-003.html
30
 * it seems that having hash randomization might be a good idea
31
 * when using XML with untrusted data
32
 * Note1: that it works correctly only if compiled with WITH_BIG_KEY
33
 *  which is the default.
34
 * Note2: the fast function used for a small dict won't protect very
35
 *  well but since the attack is based on growing a very big hash
36
 *  list we will use the BigKey algo as soon as the hash size grows
37
 *  over MIN_DICT_SIZE so this actually works
38
 */
39
#if !defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
40
#define DICT_RANDOMIZATION
41
#endif
42
43
#include <string.h>
44
#ifdef HAVE_STDINT_H
45
#include <stdint.h>
46
#else
47
#ifdef HAVE_INTTYPES_H
48
#include <inttypes.h>
49
#elif defined(_WIN32)
50
typedef unsigned __int32 uint32_t;
51
#endif
52
#endif
53
#include <libxml/tree.h>
54
#include <libxml/dict.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/xmlerror.h>
57
#include <libxml/globals.h>
58
59
/* #define DEBUG_GROW */
60
/* #define DICT_DEBUG_PATTERNS */
61
62
19.1M
#define MAX_HASH_LEN 3
63
170M
#define MIN_DICT_SIZE 128
64
7.65k
#define MAX_DICT_HASH 8 * 2048
65
#define WITH_BIG_KEY
66
67
#ifdef WITH_BIG_KEY
68
#define xmlDictComputeKey(dict, name, len)                              \
69
154M
    (((dict)->size == MIN_DICT_SIZE) ?                                  \
70
154M
     xmlDictComputeFastKey(name, len, (dict)->seed) :                   \
71
154M
     xmlDictComputeBigKey(name, len, (dict)->seed))
72
73
#define xmlDictComputeQKey(dict, prefix, plen, name, len)               \
74
1.51M
    (((prefix) == NULL) ?                                               \
75
1.51M
      (xmlDictComputeKey(dict, name, len)) :                             \
76
1.51M
      (((dict)->size == MIN_DICT_SIZE) ?                                \
77
1.51M
       xmlDictComputeFastQKey(prefix, plen, name, len, (dict)->seed) :  \
78
1.51M
       xmlDictComputeBigQKey(prefix, plen, name, len, (dict)->seed)))
79
80
#else /* !WITH_BIG_KEY */
81
#define xmlDictComputeKey(dict, name, len)                              \
82
        xmlDictComputeFastKey(name, len, (dict)->seed)
83
#define xmlDictComputeQKey(dict, prefix, plen, name, len)               \
84
        xmlDictComputeFastQKey(prefix, plen, name, len, (dict)->seed)
85
#endif /* WITH_BIG_KEY */
86
87
/*
88
 * An entry in the dictionary
89
 */
90
typedef struct _xmlDictEntry xmlDictEntry;
91
typedef xmlDictEntry *xmlDictEntryPtr;
92
struct _xmlDictEntry {
93
    struct _xmlDictEntry *next;
94
    const xmlChar *name;
95
    unsigned int len;
96
    int valid;
97
    unsigned long okey;
98
};
99
100
typedef struct _xmlDictStrings xmlDictStrings;
101
typedef xmlDictStrings *xmlDictStringsPtr;
102
struct _xmlDictStrings {
103
    xmlDictStringsPtr next;
104
    xmlChar *free;
105
    xmlChar *end;
106
    size_t size;
107
    size_t nbStrings;
108
    xmlChar array[1];
109
};
110
/*
111
 * The entire dictionary
112
 */
113
struct _xmlDict {
114
    int ref_counter;
115
116
    struct _xmlDictEntry *dict;
117
    size_t size;
118
    unsigned int nbElems;
119
    xmlDictStringsPtr strings;
120
121
    struct _xmlDict *subdict;
122
    /* used for randomization */
123
    int seed;
124
    /* used to impose a limit on size */
125
    size_t limit;
126
};
127
128
/*
129
 * A mutex for modifying the reference counter for shared
130
 * dictionaries.
131
 */
132
static xmlMutexPtr xmlDictMutex = NULL;
133
134
/*
135
 * Whether the dictionary mutex was initialized.
136
 */
137
static int xmlDictInitialized = 0;
138
139
#ifdef DICT_RANDOMIZATION
140
#ifdef HAVE_RAND_R
141
/*
142
 * Internal data for random function, protected by xmlDictMutex
143
 */
144
static unsigned int rand_seed = 0;
145
#endif
146
#endif
147
148
/**
149
 * xmlInitializeDict:
150
 *
151
 * DEPRECATED: This function will be made private. Call xmlInitParser to
152
 * initialize the library.
153
 *
154
 * Do the dictionary mutex initialization.
155
 *
156
 * Returns 0 if initialization was already done, and 1 if that
157
 * call led to the initialization
158
 */
159
3.75k
int xmlInitializeDict(void) {
160
3.75k
    return(0);
161
3.75k
}
162
163
/**
164
 * __xmlInitializeDict:
165
 *
166
 * This function is not public
167
 * Do the dictionary mutex initialization.
168
 * this function is not thread safe, initialization should
169
 * normally be done once at setup when called from xmlOnceInit()
170
 * we may also land in this code if thread support is not compiled in
171
 *
172
 * Returns 0 if initialization was already done, and 1 if that
173
 * call led to the initialization
174
 */
175
3.75k
int __xmlInitializeDict(void) {
176
3.75k
    if (xmlDictInitialized)
177
0
        return(1);
178
179
3.75k
    if ((xmlDictMutex = xmlNewMutex()) == NULL)
180
0
        return(0);
181
3.75k
    xmlMutexLock(xmlDictMutex);
182
183
#ifdef DICT_RANDOMIZATION
184
#ifdef HAVE_RAND_R
185
    rand_seed = time(NULL);
186
    rand_r(& rand_seed);
187
#else
188
    srand(time(NULL));
189
#endif
190
#endif
191
3.75k
    xmlDictInitialized = 1;
192
3.75k
    xmlMutexUnlock(xmlDictMutex);
193
3.75k
    return(1);
194
3.75k
}
195
196
#ifdef DICT_RANDOMIZATION
197
int __xmlRandom(void) {
198
    int ret;
199
200
    if (xmlDictInitialized == 0)
201
        __xmlInitializeDict();
202
203
    xmlMutexLock(xmlDictMutex);
204
#ifdef HAVE_RAND_R
205
    ret = rand_r(& rand_seed);
206
#else
207
    ret = rand();
208
#endif
209
    xmlMutexUnlock(xmlDictMutex);
210
    return(ret);
211
}
212
#endif
213
214
/**
215
 * xmlDictCleanup:
216
 *
217
 * DEPRECATED: This function will be made private. Call xmlCleanupParser
218
 * to free global state but see the warnings there. xmlCleanupParser
219
 * should be only called once at program exit. In most cases, you don't
220
 * have call cleanup functions at all.
221
 *
222
 * Free the dictionary mutex. Do not call unless sure the library
223
 * is not in use anymore !
224
 */
225
void
226
0
xmlDictCleanup(void) {
227
0
    if (!xmlDictInitialized)
228
0
        return;
229
230
0
    xmlFreeMutex(xmlDictMutex);
231
232
0
    xmlDictInitialized = 0;
233
0
}
234
235
/*
236
 * xmlDictAddString:
237
 * @dict: the dictionary
238
 * @name: the name of the userdata
239
 * @len: the length of the name
240
 *
241
 * Add the string to the array[s]
242
 *
243
 * Returns the pointer of the local string, or NULL in case of error.
244
 */
245
static const xmlChar *
246
19.0M
xmlDictAddString(xmlDictPtr dict, const xmlChar *name, unsigned int namelen) {
247
19.0M
    xmlDictStringsPtr pool;
248
19.0M
    const xmlChar *ret;
249
19.0M
    size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
250
19.0M
    size_t limit = 0;
251
252
#ifdef DICT_DEBUG_PATTERNS
253
    fprintf(stderr, "-");
254
#endif
255
19.0M
    pool = dict->strings;
256
19.1M
    while (pool != NULL) {
257
15.3M
  if ((size_t)(pool->end - pool->free) > namelen)
258
15.2M
      goto found_pool;
259
100k
  if (pool->size > size) size = pool->size;
260
100k
        limit += pool->size;
261
100k
  pool = pool->next;
262
100k
    }
263
    /*
264
     * Not found, need to allocate
265
     */
266
3.73M
    if (pool == NULL) {
267
3.73M
        if ((dict->limit > 0) && (limit > dict->limit)) {
268
0
            return(NULL);
269
0
        }
270
271
3.73M
        if (size == 0) size = 1000;
272
87.3k
  else size *= 4; /* exponential growth */
273
3.73M
        if (size < 4 * namelen)
274
60.5k
      size = 4 * namelen; /* just in case ! */
275
3.73M
  pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
276
3.73M
  if (pool == NULL)
277
0
      return(NULL);
278
3.73M
  pool->size = size;
279
3.73M
  pool->nbStrings = 0;
280
3.73M
  pool->free = &pool->array[0];
281
3.73M
  pool->end = &pool->array[size];
282
3.73M
  pool->next = dict->strings;
283
3.73M
  dict->strings = pool;
284
#ifdef DICT_DEBUG_PATTERNS
285
        fprintf(stderr, "+");
286
#endif
287
3.73M
    }
288
19.0M
found_pool:
289
19.0M
    ret = pool->free;
290
19.0M
    memcpy(pool->free, name, namelen);
291
19.0M
    pool->free += namelen;
292
19.0M
    *(pool->free++) = 0;
293
19.0M
    pool->nbStrings++;
294
19.0M
    return(ret);
295
3.73M
}
296
297
/*
298
 * xmlDictAddQString:
299
 * @dict: the dictionary
300
 * @prefix: the prefix of the userdata
301
 * @plen: the prefix length
302
 * @name: the name of the userdata
303
 * @len: the length of the name
304
 *
305
 * Add the QName to the array[s]
306
 *
307
 * Returns the pointer of the local string, or NULL in case of error.
308
 */
309
static const xmlChar *
310
xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix, unsigned int plen,
311
                 const xmlChar *name, unsigned int namelen)
312
116k
{
313
116k
    xmlDictStringsPtr pool;
314
116k
    const xmlChar *ret;
315
116k
    size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
316
116k
    size_t limit = 0;
317
318
116k
    if (prefix == NULL) return(xmlDictAddString(dict, name, namelen));
319
320
#ifdef DICT_DEBUG_PATTERNS
321
    fprintf(stderr, "=");
322
#endif
323
116k
    pool = dict->strings;
324
118k
    while (pool != NULL) {
325
117k
  if ((size_t)(pool->end - pool->free) > namelen + plen + 1)
326
114k
      goto found_pool;
327
2.15k
  if (pool->size > size) size = pool->size;
328
2.15k
        limit += pool->size;
329
2.15k
  pool = pool->next;
330
2.15k
    }
331
    /*
332
     * Not found, need to allocate
333
     */
334
1.24k
    if (pool == NULL) {
335
1.24k
        if ((dict->limit > 0) && (limit > dict->limit)) {
336
0
            return(NULL);
337
0
        }
338
339
1.24k
        if (size == 0) size = 1000;
340
1.24k
  else size *= 4; /* exponential growth */
341
1.24k
        if (size < 4 * (namelen + plen + 1))
342
0
      size = 4 * (namelen + plen + 1); /* just in case ! */
343
1.24k
  pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
344
1.24k
  if (pool == NULL)
345
0
      return(NULL);
346
1.24k
  pool->size = size;
347
1.24k
  pool->nbStrings = 0;
348
1.24k
  pool->free = &pool->array[0];
349
1.24k
  pool->end = &pool->array[size];
350
1.24k
  pool->next = dict->strings;
351
1.24k
  dict->strings = pool;
352
#ifdef DICT_DEBUG_PATTERNS
353
        fprintf(stderr, "+");
354
#endif
355
1.24k
    }
356
116k
found_pool:
357
116k
    ret = pool->free;
358
116k
    memcpy(pool->free, prefix, plen);
359
116k
    pool->free += plen;
360
116k
    *(pool->free++) = ':';
361
116k
    memcpy(pool->free, name, namelen);
362
116k
    pool->free += namelen;
363
116k
    *(pool->free++) = 0;
364
116k
    pool->nbStrings++;
365
116k
    return(ret);
366
1.24k
}
367
368
#ifdef WITH_BIG_KEY
369
/*
370
 * xmlDictComputeBigKey:
371
 *
372
 * Calculate a hash key using a good hash function that works well for
373
 * larger hash table sizes.
374
 *
375
 * Hash function by "One-at-a-Time Hash" see
376
 * http://burtleburtle.net/bob/hash/doobs.html
377
 */
378
379
#ifdef __clang__
380
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
381
#endif
382
static uint32_t
383
3.50M
xmlDictComputeBigKey(const xmlChar* data, int namelen, int seed) {
384
3.50M
    uint32_t hash;
385
3.50M
    int i;
386
387
3.50M
    if (namelen <= 0 || data == NULL) return(0);
388
389
3.50M
    hash = seed;
390
391
106M
    for (i = 0;i < namelen; i++) {
392
103M
        hash += data[i];
393
103M
  hash += (hash << 10);
394
103M
  hash ^= (hash >> 6);
395
103M
    }
396
3.50M
    hash += (hash << 3);
397
3.50M
    hash ^= (hash >> 11);
398
3.50M
    hash += (hash << 15);
399
400
3.50M
    return hash;
401
3.50M
}
402
403
/*
404
 * xmlDictComputeBigQKey:
405
 *
406
 * Calculate a hash key for two strings using a good hash function
407
 * that works well for larger hash table sizes.
408
 *
409
 * Hash function by "One-at-a-Time Hash" see
410
 * http://burtleburtle.net/bob/hash/doobs.html
411
 *
412
 * Neither of the two strings must be NULL.
413
 */
414
#ifdef __clang__
415
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
416
#endif
417
static unsigned long
418
xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
419
                      const xmlChar *name, int len, int seed)
420
112k
{
421
112k
    uint32_t hash;
422
112k
    int i;
423
424
112k
    hash = seed;
425
426
502k
    for (i = 0;i < plen; i++) {
427
389k
        hash += prefix[i];
428
389k
  hash += (hash << 10);
429
389k
  hash ^= (hash >> 6);
430
389k
    }
431
112k
    hash += ':';
432
112k
    hash += (hash << 10);
433
112k
    hash ^= (hash >> 6);
434
435
1.37M
    for (i = 0;i < len; i++) {
436
1.26M
        hash += name[i];
437
1.26M
  hash += (hash << 10);
438
1.26M
  hash ^= (hash >> 6);
439
1.26M
    }
440
112k
    hash += (hash << 3);
441
112k
    hash ^= (hash >> 11);
442
112k
    hash += (hash << 15);
443
444
112k
    return hash;
445
112k
}
446
#endif /* WITH_BIG_KEY */
447
448
/*
449
 * xmlDictComputeFastKey:
450
 *
451
 * Calculate a hash key using a fast hash function that works well
452
 * for low hash table fill.
453
 */
454
static unsigned long
455
150M
xmlDictComputeFastKey(const xmlChar *name, int namelen, int seed) {
456
150M
    unsigned long value = seed;
457
458
150M
    if (name == NULL) return(0);
459
150M
    value += *name;
460
150M
    value <<= 5;
461
150M
    if (namelen > 10) {
462
25.8M
        value += name[namelen - 1];
463
25.8M
        namelen = 10;
464
25.8M
    }
465
150M
    switch (namelen) {
466
32.9M
        case 10: value += name[9];
467
        /* Falls through. */
468
35.1M
        case 9: value += name[8];
469
        /* Falls through. */
470
41.5M
        case 8: value += name[7];
471
        /* Falls through. */
472
51.8M
        case 7: value += name[6];
473
        /* Falls through. */
474
58.1M
        case 6: value += name[5];
475
        /* Falls through. */
476
72.5M
        case 5: value += name[4];
477
        /* Falls through. */
478
93.8M
        case 4: value += name[3];
479
        /* Falls through. */
480
119M
        case 3: value += name[2];
481
        /* Falls through. */
482
129M
        case 2: value += name[1];
483
        /* Falls through. */
484
150M
        default: break;
485
150M
    }
486
150M
    return(value);
487
150M
}
488
489
/*
490
 * xmlDictComputeFastQKey:
491
 *
492
 * Calculate a hash key for two strings using a fast hash function
493
 * that works well for low hash table fill.
494
 *
495
 * Neither of the two strings must be NULL.
496
 */
497
static unsigned long
498
xmlDictComputeFastQKey(const xmlChar *prefix, int plen,
499
                       const xmlChar *name, int len, int seed)
500
1.40M
{
501
1.40M
    unsigned long value = seed;
502
503
1.40M
    if (plen == 0)
504
0
  value += 30 * ':';
505
1.40M
    else
506
1.40M
  value += 30 * (*prefix);
507
508
1.40M
    if (len > 10) {
509
83.3k
        int offset = len - (plen + 1 + 1);
510
83.3k
  if (offset < 0)
511
10.8k
      offset = len - (10 + 1);
512
83.3k
  value += name[offset];
513
83.3k
        len = 10;
514
83.3k
  if (plen > 10)
515
19.5k
      plen = 10;
516
83.3k
    }
517
1.40M
    switch (plen) {
518
66.3k
        case 10: value += prefix[9];
519
        /* Falls through. */
520
75.2k
        case 9: value += prefix[8];
521
        /* Falls through. */
522
129k
        case 8: value += prefix[7];
523
        /* Falls through. */
524
139k
        case 7: value += prefix[6];
525
        /* Falls through. */
526
147k
        case 6: value += prefix[5];
527
        /* Falls through. */
528
194k
        case 5: value += prefix[4];
529
        /* Falls through. */
530
214k
        case 4: value += prefix[3];
531
        /* Falls through. */
532
733k
        case 3: value += prefix[2];
533
        /* Falls through. */
534
1.29M
        case 2: value += prefix[1];
535
        /* Falls through. */
536
1.38M
        case 1: value += prefix[0];
537
        /* Falls through. */
538
1.40M
        default: break;
539
1.40M
    }
540
1.40M
    len -= plen;
541
1.40M
    if (len > 0) {
542
969k
        value += ':';
543
969k
  len--;
544
969k
    }
545
1.40M
    switch (len) {
546
0
        case 10: value += name[9];
547
        /* Falls through. */
548
0
        case 9: value += name[8];
549
        /* Falls through. */
550
4.39k
        case 8: value += name[7];
551
        /* Falls through. */
552
46.5k
        case 7: value += name[6];
553
        /* Falls through. */
554
72.3k
        case 6: value += name[5];
555
        /* Falls through. */
556
373k
        case 5: value += name[4];
557
        /* Falls through. */
558
638k
        case 4: value += name[3];
559
        /* Falls through. */
560
671k
        case 3: value += name[2];
561
        /* Falls through. */
562
706k
        case 2: value += name[1];
563
        /* Falls through. */
564
740k
        case 1: value += name[0];
565
        /* Falls through. */
566
1.40M
        default: break;
567
1.40M
    }
568
1.40M
    return(value);
569
1.40M
}
570
571
/**
572
 * xmlDictCreate:
573
 *
574
 * Create a new dictionary
575
 *
576
 * Returns the newly created dictionary, or NULL if an error occurred.
577
 */
578
xmlDictPtr
579
4.97M
xmlDictCreate(void) {
580
4.97M
    xmlDictPtr dict;
581
582
4.97M
    if (!xmlDictInitialized)
583
0
        if (!__xmlInitializeDict())
584
0
            return(NULL);
585
586
#ifdef DICT_DEBUG_PATTERNS
587
    fprintf(stderr, "C");
588
#endif
589
590
4.97M
    dict = xmlMalloc(sizeof(xmlDict));
591
4.97M
    if (dict) {
592
4.97M
        dict->ref_counter = 1;
593
4.97M
        dict->limit = 0;
594
595
4.97M
        dict->size = MIN_DICT_SIZE;
596
4.97M
  dict->nbElems = 0;
597
4.97M
        dict->dict = xmlMalloc(MIN_DICT_SIZE * sizeof(xmlDictEntry));
598
4.97M
  dict->strings = NULL;
599
4.97M
  dict->subdict = NULL;
600
4.97M
        if (dict->dict) {
601
4.97M
      memset(dict->dict, 0, MIN_DICT_SIZE * sizeof(xmlDictEntry));
602
#ifdef DICT_RANDOMIZATION
603
            dict->seed = __xmlRandom();
604
#else
605
4.97M
            dict->seed = 0;
606
4.97M
#endif
607
4.97M
      return(dict);
608
4.97M
        }
609
0
        xmlFree(dict);
610
0
    }
611
0
    return(NULL);
612
4.97M
}
613
614
/**
615
 * xmlDictCreateSub:
616
 * @sub: an existing dictionary
617
 *
618
 * Create a new dictionary, inheriting strings from the read-only
619
 * dictionary @sub. On lookup, strings are first searched in the
620
 * new dictionary, then in @sub, and if not found are created in the
621
 * new dictionary.
622
 *
623
 * Returns the newly created dictionary, or NULL if an error occurred.
624
 */
625
xmlDictPtr
626
0
xmlDictCreateSub(xmlDictPtr sub) {
627
0
    xmlDictPtr dict = xmlDictCreate();
628
629
0
    if ((dict != NULL) && (sub != NULL)) {
630
#ifdef DICT_DEBUG_PATTERNS
631
        fprintf(stderr, "R");
632
#endif
633
0
        dict->seed = sub->seed;
634
0
        dict->subdict = sub;
635
0
  xmlDictReference(dict->subdict);
636
0
    }
637
0
    return(dict);
638
0
}
639
640
/**
641
 * xmlDictReference:
642
 * @dict: the dictionary
643
 *
644
 * Increment the reference counter of a dictionary
645
 *
646
 * Returns 0 in case of success and -1 in case of error
647
 */
648
int
649
3.30M
xmlDictReference(xmlDictPtr dict) {
650
3.30M
    if (!xmlDictInitialized)
651
0
        if (!__xmlInitializeDict())
652
0
            return(-1);
653
654
3.30M
    if (dict == NULL) return -1;
655
2.90M
    xmlMutexLock(xmlDictMutex);
656
2.90M
    dict->ref_counter++;
657
2.90M
    xmlMutexUnlock(xmlDictMutex);
658
2.90M
    return(0);
659
3.30M
}
660
661
/**
662
 * xmlDictGrow:
663
 * @dict: the dictionary
664
 * @size: the new size of the dictionary
665
 *
666
 * resize the dictionary
667
 *
668
 * Returns 0 in case of success, -1 in case of failure
669
 */
670
static int
671
7.65k
xmlDictGrow(xmlDictPtr dict, size_t size) {
672
7.65k
    unsigned long key, okey;
673
7.65k
    size_t oldsize, i;
674
7.65k
    xmlDictEntryPtr iter, next;
675
7.65k
    struct _xmlDictEntry *olddict;
676
#ifdef DEBUG_GROW
677
    unsigned long nbElem = 0;
678
#endif
679
7.65k
    int ret = 0;
680
7.65k
    int keep_keys = 1;
681
682
7.65k
    if (dict == NULL)
683
0
  return(-1);
684
7.65k
    if (size < 8)
685
0
        return(-1);
686
7.65k
    if (size > 8 * 2048)
687
0
  return(-1);
688
689
#ifdef DICT_DEBUG_PATTERNS
690
    fprintf(stderr, "*");
691
#endif
692
693
7.65k
    oldsize = dict->size;
694
7.65k
    olddict = dict->dict;
695
7.65k
    if (olddict == NULL)
696
0
        return(-1);
697
7.65k
    if (oldsize == MIN_DICT_SIZE)
698
7.65k
        keep_keys = 0;
699
700
7.65k
    dict->dict = xmlMalloc(size * sizeof(xmlDictEntry));
701
7.65k
    if (dict->dict == NULL) {
702
0
  dict->dict = olddict;
703
0
  return(-1);
704
0
    }
705
7.65k
    memset(dict->dict, 0, size * sizeof(xmlDictEntry));
706
7.65k
    dict->size = size;
707
708
    /*  If the two loops are merged, there would be situations where
709
  a new entry needs to allocated and data copied into it from
710
  the main dict. It is nicer to run through the array twice, first
711
  copying all the elements in the main array (less probability of
712
  allocate) and then the rest, so we only free in the second loop.
713
    */
714
987k
    for (i = 0; i < oldsize; i++) {
715
979k
  if (olddict[i].valid == 0)
716
766k
      continue;
717
718
212k
  if (keep_keys)
719
0
      okey = olddict[i].okey;
720
212k
  else
721
212k
      okey = xmlDictComputeKey(dict, olddict[i].name, olddict[i].len);
722
212k
  key = okey % dict->size;
723
724
212k
  if (dict->dict[key].valid == 0) {
725
203k
      memcpy(&(dict->dict[key]), &(olddict[i]), sizeof(xmlDictEntry));
726
203k
      dict->dict[key].next = NULL;
727
203k
      dict->dict[key].okey = okey;
728
203k
  } else {
729
9.30k
      xmlDictEntryPtr entry;
730
731
9.30k
      entry = xmlMalloc(sizeof(xmlDictEntry));
732
9.30k
      if (entry != NULL) {
733
9.30k
    entry->name = olddict[i].name;
734
9.30k
    entry->len = olddict[i].len;
735
9.30k
    entry->okey = okey;
736
9.30k
    entry->next = dict->dict[key].next;
737
9.30k
    entry->valid = 1;
738
9.30k
    dict->dict[key].next = entry;
739
9.30k
      } else {
740
          /*
741
     * we don't have much ways to alert from here
742
     * result is losing an entry and unicity guarantee
743
     */
744
0
          ret = -1;
745
0
      }
746
9.30k
  }
747
#ifdef DEBUG_GROW
748
  nbElem++;
749
#endif
750
212k
    }
751
752
987k
    for (i = 0; i < oldsize; i++) {
753
979k
  iter = olddict[i].next;
754
1.12M
  while (iter) {
755
144k
      next = iter->next;
756
757
      /*
758
       * put back the entry in the new dict
759
       */
760
761
144k
      if (keep_keys)
762
0
    okey = iter->okey;
763
144k
      else
764
144k
    okey = xmlDictComputeKey(dict, iter->name, iter->len);
765
144k
      key = okey % dict->size;
766
144k
      if (dict->dict[key].valid == 0) {
767
127k
    memcpy(&(dict->dict[key]), iter, sizeof(xmlDictEntry));
768
127k
    dict->dict[key].next = NULL;
769
127k
    dict->dict[key].valid = 1;
770
127k
    dict->dict[key].okey = okey;
771
127k
    xmlFree(iter);
772
127k
      } else {
773
16.5k
    iter->next = dict->dict[key].next;
774
16.5k
    iter->okey = okey;
775
16.5k
    dict->dict[key].next = iter;
776
16.5k
      }
777
778
#ifdef DEBUG_GROW
779
      nbElem++;
780
#endif
781
782
144k
      iter = next;
783
144k
  }
784
979k
    }
785
786
7.65k
    xmlFree(olddict);
787
788
#ifdef DEBUG_GROW
789
    xmlGenericError(xmlGenericErrorContext,
790
      "xmlDictGrow : from %lu to %lu, %u elems\n", oldsize, size, nbElem);
791
#endif
792
793
7.65k
    return(ret);
794
7.65k
}
795
796
/**
797
 * xmlDictFree:
798
 * @dict: the dictionary
799
 *
800
 * Free the hash @dict and its contents. The userdata is
801
 * deallocated with @f if provided.
802
 */
803
void
804
7.88M
xmlDictFree(xmlDictPtr dict) {
805
7.88M
    size_t i;
806
7.88M
    xmlDictEntryPtr iter;
807
7.88M
    xmlDictEntryPtr next;
808
7.88M
    int inside_dict = 0;
809
7.88M
    xmlDictStringsPtr pool, nextp;
810
811
7.88M
    if (dict == NULL)
812
0
  return;
813
814
7.88M
    if (!xmlDictInitialized)
815
0
        if (!__xmlInitializeDict())
816
0
            return;
817
818
    /* decrement the counter, it may be shared by a parser and docs */
819
7.88M
    xmlMutexLock(xmlDictMutex);
820
7.88M
    dict->ref_counter--;
821
7.88M
    if (dict->ref_counter > 0) {
822
2.90M
        xmlMutexUnlock(xmlDictMutex);
823
2.90M
        return;
824
2.90M
    }
825
826
4.97M
    xmlMutexUnlock(xmlDictMutex);
827
828
4.97M
    if (dict->subdict != NULL) {
829
0
        xmlDictFree(dict->subdict);
830
0
    }
831
832
4.97M
    if (dict->dict) {
833
352M
  for(i = 0; ((i < dict->size) && (dict->nbElems > 0)); i++) {
834
347M
      iter = &(dict->dict[i]);
835
347M
      if (iter->valid == 0)
836
332M
    continue;
837
14.5M
      inside_dict = 1;
838
33.6M
      while (iter) {
839
19.1M
    next = iter->next;
840
19.1M
    if (!inside_dict)
841
4.55M
        xmlFree(iter);
842
19.1M
    dict->nbElems--;
843
19.1M
    inside_dict = 0;
844
19.1M
    iter = next;
845
19.1M
      }
846
14.5M
  }
847
4.97M
  xmlFree(dict->dict);
848
4.97M
    }
849
4.97M
    pool = dict->strings;
850
8.70M
    while (pool != NULL) {
851
3.73M
        nextp = pool->next;
852
3.73M
  xmlFree(pool);
853
3.73M
  pool = nextp;
854
3.73M
    }
855
4.97M
    xmlFree(dict);
856
4.97M
}
857
858
/**
859
 * xmlDictLookup:
860
 * @dict: the dictionary
861
 * @name: the name of the userdata
862
 * @len: the length of the name, if -1 it is recomputed
863
 *
864
 * Add the @name to the dictionary @dict if not present.
865
 *
866
 * Returns the internal copy of the name or NULL in case of internal error
867
 */
868
const xmlChar *
869
158M
xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
870
158M
    unsigned long key, okey, nbi = 0;
871
158M
    xmlDictEntryPtr entry;
872
158M
    xmlDictEntryPtr insert;
873
158M
    const xmlChar *ret;
874
158M
    unsigned int l;
875
876
158M
    if ((dict == NULL) || (name == NULL))
877
4.41M
  return(NULL);
878
879
153M
    if (len < 0)
880
29.4M
        l = strlen((const char *) name);
881
124M
    else
882
124M
        l = len;
883
884
153M
    if (((dict->limit > 0) && (l >= dict->limit)) ||
885
153M
        (l > INT_MAX / 2))
886
0
        return(NULL);
887
888
    /*
889
     * Check for duplicate and insertion location.
890
     */
891
153M
    okey = xmlDictComputeKey(dict, name, l);
892
153M
    key = okey % dict->size;
893
153M
    if (dict->dict[key].valid == 0) {
894
14.3M
  insert = NULL;
895
139M
    } else {
896
164M
  for (insert = &(dict->dict[key]); insert->next != NULL;
897
139M
       insert = insert->next) {
898
46.4M
#ifdef __GNUC__
899
46.4M
      if ((insert->okey == okey) && (insert->len == l)) {
900
22.2M
    if (!memcmp(insert->name, name, l))
901
21.2M
        return(insert->name);
902
22.2M
      }
903
#else
904
      if ((insert->okey == okey) && (insert->len == l) &&
905
          (!xmlStrncmp(insert->name, name, l)))
906
    return(insert->name);
907
#endif
908
25.2M
      nbi++;
909
25.2M
  }
910
118M
#ifdef __GNUC__
911
118M
  if ((insert->okey == okey) && (insert->len == l)) {
912
113M
      if (!memcmp(insert->name, name, l))
913
113M
    return(insert->name);
914
113M
  }
915
#else
916
  if ((insert->okey == okey) && (insert->len == l) &&
917
      (!xmlStrncmp(insert->name, name, l)))
918
      return(insert->name);
919
#endif
920
118M
    }
921
922
19.0M
    if (dict->subdict) {
923
0
        unsigned long skey;
924
925
        /* we cannot always reuse the same okey for the subdict */
926
0
        if (((dict->size == MIN_DICT_SIZE) &&
927
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
928
0
            ((dict->size != MIN_DICT_SIZE) &&
929
0
       (dict->subdict->size == MIN_DICT_SIZE)))
930
0
      skey = xmlDictComputeKey(dict->subdict, name, l);
931
0
  else
932
0
      skey = okey;
933
934
0
  key = skey % dict->subdict->size;
935
0
  if (dict->subdict->dict[key].valid != 0) {
936
0
      xmlDictEntryPtr tmp;
937
938
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
939
0
     tmp = tmp->next) {
940
0
#ifdef __GNUC__
941
0
    if ((tmp->okey == skey) && (tmp->len == l)) {
942
0
        if (!memcmp(tmp->name, name, l))
943
0
      return(tmp->name);
944
0
    }
945
#else
946
    if ((tmp->okey == skey) && (tmp->len == l) &&
947
        (!xmlStrncmp(tmp->name, name, l)))
948
        return(tmp->name);
949
#endif
950
0
    nbi++;
951
0
      }
952
0
#ifdef __GNUC__
953
0
      if ((tmp->okey == skey) && (tmp->len == l)) {
954
0
    if (!memcmp(tmp->name, name, l))
955
0
        return(tmp->name);
956
0
      }
957
#else
958
      if ((tmp->okey == skey) && (tmp->len == l) &&
959
    (!xmlStrncmp(tmp->name, name, l)))
960
    return(tmp->name);
961
#endif
962
0
  }
963
0
  key = okey % dict->size;
964
0
    }
965
966
19.0M
    ret = xmlDictAddString(dict, name, l);
967
19.0M
    if (ret == NULL)
968
0
        return(NULL);
969
19.0M
    if (insert == NULL) {
970
14.3M
  entry = &(dict->dict[key]);
971
14.3M
    } else {
972
4.65M
  entry = xmlMalloc(sizeof(xmlDictEntry));
973
4.65M
  if (entry == NULL)
974
0
       return(NULL);
975
4.65M
    }
976
19.0M
    entry->name = ret;
977
19.0M
    entry->len = l;
978
19.0M
    entry->next = NULL;
979
19.0M
    entry->valid = 1;
980
19.0M
    entry->okey = okey;
981
982
983
19.0M
    if (insert != NULL)
984
4.65M
  insert->next = entry;
985
986
19.0M
    dict->nbElems++;
987
988
19.0M
    if ((nbi > MAX_HASH_LEN) &&
989
19.0M
        (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN))) {
990
7.26k
  if (xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size) != 0)
991
0
      return(NULL);
992
7.26k
    }
993
    /* Note that entry may have been freed at this point by xmlDictGrow */
994
995
19.0M
    return(ret);
996
19.0M
}
997
998
/**
999
 * xmlDictExists:
1000
 * @dict: the dictionary
1001
 * @name: the name of the userdata
1002
 * @len: the length of the name, if -1 it is recomputed
1003
 *
1004
 * Check if the @name exists in the dictionary @dict.
1005
 *
1006
 * Returns the internal copy of the name or NULL if not found.
1007
 */
1008
const xmlChar *
1009
0
xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
1010
0
    unsigned long key, okey, nbi = 0;
1011
0
    xmlDictEntryPtr insert;
1012
0
    unsigned int l;
1013
1014
0
    if ((dict == NULL) || (name == NULL))
1015
0
  return(NULL);
1016
1017
0
    if (len < 0)
1018
0
        l = strlen((const char *) name);
1019
0
    else
1020
0
        l = len;
1021
0
    if (((dict->limit > 0) && (l >= dict->limit)) ||
1022
0
        (l > INT_MAX / 2))
1023
0
        return(NULL);
1024
1025
    /*
1026
     * Check for duplicate and insertion location.
1027
     */
1028
0
    okey = xmlDictComputeKey(dict, name, l);
1029
0
    key = okey % dict->size;
1030
0
    if (dict->dict[key].valid == 0) {
1031
0
  insert = NULL;
1032
0
    } else {
1033
0
  for (insert = &(dict->dict[key]); insert->next != NULL;
1034
0
       insert = insert->next) {
1035
0
#ifdef __GNUC__
1036
0
      if ((insert->okey == okey) && (insert->len == l)) {
1037
0
    if (!memcmp(insert->name, name, l))
1038
0
        return(insert->name);
1039
0
      }
1040
#else
1041
      if ((insert->okey == okey) && (insert->len == l) &&
1042
          (!xmlStrncmp(insert->name, name, l)))
1043
    return(insert->name);
1044
#endif
1045
0
      nbi++;
1046
0
  }
1047
0
#ifdef __GNUC__
1048
0
  if ((insert->okey == okey) && (insert->len == l)) {
1049
0
      if (!memcmp(insert->name, name, l))
1050
0
    return(insert->name);
1051
0
  }
1052
#else
1053
  if ((insert->okey == okey) && (insert->len == l) &&
1054
      (!xmlStrncmp(insert->name, name, l)))
1055
      return(insert->name);
1056
#endif
1057
0
    }
1058
1059
0
    if (dict->subdict) {
1060
0
        unsigned long skey;
1061
1062
        /* we cannot always reuse the same okey for the subdict */
1063
0
        if (((dict->size == MIN_DICT_SIZE) &&
1064
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
1065
0
            ((dict->size != MIN_DICT_SIZE) &&
1066
0
       (dict->subdict->size == MIN_DICT_SIZE)))
1067
0
      skey = xmlDictComputeKey(dict->subdict, name, l);
1068
0
  else
1069
0
      skey = okey;
1070
1071
0
  key = skey % dict->subdict->size;
1072
0
  if (dict->subdict->dict[key].valid != 0) {
1073
0
      xmlDictEntryPtr tmp;
1074
1075
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
1076
0
     tmp = tmp->next) {
1077
0
#ifdef __GNUC__
1078
0
    if ((tmp->okey == skey) && (tmp->len == l)) {
1079
0
        if (!memcmp(tmp->name, name, l))
1080
0
      return(tmp->name);
1081
0
    }
1082
#else
1083
    if ((tmp->okey == skey) && (tmp->len == l) &&
1084
        (!xmlStrncmp(tmp->name, name, l)))
1085
        return(tmp->name);
1086
#endif
1087
0
    nbi++;
1088
0
      }
1089
0
#ifdef __GNUC__
1090
0
      if ((tmp->okey == skey) && (tmp->len == l)) {
1091
0
    if (!memcmp(tmp->name, name, l))
1092
0
        return(tmp->name);
1093
0
      }
1094
#else
1095
      if ((tmp->okey == skey) && (tmp->len == l) &&
1096
    (!xmlStrncmp(tmp->name, name, l)))
1097
    return(tmp->name);
1098
#endif
1099
0
  }
1100
0
    }
1101
1102
    /* not found */
1103
0
    return(NULL);
1104
0
}
1105
1106
/**
1107
 * xmlDictQLookup:
1108
 * @dict: the dictionary
1109
 * @prefix: the prefix
1110
 * @name: the name
1111
 *
1112
 * Add the QName @prefix:@name to the hash @dict if not present.
1113
 *
1114
 * Returns the internal copy of the QName or NULL in case of internal error
1115
 */
1116
const xmlChar *
1117
1.51M
xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
1118
1.51M
    unsigned long okey, key, nbi = 0;
1119
1.51M
    xmlDictEntryPtr entry;
1120
1.51M
    xmlDictEntryPtr insert;
1121
1.51M
    const xmlChar *ret;
1122
1.51M
    unsigned int len, plen, l;
1123
1124
1.51M
    if ((dict == NULL) || (name == NULL))
1125
0
  return(NULL);
1126
1.51M
    if (prefix == NULL)
1127
0
        return(xmlDictLookup(dict, name, -1));
1128
1129
1.51M
    l = len = strlen((const char *) name);
1130
1.51M
    plen = strlen((const char *) prefix);
1131
1.51M
    len += 1 + plen;
1132
1133
    /*
1134
     * Check for duplicate and insertion location.
1135
     */
1136
1.51M
    okey = xmlDictComputeQKey(dict, prefix, plen, name, l);
1137
1.51M
    key = okey % dict->size;
1138
1.51M
    if (dict->dict[key].valid == 0) {
1139
93.2k
  insert = NULL;
1140
1.42M
    } else {
1141
1.64M
  for (insert = &(dict->dict[key]); insert->next != NULL;
1142
1.42M
       insert = insert->next) {
1143
393k
      if ((insert->okey == okey) && (insert->len == len) &&
1144
393k
          (xmlStrQEqual(prefix, name, insert->name)))
1145
175k
    return(insert->name);
1146
218k
      nbi++;
1147
218k
  }
1148
1.24M
  if ((insert->okey == okey) && (insert->len == len) &&
1149
1.24M
      (xmlStrQEqual(prefix, name, insert->name)))
1150
1.22M
      return(insert->name);
1151
1.24M
    }
1152
1153
116k
    if (dict->subdict) {
1154
0
        unsigned long skey;
1155
1156
        /* we cannot always reuse the same okey for the subdict */
1157
0
        if (((dict->size == MIN_DICT_SIZE) &&
1158
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
1159
0
            ((dict->size != MIN_DICT_SIZE) &&
1160
0
       (dict->subdict->size == MIN_DICT_SIZE)))
1161
0
      skey = xmlDictComputeQKey(dict->subdict, prefix, plen, name, l);
1162
0
  else
1163
0
      skey = okey;
1164
1165
0
  key = skey % dict->subdict->size;
1166
0
  if (dict->subdict->dict[key].valid != 0) {
1167
0
      xmlDictEntryPtr tmp;
1168
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
1169
0
     tmp = tmp->next) {
1170
0
    if ((tmp->okey == skey) && (tmp->len == len) &&
1171
0
        (xmlStrQEqual(prefix, name, tmp->name)))
1172
0
        return(tmp->name);
1173
0
    nbi++;
1174
0
      }
1175
0
      if ((tmp->okey == skey) && (tmp->len == len) &&
1176
0
    (xmlStrQEqual(prefix, name, tmp->name)))
1177
0
    return(tmp->name);
1178
0
  }
1179
0
  key = okey % dict->size;
1180
0
    }
1181
1182
116k
    ret = xmlDictAddQString(dict, prefix, plen, name, l);
1183
116k
    if (ret == NULL)
1184
0
        return(NULL);
1185
116k
    if (insert == NULL) {
1186
93.2k
  entry = &(dict->dict[key]);
1187
93.2k
    } else {
1188
22.9k
  entry = xmlMalloc(sizeof(xmlDictEntry));
1189
22.9k
  if (entry == NULL)
1190
0
       return(NULL);
1191
22.9k
    }
1192
116k
    entry->name = ret;
1193
116k
    entry->len = len;
1194
116k
    entry->next = NULL;
1195
116k
    entry->valid = 1;
1196
116k
    entry->okey = okey;
1197
1198
116k
    if (insert != NULL)
1199
22.9k
  insert->next = entry;
1200
1201
116k
    dict->nbElems++;
1202
1203
116k
    if ((nbi > MAX_HASH_LEN) &&
1204
116k
        (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN)))
1205
383
  xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size);
1206
    /* Note that entry may have been freed at this point by xmlDictGrow */
1207
1208
116k
    return(ret);
1209
116k
}
1210
1211
/**
1212
 * xmlDictOwns:
1213
 * @dict: the dictionary
1214
 * @str: the string
1215
 *
1216
 * check if a string is owned by the dictionary
1217
 *
1218
 * Returns 1 if true, 0 if false and -1 in case of error
1219
 * -1 in case of error
1220
 */
1221
int
1222
108M
xmlDictOwns(xmlDictPtr dict, const xmlChar *str) {
1223
108M
    xmlDictStringsPtr pool;
1224
1225
108M
    if ((dict == NULL) || (str == NULL))
1226
0
  return(-1);
1227
108M
    pool = dict->strings;
1228
209M
    while (pool != NULL) {
1229
154M
        if ((str >= &pool->array[0]) && (str <= pool->free))
1230
53.1M
      return(1);
1231
101M
  pool = pool->next;
1232
101M
    }
1233
55.2M
    if (dict->subdict)
1234
0
        return(xmlDictOwns(dict->subdict, str));
1235
55.2M
    return(0);
1236
55.2M
}
1237
1238
/**
1239
 * xmlDictSize:
1240
 * @dict: the dictionary
1241
 *
1242
 * Query the number of elements installed in the hash @dict.
1243
 *
1244
 * Returns the number of elements in the dictionary or
1245
 * -1 in case of error
1246
 */
1247
int
1248
0
xmlDictSize(xmlDictPtr dict) {
1249
0
    if (dict == NULL)
1250
0
  return(-1);
1251
0
    if (dict->subdict)
1252
0
        return(dict->nbElems + dict->subdict->nbElems);
1253
0
    return(dict->nbElems);
1254
0
}
1255
1256
/**
1257
 * xmlDictSetLimit:
1258
 * @dict: the dictionary
1259
 * @limit: the limit in bytes
1260
 *
1261
 * Set a size limit for the dictionary
1262
 * Added in 2.9.0
1263
 *
1264
 * Returns the previous limit of the dictionary or 0
1265
 */
1266
size_t
1267
5.44M
xmlDictSetLimit(xmlDictPtr dict, size_t limit) {
1268
5.44M
    size_t ret;
1269
1270
5.44M
    if (dict == NULL)
1271
0
  return(0);
1272
5.44M
    ret = dict->limit;
1273
5.44M
    dict->limit = limit;
1274
5.44M
    return(ret);
1275
5.44M
}
1276
1277
/**
1278
 * xmlDictGetUsage:
1279
 * @dict: the dictionary
1280
 *
1281
 * Get how much memory is used by a dictionary for strings
1282
 * Added in 2.9.0
1283
 *
1284
 * Returns the amount of strings allocated
1285
 */
1286
size_t
1287
0
xmlDictGetUsage(xmlDictPtr dict) {
1288
0
    xmlDictStringsPtr pool;
1289
0
    size_t limit = 0;
1290
1291
0
    if (dict == NULL)
1292
0
  return(0);
1293
0
    pool = dict->strings;
1294
0
    while (pool != NULL) {
1295
0
        limit += pool->size;
1296
0
  pool = pool->next;
1297
0
    }
1298
0
    return(limit);
1299
0
}
1300