Coverage Report

Created: 2023-12-14 14:09

/src/libxml2/dict.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * dict.c: dictionary of reusable strings, just used to avoid allocation
3
 *         and freeing operations.
4
 *
5
 * Copyright (C) 2003-2012 Daniel Veillard.
6
 *
7
 * Permission to use, copy, modify, and distribute this software for any
8
 * purpose with or without fee is hereby granted, provided that the above
9
 * copyright notice and this permission notice appear in all copies.
10
 *
11
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
12
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
13
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND
14
 * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER.
15
 *
16
 * Author: daniel@veillard.com
17
 */
18
19
#define IN_LIBXML
20
#include "libxml.h"
21
22
#include <limits.h>
23
#include <stdlib.h>
24
#include <time.h>
25
26
#include "private/dict.h"
27
#include "private/threads.h"
28
29
/*
30
 * Following http://www.ocert.org/advisories/ocert-2011-003.html
31
 * it seems that having hash randomization might be a good idea
32
 * when using XML with untrusted data
33
 * Note1: that it works correctly only if compiled with WITH_BIG_KEY
34
 *  which is the default.
35
 * Note2: the fast function used for a small dict won't protect very
36
 *  well but since the attack is based on growing a very big hash
37
 *  list we will use the BigKey algo as soon as the hash size grows
38
 *  over MIN_DICT_SIZE so this actually works
39
 */
40
#if !defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION)
41
#define DICT_RANDOMIZATION
42
#endif
43
44
#include <string.h>
45
#ifdef HAVE_STDINT_H
46
#include <stdint.h>
47
#else
48
#ifdef HAVE_INTTYPES_H
49
#include <inttypes.h>
50
#elif defined(_WIN32)
51
typedef unsigned __int32 uint32_t;
52
#endif
53
#endif
54
#include <libxml/tree.h>
55
#include <libxml/dict.h>
56
#include <libxml/xmlmemory.h>
57
#include <libxml/xmlerror.h>
58
#include <libxml/globals.h>
59
60
/* #define DEBUG_GROW */
61
/* #define DICT_DEBUG_PATTERNS */
62
63
45.8M
#define MAX_HASH_LEN 3
64
784M
#define MIN_DICT_SIZE 128
65
65.7k
#define MAX_DICT_HASH 8 * 2048
66
#define WITH_BIG_KEY
67
68
#ifdef WITH_BIG_KEY
69
#define xmlDictComputeKey(dict, name, len)                              \
70
775M
    (((dict)->size == MIN_DICT_SIZE) ?                                  \
71
775M
     xmlDictComputeFastKey(name, len, (dict)->seed) :                   \
72
775M
     xmlDictComputeBigKey(name, len, (dict)->seed))
73
74
#define xmlDictComputeQKey(dict, prefix, plen, name, len)               \
75
1.15M
    (((prefix) == NULL) ?                                               \
76
1.15M
      (xmlDictComputeKey(dict, name, len)) :                             \
77
1.15M
      (((dict)->size == MIN_DICT_SIZE) ?                                \
78
1.15M
       xmlDictComputeFastQKey(prefix, plen, name, len, (dict)->seed) :  \
79
1.15M
       xmlDictComputeBigQKey(prefix, plen, name, len, (dict)->seed)))
80
81
#else /* !WITH_BIG_KEY */
82
#define xmlDictComputeKey(dict, name, len)                              \
83
        xmlDictComputeFastKey(name, len, (dict)->seed)
84
#define xmlDictComputeQKey(dict, prefix, plen, name, len)               \
85
        xmlDictComputeFastQKey(prefix, plen, name, len, (dict)->seed)
86
#endif /* WITH_BIG_KEY */
87
88
/*
89
 * An entry in the dictionary
90
 */
91
typedef struct _xmlDictEntry xmlDictEntry;
92
typedef xmlDictEntry *xmlDictEntryPtr;
93
struct _xmlDictEntry {
94
    struct _xmlDictEntry *next;
95
    const xmlChar *name;
96
    unsigned int len;
97
    int valid;
98
    unsigned long okey;
99
};
100
101
typedef struct _xmlDictStrings xmlDictStrings;
102
typedef xmlDictStrings *xmlDictStringsPtr;
103
struct _xmlDictStrings {
104
    xmlDictStringsPtr next;
105
    xmlChar *free;
106
    xmlChar *end;
107
    size_t size;
108
    size_t nbStrings;
109
    xmlChar array[1];
110
};
111
/*
112
 * The entire dictionary
113
 */
114
struct _xmlDict {
115
    int ref_counter;
116
117
    struct _xmlDictEntry *dict;
118
    size_t size;
119
    unsigned int nbElems;
120
    xmlDictStringsPtr strings;
121
122
    struct _xmlDict *subdict;
123
    /* used for randomization */
124
    int seed;
125
    /* used to impose a limit on size */
126
    size_t limit;
127
};
128
129
/*
130
 * A mutex for modifying the reference counter for shared
131
 * dictionaries.
132
 */
133
static xmlMutex xmlDictMutex;
134
135
#ifdef DICT_RANDOMIZATION
136
#ifdef HAVE_RAND_R
137
/*
138
 * Internal data for random function, protected by xmlDictMutex
139
 */
140
static unsigned int rand_seed = 0;
141
#endif
142
#endif
143
144
/**
145
 * xmlInitializeDict:
146
 *
147
 * DEPRECATED: Alias for xmlInitParser.
148
 */
149
0
int xmlInitializeDict(void) {
150
0
    xmlInitParser();
151
0
    return(0);
152
0
}
153
154
/**
155
 * __xmlInitializeDict:
156
 *
157
 * This function is not public
158
 * Do the dictionary mutex initialization.
159
 */
160
3.70k
int __xmlInitializeDict(void) {
161
3.70k
    xmlInitMutex(&xmlDictMutex);
162
163
#ifdef DICT_RANDOMIZATION
164
#ifdef HAVE_RAND_R
165
    rand_seed = time(NULL);
166
    rand_r(& rand_seed);
167
#else
168
    srand(time(NULL));
169
#endif
170
#endif
171
3.70k
    return(1);
172
3.70k
}
173
174
#ifdef DICT_RANDOMIZATION
175
int __xmlRandom(void) {
176
    int ret;
177
178
    xmlMutexLock(&xmlDictMutex);
179
#ifdef HAVE_RAND_R
180
    ret = rand_r(& rand_seed);
181
#else
182
    ret = rand();
183
#endif
184
    xmlMutexUnlock(&xmlDictMutex);
185
    return(ret);
186
}
187
#endif
188
189
/**
190
 * xmlDictCleanup:
191
 *
192
 * DEPRECATED: This function is a no-op. Call xmlCleanupParser
193
 * to free global state but see the warnings there. xmlCleanupParser
194
 * should be only called once at program exit. In most cases, you don't
195
 * have call cleanup functions at all.
196
 */
197
void
198
0
xmlDictCleanup(void) {
199
0
}
200
201
/**
202
 * xmlCleanupDictInternal:
203
 *
204
 * Free the dictionary mutex.
205
 */
206
void
207
0
xmlCleanupDictInternal(void) {
208
0
    xmlCleanupMutex(&xmlDictMutex);
209
0
}
210
211
/*
212
 * xmlDictAddString:
213
 * @dict: the dictionary
214
 * @name: the name of the userdata
215
 * @len: the length of the name
216
 *
217
 * Add the string to the array[s]
218
 *
219
 * Returns the pointer of the local string, or NULL in case of error.
220
 */
221
static const xmlChar *
222
45.4M
xmlDictAddString(xmlDictPtr dict, const xmlChar *name, unsigned int namelen) {
223
45.4M
    xmlDictStringsPtr pool;
224
45.4M
    const xmlChar *ret;
225
45.4M
    size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
226
45.4M
    size_t limit = 0;
227
228
#ifdef DICT_DEBUG_PATTERNS
229
    fprintf(stderr, "-");
230
#endif
231
45.4M
    pool = dict->strings;
232
45.6M
    while (pool != NULL) {
233
44.2M
  if ((size_t)(pool->end - pool->free) > namelen)
234
44.1M
      goto found_pool;
235
154k
  if (pool->size > size) size = pool->size;
236
154k
        limit += pool->size;
237
154k
  pool = pool->next;
238
154k
    }
239
    /*
240
     * Not found, need to allocate
241
     */
242
1.34M
    if (pool == NULL) {
243
1.34M
        if ((dict->limit > 0) && (limit > dict->limit)) {
244
0
            return(NULL);
245
0
        }
246
247
1.34M
        if (size == 0) size = 1000;
248
140k
  else size *= 4; /* exponential growth */
249
1.34M
        if (size < 4 * namelen)
250
8.71k
      size = 4 * namelen; /* just in case ! */
251
1.34M
  pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
252
1.34M
  if (pool == NULL)
253
0
      return(NULL);
254
1.34M
  pool->size = size;
255
1.34M
  pool->nbStrings = 0;
256
1.34M
  pool->free = &pool->array[0];
257
1.34M
  pool->end = &pool->array[size];
258
1.34M
  pool->next = dict->strings;
259
1.34M
  dict->strings = pool;
260
#ifdef DICT_DEBUG_PATTERNS
261
        fprintf(stderr, "+");
262
#endif
263
1.34M
    }
264
45.4M
found_pool:
265
45.4M
    ret = pool->free;
266
45.4M
    memcpy(pool->free, name, namelen);
267
45.4M
    pool->free += namelen;
268
45.4M
    *(pool->free++) = 0;
269
45.4M
    pool->nbStrings++;
270
45.4M
    return(ret);
271
1.34M
}
272
273
/*
274
 * xmlDictAddQString:
275
 * @dict: the dictionary
276
 * @prefix: the prefix of the userdata
277
 * @plen: the prefix length
278
 * @name: the name of the userdata
279
 * @len: the length of the name
280
 *
281
 * Add the QName to the array[s]
282
 *
283
 * Returns the pointer of the local string, or NULL in case of error.
284
 */
285
static const xmlChar *
286
xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix, unsigned int plen,
287
                 const xmlChar *name, unsigned int namelen)
288
206k
{
289
206k
    xmlDictStringsPtr pool;
290
206k
    const xmlChar *ret;
291
206k
    size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */
292
206k
    size_t limit = 0;
293
294
206k
    if (prefix == NULL) return(xmlDictAddString(dict, name, namelen));
295
296
#ifdef DICT_DEBUG_PATTERNS
297
    fprintf(stderr, "=");
298
#endif
299
206k
    pool = dict->strings;
300
208k
    while (pool != NULL) {
301
207k
  if ((size_t)(pool->end - pool->free) > namelen + plen + 1)
302
205k
      goto found_pool;
303
1.90k
  if (pool->size > size) size = pool->size;
304
1.90k
        limit += pool->size;
305
1.90k
  pool = pool->next;
306
1.90k
    }
307
    /*
308
     * Not found, need to allocate
309
     */
310
840
    if (pool == NULL) {
311
840
        if ((dict->limit > 0) && (limit > dict->limit)) {
312
0
            return(NULL);
313
0
        }
314
315
840
        if (size == 0) size = 1000;
316
840
  else size *= 4; /* exponential growth */
317
840
        if (size < 4 * (namelen + plen + 1))
318
0
      size = 4 * (namelen + plen + 1); /* just in case ! */
319
840
  pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size);
320
840
  if (pool == NULL)
321
0
      return(NULL);
322
840
  pool->size = size;
323
840
  pool->nbStrings = 0;
324
840
  pool->free = &pool->array[0];
325
840
  pool->end = &pool->array[size];
326
840
  pool->next = dict->strings;
327
840
  dict->strings = pool;
328
#ifdef DICT_DEBUG_PATTERNS
329
        fprintf(stderr, "+");
330
#endif
331
840
    }
332
206k
found_pool:
333
206k
    ret = pool->free;
334
206k
    memcpy(pool->free, prefix, plen);
335
206k
    pool->free += plen;
336
206k
    *(pool->free++) = ':';
337
206k
    memcpy(pool->free, name, namelen);
338
206k
    pool->free += namelen;
339
206k
    *(pool->free++) = 0;
340
206k
    pool->nbStrings++;
341
206k
    return(ret);
342
840
}
343
344
#ifdef WITH_BIG_KEY
345
/*
346
 * xmlDictComputeBigKey:
347
 *
348
 * Calculate a hash key using a good hash function that works well for
349
 * larger hash table sizes.
350
 *
351
 * Hash function by "One-at-a-Time Hash" see
352
 * http://burtleburtle.net/bob/hash/doobs.html
353
 */
354
355
#ifdef __clang__
356
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
357
#endif
358
static uint32_t
359
145M
xmlDictComputeBigKey(const xmlChar* data, int namelen, int seed) {
360
145M
    uint32_t hash;
361
145M
    int i;
362
363
145M
    if (namelen <= 0 || data == NULL) return(0);
364
365
145M
    hash = seed;
366
367
848M
    for (i = 0;i < namelen; i++) {
368
703M
        hash += data[i];
369
703M
  hash += (hash << 10);
370
703M
  hash ^= (hash >> 6);
371
703M
    }
372
145M
    hash += (hash << 3);
373
145M
    hash ^= (hash >> 11);
374
145M
    hash += (hash << 15);
375
376
145M
    return hash;
377
145M
}
378
379
/*
380
 * xmlDictComputeBigQKey:
381
 *
382
 * Calculate a hash key for two strings using a good hash function
383
 * that works well for larger hash table sizes.
384
 *
385
 * Hash function by "One-at-a-Time Hash" see
386
 * http://burtleburtle.net/bob/hash/doobs.html
387
 *
388
 * Neither of the two strings must be NULL.
389
 */
390
#ifdef __clang__
391
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
392
#endif
393
static unsigned long
394
xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
395
                      const xmlChar *name, int len, int seed)
396
648k
{
397
648k
    uint32_t hash;
398
648k
    int i;
399
400
648k
    hash = seed;
401
402
2.75M
    for (i = 0;i < plen; i++) {
403
2.10M
        hash += prefix[i];
404
2.10M
  hash += (hash << 10);
405
2.10M
  hash ^= (hash >> 6);
406
2.10M
    }
407
648k
    hash += ':';
408
648k
    hash += (hash << 10);
409
648k
    hash ^= (hash >> 6);
410
411
5.36M
    for (i = 0;i < len; i++) {
412
4.72M
        hash += name[i];
413
4.72M
  hash += (hash << 10);
414
4.72M
  hash ^= (hash >> 6);
415
4.72M
    }
416
648k
    hash += (hash << 3);
417
648k
    hash ^= (hash >> 11);
418
648k
    hash += (hash << 15);
419
420
648k
    return hash;
421
648k
}
422
#endif /* WITH_BIG_KEY */
423
424
/*
425
 * xmlDictComputeFastKey:
426
 *
427
 * Calculate a hash key using a fast hash function that works well
428
 * for low hash table fill.
429
 */
430
static unsigned long
431
629M
xmlDictComputeFastKey(const xmlChar *name, int namelen, int seed) {
432
629M
    unsigned long value = seed;
433
434
629M
    if (name == NULL) return(0);
435
629M
    value += *name;
436
629M
    value <<= 5;
437
629M
    if (namelen > 10) {
438
27.2M
        value += name[namelen - 1];
439
27.2M
        namelen = 10;
440
27.2M
    }
441
629M
    switch (namelen) {
442
38.1M
        case 10: value += name[9];
443
        /* Falls through. */
444
44.0M
        case 9: value += name[8];
445
        /* Falls through. */
446
59.0M
        case 8: value += name[7];
447
        /* Falls through. */
448
86.8M
        case 7: value += name[6];
449
        /* Falls through. */
450
108M
        case 6: value += name[5];
451
        /* Falls through. */
452
149M
        case 5: value += name[4];
453
        /* Falls through. */
454
484M
        case 4: value += name[3];
455
        /* Falls through. */
456
531M
        case 3: value += name[2];
457
        /* Falls through. */
458
574M
        case 2: value += name[1];
459
        /* Falls through. */
460
629M
        default: break;
461
629M
    }
462
629M
    return(value);
463
629M
}
464
465
/*
466
 * xmlDictComputeFastQKey:
467
 *
468
 * Calculate a hash key for two strings using a fast hash function
469
 * that works well for low hash table fill.
470
 *
471
 * Neither of the two strings must be NULL.
472
 */
473
static unsigned long
474
xmlDictComputeFastQKey(const xmlChar *prefix, int plen,
475
                       const xmlChar *name, int len, int seed)
476
509k
{
477
509k
    unsigned long value = seed;
478
479
509k
    if (plen == 0)
480
0
  value += 30 * ':';
481
509k
    else
482
509k
  value += 30 * (*prefix);
483
484
509k
    if (len > 10) {
485
57.5k
        int offset = len - (plen + 1 + 1);
486
57.5k
  if (offset < 0)
487
1.40k
      offset = len - (10 + 1);
488
57.5k
  value += name[offset];
489
57.5k
        len = 10;
490
57.5k
  if (plen > 10)
491
3.67k
      plen = 10;
492
57.5k
    }
493
509k
    switch (plen) {
494
4.94k
        case 10: value += prefix[9];
495
        /* Falls through. */
496
6.01k
        case 9: value += prefix[8];
497
        /* Falls through. */
498
11.8k
        case 8: value += prefix[7];
499
        /* Falls through. */
500
12.9k
        case 7: value += prefix[6];
501
        /* Falls through. */
502
15.8k
        case 6: value += prefix[5];
503
        /* Falls through. */
504
27.0k
        case 5: value += prefix[4];
505
        /* Falls through. */
506
68.4k
        case 4: value += prefix[3];
507
        /* Falls through. */
508
398k
        case 3: value += prefix[2];
509
        /* Falls through. */
510
409k
        case 2: value += prefix[1];
511
        /* Falls through. */
512
503k
        case 1: value += prefix[0];
513
        /* Falls through. */
514
509k
        default: break;
515
509k
    }
516
509k
    len -= plen;
517
509k
    if (len > 0) {
518
393k
        value += ':';
519
393k
  len--;
520
393k
    }
521
509k
    switch (len) {
522
0
        case 10: value += name[9];
523
        /* Falls through. */
524
0
        case 9: value += name[8];
525
        /* Falls through. */
526
13.4k
        case 8: value += name[7];
527
        /* Falls through. */
528
20.6k
        case 7: value += name[6];
529
        /* Falls through. */
530
75.6k
        case 6: value += name[5];
531
        /* Falls through. */
532
156k
        case 5: value += name[4];
533
        /* Falls through. */
534
172k
        case 4: value += name[3];
535
        /* Falls through. */
536
203k
        case 3: value += name[2];
537
        /* Falls through. */
538
256k
        case 2: value += name[1];
539
        /* Falls through. */
540
306k
        case 1: value += name[0];
541
        /* Falls through. */
542
509k
        default: break;
543
509k
    }
544
509k
    return(value);
545
509k
}
546
547
/**
548
 * xmlDictCreate:
549
 *
550
 * Create a new dictionary
551
 *
552
 * Returns the newly created dictionary, or NULL if an error occurred.
553
 */
554
xmlDictPtr
555
2.55M
xmlDictCreate(void) {
556
2.55M
    xmlDictPtr dict;
557
558
2.55M
    xmlInitParser();
559
560
#ifdef DICT_DEBUG_PATTERNS
561
    fprintf(stderr, "C");
562
#endif
563
564
2.55M
    dict = xmlMalloc(sizeof(xmlDict));
565
2.55M
    if (dict) {
566
2.55M
        dict->ref_counter = 1;
567
2.55M
        dict->limit = 0;
568
569
2.55M
        dict->size = MIN_DICT_SIZE;
570
2.55M
  dict->nbElems = 0;
571
2.55M
        dict->dict = xmlMalloc(MIN_DICT_SIZE * sizeof(xmlDictEntry));
572
2.55M
  dict->strings = NULL;
573
2.55M
  dict->subdict = NULL;
574
2.55M
        if (dict->dict) {
575
2.55M
      memset(dict->dict, 0, MIN_DICT_SIZE * sizeof(xmlDictEntry));
576
#ifdef DICT_RANDOMIZATION
577
            dict->seed = __xmlRandom();
578
#else
579
2.55M
            dict->seed = 0;
580
2.55M
#endif
581
2.55M
      return(dict);
582
2.55M
        }
583
0
        xmlFree(dict);
584
0
    }
585
0
    return(NULL);
586
2.55M
}
587
588
/**
589
 * xmlDictCreateSub:
590
 * @sub: an existing dictionary
591
 *
592
 * Create a new dictionary, inheriting strings from the read-only
593
 * dictionary @sub. On lookup, strings are first searched in the
594
 * new dictionary, then in @sub, and if not found are created in the
595
 * new dictionary.
596
 *
597
 * Returns the newly created dictionary, or NULL if an error occurred.
598
 */
599
xmlDictPtr
600
0
xmlDictCreateSub(xmlDictPtr sub) {
601
0
    xmlDictPtr dict = xmlDictCreate();
602
603
0
    if ((dict != NULL) && (sub != NULL)) {
604
#ifdef DICT_DEBUG_PATTERNS
605
        fprintf(stderr, "R");
606
#endif
607
0
        dict->seed = sub->seed;
608
0
        dict->subdict = sub;
609
0
  xmlDictReference(dict->subdict);
610
0
    }
611
0
    return(dict);
612
0
}
613
614
/**
615
 * xmlDictReference:
616
 * @dict: the dictionary
617
 *
618
 * Increment the reference counter of a dictionary
619
 *
620
 * Returns 0 in case of success and -1 in case of error
621
 */
622
int
623
2.29M
xmlDictReference(xmlDictPtr dict) {
624
2.29M
    if (dict == NULL) return -1;
625
1.93M
    xmlMutexLock(&xmlDictMutex);
626
1.93M
    dict->ref_counter++;
627
1.93M
    xmlMutexUnlock(&xmlDictMutex);
628
1.93M
    return(0);
629
2.29M
}
630
631
/**
632
 * xmlDictGrow:
633
 * @dict: the dictionary
634
 * @size: the new size of the dictionary
635
 *
636
 * resize the dictionary
637
 *
638
 * Returns 0 in case of success, -1 in case of failure
639
 */
640
static int
641
65.7k
xmlDictGrow(xmlDictPtr dict, size_t size) {
642
65.7k
    unsigned long key, okey;
643
65.7k
    size_t oldsize, i;
644
65.7k
    xmlDictEntryPtr iter, next;
645
65.7k
    struct _xmlDictEntry *olddict;
646
#ifdef DEBUG_GROW
647
    unsigned long nbElem = 0;
648
#endif
649
65.7k
    int ret = 0;
650
65.7k
    int keep_keys = 1;
651
652
65.7k
    if (dict == NULL)
653
0
  return(-1);
654
65.7k
    if (size < 8)
655
0
        return(-1);
656
65.7k
    if (size > 8 * 2048)
657
0
  return(-1);
658
659
#ifdef DICT_DEBUG_PATTERNS
660
    fprintf(stderr, "*");
661
#endif
662
663
65.7k
    oldsize = dict->size;
664
65.7k
    olddict = dict->dict;
665
65.7k
    if (olddict == NULL)
666
0
        return(-1);
667
65.7k
    if (oldsize == MIN_DICT_SIZE)
668
64.8k
        keep_keys = 0;
669
670
65.7k
    dict->dict = xmlMalloc(size * sizeof(xmlDictEntry));
671
65.7k
    if (dict->dict == NULL) {
672
0
  dict->dict = olddict;
673
0
  return(-1);
674
0
    }
675
65.7k
    memset(dict->dict, 0, size * sizeof(xmlDictEntry));
676
65.7k
    dict->size = size;
677
678
    /*  If the two loops are merged, there would be situations where
679
  a new entry needs to allocated and data copied into it from
680
  the main dict. It is nicer to run through the array twice, first
681
  copying all the elements in the main array (less probability of
682
  allocate) and then the rest, so we only free in the second loop.
683
    */
684
9.06M
    for (i = 0; i < oldsize; i++) {
685
9.00M
  if (olddict[i].valid == 0)
686
2.69M
      continue;
687
688
6.30M
  if (keep_keys)
689
319k
      okey = olddict[i].okey;
690
5.98M
  else
691
5.98M
      okey = xmlDictComputeKey(dict, olddict[i].name, olddict[i].len);
692
6.30M
  key = okey % dict->size;
693
694
6.30M
  if (dict->dict[key].valid == 0) {
695
5.89M
      memcpy(&(dict->dict[key]), &(olddict[i]), sizeof(xmlDictEntry));
696
5.89M
      dict->dict[key].next = NULL;
697
5.89M
      dict->dict[key].okey = okey;
698
5.89M
  } else {
699
405k
      xmlDictEntryPtr entry;
700
701
405k
      entry = xmlMalloc(sizeof(xmlDictEntry));
702
405k
      if (entry != NULL) {
703
405k
    entry->name = olddict[i].name;
704
405k
    entry->len = olddict[i].len;
705
405k
    entry->okey = okey;
706
405k
    entry->next = dict->dict[key].next;
707
405k
    entry->valid = 1;
708
405k
    dict->dict[key].next = entry;
709
405k
      } else {
710
          /*
711
     * we don't have much ways to alert from here
712
     * result is losing an entry and unicity guarantee
713
     */
714
0
          ret = -1;
715
0
      }
716
405k
  }
717
#ifdef DEBUG_GROW
718
  nbElem++;
719
#endif
720
6.30M
    }
721
722
9.06M
    for (i = 0; i < oldsize; i++) {
723
9.00M
  iter = olddict[i].next;
724
15.4M
  while (iter) {
725
6.45M
      next = iter->next;
726
727
      /*
728
       * put back the entry in the new dict
729
       */
730
731
6.45M
      if (keep_keys)
732
100k
    okey = iter->okey;
733
6.35M
      else
734
6.35M
    okey = xmlDictComputeKey(dict, iter->name, iter->len);
735
6.45M
      key = okey % dict->size;
736
6.45M
      if (dict->dict[key].valid == 0) {
737
5.15M
    memcpy(&(dict->dict[key]), iter, sizeof(xmlDictEntry));
738
5.15M
    dict->dict[key].next = NULL;
739
5.15M
    dict->dict[key].valid = 1;
740
5.15M
    dict->dict[key].okey = okey;
741
5.15M
    xmlFree(iter);
742
5.15M
      } else {
743
1.30M
    iter->next = dict->dict[key].next;
744
1.30M
    iter->okey = okey;
745
1.30M
    dict->dict[key].next = iter;
746
1.30M
      }
747
748
#ifdef DEBUG_GROW
749
      nbElem++;
750
#endif
751
752
6.45M
      iter = next;
753
6.45M
  }
754
9.00M
    }
755
756
65.7k
    xmlFree(olddict);
757
758
#ifdef DEBUG_GROW
759
    xmlGenericError(xmlGenericErrorContext,
760
      "xmlDictGrow : from %lu to %lu, %u elems\n", oldsize, size, nbElem);
761
#endif
762
763
65.7k
    return(ret);
764
65.7k
}
765
766
/**
767
 * xmlDictFree:
768
 * @dict: the dictionary
769
 *
770
 * Free the hash @dict and its contents. The userdata is
771
 * deallocated with @f if provided.
772
 */
773
void
774
4.49M
xmlDictFree(xmlDictPtr dict) {
775
4.49M
    size_t i;
776
4.49M
    xmlDictEntryPtr iter;
777
4.49M
    xmlDictEntryPtr next;
778
4.49M
    int inside_dict = 0;
779
4.49M
    xmlDictStringsPtr pool, nextp;
780
781
4.49M
    if (dict == NULL)
782
0
  return;
783
784
    /* decrement the counter, it may be shared by a parser and docs */
785
4.49M
    xmlMutexLock(&xmlDictMutex);
786
4.49M
    dict->ref_counter--;
787
4.49M
    if (dict->ref_counter > 0) {
788
1.93M
        xmlMutexUnlock(&xmlDictMutex);
789
1.93M
        return;
790
1.93M
    }
791
792
2.55M
    xmlMutexUnlock(&xmlDictMutex);
793
794
2.55M
    if (dict->subdict != NULL) {
795
0
        xmlDictFree(dict->subdict);
796
0
    }
797
798
2.55M
    if (dict->dict) {
799
172M
  for(i = 0; ((i < dict->size) && (dict->nbElems > 0)); i++) {
800
170M
      iter = &(dict->dict[i]);
801
170M
      if (iter->valid == 0)
802
137M
    continue;
803
32.8M
      inside_dict = 1;
804
78.5M
      while (iter) {
805
45.6M
    next = iter->next;
806
45.6M
    if (!inside_dict)
807
12.8M
        xmlFree(iter);
808
45.6M
    dict->nbElems--;
809
45.6M
    inside_dict = 0;
810
45.6M
    iter = next;
811
45.6M
      }
812
32.8M
  }
813
2.55M
  xmlFree(dict->dict);
814
2.55M
    }
815
2.55M
    pool = dict->strings;
816
3.89M
    while (pool != NULL) {
817
1.34M
        nextp = pool->next;
818
1.34M
  xmlFree(pool);
819
1.34M
  pool = nextp;
820
1.34M
    }
821
2.55M
    xmlFree(dict);
822
2.55M
}
823
824
/**
825
 * xmlDictLookup:
826
 * @dict: the dictionary
827
 * @name: the name of the userdata
828
 * @len: the length of the name, if -1 it is recomputed
829
 *
830
 * Add the @name to the dictionary @dict if not present.
831
 *
832
 * Returns the internal copy of the name or NULL in case of internal error
833
 */
834
const xmlChar *
835
783M
xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
836
783M
    unsigned long key, okey, nbi = 0;
837
783M
    xmlDictEntryPtr entry;
838
783M
    xmlDictEntryPtr insert;
839
783M
    const xmlChar *ret;
840
783M
    unsigned int l;
841
842
783M
    if ((dict == NULL) || (name == NULL))
843
20.0M
  return(NULL);
844
845
763M
    if (len < 0)
846
103M
        l = strlen((const char *) name);
847
659M
    else
848
659M
        l = len;
849
850
763M
    if (((dict->limit > 0) && (l >= dict->limit)) ||
851
763M
        (l > INT_MAX / 2))
852
0
        return(NULL);
853
854
    /*
855
     * Check for duplicate and insertion location.
856
     */
857
763M
    okey = xmlDictComputeKey(dict, name, l);
858
763M
    key = okey % dict->size;
859
763M
    if (dict->dict[key].valid == 0) {
860
27.9M
  insert = NULL;
861
735M
    } else {
862
962M
  for (insert = &(dict->dict[key]); insert->next != NULL;
863
735M
       insert = insert->next) {
864
338M
#ifdef __GNUC__
865
338M
      if ((insert->okey == okey) && (insert->len == l)) {
866
115M
    if (!memcmp(insert->name, name, l))
867
111M
        return(insert->name);
868
115M
      }
869
#else
870
      if ((insert->okey == okey) && (insert->len == l) &&
871
          (!xmlStrncmp(insert->name, name, l)))
872
    return(insert->name);
873
#endif
874
227M
      nbi++;
875
227M
  }
876
623M
#ifdef __GNUC__
877
623M
  if ((insert->okey == okey) && (insert->len == l)) {
878
606M
      if (!memcmp(insert->name, name, l))
879
606M
    return(insert->name);
880
606M
  }
881
#else
882
  if ((insert->okey == okey) && (insert->len == l) &&
883
      (!xmlStrncmp(insert->name, name, l)))
884
      return(insert->name);
885
#endif
886
623M
    }
887
888
45.4M
    if (dict->subdict) {
889
0
        unsigned long skey;
890
891
        /* we cannot always reuse the same okey for the subdict */
892
0
        if (((dict->size == MIN_DICT_SIZE) &&
893
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
894
0
            ((dict->size != MIN_DICT_SIZE) &&
895
0
       (dict->subdict->size == MIN_DICT_SIZE)))
896
0
      skey = xmlDictComputeKey(dict->subdict, name, l);
897
0
  else
898
0
      skey = okey;
899
900
0
  key = skey % dict->subdict->size;
901
0
  if (dict->subdict->dict[key].valid != 0) {
902
0
      xmlDictEntryPtr tmp;
903
904
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
905
0
     tmp = tmp->next) {
906
0
#ifdef __GNUC__
907
0
    if ((tmp->okey == skey) && (tmp->len == l)) {
908
0
        if (!memcmp(tmp->name, name, l))
909
0
      return(tmp->name);
910
0
    }
911
#else
912
    if ((tmp->okey == skey) && (tmp->len == l) &&
913
        (!xmlStrncmp(tmp->name, name, l)))
914
        return(tmp->name);
915
#endif
916
0
    nbi++;
917
0
      }
918
0
#ifdef __GNUC__
919
0
      if ((tmp->okey == skey) && (tmp->len == l)) {
920
0
    if (!memcmp(tmp->name, name, l))
921
0
        return(tmp->name);
922
0
      }
923
#else
924
      if ((tmp->okey == skey) && (tmp->len == l) &&
925
    (!xmlStrncmp(tmp->name, name, l)))
926
    return(tmp->name);
927
#endif
928
0
  }
929
0
  key = okey % dict->size;
930
0
    }
931
932
45.4M
    ret = xmlDictAddString(dict, name, l);
933
45.4M
    if (ret == NULL)
934
0
        return(NULL);
935
45.4M
    if (insert == NULL) {
936
27.9M
  entry = &(dict->dict[key]);
937
27.9M
    } else {
938
17.5M
  entry = xmlMalloc(sizeof(xmlDictEntry));
939
17.5M
  if (entry == NULL)
940
0
       return(NULL);
941
17.5M
    }
942
45.4M
    entry->name = ret;
943
45.4M
    entry->len = l;
944
45.4M
    entry->next = NULL;
945
45.4M
    entry->valid = 1;
946
45.4M
    entry->okey = okey;
947
948
949
45.4M
    if (insert != NULL)
950
17.5M
  insert->next = entry;
951
952
45.4M
    dict->nbElems++;
953
954
45.4M
    if ((nbi > MAX_HASH_LEN) &&
955
45.4M
        (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN))) {
956
63.7k
  if (xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size) != 0)
957
0
      return(NULL);
958
63.7k
    }
959
    /* Note that entry may have been freed at this point by xmlDictGrow */
960
961
45.4M
    return(ret);
962
45.4M
}
963
964
/**
965
 * xmlDictExists:
966
 * @dict: the dictionary
967
 * @name: the name of the userdata
968
 * @len: the length of the name, if -1 it is recomputed
969
 *
970
 * Check if the @name exists in the dictionary @dict.
971
 *
972
 * Returns the internal copy of the name or NULL if not found.
973
 */
974
const xmlChar *
975
0
xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
976
0
    unsigned long key, okey;
977
0
    xmlDictEntryPtr insert;
978
0
    unsigned int l;
979
980
0
    if ((dict == NULL) || (name == NULL))
981
0
  return(NULL);
982
983
0
    if (len < 0)
984
0
        l = strlen((const char *) name);
985
0
    else
986
0
        l = len;
987
0
    if (((dict->limit > 0) && (l >= dict->limit)) ||
988
0
        (l > INT_MAX / 2))
989
0
        return(NULL);
990
991
    /*
992
     * Check for duplicate and insertion location.
993
     */
994
0
    okey = xmlDictComputeKey(dict, name, l);
995
0
    key = okey % dict->size;
996
0
    if (dict->dict[key].valid == 0) {
997
0
  insert = NULL;
998
0
    } else {
999
0
  for (insert = &(dict->dict[key]); insert->next != NULL;
1000
0
       insert = insert->next) {
1001
0
#ifdef __GNUC__
1002
0
      if ((insert->okey == okey) && (insert->len == l)) {
1003
0
    if (!memcmp(insert->name, name, l))
1004
0
        return(insert->name);
1005
0
      }
1006
#else
1007
      if ((insert->okey == okey) && (insert->len == l) &&
1008
          (!xmlStrncmp(insert->name, name, l)))
1009
    return(insert->name);
1010
#endif
1011
0
  }
1012
0
#ifdef __GNUC__
1013
0
  if ((insert->okey == okey) && (insert->len == l)) {
1014
0
      if (!memcmp(insert->name, name, l))
1015
0
    return(insert->name);
1016
0
  }
1017
#else
1018
  if ((insert->okey == okey) && (insert->len == l) &&
1019
      (!xmlStrncmp(insert->name, name, l)))
1020
      return(insert->name);
1021
#endif
1022
0
    }
1023
1024
0
    if (dict->subdict) {
1025
0
        unsigned long skey;
1026
1027
        /* we cannot always reuse the same okey for the subdict */
1028
0
        if (((dict->size == MIN_DICT_SIZE) &&
1029
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
1030
0
            ((dict->size != MIN_DICT_SIZE) &&
1031
0
       (dict->subdict->size == MIN_DICT_SIZE)))
1032
0
      skey = xmlDictComputeKey(dict->subdict, name, l);
1033
0
  else
1034
0
      skey = okey;
1035
1036
0
  key = skey % dict->subdict->size;
1037
0
  if (dict->subdict->dict[key].valid != 0) {
1038
0
      xmlDictEntryPtr tmp;
1039
1040
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
1041
0
     tmp = tmp->next) {
1042
0
#ifdef __GNUC__
1043
0
    if ((tmp->okey == skey) && (tmp->len == l)) {
1044
0
        if (!memcmp(tmp->name, name, l))
1045
0
      return(tmp->name);
1046
0
    }
1047
#else
1048
    if ((tmp->okey == skey) && (tmp->len == l) &&
1049
        (!xmlStrncmp(tmp->name, name, l)))
1050
        return(tmp->name);
1051
#endif
1052
0
      }
1053
0
#ifdef __GNUC__
1054
0
      if ((tmp->okey == skey) && (tmp->len == l)) {
1055
0
    if (!memcmp(tmp->name, name, l))
1056
0
        return(tmp->name);
1057
0
      }
1058
#else
1059
      if ((tmp->okey == skey) && (tmp->len == l) &&
1060
    (!xmlStrncmp(tmp->name, name, l)))
1061
    return(tmp->name);
1062
#endif
1063
0
  }
1064
0
    }
1065
1066
    /* not found */
1067
0
    return(NULL);
1068
0
}
1069
1070
/**
1071
 * xmlDictQLookup:
1072
 * @dict: the dictionary
1073
 * @prefix: the prefix
1074
 * @name: the name
1075
 *
1076
 * Add the QName @prefix:@name to the hash @dict if not present.
1077
 *
1078
 * Returns the internal copy of the QName or NULL in case of internal error
1079
 */
1080
const xmlChar *
1081
1.15M
xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
1082
1.15M
    unsigned long okey, key, nbi = 0;
1083
1.15M
    xmlDictEntryPtr entry;
1084
1.15M
    xmlDictEntryPtr insert;
1085
1.15M
    const xmlChar *ret;
1086
1.15M
    unsigned int len, plen, l;
1087
1088
1.15M
    if ((dict == NULL) || (name == NULL))
1089
0
  return(NULL);
1090
1.15M
    if (prefix == NULL)
1091
0
        return(xmlDictLookup(dict, name, -1));
1092
1093
1.15M
    l = len = strlen((const char *) name);
1094
1.15M
    plen = strlen((const char *) prefix);
1095
1.15M
    len += 1 + plen;
1096
1097
    /*
1098
     * Check for duplicate and insertion location.
1099
     */
1100
1.15M
    okey = xmlDictComputeQKey(dict, prefix, plen, name, l);
1101
1.15M
    key = okey % dict->size;
1102
1.15M
    if (dict->dict[key].valid == 0) {
1103
151k
  insert = NULL;
1104
1.00M
    } else {
1105
1.25M
  for (insert = &(dict->dict[key]); insert->next != NULL;
1106
1.00M
       insert = insert->next) {
1107
497k
      if ((insert->okey == okey) && (insert->len == len) &&
1108
497k
          (xmlStrQEqual(prefix, name, insert->name)))
1109
254k
    return(insert->name);
1110
243k
      nbi++;
1111
243k
  }
1112
753k
  if ((insert->okey == okey) && (insert->len == len) &&
1113
753k
      (xmlStrQEqual(prefix, name, insert->name)))
1114
697k
      return(insert->name);
1115
753k
    }
1116
1117
206k
    if (dict->subdict) {
1118
0
        unsigned long skey;
1119
1120
        /* we cannot always reuse the same okey for the subdict */
1121
0
        if (((dict->size == MIN_DICT_SIZE) &&
1122
0
       (dict->subdict->size != MIN_DICT_SIZE)) ||
1123
0
            ((dict->size != MIN_DICT_SIZE) &&
1124
0
       (dict->subdict->size == MIN_DICT_SIZE)))
1125
0
      skey = xmlDictComputeQKey(dict->subdict, prefix, plen, name, l);
1126
0
  else
1127
0
      skey = okey;
1128
1129
0
  key = skey % dict->subdict->size;
1130
0
  if (dict->subdict->dict[key].valid != 0) {
1131
0
      xmlDictEntryPtr tmp;
1132
0
      for (tmp = &(dict->subdict->dict[key]); tmp->next != NULL;
1133
0
     tmp = tmp->next) {
1134
0
    if ((tmp->okey == skey) && (tmp->len == len) &&
1135
0
        (xmlStrQEqual(prefix, name, tmp->name)))
1136
0
        return(tmp->name);
1137
0
    nbi++;
1138
0
      }
1139
0
      if ((tmp->okey == skey) && (tmp->len == len) &&
1140
0
    (xmlStrQEqual(prefix, name, tmp->name)))
1141
0
    return(tmp->name);
1142
0
  }
1143
0
  key = okey % dict->size;
1144
0
    }
1145
1146
206k
    ret = xmlDictAddQString(dict, prefix, plen, name, l);
1147
206k
    if (ret == NULL)
1148
0
        return(NULL);
1149
206k
    if (insert == NULL) {
1150
151k
  entry = &(dict->dict[key]);
1151
151k
    } else {
1152
55.5k
  entry = xmlMalloc(sizeof(xmlDictEntry));
1153
55.5k
  if (entry == NULL)
1154
0
       return(NULL);
1155
55.5k
    }
1156
206k
    entry->name = ret;
1157
206k
    entry->len = len;
1158
206k
    entry->next = NULL;
1159
206k
    entry->valid = 1;
1160
206k
    entry->okey = okey;
1161
1162
206k
    if (insert != NULL)
1163
55.5k
  insert->next = entry;
1164
1165
206k
    dict->nbElems++;
1166
1167
206k
    if ((nbi > MAX_HASH_LEN) &&
1168
206k
        (dict->size <= ((MAX_DICT_HASH / 2) / MAX_HASH_LEN)))
1169
1.93k
  xmlDictGrow(dict, MAX_HASH_LEN * 2 * dict->size);
1170
    /* Note that entry may have been freed at this point by xmlDictGrow */
1171
1172
206k
    return(ret);
1173
206k
}
1174
1175
/**
1176
 * xmlDictOwns:
1177
 * @dict: the dictionary
1178
 * @str: the string
1179
 *
1180
 * check if a string is owned by the dictionary
1181
 *
1182
 * Returns 1 if true, 0 if false and -1 in case of error
1183
 * -1 in case of error
1184
 */
1185
int
1186
584M
xmlDictOwns(xmlDictPtr dict, const xmlChar *str) {
1187
584M
    xmlDictStringsPtr pool;
1188
1189
584M
    if ((dict == NULL) || (str == NULL))
1190
0
  return(-1);
1191
584M
    pool = dict->strings;
1192
1.09G
    while (pool != NULL) {
1193
807M
        if ((str >= &pool->array[0]) && (str <= pool->free))
1194
296M
      return(1);
1195
510M
  pool = pool->next;
1196
510M
    }
1197
288M
    if (dict->subdict)
1198
0
        return(xmlDictOwns(dict->subdict, str));
1199
288M
    return(0);
1200
288M
}
1201
1202
/**
1203
 * xmlDictSize:
1204
 * @dict: the dictionary
1205
 *
1206
 * Query the number of elements installed in the hash @dict.
1207
 *
1208
 * Returns the number of elements in the dictionary or
1209
 * -1 in case of error
1210
 */
1211
int
1212
0
xmlDictSize(xmlDictPtr dict) {
1213
0
    if (dict == NULL)
1214
0
  return(-1);
1215
0
    if (dict->subdict)
1216
0
        return(dict->nbElems + dict->subdict->nbElems);
1217
0
    return(dict->nbElems);
1218
0
}
1219
1220
/**
1221
 * xmlDictSetLimit:
1222
 * @dict: the dictionary
1223
 * @limit: the limit in bytes
1224
 *
1225
 * Set a size limit for the dictionary
1226
 * Added in 2.9.0
1227
 *
1228
 * Returns the previous limit of the dictionary or 0
1229
 */
1230
size_t
1231
2.73M
xmlDictSetLimit(xmlDictPtr dict, size_t limit) {
1232
2.73M
    size_t ret;
1233
1234
2.73M
    if (dict == NULL)
1235
0
  return(0);
1236
2.73M
    ret = dict->limit;
1237
2.73M
    dict->limit = limit;
1238
2.73M
    return(ret);
1239
2.73M
}
1240
1241
/**
1242
 * xmlDictGetUsage:
1243
 * @dict: the dictionary
1244
 *
1245
 * Get how much memory is used by a dictionary for strings
1246
 * Added in 2.9.0
1247
 *
1248
 * Returns the amount of strings allocated
1249
 */
1250
size_t
1251
0
xmlDictGetUsage(xmlDictPtr dict) {
1252
0
    xmlDictStringsPtr pool;
1253
0
    size_t limit = 0;
1254
1255
0
    if (dict == NULL)
1256
0
  return(0);
1257
0
    pool = dict->strings;
1258
0
    while (pool != NULL) {
1259
0
        limit += pool->size;
1260
0
  pool = pool->next;
1261
0
    }
1262
0
    return(limit);
1263
0
}
1264