Coverage Report

Created: 2025-06-24 06:45

/src/binutils-gdb/bfd/merge.c
Line
Count
Source (jump to first uncovered line)
1
/* SEC_MERGE support.
2
   Copyright (C) 2001-2025 Free Software Foundation, Inc.
3
   Written by Jakub Jelinek <jakub@redhat.com>.
4
5
   This file is part of BFD, the Binary File Descriptor library.
6
7
   This program is free software; you can redistribute it and/or modify
8
   it under the terms of the GNU General Public License as published by
9
   the Free Software Foundation; either version 3 of the License, or
10
   (at your option) any later version.
11
12
   This program is distributed in the hope that it will be useful,
13
   but WITHOUT ANY WARRANTY; without even the implied warranty of
14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
   GNU General Public License for more details.
16
17
   You should have received a copy of the GNU General Public License
18
   along with this program; if not, write to the Free Software
19
   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20
   MA 02110-1301, USA.  */
21
22
23
/* This file contains support for merging duplicate entities within sections,
24
   as used in ELF SHF_MERGE.  */
25
26
#include "sysdep.h"
27
#include <limits.h>
28
#include "bfd.h"
29
#include "elf-bfd.h"
30
#include "libbfd.h"
31
#include "objalloc.h"
32
#include "libiberty.h"
33
34
/* We partition all mergable input sections into sets of similar
35
   characteristics.  These sets are the unit of merging.  All content
36
   of the input sections is scanned and inserted into a hash table.
37
   We also remember an input-offset to entry mapping per input section, but
38
   the content itself is removed.  After everything is read in we assign
39
   output offsets to all hash entries, and when relocations are processed we
40
   lookup the given input offset per input-section, get the matching entry
41
   and its output offset (possibly adjusted for offset pointing into the
42
   middle of an entry).
43
44
   The input-offset-to-entry mapping (in map_ofs/map) is sorted, so in principle
45
   we could binary search it, but that's not cache-friendly and it's faster
46
   to add another lookup structure that gets us very near the correct
47
   entry in just one step (that's what ofstolowbound is for) and do a linear
48
   search from there.  */
49
50
/* An entry in the section merge hash table.  */
51
52
struct sec_merge_hash_entry
53
{
54
  /* Length of this entry.  This includes the zero terminator.  */
55
  unsigned int len;
56
  /* Start of this string needs to be aligned to
57
     alignment octets (not 1 << align).  */
58
  unsigned int alignment;
59
  union
60
  {
61
    /* Index within the merged section.  */
62
    bfd_size_type index;
63
    /* Entry this is a suffix of (if alignment is 0).  */
64
    struct sec_merge_hash_entry *suffix;
65
  } u;
66
  /* Next entity in the hash table (in order of entering).  */
67
  struct sec_merge_hash_entry *next;
68
  char str[1];
69
};
70
71
/* The section merge hash table.  */
72
73
struct sec_merge_hash
74
{
75
  struct bfd_hash_table table;
76
  /* First entity in the SEC_MERGE sections of this type.  */
77
  struct sec_merge_hash_entry *first;
78
  /* Last entity in the SEC_MERGE sections of this type.  */
79
  struct sec_merge_hash_entry *last;
80
  /* Entity size.  */
81
  unsigned int entsize;
82
  /* Are entries fixed size or zero terminated strings?  */
83
  bool strings;
84
  /* struct-of-array variant of all entries in the hash-table: */
85
  unsigned int nbuckets;
86
  /* We keep hash-code and length of entry together in a separate
87
     array in such a way that it can be checked with just a single memory
88
     reference.  In this way we don't need indirect access to the entries
89
     in the normal case.  keys_lens[i] is 'hashcode << 32) | len' for entry
90
     i (which is pointed to be values[i]).  */
91
  uint64_t *key_lens;
92
  struct sec_merge_hash_entry **values;
93
};
94
95
struct sec_merge_sec_info;
96
97
/* Information per merged blob.  This is the unit of merging and is
98
   related to (multiple) input sections of similar characteristics
99
   (alignment, entity size, strings or blobs).  */
100
struct sec_merge_info
101
{
102
  /* Chain of sec_merge_infos.  */
103
  struct sec_merge_info *next;
104
  /* Chain of sec_merge_sec_infos.  This first one will be the representative
105
     section that conceptually collects all merged content.  */
106
  struct sec_merge_sec_info *chain;
107
  struct sec_merge_sec_info **last;
108
  /* A hash table used to hold section content.  */
109
  struct sec_merge_hash *htab;
110
};
111
112
/* Offset into input mergable sections are represented by this type.
113
   Note how doesn't support crazy large mergable sections.  */
114
typedef uint32_t mapofs_type;
115
116
/* Given a sec_merge_sec_info S this gives the input offset of the IDX's
117
   recorded entry.  */
118
0
#define MAP_OFS(S,IDX) (S)->map_ofs[IDX]
119
/* And this gives the output offset (in the merged blob representing
120
   this S.  */
121
0
#define MAP_IDX(S,IDX) (S)->map[IDX].idx
122
/* For quick lookup of output offset given an input offset we store
123
   an array mapping intput-offset / OFSDIV to entry index.
124
   16 is better than 8, 32 is roughly same as 16, but uses less memory, so
125
   we use that. */
126
0
#define OFSDIV 32
127
128
/* Information per input merge section.  */
129
struct sec_merge_sec_info
130
{
131
  /* Chain of sec_merge_sec_infos.  */
132
  struct sec_merge_sec_info *next;
133
  /* The corresponding section.  */
134
  asection *sec;
135
  /* Pointer to merge_info pointing to us.  */
136
  void **psecinfo;
137
  /* The merge entity this is a part of.  */
138
  struct sec_merge_info *sinfo;
139
  /* The section associated with sinfo (i.e. the representative section).
140
     Same as sinfo->chain->sec, but faster to access in the hot function.  */
141
  asection *reprsec;
142
  /* First string in this section.  */
143
  struct sec_merge_hash_entry *first_str;
144
  /* Sparse mapping from input offset to entry covering that offset:  */
145
  unsigned int noffsetmap;  /* Number of these mappings.  */
146
  mapofs_type *map_ofs;     /* Input offset.  */
147
  union {
148
      struct sec_merge_hash_entry *entry;  /* Covering hash entry ... */
149
      bfd_size_type idx;                   /* ... or destination offset.  */
150
  } *map;
151
  /* Quick access: index into map_ofs[].  ofstolowbound[o / OFSDIV]=I is
152
     such that map_ofs[I] is the smallest offset higher that
153
     rounddown(o, OFSDIV) (and hence I-1 is the largest entry whose offset is
154
     smaller or equal to o/OFSDIV*OFSDIV).  */
155
  unsigned int *ofstolowbound;
156
  int fast_state;
157
};
158
159
160
/* True when COUNT+ADDED and NBUCKETS indicate that the hash table
161
   needs resizing.  */
162
163
static inline bool
164
needs_resize (unsigned int count, unsigned int added, unsigned int nbuckets)
165
0
{
166
  /* This doesn't consider the possibility of "count" + "added"
167
     overflowing, because that can't happen given current usage.  If
168
     code calling this function changes then that assumption may no
169
     longer be correct.  Currently "added" is always 1 and "nbuckets"
170
     is limited to 0x80000000.  We'll attempt and fail resizing at
171
     "count" of 0x55555555.  */
172
0
  return count + added > nbuckets / 3 * 2;
173
0
}
174
175
/* Given a merge hash table TABLE and a number of entries to be
176
   ADDED, resize the table for this to fit.
177
   Returns false if that can't be done for whatever reason.  */
178
179
static bool
180
sec_merge_resize (struct sec_merge_hash *table, unsigned added)
181
0
{
182
0
  struct bfd_hash_table *bfdtab = &table->table;
183
0
  unsigned i;
184
0
  unsigned long newnb = table->nbuckets;
185
0
  struct sec_merge_hash_entry **newv;
186
0
  uint64_t *newl;
187
0
  unsigned long alloc;
188
189
0
  do
190
0
    {
191
0
      if (newnb >> (8 * sizeof(mapofs_type) - 1))
192
0
  return false;
193
0
      newnb *= 2;
194
0
    }
195
0
  while (needs_resize (bfdtab->count, added, newnb));
196
197
0
  alloc = newnb * sizeof (newl[0]);
198
0
  if (alloc / sizeof (newl[0]) != newnb)
199
0
    return false;
200
0
  newl = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
201
0
  if (newl == NULL)
202
0
    return false;
203
0
  memset (newl, 0, alloc);
204
0
  alloc = newnb * sizeof (newv[0]);
205
0
  if (alloc / sizeof (newv[0]) != newnb)
206
0
    return false;
207
0
  newv = objalloc_alloc ((struct objalloc *) table->table.memory, alloc);
208
0
  if (newv == NULL)
209
0
    return false;
210
0
  memset (newv, 0, alloc);
211
212
0
  for (i = 0; i < table->nbuckets; i++)
213
0
    {
214
0
      struct sec_merge_hash_entry *v = table->values[i];
215
0
      if (v)
216
0
  {
217
0
    uint32_t thishash = table->key_lens[i] >> 32;
218
0
    unsigned idx = thishash & (newnb - 1);
219
0
    while (newv[idx])
220
0
      idx = (idx + 1) & (newnb - 1);
221
0
    newl[idx] = table->key_lens[i];
222
0
    newv[idx] = v;
223
0
  }
224
0
    }
225
226
0
  table->key_lens = newl;
227
0
  table->values = newv;
228
0
  table->nbuckets = newnb;
229
0
  return true;
230
0
}
231
232
/* Insert STRING (actually a byte blob of length LEN, with pre-computed
233
   HASH and bucket _INDEX) into our hash TABLE.  */
234
235
static struct sec_merge_hash_entry *
236
sec_merge_hash_insert (struct sec_merge_hash *table,
237
     const char *string,
238
     uint64_t hash, unsigned int len, unsigned int _index)
239
0
{
240
0
  struct bfd_hash_table *bfdtab = &table->table;
241
0
  struct sec_merge_hash_entry *hashp;
242
243
0
  hashp = (struct sec_merge_hash_entry *)
244
0
      bfd_hash_allocate (bfdtab, len + sizeof (struct sec_merge_hash_entry));
245
0
  if (hashp == NULL)
246
0
    return NULL;
247
248
0
  memcpy (hashp->str, string, len);
249
0
  hashp->len = len;
250
0
  hashp->alignment = 0;
251
0
  hashp->u.suffix = NULL;
252
0
  hashp->next = NULL;
253
254
0
  if (needs_resize (bfdtab->count, 1, table->nbuckets))
255
0
    {
256
0
      if (!sec_merge_resize (table, 1))
257
0
  return NULL;
258
0
      uint64_t *key_lens = table->key_lens;
259
0
      unsigned int nbuckets = table->nbuckets;
260
0
      _index = hash & (nbuckets - 1);
261
0
      while (1)
262
0
  {
263
0
    uint64_t candlen = key_lens[_index];
264
0
    if (!(candlen & (uint32_t)-1))
265
0
      break;
266
0
    _index = (_index + 1) & (nbuckets - 1);
267
0
  }
268
0
    }
269
270
0
  bfdtab->count++;
271
0
  table->key_lens[_index] = (hash << 32) | (uint32_t)len;
272
0
  table->values[_index] = hashp;
273
274
0
  return hashp;
275
0
}
276
277
/* Read four bytes from *STR, interpret it as 32bit unsigned little
278
   endian value and return that.  */
279
280
static inline uint32_t
281
hash_read32 (const char *str)
282
0
{
283
0
  uint32_t i;
284
  /* All reasonable compilers will inline this memcpy and generate optimal
285
     code on architectures that support unaligned (4-byte) accesses.  */
286
0
  memcpy(&i, str, 4);
287
#ifdef WORDS_BIGENDIAN
288
  i = (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24);
289
#endif
290
0
  return i;
291
0
}
292
293
/* Calculate and return a hashvalue of the bytes at STR[0..LEN-1].
294
   All non-zero lengths and all alignments are supported.
295
296
   This is somewhat similar to xxh3 (of xxhash), but restricted to 32bit.
297
   On cc1 strings this has quite similar statistic properties, and we
298
   don't need to jump through hoops to get fast 64x64->128 mults,
299
   or 64bit arith on 32 bit hosts.  We also don't care for seeds
300
   or secrets.  They improve mixing very little.  */
301
302
static uint32_t
303
hash_blob (const char *str, unsigned int len)
304
0
{
305
0
  uint32_t ret = 0;
306
0
  uint32_t mul = (1 << 0) +  (1 << 2) + (1 << 3) + (1 << 5) + (1 << 7);
307
0
  mul += (1 << 11) + (1 << 13) + (1 << 17) + (0 << 19) + (1 << 23) + (1 << 29);
308
0
  mul += (1u << 31);
309
0
  if (len >= 8)
310
0
    {
311
0
      uint32_t acc = len * 0x9e3779b1;
312
0
      while (len >= 8)
313
0
  {
314
0
    uint32_t i1 = hash_read32  (str) ^ (0x396cfeb8 + 1*len);
315
0
    uint32_t i2 = hash_read32  (str + 4) ^ (0xbe4ba423 + 1*len);
316
0
    str += 8;
317
0
    len -= 8;
318
0
    uint64_t m = (uint64_t)i1 * i2;
319
0
    acc += (uint32_t)m ^ (uint32_t)(m >> 32);
320
0
  }
321
0
      acc = acc ^ (acc >> 7);
322
0
      uint64_t r = (uint64_t)mul * acc;
323
0
      ret = (uint32_t)r ^ (uint32_t)(r >> 32);
324
0
      if (len == 0)
325
0
  goto end;
326
0
    }
327
0
  if (len >= 4)
328
0
    {
329
0
      uint32_t i1 = hash_read32  (str);
330
0
      uint32_t i2 = hash_read32  (str + len - 4);
331
0
      i1 = ((i1 + len) ^ (i1 >> 7));
332
0
      i2 = i2 ^ (i2 >> 7);
333
0
      uint64_t r = (uint64_t)mul * i1 + i2;
334
0
      ret += r ^ (r >> 32);
335
0
    }
336
0
  else
337
0
    {
338
      /* Cleverly read in 1 to 3 bytes without further conditionals.  */
339
0
      unsigned char c1 = str[0];
340
0
      unsigned char c2 = str[len >> 1];
341
0
      unsigned char c3 = str[len - 1];
342
0
      uint32_t i1 = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24)
343
0
         | ((uint32_t) c3) | (len << 8);
344
0
      i1 = i1 ^ (i1 >> 7);
345
0
      uint64_t r = (uint64_t)mul * i1;
346
0
      ret += r ^ (r >> 32);
347
0
    }
348
0
end:
349
0
  return ret;
350
0
}
351
352
/* Given a hash TABLE, return the hash of STRING (a blob described
353
   according to info in TABLE, either a character string, or some fixed
354
   size entity) and set *PLEN to the length of this blob.  */
355
356
static uint32_t
357
hashit (struct sec_merge_hash *table, const char *string, unsigned int *plen)
358
0
{
359
0
  const unsigned char *s;
360
0
  uint32_t hash;
361
0
  unsigned int len, i;
362
363
0
  s = (const unsigned char *) string;
364
0
  if (table->strings)
365
0
    {
366
0
      if (table->entsize == 1)
367
0
  len = strlen (string) + 1;
368
0
      else
369
0
  {
370
0
    len = 0;
371
0
    for (;;)
372
0
      {
373
0
        for (i = 0; i < table->entsize; ++i)
374
0
    if (s[i] != '\0')
375
0
      break;
376
0
        if (i == table->entsize)
377
0
    break;
378
0
        s += table->entsize;
379
0
        ++len;
380
0
      }
381
0
    len *= table->entsize;
382
0
    len += table->entsize;
383
0
  }
384
0
    }
385
0
  else
386
0
    len = table->entsize;
387
0
  hash = hash_blob (string, len);
388
0
  *plen = len;
389
0
  return hash;
390
0
}
391
392
/* Lookup or insert a blob STRING (of length LEN, precomputed HASH and
393
   input ALIGNMENT) into TABLE.  Return the found or new hash table entry.  */
394
395
static struct sec_merge_hash_entry *
396
sec_merge_hash_lookup (struct sec_merge_hash *table, const char *string,
397
           unsigned int len, uint64_t hash,
398
           unsigned int alignment)
399
0
{
400
0
  struct sec_merge_hash_entry *hashp;
401
0
  unsigned int _index;
402
403
  /*printf ("YYY insert 0x%x into %u buckets (%s)\n",
404
    (unsigned)hash, (unsigned)table->nbuckets, string);*/
405
0
  uint64_t *key_lens = table->key_lens;
406
0
  struct sec_merge_hash_entry **values = table->values;
407
0
  uint64_t hlen = (hash << 32) | (uint32_t)len;
408
0
  unsigned int nbuckets = table->nbuckets;
409
0
  _index = hash & (nbuckets - 1);
410
0
  while (1)
411
0
    {
412
0
      uint64_t candlen = key_lens[_index];
413
0
      if (candlen == hlen
414
0
    && !memcmp (values[_index]->str, string, len))
415
0
  {
416
0
    hashp = values[_index];
417
0
    if (hashp->alignment < alignment)
418
0
      hashp->alignment = alignment;
419
0
    return hashp;
420
0
  }
421
0
      if (!(candlen & (uint32_t)-1))
422
0
  break;
423
0
      _index = (_index + 1) & (nbuckets - 1);
424
0
    }
425
426
0
  hashp = sec_merge_hash_insert (table, string, hash, len, _index);
427
0
  if (hashp == NULL)
428
0
    return NULL;
429
0
  hashp->alignment = alignment;
430
431
0
  if (table->first == NULL)
432
0
    table->first = hashp;
433
0
  else
434
0
    table->last->next = hashp;
435
0
  table->last = hashp;
436
437
0
  return hashp;
438
0
}
439
440
/* Create a new hash table.  */
441
442
static struct sec_merge_hash *
443
sec_merge_init (unsigned int entsize, bool strings)
444
0
{
445
0
  struct sec_merge_hash *table;
446
447
0
  table = (struct sec_merge_hash *) bfd_malloc (sizeof (struct sec_merge_hash));
448
0
  if (table == NULL)
449
0
    return NULL;
450
451
0
  if (! bfd_hash_table_init_n (&table->table, NULL,
452
0
             sizeof (struct sec_merge_hash_entry), 0x2000))
453
0
    {
454
0
      free (table);
455
0
      return NULL;
456
0
    }
457
458
0
  table->first = NULL;
459
0
  table->last = NULL;
460
0
  table->entsize = entsize;
461
0
  table->strings = strings;
462
463
0
  table->nbuckets = 0x2000;
464
0
  table->key_lens = objalloc_alloc ((struct objalloc *) table->table.memory,
465
0
        table->nbuckets * sizeof (table->key_lens[0]));
466
0
  memset (table->key_lens, 0, table->nbuckets * sizeof (table->key_lens[0]));
467
0
  table->values = objalloc_alloc ((struct objalloc *) table->table.memory,
468
0
        table->nbuckets * sizeof (table->values[0]));
469
0
  memset (table->values, 0, table->nbuckets * sizeof (table->values[0]));
470
471
0
  return table;
472
0
}
473
474
/* Append the tuple of input-offset O corresponding
475
   to hash table ENTRY into SECINFO, such that we later may lookup the
476
   entry just by O.  */
477
478
static bool
479
append_offsetmap (struct sec_merge_sec_info *secinfo,
480
      mapofs_type o,
481
      struct sec_merge_hash_entry *entry)
482
0
{
483
0
  if ((secinfo->noffsetmap & 2047) == 0)
484
0
    {
485
0
      bfd_size_type amt;
486
0
      amt = (secinfo->noffsetmap + 2048);
487
0
      secinfo->map_ofs = bfd_realloc (secinfo->map_ofs,
488
0
              amt * sizeof(secinfo->map_ofs[0]));
489
0
      if (!secinfo->map_ofs)
490
0
  return false;
491
0
      secinfo->map = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
492
0
      if (!secinfo->map)
493
0
  return false;
494
0
    }
495
0
  unsigned int i = secinfo->noffsetmap++;
496
0
  MAP_OFS(secinfo, i) = o;
497
0
  secinfo->map[i].entry = entry;
498
0
  return true;
499
0
}
500
501
/* Prepare the input-offset-to-entry tables after output offsets are
502
   determined.  */
503
504
static void
505
prepare_offsetmap (struct sec_merge_sec_info *secinfo)
506
0
{
507
0
  unsigned int noffsetmap = secinfo->noffsetmap;
508
0
  unsigned int i, lbi;
509
0
  bfd_size_type l, sz, amt;
510
511
0
  secinfo->fast_state = 1;
512
513
0
  for (i = 0; i < noffsetmap; i++)
514
0
    MAP_IDX(secinfo, i) = secinfo->map[i].entry->u.index;
515
516
0
  sz = secinfo->sec->rawsize;
517
0
  amt = (sz / OFSDIV + 1) * sizeof (secinfo->ofstolowbound[0]);
518
0
  secinfo->ofstolowbound = bfd_zmalloc (amt);
519
0
  if (!secinfo->ofstolowbound)
520
0
    return;
521
0
  for (l = lbi = 0; l < sz; l += OFSDIV)
522
0
    {
523
      /* No need for bounds checking on lbi, as we've added a sentinel that's
524
   larger than any offset.  */
525
0
      while (MAP_OFS(secinfo, lbi) <= l)
526
0
  lbi++;
527
      //BFD_ASSERT ((l / OFSDIV) <= (i / OFSDIV));
528
0
      secinfo->ofstolowbound[l / OFSDIV] = lbi;
529
0
    }
530
0
  secinfo->fast_state = 2;
531
0
}
532
533
static bool
534
sec_merge_emit (bfd *abfd, struct sec_merge_sec_info *secinfo,
535
    unsigned char *contents)
536
0
{
537
0
  struct sec_merge_hash_entry *entry = secinfo->first_str;
538
0
  asection *sec = secinfo->sec;
539
0
  file_ptr offset = sec->output_offset;
540
0
  char *pad = NULL;
541
0
  bfd_size_type off = 0;
542
0
  unsigned int opb = bfd_octets_per_byte (abfd, sec);
543
0
  int alignment_power = sec->output_section->alignment_power * opb;
544
0
  bfd_size_type pad_len;  /* Octets.  */
545
546
  /* FIXME: If alignment_power is 0 then really we should scan the
547
     entry list for the largest required alignment and use that.  */
548
0
  pad_len = alignment_power ? ((bfd_size_type) 1 << alignment_power) : 16;
549
550
0
  pad = (char *) bfd_zmalloc (pad_len);
551
0
  if (pad == NULL)
552
0
    return false;
553
554
0
  for (; entry != NULL; entry = entry->next)
555
0
    {
556
0
      const char *str;
557
0
      bfd_size_type len;
558
559
0
      if (!entry->len)
560
0
  continue;
561
0
      BFD_ASSERT (entry->alignment);
562
0
      len = -off & (entry->alignment - 1);
563
0
      if (len != 0)
564
0
  {
565
0
    BFD_ASSERT (len <= pad_len);
566
0
    if (contents)
567
0
      {
568
0
        memcpy (contents + offset, pad, len);
569
0
        offset += len;
570
0
      }
571
0
    else if (bfd_write (pad, len, abfd) != len)
572
0
      goto err;
573
0
    off += len;
574
0
  }
575
576
0
      str = entry->str;
577
0
      len = entry->len;
578
579
0
      if (contents)
580
0
  {
581
0
    memcpy (contents + offset, str, len);
582
0
    offset += len;
583
0
  }
584
0
      else if (bfd_write (str, len, abfd) != len)
585
0
  goto err;
586
587
0
      off += len;
588
0
    }
589
0
  BFD_ASSERT (!entry);
590
591
  /* Trailing alignment needed?  */
592
0
  off = sec->size - off;
593
0
  if (1 && off != 0)
594
0
    {
595
0
      BFD_ASSERT (off <= pad_len);
596
0
      if (contents)
597
0
  memcpy (contents + offset, pad, off);
598
0
      else if (bfd_write (pad, off, abfd) != off)
599
0
  goto err;
600
0
    }
601
602
0
  free (pad);
603
0
  return true;
604
605
0
 err:
606
0
  free (pad);
607
0
  return false;
608
0
}
609
610
/* Register a SEC_MERGE section as a candidate for merging.
611
   This function is called for all non-dynamic SEC_MERGE input sections.  */
612
613
bool
614
_bfd_add_merge_section (bfd *abfd, void **psinfo, asection *sec,
615
      void **psecinfo)
616
0
{
617
0
  struct sec_merge_info *sinfo;
618
0
  struct sec_merge_sec_info *secinfo;
619
0
  asection *repr;
620
0
  unsigned int alignment_power;  /* Octets.  */
621
0
  unsigned int align;            /* Octets.  */
622
0
  unsigned int opb = bfd_octets_per_byte (abfd, sec);
623
624
0
  if ((abfd->flags & DYNAMIC) != 0
625
0
      || (sec->flags & SEC_MERGE) == 0)
626
0
    abort ();
627
628
0
  if (sec->size == 0
629
0
      || (sec->flags & SEC_EXCLUDE) != 0
630
0
      || (sec->flags & SEC_HAS_CONTENTS) == 0
631
0
      || sec->entsize == 0)
632
0
    return true;
633
634
0
  if (sec->size % sec->entsize != 0)
635
0
    return true;
636
637
0
  if ((sec->flags & SEC_RELOC) != 0)
638
0
    {
639
      /* We aren't prepared to handle relocations in merged sections.  */
640
0
      return true;
641
0
    }
642
643
0
  if (sec->size > (mapofs_type)-1)
644
0
    {
645
      /* Input offsets must be representable by mapofs_type.  */
646
0
      return true;
647
0
    }
648
649
#ifndef CHAR_BIT
650
#define CHAR_BIT 8
651
#endif
652
0
  alignment_power = sec->alignment_power * opb;
653
0
  if (alignment_power >= sizeof (align) * CHAR_BIT)
654
0
    return true;
655
656
0
  align = 1u << alignment_power;
657
0
  if ((sec->entsize < align
658
0
       && ((sec->entsize & (sec->entsize - 1))
659
0
     || !(sec->flags & SEC_STRINGS)))
660
0
      || (sec->entsize > align
661
0
    && (sec->entsize & (align - 1))))
662
0
    {
663
      /* Sanity check.  If string character size is smaller than
664
   alignment, then we require character size to be a power
665
   of 2, otherwise character size must be integer multiple
666
   of alignment.  For non-string constants, alignment must
667
   be smaller than or equal to entity size and entity size
668
   must be integer multiple of alignment.  */
669
0
      return true;
670
0
    }
671
672
  /* Initialize the descriptor for this input section.  */
673
674
0
  *psecinfo = secinfo = bfd_zalloc (abfd, sizeof (*secinfo));
675
0
  if (*psecinfo == NULL)
676
0
    goto error_return;
677
678
0
  secinfo->sec = sec;
679
0
  secinfo->psecinfo = psecinfo;
680
681
  /* Search for a matching output merged section.  */
682
0
  for (sinfo = (struct sec_merge_info *) *psinfo; sinfo; sinfo = sinfo->next)
683
0
    if (sinfo->chain
684
0
  && (repr = sinfo->chain->sec)
685
0
  && ! ((repr->flags ^ sec->flags) & (SEC_MERGE | SEC_STRINGS))
686
0
  && repr->entsize == sec->entsize
687
0
  && repr->alignment_power == sec->alignment_power
688
0
  && repr->output_section == sec->output_section)
689
0
      break;
690
691
0
  if (sinfo == NULL)
692
0
    {
693
      /* Initialize the information we need to keep track of.  */
694
0
      sinfo = (struct sec_merge_info *)
695
0
    bfd_alloc (abfd, sizeof (struct sec_merge_info));
696
0
      if (sinfo == NULL)
697
0
  goto error_return;
698
0
      sinfo->next = (struct sec_merge_info *) *psinfo;
699
0
      sinfo->chain = NULL;
700
0
      sinfo->last = &sinfo->chain;
701
0
      *psinfo = sinfo;
702
0
      sinfo->htab = sec_merge_init (sec->entsize, (sec->flags & SEC_STRINGS));
703
0
      if (sinfo->htab == NULL)
704
0
  goto error_return;
705
0
    }
706
707
0
  *sinfo->last = secinfo;
708
0
  sinfo->last = &secinfo->next;
709
710
0
  secinfo->sinfo = sinfo;
711
0
  secinfo->reprsec = sinfo->chain->sec;
712
713
0
  return true;
714
715
0
 error_return:
716
0
  *psecinfo = NULL;
717
0
  return false;
718
0
}
719
720
/* Record one whole input section (described by SECINFO) into the hash table
721
   SINFO.  Returns true when section is completely recorded, and false when
722
   it wasn't recorded but we can continue (e.g. by simply not deduplicating
723
   this section).  */
724
725
static bool
726
record_section (struct sec_merge_info *sinfo,
727
    struct sec_merge_sec_info *secinfo)
728
0
{
729
0
  asection *sec = secinfo->sec;
730
0
  struct sec_merge_hash_entry *entry;
731
0
  unsigned char *p, *end;
732
0
  bfd_vma mask, eltalign;
733
0
  unsigned int align;
734
0
  bfd_size_type amt;
735
0
  bfd_byte *contents;
736
0
  void *tmpptr;
737
738
0
  amt = sec->size;
739
0
  if (sec->flags & SEC_STRINGS)
740
    /* Some versions of gcc may emit a string without a zero terminator.
741
       See http://gcc.gnu.org/ml/gcc-patches/2006-06/msg01004.html
742
       Allocate space for an extra zero.  */
743
0
    amt += sec->entsize;
744
0
  contents = bfd_malloc (amt);
745
0
  if (!contents)
746
0
    goto error_return;
747
748
  /* Slurp in all section contents (possibly decompressing it).  */
749
0
  sec->rawsize = sec->size;
750
0
  if (sec->flags & SEC_STRINGS)
751
0
    memset (contents + sec->size, 0, sec->entsize);
752
0
  if (! bfd_get_full_section_contents (sec->owner, sec, &contents))
753
0
    goto error_return;
754
755
  /* Now populate the hash table and offset mapping.  */
756
757
  /* Walk through the contents, calculate hashes and length of all
758
     blobs (strings or fixed-size entries) we find and fill the
759
     hash and offset tables.  */
760
0
  align = sec->alignment_power;
761
0
  mask = ((bfd_vma) 1 << align) - 1;
762
0
  end = contents + sec->size;
763
0
  for (p = contents; p < end;)
764
0
    {
765
0
      unsigned len;
766
0
      uint32_t hash = hashit (sinfo->htab, (char*) p, &len);
767
0
      unsigned int ofs = p - contents;
768
0
      eltalign = ofs;
769
0
      eltalign = ((eltalign ^ (eltalign - 1)) + 1) >> 1;
770
0
      if (!eltalign || eltalign > mask)
771
0
  eltalign = mask + 1;
772
0
      entry = sec_merge_hash_lookup (sinfo->htab, (char *) p, len, hash,
773
0
             (unsigned) eltalign);
774
0
      if (! entry)
775
0
  goto error_return;
776
0
      if (! append_offsetmap (secinfo, ofs, entry))
777
0
  goto error_return;
778
0
      p += len;
779
0
    }
780
781
  /* Add a sentinel element that's conceptually behind all others.  */
782
0
  append_offsetmap (secinfo, sec->size, NULL);
783
  /* But don't count it.  */
784
0
  secinfo->noffsetmap--;
785
786
0
  free (contents);
787
0
  contents = NULL;
788
789
  /* We allocate the ofsmap arrays in blocks of 2048 elements.
790
     In case we have very many small input files/sections,
791
     this might waste large amounts of memory, so reallocate these
792
     arrays here to their true size.  */
793
0
  amt = secinfo->noffsetmap + 1;
794
0
  tmpptr = bfd_realloc (secinfo->map, amt * sizeof(secinfo->map[0]));
795
0
  if (tmpptr)
796
0
    secinfo->map = tmpptr;
797
0
  tmpptr = bfd_realloc (secinfo->map_ofs, amt * sizeof(secinfo->map_ofs[0]));
798
0
  if (tmpptr)
799
0
    secinfo->map_ofs = tmpptr;
800
801
  /*printf ("ZZZ %s:%s %u entries\n", sec->owner->filename, sec->name,
802
    (unsigned)secinfo->noffsetmap);*/
803
804
0
  return true;
805
806
0
 error_return:
807
0
  free (contents);
808
0
  contents = NULL;
809
0
  return false;
810
0
}
811
812
/* qsort comparison function.  Won't ever return zero as all entries
813
   differ, so there is no issue with qsort stability here.  */
814
815
static int
816
strrevcmp (const void *a, const void *b)
817
0
{
818
0
  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
819
0
  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
820
0
  unsigned int lenA = A->len;
821
0
  unsigned int lenB = B->len;
822
0
  const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
823
0
  const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
824
0
  int l = lenA < lenB ? lenA : lenB;
825
826
0
  while (l)
827
0
    {
828
0
      if (*s != *t)
829
0
  return (int) *s - (int) *t;
830
0
      s--;
831
0
      t--;
832
0
      l--;
833
0
    }
834
0
  return lenA - lenB;
835
0
}
836
837
/* Like strrevcmp, but for the case where all strings have the same
838
   alignment > entsize.  */
839
840
static int
841
strrevcmp_align (const void *a, const void *b)
842
0
{
843
0
  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
844
0
  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
845
0
  unsigned int lenA = A->len;
846
0
  unsigned int lenB = B->len;
847
0
  const unsigned char *s = (const unsigned char *) A->str + lenA - 1;
848
0
  const unsigned char *t = (const unsigned char *) B->str + lenB - 1;
849
0
  int l = lenA < lenB ? lenA : lenB;
850
0
  int tail_align = (lenA & (A->alignment - 1)) - (lenB & (A->alignment - 1));
851
852
0
  if (tail_align != 0)
853
0
    return tail_align;
854
855
0
  while (l)
856
0
    {
857
0
      if (*s != *t)
858
0
  return (int) *s - (int) *t;
859
0
      s--;
860
0
      t--;
861
0
      l--;
862
0
    }
863
0
  return lenA - lenB;
864
0
}
865
866
static inline int
867
is_suffix (const struct sec_merge_hash_entry *A,
868
     const struct sec_merge_hash_entry *B)
869
0
{
870
0
  if (A->len <= B->len)
871
    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
872
       not to be equal by the hash table.  */
873
0
    return 0;
874
875
0
  return memcmp (A->str + (A->len - B->len),
876
0
     B->str, B->len) == 0;
877
0
}
878
879
/* This is a helper function for _bfd_merge_sections.  It attempts to
880
   merge strings matching suffixes of longer strings.  */
881
static struct sec_merge_sec_info *
882
merge_strings (struct sec_merge_info *sinfo)
883
0
{
884
0
  struct sec_merge_hash_entry **array, **a, *e;
885
0
  struct sec_merge_sec_info *secinfo;
886
0
  bfd_size_type size, amt;
887
0
  unsigned int alignment = 0;
888
889
  /* Now sort the strings */
890
0
  amt = sinfo->htab->table.count * sizeof (struct sec_merge_hash_entry *);
891
0
  array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
892
0
  if (array == NULL)
893
0
    return NULL;
894
895
0
  for (e = sinfo->htab->first, a = array; e; e = e->next)
896
0
    if (e->alignment)
897
0
      {
898
0
  *a++ = e;
899
  /* Adjust the length to not include the zero terminator.  */
900
0
  e->len -= sinfo->htab->entsize;
901
0
  if (alignment != e->alignment)
902
0
    {
903
0
      if (alignment == 0)
904
0
        alignment = e->alignment;
905
0
      else
906
0
        alignment = (unsigned) -1;
907
0
    }
908
0
      }
909
910
0
  size_t asize = a - array;
911
0
  if (asize != 0)
912
0
    {
913
0
      qsort (array, asize,
914
0
       sizeof (struct sec_merge_hash_entry *),
915
0
       (alignment != (unsigned) -1 && alignment > sinfo->htab->entsize
916
0
        ? strrevcmp_align : strrevcmp));
917
918
      /* Loop over the sorted array and merge suffixes */
919
0
      e = *--a;
920
0
      e->len += sinfo->htab->entsize;
921
0
      while (--a >= array)
922
0
  {
923
0
    struct sec_merge_hash_entry *cmp = *a;
924
925
0
    cmp->len += sinfo->htab->entsize;
926
0
    if (e->alignment >= cmp->alignment
927
0
        && !((e->len - cmp->len) & (cmp->alignment - 1))
928
0
        && is_suffix (e, cmp))
929
0
      {
930
0
        cmp->u.suffix = e;
931
0
        cmp->alignment = 0;
932
0
      }
933
0
    else
934
0
      e = cmp;
935
0
  }
936
0
    }
937
938
0
  free (array);
939
940
  /* Now assign positions to the strings we want to keep.  */
941
0
  size = 0;
942
0
  secinfo = sinfo->chain;
943
0
  for (e = sinfo->htab->first; e; e = e->next)
944
0
    {
945
0
      if (e->alignment)
946
0
  {
947
0
    size = (size + e->alignment - 1) & ~((bfd_vma) e->alignment - 1);
948
0
    e->u.index = size;
949
0
    size += e->len;
950
0
  }
951
0
    }
952
0
  secinfo->sec->size = size;
953
954
  /* And now adjust the rest, removing them from the chain (but not hashtable)
955
     at the same time.  */
956
0
  for (a = &sinfo->htab->first, e = *a; e; e = e->next)
957
0
    if (e->alignment)
958
0
      a = &e->next;
959
0
    else
960
0
      {
961
0
  *a = e->next;
962
0
  if (e->len)
963
0
    {
964
0
      e->alignment = e->u.suffix->alignment;
965
0
      e->u.index = e->u.suffix->u.index + (e->u.suffix->len - e->len);
966
0
    }
967
0
      }
968
969
0
  BFD_ASSERT (!secinfo->first_str);
970
0
  secinfo->first_str = sinfo->htab->first;
971
972
0
  return secinfo;
973
0
}
974
975
/* This function is called once after all SEC_MERGE sections are registered
976
   with _bfd_merge_section.  */
977
978
bool
979
_bfd_merge_sections (bfd *abfd,
980
         struct bfd_link_info *info ATTRIBUTE_UNUSED,
981
         void *xsinfo,
982
         void (*remove_hook) (bfd *, asection *))
983
0
{
984
0
  struct sec_merge_info *sinfo;
985
986
0
  for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
987
0
    {
988
0
      struct sec_merge_sec_info *secinfo;
989
0
      bfd_size_type align;  /* Bytes.  */
990
991
0
      if (! sinfo->chain)
992
0
  continue;
993
994
      /* Record the sections into the hash table.  */
995
0
      align = 1;
996
0
      for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
997
0
  if (secinfo->sec->flags & SEC_EXCLUDE
998
0
      || !record_section (sinfo, secinfo))
999
0
    {
1000
0
      *secinfo->psecinfo = NULL;
1001
0
      if (remove_hook)
1002
0
        (*remove_hook) (abfd, secinfo->sec);
1003
0
    }
1004
0
  else if (align)
1005
0
    {
1006
0
      unsigned int opb = bfd_octets_per_byte (abfd, secinfo->sec);
1007
1008
0
      align = (bfd_size_type) 1 << secinfo->sec->alignment_power;
1009
0
      if (((secinfo->sec->size / opb) & (align - 1)) != 0)
1010
0
        align = 0;
1011
0
    }
1012
1013
0
      if (sinfo->htab->first == NULL)
1014
0
  continue;
1015
1016
0
      if (sinfo->htab->strings)
1017
0
  {
1018
0
    secinfo = merge_strings (sinfo);
1019
0
    if (!secinfo)
1020
0
      return false;
1021
0
  }
1022
0
      else
1023
0
  {
1024
0
    struct sec_merge_hash_entry *e = sinfo->htab->first;
1025
0
    bfd_size_type size = 0;  /* Octets.  */
1026
1027
    /* Things are much simpler for non-strings.
1028
       Just assign them slots in the section.  */
1029
0
    secinfo = sinfo->chain;
1030
0
    BFD_ASSERT (!secinfo->first_str);
1031
0
    secinfo->first_str = e;
1032
0
    for (e = sinfo->htab->first; e; e = e->next)
1033
0
      {
1034
0
        if (e->alignment)
1035
0
    {
1036
0
      size = (size + e->alignment - 1)
1037
0
       & ~((bfd_vma) e->alignment - 1);
1038
0
      e->u.index = size;
1039
0
      size += e->len;
1040
0
    }
1041
0
      }
1042
0
    secinfo->sec->size = size;
1043
0
  }
1044
1045
      /* If the input sections were padded according to their alignments,
1046
   then pad the output too.  */
1047
0
      if (align)
1048
0
  secinfo->sec->size = (secinfo->sec->size + align - 1) & -align;
1049
1050
      /* Finally remove all input sections which have not made it into
1051
   the hash table at all.  */
1052
0
      for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1053
0
  if (secinfo->first_str == NULL
1054
0
      && secinfo->sec->sec_info_type == SEC_INFO_TYPE_MERGE)
1055
0
    secinfo->sec->flags |= SEC_EXCLUDE | SEC_KEEP;
1056
0
    }
1057
1058
0
  return true;
1059
0
}
1060
1061
/* Write out the merged section.  */
1062
1063
bool
1064
_bfd_write_merged_section (bfd *output_bfd, asection *sec, void *psecinfo)
1065
0
{
1066
0
  struct sec_merge_sec_info *secinfo;
1067
0
  file_ptr pos;
1068
0
  unsigned char *contents;
1069
0
  Elf_Internal_Shdr *hdr;
1070
1071
0
  secinfo = (struct sec_merge_sec_info *) psecinfo;
1072
1073
0
  if (!secinfo)
1074
0
    return false;
1075
1076
0
  if (secinfo->first_str == NULL)
1077
0
    return true;
1078
1079
  /* FIXME: octets_per_byte.  */
1080
0
  hdr = &elf_section_data (sec->output_section)->this_hdr;
1081
0
  if (hdr->sh_offset == (file_ptr) -1)
1082
0
    {
1083
      /* We must compress this section.  Write output to the
1084
   buffer.  */
1085
0
      contents = hdr->contents;
1086
0
      if (contents == NULL)
1087
0
  abort ();
1088
0
    }
1089
0
  else
1090
0
    {
1091
0
      contents = NULL;
1092
0
      pos = sec->output_section->filepos + sec->output_offset;
1093
0
      if (bfd_seek (output_bfd, pos, SEEK_SET) != 0)
1094
0
  return false;
1095
0
    }
1096
1097
0
  BFD_ASSERT (sec == secinfo->sec);
1098
0
  BFD_ASSERT (secinfo == secinfo->sinfo->chain);
1099
0
  if (! sec_merge_emit (output_bfd, secinfo, contents))
1100
0
    return false;
1101
1102
0
  return true;
1103
0
}
1104
1105
/* Adjust an address in the SEC_MERGE section.  Given OFFSET within
1106
   *PSEC, this returns the new offset in the adjusted SEC_MERGE
1107
   section and writes the new section back into *PSEC.  */
1108
1109
bfd_vma
1110
_bfd_merged_section_offset (bfd *output_bfd ATTRIBUTE_UNUSED, asection **psec,
1111
          void *psecinfo, bfd_vma offset)
1112
0
{
1113
0
  struct sec_merge_sec_info *secinfo;
1114
0
  asection *sec = *psec;
1115
1116
0
  secinfo = (struct sec_merge_sec_info *) psecinfo;
1117
1118
0
  if (!secinfo)
1119
0
    return offset;
1120
1121
0
  if (offset >= sec->rawsize)
1122
0
    {
1123
0
      if (offset > sec->rawsize)
1124
0
  _bfd_error_handler
1125
    /* xgettext:c-format */
1126
0
    (_("%pB: access beyond end of merged section (%" PRId64 ")"),
1127
0
     sec->owner, (int64_t) offset);
1128
0
      return secinfo->first_str ? sec->size : 0;
1129
0
    }
1130
1131
0
  if (secinfo->fast_state != 2)
1132
0
    {
1133
0
      if (!secinfo->fast_state)
1134
0
  prepare_offsetmap (secinfo);
1135
0
      if (secinfo->fast_state != 2)
1136
0
  return offset;
1137
0
    }
1138
1139
0
  long lb = secinfo->ofstolowbound[offset / OFSDIV];
1140
0
  *psec = secinfo->reprsec;
1141
1142
  /* No need for bounds checking on lb, as we've added a sentinel that's
1143
     larger than any offset.  */
1144
0
  while (MAP_OFS(secinfo, lb) <= offset)
1145
0
    lb++;
1146
0
  lb--;
1147
1148
  /*printf ("YYY (%s:%s):%u -> (%s):%u\n",
1149
    sec->owner->filename, sec->name, (unsigned)offset,
1150
    (*psec)->name, (unsigned)lb);*/
1151
0
  return MAP_IDX(secinfo, lb) + offset - MAP_OFS(secinfo, lb);
1152
0
}
1153
1154
/* Tidy up when done.  */
1155
1156
void
1157
_bfd_merge_sections_free (void *xsinfo)
1158
0
{
1159
0
  struct sec_merge_info *sinfo;
1160
1161
0
  for (sinfo = (struct sec_merge_info *) xsinfo; sinfo; sinfo = sinfo->next)
1162
0
    {
1163
0
      struct sec_merge_sec_info *secinfo;
1164
0
      for (secinfo = sinfo->chain; secinfo; secinfo = secinfo->next)
1165
0
  {
1166
0
    free (secinfo->ofstolowbound);
1167
0
    free (secinfo->map);
1168
0
    free (secinfo->map_ofs);
1169
0
  }
1170
0
      bfd_hash_table_free (&sinfo->htab->table);
1171
0
      free (sinfo->htab);
1172
0
    }
1173
0
}