Coverage Report

Created: 2026-04-12 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/capstonenext/Mapping.c
Line
Count
Source
1
/* Capstone Disassembly Engine */
2
/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
3
/*    Rot127 <unisono@quyllur.org>, 2022-2023 */
4
5
#include "Mapping.h"
6
#include "capstone/capstone.h"
7
#include "cs_priv.h"
8
#include "utils.h"
9
10
// Create a cache to map LLVM instruction IDs to capstone instruction IDs, if
11
// the architecture needs this.
12
cs_err populate_insn_map_cache(cs_struct *handle)
13
35.1k
{
14
35.1k
  unsigned int i;
15
16
  // If this architecture doesn't use instruction mapping, do nothing
17
35.1k
  if (!handle->insn_map || handle->insn_map_size <= 0)
18
33.1k
    return CS_ERR_OK;
19
20
  // Since the instruction map is assumed to be stored in ascending
21
  // order, we can get the maximum LLVM instruction id just by looking at
22
  // the last element.
23
1.96k
  unsigned int cache_elements =
24
1.96k
    handle->insn_map[handle->insn_map_size - 1].id + 1;
25
26
  // This should not be initialized yet.
27
1.96k
  CS_ASSERT(!handle->insn_cache);
28
29
1.96k
  unsigned short *cache = cs_mem_calloc(cache_elements, sizeof(*cache));
30
1.96k
  if (!cache) {
31
0
    handle->errnum = CS_ERR_MEM;
32
0
    return CS_ERR_MEM;
33
0
  }
34
1.96k
  handle->insn_cache = cache;
35
36
12.3M
  for (i = 1; i < handle->insn_map_size; ++i)
37
12.3M
    handle->insn_cache[handle->insn_map[i].id] = i;
38
39
1.96k
  return CS_ERR_OK;
40
1.96k
}
41
42
const insn_map *lookup_insn_map(cs_struct *handle, unsigned short id)
43
84.2k
{
44
  // If this is getting called, we need the cache to already be populated
45
  // (this should be done when populate_insn_map_cache() gets called).
46
84.2k
  CS_ASSERT(handle->insn_cache);
47
84.2k
  CS_ASSERT(handle->insn_map_size);
48
49
84.2k
  unsigned short highest_id =
50
84.2k
    handle->insn_map[handle->insn_map_size - 1].id;
51
84.2k
  if (id > highest_id)
52
0
    return NULL;
53
54
84.2k
  unsigned short i = handle->insn_cache[id];
55
56
84.2k
  return &handle->insn_map[i];
57
84.2k
}
58
59
// Gives the id for the given @name if it is saved in @map.
60
// Returns the id or -1 if not found.
61
int name2id(const name_map *map, int max, const char *name)
62
52.1k
{
63
52.1k
  CS_ASSERT_RET_VAL(map && name, -1);
64
52.1k
  int i;
65
66
6.95M
  for (i = 0; i < max; i++) {
67
6.95M
    if (!map[i].name) {
68
2.51k
      return -1;
69
2.51k
    }
70
6.95M
    if (!strcmp(map[i].name, name)) {
71
48.2k
      return map[i].id;
72
48.2k
    }
73
6.95M
  }
74
75
  // nothing match
76
1.36k
  return -1;
77
52.1k
}
78
79
// Gives the name for the given @id if it is saved in @map.
80
// Returns the name or NULL if not found.
81
const char *id2name(const name_map *map, int max, const unsigned int id)
82
1.76M
{
83
1.76M
  int i;
84
85
69.8M
  for (i = 0; i < max; i++) {
86
69.8M
    if (map[i].id == id) {
87
1.75M
      return map[i].name;
88
1.75M
    }
89
69.8M
  }
90
91
  // nothing match
92
11.7k
  return NULL;
93
1.76M
}
94
95
/// Adds a register to the implicit write register list.
96
/// It will not add the same register twice.
97
void map_add_implicit_write(MCInst *MI, uint32_t Reg)
98
240k
{
99
240k
  if (!MI->flat_insn->detail)
100
0
    return;
101
102
240k
  uint16_t *regs_write = MI->flat_insn->detail->regs_write;
103
242k
  for (int i = 0; i < MAX_IMPL_W_REGS; ++i) {
104
242k
    if (i == MI->flat_insn->detail->regs_write_count) {
105
230k
      regs_write[i] = Reg;
106
230k
      MI->flat_insn->detail->regs_write_count++;
107
230k
      return;
108
230k
    }
109
12.4k
    if (regs_write[i] == Reg)
110
10.6k
      return;
111
12.4k
  }
112
240k
}
113
114
/// Adds a register to the implicit read register list.
115
/// It will not add the same register twice.
116
void map_add_implicit_read(MCInst *MI, uint32_t Reg)
117
102k
{
118
102k
  if (!MI->flat_insn->detail)
119
0
    return;
120
121
102k
  uint16_t *regs_read = MI->flat_insn->detail->regs_read;
122
108k
  for (int i = 0; i < MAX_IMPL_R_REGS; ++i) {
123
108k
    if (i == MI->flat_insn->detail->regs_read_count) {
124
93.5k
      regs_read[i] = Reg;
125
93.5k
      MI->flat_insn->detail->regs_read_count++;
126
93.5k
      return;
127
93.5k
    }
128
15.1k
    if (regs_read[i] == Reg)
129
9.36k
      return;
130
15.1k
  }
131
102k
}
132
133
/// Removes a register from the implicit write register list.
134
void map_remove_implicit_write(MCInst *MI, uint32_t Reg)
135
17.8k
{
136
17.8k
  if (!MI->flat_insn->detail)
137
0
    return;
138
139
17.8k
  uint16_t *regs_write = MI->flat_insn->detail->regs_write;
140
17.8k
  bool shorten_list = false;
141
20.3k
  for (int i = 0; i < MAX_IMPL_W_REGS; ++i) {
142
20.3k
    if (shorten_list) {
143
2.52k
      regs_write[i - 1] = regs_write[i];
144
2.52k
    }
145
20.3k
    if (i >= MI->flat_insn->detail->regs_write_count)
146
17.8k
      return;
147
148
2.52k
    if (regs_write[i] == Reg) {
149
2.52k
      MI->flat_insn->detail->regs_write_count--;
150
      // The register should exist only once in the list.
151
2.52k
      CS_ASSERT_RET(!shorten_list);
152
2.52k
      shorten_list = true;
153
2.52k
    }
154
2.52k
  }
155
17.8k
}
156
157
/// Copies the implicit read registers of @imap to @MI->flat_insn.
158
/// Already present registers will be preserved.
159
void map_implicit_reads(MCInst *MI, const insn_map *imap)
160
1.11M
{
161
1.11M
#ifndef CAPSTONE_DIET
162
1.11M
  if (!MI->flat_insn->detail)
163
0
    return;
164
165
1.11M
  cs_detail *detail = MI->flat_insn->detail;
166
1.11M
  unsigned Opcode = MCInst_getOpcode(MI);
167
1.11M
  unsigned i = 0;
168
1.11M
  uint16_t reg = imap[Opcode].regs_use[i];
169
1.18M
  while (reg != 0) {
170
73.4k
    if (i >= MAX_IMPL_R_REGS ||
171
73.4k
        detail->regs_read_count >= MAX_IMPL_R_REGS) {
172
0
      printf("ERROR: Too many implicit read register defined in "
173
0
             "instruction mapping.\n");
174
0
      return;
175
0
    }
176
73.4k
    detail->regs_read[detail->regs_read_count++] = reg;
177
73.4k
    if (i + 1 < MAX_IMPL_R_REGS) {
178
      // Select next one
179
73.4k
      reg = imap[Opcode].regs_use[++i];
180
73.4k
    }
181
73.4k
  }
182
1.11M
#endif // CAPSTONE_DIET
183
1.11M
}
184
185
/// Copies the implicit write registers of @imap to @MI->flat_insn.
186
/// Already present registers will be preserved.
187
void map_implicit_writes(MCInst *MI, const insn_map *imap)
188
1.11M
{
189
1.11M
#ifndef CAPSTONE_DIET
190
1.11M
  if (!MI->flat_insn->detail)
191
0
    return;
192
193
1.11M
  cs_detail *detail = MI->flat_insn->detail;
194
1.11M
  unsigned Opcode = MCInst_getOpcode(MI);
195
1.11M
  unsigned i = 0;
196
1.11M
  uint16_t reg = imap[Opcode].regs_mod[i];
197
1.29M
  while (reg != 0) {
198
180k
    if (i >= MAX_IMPL_W_REGS ||
199
180k
        detail->regs_write_count >= MAX_IMPL_W_REGS) {
200
0
      printf("ERROR: Too many implicit write register defined in "
201
0
             "instruction mapping.\n");
202
0
      return;
203
0
    }
204
180k
    detail->regs_write[detail->regs_write_count++] = reg;
205
180k
    if (i + 1 < MAX_IMPL_W_REGS) {
206
      // Select next one
207
180k
      reg = imap[Opcode].regs_mod[++i];
208
180k
    }
209
180k
  }
210
1.11M
#endif // CAPSTONE_DIET
211
1.11M
}
212
213
/// Adds a given group to @MI->flat_insn.
214
/// A group is never added twice.
215
void add_group(MCInst *MI, unsigned /* arch_group */ group)
216
107k
{
217
107k
#ifndef CAPSTONE_DIET
218
107k
  if (!MI->flat_insn->detail)
219
0
    return;
220
221
107k
  cs_detail *detail = MI->flat_insn->detail;
222
107k
  if (detail->groups_count >= MAX_NUM_GROUPS) {
223
0
    printf("ERROR: Too many groups defined.\n");
224
0
    return;
225
0
  }
226
197k
  for (int i = 0; i < detail->groups_count; ++i) {
227
91.2k
    if (detail->groups[i] == group) {
228
410
      return;
229
410
    }
230
91.2k
  }
231
106k
  detail->groups[detail->groups_count++] = group;
232
106k
#endif // CAPSTONE_DIET
233
106k
}
234
235
/// Copies the groups from @imap to @MI->flat_insn.
236
/// Already present groups will be preserved.
237
void map_groups(MCInst *MI, const insn_map *imap)
238
1.11M
{
239
1.11M
#ifndef CAPSTONE_DIET
240
1.11M
  if (!MI->flat_insn->detail)
241
0
    return;
242
243
1.11M
  cs_detail *detail = MI->flat_insn->detail;
244
1.11M
  unsigned Opcode = MCInst_getOpcode(MI);
245
1.11M
  unsigned i = 0;
246
1.11M
  uint16_t group = imap[Opcode].groups[i];
247
2.39M
  while (group != 0) {
248
1.28M
    if (detail->groups_count >= MAX_NUM_GROUPS) {
249
0
      printf("ERROR: Too many groups defined in instruction mapping.\n");
250
0
      return;
251
0
    }
252
1.28M
    detail->groups[detail->groups_count++] = group;
253
1.28M
    group = imap[Opcode].groups[++i];
254
1.28M
  }
255
1.11M
#endif // CAPSTONE_DIET
256
1.11M
}
257
258
/// Returns the pointer to the supllementary information in
259
/// the instruction mapping table @imap or NULL in case of failure.
260
const void *map_get_suppl_info(MCInst *MI, const insn_map *imap)
261
853k
{
262
853k
#ifndef CAPSTONE_DIET
263
853k
  if (!MI->flat_insn->detail)
264
0
    return NULL;
265
266
853k
  unsigned Opcode = MCInst_getOpcode(MI);
267
853k
  return &imap[Opcode].suppl_info;
268
#else
269
  return NULL;
270
#endif // CAPSTONE_DIET
271
853k
}
272
273
// Search for the CS instruction id for the given @MC_Opcode in @imap.
274
// return -1 if none is found.
275
unsigned int find_cs_id(unsigned MC_Opcode, const insn_map *imap,
276
      unsigned imap_size)
277
1.11M
{
278
  // binary searching since the IDs are sorted in order
279
1.11M
  unsigned int left, right, m;
280
1.11M
  unsigned int max = imap_size;
281
282
1.11M
  right = max - 1;
283
284
1.11M
  if (MC_Opcode < imap[0].id || MC_Opcode > imap[right].id)
285
    // not found
286
0
    return -1;
287
288
1.11M
  left = 0;
289
290
12.5M
  while (left <= right) {
291
12.5M
    m = (left + right) / 2;
292
12.5M
    if (MC_Opcode == imap[m].id) {
293
1.11M
      return m;
294
1.11M
    }
295
296
11.3M
    if (MC_Opcode < imap[m].id)
297
4.11M
      right = m - 1;
298
7.27M
    else
299
7.27M
      left = m + 1;
300
11.3M
  }
301
302
0
  return -1;
303
1.11M
}
304
305
/// Sets the Capstone instruction id which maps to the @MI opcode.
306
/// If no mapping is found the function returns and prints an error.
307
void map_cs_id(MCInst *MI, const insn_map *imap, unsigned int imap_size)
308
1.11M
{
309
1.11M
  unsigned int i = find_cs_id(MCInst_getOpcode(MI), imap, imap_size);
310
1.11M
  if (i != -1) {
311
1.11M
    MI->flat_insn->id = imap[i].mapid;
312
1.11M
    return;
313
1.11M
  }
314
0
  printf("ERROR: Could not find CS id for MCInst opcode: %d\n",
315
0
         MCInst_getOpcode(MI));
316
0
  return;
317
1.11M
}
318
319
/// Returns the operand type information from the
320
/// mapping table for instruction operands.
321
/// Only usable by `auto-sync` archs!
322
const cs_op_type mapping_get_op_type(MCInst *MI, unsigned OpNum,
323
             const map_insn_ops *insn_ops_map,
324
             size_t map_size)
325
8.63M
{
326
8.63M
  assert(MI);
327
8.63M
  assert(MI->Opcode < map_size);
328
8.63M
  assert(OpNum < sizeof(insn_ops_map[MI->Opcode].ops) /
329
8.63M
             sizeof(insn_ops_map[MI->Opcode].ops[0]));
330
331
8.63M
  return insn_ops_map[MI->Opcode].ops[OpNum].type;
332
8.63M
}
333
334
/// Returns the operand access flags from the
335
/// mapping table for instruction operands.
336
/// Only usable by `auto-sync` archs!
337
const cs_ac_type mapping_get_op_access(MCInst *MI, unsigned OpNum,
338
               const map_insn_ops *insn_ops_map,
339
               size_t map_size)
340
3.31M
{
341
3.31M
  assert(MI);
342
3.31M
  assert(MI->Opcode < map_size);
343
3.31M
  assert(OpNum < sizeof(insn_ops_map[MI->Opcode].ops) /
344
3.31M
             sizeof(insn_ops_map[MI->Opcode].ops[0]));
345
346
3.31M
  cs_ac_type access = insn_ops_map[MI->Opcode].ops[OpNum].access;
347
3.31M
  if (MCInst_opIsTied(MI, OpNum) || MCInst_opIsTying(MI, OpNum))
348
243k
    access |= (access == CS_AC_READ) ? CS_AC_WRITE : CS_AC_READ;
349
3.31M
  return access;
350
3.31M
}
351
352
/// Returns the operand at detail->arch.operands[op_count + offset]
353
/// Or NULL if detail is not set or the offset would be out of bounds.
354
#define DEFINE_get_detail_op(arch, ARCH, ARCH_UPPER) \
355
  cs_##arch##_op *ARCH##_get_detail_op(MCInst *MI, int offset) \
356
12.6M
  { \
357
12.6M
    if (!MI->flat_insn->detail) \
358
12.6M
      return NULL; \
359
12.6M
    int OpIdx = MI->flat_insn->detail->arch.op_count + offset; \
360
12.6M
    if (OpIdx < 0 || OpIdx >= NUM_##ARCH_UPPER##_OPS) { \
361
5.00k
      return NULL; \
362
5.00k
    } \
363
12.6M
    return &MI->flat_insn->detail->arch.operands[OpIdx]; \
364
12.6M
  }
365
366
7.09M
DEFINE_get_detail_op(arm, ARM, ARM);
367
371k
DEFINE_get_detail_op(ppc, PPC, PPC);
368
0
DEFINE_get_detail_op(tricore, TriCore, TRICORE);
369
3.23M
DEFINE_get_detail_op(aarch64, AArch64, AARCH64);
370
0
DEFINE_get_detail_op(alpha, Alpha, ALPHA);
371
0
DEFINE_get_detail_op(hppa, HPPA, HPPA);
372
0
DEFINE_get_detail_op(loongarch, LoongArch, LOONGARCH);
373
818k
DEFINE_get_detail_op(mips, Mips, MIPS);
374
0
DEFINE_get_detail_op(riscv, RISCV, RISCV);
375
758k
DEFINE_get_detail_op(systemz, SystemZ, SYSTEMZ);
376
147k
DEFINE_get_detail_op(xtensa, Xtensa, XTENSA);
377
0
DEFINE_get_detail_op(bpf, BPF, BPF);
378
0
DEFINE_get_detail_op(arc, ARC, ARC);
379
179k
DEFINE_get_detail_op(sparc, Sparc, SPARC);
380
381
/// Returns the operand at detail->arch.operands[index]
382
/// Or NULL if detail is not set or the index would be out of bounds.
383
#define DEFINE_get_detail_op_at(arch, ARCH, ARCH_UPPER) \
384
  cs_##arch##_op *ARCH##_get_detail_op_at(MCInst *MI, int index) \
385
101k
  { \
386
101k
    if (!MI->flat_insn->detail) \
387
101k
      return NULL; \
388
101k
    if (index < 0 || index >= NUM_##ARCH_UPPER##_OPS) { \
389
0
      return NULL; \
390
0
    } \
391
101k
    return &MI->flat_insn->detail->arch.operands[index]; \
392
101k
  }
393
394
0
DEFINE_get_detail_op_at(arm, ARM, ARM);
395
0
DEFINE_get_detail_op_at(ppc, PPC, PPC);
396
0
DEFINE_get_detail_op_at(tricore, TriCore, TRICORE);
397
0
DEFINE_get_detail_op_at(aarch64, AArch64, AARCH64);
398
0
DEFINE_get_detail_op_at(alpha, Alpha, ALPHA);
399
0
DEFINE_get_detail_op_at(hppa, HPPA, HPPA);
400
0
DEFINE_get_detail_op_at(loongarch, LoongArch, LOONGARCH);
401
0
DEFINE_get_detail_op_at(mips, Mips, MIPS);
402
101k
DEFINE_get_detail_op_at(riscv, RISCV, RISCV);
403
0
DEFINE_get_detail_op_at(systemz, SystemZ, SYSTEMZ);
404
0
DEFINE_get_detail_op_at(xtensa, Xtensa, XTENSA);
405
0
DEFINE_get_detail_op_at(bpf, BPF, BPF);
406
0
DEFINE_get_detail_op_at(arc, ARC, ARC);
407
0
DEFINE_get_detail_op_at(sparc, Sparc, SPARC);
408
409
/// Returns true if for this architecture the
410
/// alias operands should be filled.
411
/// TODO: Replace this with a proper option.
412
///       So it can be toggled between disas() calls.
413
bool map_use_alias_details(const MCInst *MI)
414
1.86M
{
415
1.86M
  assert(MI);
416
1.86M
  return (MI->csh->detail_opt & CS_OPT_ON) &&
417
1.86M
         !(MI->csh->detail_opt & CS_OPT_DETAIL_REAL);
418
1.86M
}
419
420
/// Sets the setDetailOps flag to @p Val.
421
/// If detail == NULLit refuses to set the flag to true.
422
void map_set_fill_detail_ops(MCInst *MI, bool Val)
423
1.81M
{
424
1.81M
  CS_ASSERT_RET(MI);
425
1.81M
  if (!detail_is_set(MI)) {
426
0
    MI->fillDetailOps = false;
427
0
    return;
428
0
  }
429
430
1.81M
  MI->fillDetailOps = Val;
431
1.81M
}
432
433
/// Sets the instruction alias flags and the given alias id.
434
void map_set_is_alias_insn(MCInst *MI, bool Val, uint64_t Alias)
435
0
{
436
0
  CS_ASSERT_RET(MI);
437
0
  MI->isAliasInstr = Val;
438
0
  MI->flat_insn->is_alias = Val;
439
0
  MI->flat_insn->alias_id = Alias;
440
0
}
441
442
static inline bool char_ends_mnem(const char c, cs_arch arch)
443
239k
{
444
239k
  switch (arch) {
445
169k
  default:
446
169k
    return (!c || c == ' ' || c == '\t' || c == '.');
447
31.0k
  case CS_ARCH_PPC:
448
57.2k
  case CS_ARCH_RISCV:
449
57.2k
    return (!c || c == ' ' || c == '\t');
450
12.8k
  case CS_ARCH_SPARC:
451
12.8k
    return (!c || c == ' ' || c == '\t' || c == ',');
452
239k
  }
453
239k
}
454
455
/// Sets an alternative id for some instruction.
456
/// Or -1 if it fails.
457
/// You must add (<ARCH>_INS_ALIAS_BEGIN + 1) to the id to get the real id.
458
void map_set_alias_id(MCInst *MI, const SStream *O,
459
          const name_map *alias_mnem_id_map, int map_size)
460
1.08M
{
461
1.08M
  if (!MCInst_isAlias(MI))
462
1.03M
    return;
463
464
52.1k
  char alias_mnem[16] = { 0 };
465
52.1k
  int i = 0, j = 0;
466
52.1k
  const char *asm_str_buf = O->buffer;
467
  // Skip spaces and tabs
468
85.7k
  while (is_blank_char(asm_str_buf[i])) {
469
33.5k
    if (!asm_str_buf[i]) {
470
0
      MI->flat_insn->alias_id = -1;
471
0
      return;
472
0
    }
473
33.5k
    ++i;
474
33.5k
  }
475
239k
  for (; j < sizeof(alias_mnem) - 1; ++j, ++i) {
476
239k
    if (char_ends_mnem(asm_str_buf[i], MI->csh->arch))
477
52.1k
      break;
478
187k
    alias_mnem[j] = asm_str_buf[i];
479
187k
  }
480
481
52.1k
  MI->flat_insn->alias_id =
482
52.1k
    name2id(alias_mnem_id_map, map_size, alias_mnem);
483
52.1k
}
484
485
/// Does a binary search over the given map and searches for @id.
486
/// If @id exists in @map, it sets @found to true and returns
487
/// the value for the @id.
488
/// Otherwise, @found is set to false and it returns UINT64_MAX.
489
///
490
/// Of course it assumes the map is sorted.
491
uint64_t enum_map_bin_search(const cs_enum_id_map *map, size_t map_len,
492
           const char *id, bool *found)
493
0
{
494
0
  size_t l = 0;
495
0
  size_t r = map_len;
496
0
  size_t id_len = strlen(id);
497
498
0
  while (l <= r) {
499
0
    size_t m = (l + r) / 2;
500
0
    size_t j = 0;
501
0
    size_t i = 0;
502
0
    size_t entry_len = strlen(map[m].str);
503
504
0
    while (j < entry_len && i < id_len && id[i] == map[m].str[j]) {
505
0
      ++j, ++i;
506
0
    }
507
0
    if (i == id_len && j == entry_len) {
508
0
      *found = true;
509
0
      return map[m].val;
510
0
    }
511
512
0
    if (id[i] < map[m].str[j]) {
513
0
      r = m - 1;
514
0
    } else if (id[i] > map[m].str[j]) {
515
0
      l = m + 1;
516
0
    }
517
0
    if ((m == 0 && id[i] < map[m].str[j]) ||
518
0
        (l + r) / 2 >= map_len) {
519
      // Break before we go out of bounds.
520
0
      break;
521
0
    }
522
0
  }
523
0
  *found = false;
524
  return UINT64_MAX;
525
0
}