Coverage Report

Created: 2026-03-31 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/spirv-tools/source/opt/canonicalize_ids_pass.cpp
Line
Count
Source
1
// Copyright (c) 2025 LunarG Inc.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include "source/opt/canonicalize_ids_pass.h"
16
17
#include <algorithm>
18
#include <limits>
19
20
namespace spvtools {
21
namespace opt {
22
23
0
Pass::Status CanonicalizeIdsPass::Process() {
24
  // Initialize the new ID map.
25
0
  new_id_.resize(GetBound(), unused_);
26
27
  // Scan the IDs and set to unmapped.
28
0
  ScanIds();
29
30
  // Create new IDs for types and consts.
31
0
  CanonicalizeTypeAndConst();
32
33
  // Create new IDs for names.
34
0
  CanonicalizeNames();
35
36
  // Create new IDs for functions.
37
0
  CanonicalizeFunctions();
38
39
  // Create new IDs for everything else.
40
0
  CanonicalizeRemainders();
41
42
  // Apply the new IDs to the module.
43
0
  auto const modified = ApplyMap();
44
45
  // Update bound in the header.
46
0
  if (modified) {
47
0
    UpdateBound();
48
0
  }
49
50
0
  return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
51
0
}
52
53
0
void CanonicalizeIdsPass::ScanIds() {
54
0
  get_module()->ForEachInst(
55
0
      [this](Instruction* inst) {
56
        // Look for types and constants.
57
0
        if (spvOpcodeGeneratesType(inst->opcode()) ||
58
0
            spvOpcodeIsConstant(inst->opcode())) {
59
0
          type_and_const_ids_.push_back(inst->result_id());
60
0
          SetNewId(inst->result_id(), unmapped_);
61
0
        }
62
        // Look for names.
63
0
        else if (inst->opcode() == spv::Op::OpName) {
64
          // store name string in map so that we can compute the hash later
65
0
          auto const name = inst->GetOperand(1).AsString();
66
0
          auto const target = inst->GetSingleWordInOperand(0);
67
0
          name_ids_[name] = target;
68
0
          SetNewId(target, unmapped_);
69
0
        }
70
        // Look for function IDs.
71
0
        else if (inst->opcode() == spv::Op::OpFunction) {
72
0
          auto const res_id = inst->result_id();
73
0
          function_ids_.push_back(res_id);
74
0
          SetNewId(res_id, unmapped_);
75
0
        }
76
        // Look for remaining result IDs.
77
0
        else if (inst->HasResultId()) {
78
0
          auto const res_id = inst->result_id();
79
0
          SetNewId(res_id, unmapped_);
80
0
        }
81
0
      },
82
0
      true);
83
0
}
84
85
0
void CanonicalizeIdsPass::CanonicalizeTypeAndConst() {
86
  // Remap type IDs.
87
0
  static constexpr std::uint32_t soft_type_id_limit = 3011;  // small prime.
88
0
  static constexpr std::uint32_t first_mapped_id = 8;  // offset into ID space
89
0
  for (auto const id : type_and_const_ids_) {
90
0
    if (!IsOldIdUnmapped(id)) {
91
0
      continue;
92
0
    }
93
94
    // Compute the hash value.
95
0
    auto const hash_value = HashTypeAndConst(id);
96
0
    if (hash_value != unmapped_) {
97
0
      SetNewId(id, hash_value % soft_type_id_limit + first_mapped_id);
98
0
    }
99
0
  }
100
0
}
101
102
// Hash types to canonical values.  This can return ID collisions (it's a bit
103
// inevitable): it's up to the caller to handle that gracefully.
104
0
spv::Id CanonicalizeIdsPass::HashTypeAndConst(spv::Id const id) const {
105
0
  spv::Id value = 0;
106
107
0
  auto const inst = get_def_use_mgr()->GetDef(id);
108
0
  auto const op_code = inst->opcode();
109
0
  switch (op_code) {
110
0
    case spv::Op::OpTypeVoid:
111
0
      value = 0;
112
0
      break;
113
0
    case spv::Op::OpTypeBool:
114
0
      value = 1;
115
0
      break;
116
0
    case spv::Op::OpTypeInt: {
117
0
      auto const signedness = inst->GetSingleWordOperand(2);
118
0
      value = 3 + signedness;
119
0
      break;
120
0
    }
121
0
    case spv::Op::OpTypeFloat:
122
0
      value = 5;
123
0
      break;
124
0
    case spv::Op::OpTypeVector: {
125
0
      auto const component_type = inst->GetSingleWordOperand(1);
126
0
      auto const component_count = inst->GetSingleWordOperand(2);
127
0
      value = 6 + HashTypeAndConst(component_type) * (component_count - 1);
128
0
      break;
129
0
    }
130
0
    case spv::Op::OpTypeMatrix: {
131
0
      auto const column_type = inst->GetSingleWordOperand(1);
132
0
      auto const column_count = inst->GetSingleWordOperand(2);
133
0
      value = 30 + HashTypeAndConst(column_type) * (column_count - 1);
134
0
      break;
135
0
    }
136
0
    case spv::Op::OpTypeImage: {
137
      // TODO: Why isn't the format used to compute the hash value?
138
0
      auto const sampled_type = inst->GetSingleWordOperand(1);
139
0
      auto const dim = inst->GetSingleWordOperand(2);
140
0
      auto const depth = inst->GetSingleWordOperand(3);
141
0
      auto const arrayed = inst->GetSingleWordOperand(4);
142
0
      auto const ms = inst->GetSingleWordOperand(5);
143
0
      auto const sampled = inst->GetSingleWordOperand(6);
144
0
      value = 120 + HashTypeAndConst(sampled_type) + dim + depth * 8 * 16 +
145
0
              arrayed * 4 * 16 + ms * 2 * 16 + sampled * 1 * 16;
146
0
      break;
147
0
    }
148
0
    case spv::Op::OpTypeSampler:
149
0
      value = 500;
150
0
      break;
151
0
    case spv::Op::OpTypeSampledImage:
152
0
      value = 502;
153
0
      break;
154
0
    case spv::Op::OpTypeArray: {
155
0
      auto const element_type = inst->GetSingleWordOperand(1);
156
0
      auto const length = inst->GetSingleWordOperand(2);
157
0
      value = 501 + HashTypeAndConst(element_type) * length;
158
0
      break;
159
0
    }
160
0
    case spv::Op::OpTypeRuntimeArray: {
161
0
      auto const element_type = inst->GetSingleWordOperand(1);
162
0
      value = 5000 + HashTypeAndConst(element_type);
163
0
      break;
164
0
    }
165
0
    case spv::Op::OpTypeStruct:
166
0
      value = 10000;
167
0
      for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
168
0
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
169
0
      }
170
0
      break;
171
0
    case spv::Op::OpTypeOpaque: {
172
      // TODO: Name is a literal that may have more than one word.
173
0
      auto const name = inst->GetSingleWordOperand(1);
174
0
      value = 6000 + name;
175
0
      break;
176
0
    }
177
0
    case spv::Op::OpTypePointer: {
178
0
      auto const type = inst->GetSingleWordOperand(2);
179
0
      value = 100000 + HashTypeAndConst(type);
180
0
      break;
181
0
    }
182
0
    case spv::Op::OpTypeFunction:
183
0
      value = 200000;
184
0
      for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
185
0
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
186
0
      }
187
0
      break;
188
0
    case spv::Op::OpTypeEvent:
189
0
      value = 300000;
190
0
      break;
191
0
    case spv::Op::OpTypeDeviceEvent:
192
0
      value = 300001;
193
0
      break;
194
0
    case spv::Op::OpTypeReserveId:
195
0
      value = 300002;
196
0
      break;
197
0
    case spv::Op::OpTypeQueue:
198
0
      value = 300003;
199
0
      break;
200
0
    case spv::Op::OpTypePipe:
201
0
      value = 300004;
202
0
      break;
203
0
    case spv::Op::OpTypePipeStorage:
204
0
      value = 300005;
205
0
      break;
206
0
    case spv::Op::OpTypeNamedBarrier:
207
0
      value = 300006;
208
0
      break;
209
0
    case spv::Op::OpConstantTrue:
210
0
      value = 300007;
211
0
      break;
212
0
    case spv::Op::OpConstantFalse:
213
0
      value = 300008;
214
0
      break;
215
0
    case spv::Op::OpTypeRayQueryKHR:
216
0
      value = 300009;
217
0
      break;
218
0
    case spv::Op::OpTypeAccelerationStructureKHR:
219
0
      value = 300010;
220
0
      break;
221
    // Don't map the following types.
222
    // TODO: These types were not remapped in the glslang version of the
223
    // remapper. Support should be added as necessary.
224
0
    case spv::Op::OpTypeCooperativeMatrixNV:
225
0
    case spv::Op::OpTypeCooperativeMatrixKHR:
226
0
    case spv::Op::OpTypeVectorIdEXT:
227
0
    case spv::Op::OpTypeHitObjectNV:
228
0
    case spv::Op::OpTypeUntypedPointerKHR:
229
0
    case spv::Op::OpTypeNodePayloadArrayAMDX:
230
0
    case spv::Op::OpTypeTensorLayoutNV:
231
0
    case spv::Op::OpTypeTensorViewNV:
232
0
    case spv::Op::OpTypeTensorARM:
233
0
    case spv::Op::OpTypeTaskSequenceINTEL:
234
0
      value = unmapped_;
235
0
      break;
236
0
    case spv::Op::OpConstant: {
237
0
      auto const result_type = inst->GetSingleWordOperand(0);
238
0
      value = 400011 + HashTypeAndConst(result_type);
239
0
      auto const literal = inst->GetOperand(2);
240
0
      for (uint32_t w = 0; w < literal.words.size(); ++w) {
241
0
        value += (w + 3) * literal.words[w];
242
0
      }
243
0
      break;
244
0
    }
245
0
    case spv::Op::OpConstantComposite: {
246
0
      auto const result_type = inst->GetSingleWordOperand(0);
247
0
      value = 300011 + HashTypeAndConst(result_type);
248
0
      for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
249
0
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
250
0
      }
251
0
      break;
252
0
    }
253
0
    case spv::Op::OpConstantNull: {
254
0
      auto const result_type = inst->GetSingleWordOperand(0);
255
0
      value = 500009 + HashTypeAndConst(result_type);
256
0
      break;
257
0
    }
258
0
    case spv::Op::OpConstantSampler: {
259
0
      auto const result_type = inst->GetSingleWordOperand(0);
260
0
      value = 600011 + HashTypeAndConst(result_type);
261
0
      for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
262
0
        value += (w + 1) * inst->GetSingleWordOperand(w);
263
0
      }
264
0
      break;
265
0
    }
266
    // Don't map the following constants.
267
    // TODO: These constants were not remapped in the glslang version of the
268
    // remapper. Support should be added as necessary.
269
0
    case spv::Op::OpConstantCompositeReplicateEXT:
270
0
    case spv::Op::OpConstantFunctionPointerINTEL:
271
0
    case spv::Op::OpConstantStringAMDX:
272
0
    case spv::Op::OpSpecConstantTrue:
273
0
    case spv::Op::OpSpecConstantFalse:
274
0
    case spv::Op::OpSpecConstant:
275
0
    case spv::Op::OpSpecConstantComposite:
276
0
    case spv::Op::OpSpecConstantCompositeReplicateEXT:
277
0
    case spv::Op::OpSpecConstantOp:
278
0
    case spv::Op::OpSpecConstantStringAMDX:
279
0
      value = unmapped_;
280
0
      break;
281
    // TODO: Add additional types/constants as needed. See
282
    // spvOpcodeGeneratesType and spvOpcodeIsConstant.
283
0
    default:
284
0
      context()->consumer()(SPV_MSG_WARNING, "", {0, 0, 0},
285
0
                            "unhandled opcode will not be canonicalized");
286
0
      break;
287
0
  }
288
289
0
  return value;
290
0
}
291
292
0
void CanonicalizeIdsPass::CanonicalizeNames() {
293
0
  static constexpr std::uint32_t soft_type_id_limit = 3011;  // Small prime.
294
0
  static constexpr std::uint32_t first_mapped_id =
295
0
      3019;  // Offset into ID space.
296
297
0
  for (auto const& [name, target] : name_ids_) {
298
0
    if (!IsOldIdUnmapped(target)) {
299
0
      continue;
300
0
    }
301
302
0
    spv::Id hash_value = 1911;
303
0
    for (const char c : name) {
304
0
      hash_value = hash_value * 1009 + c;
305
0
    }
306
307
0
    if (IsOldIdUnmapped(target)) {
308
0
      SetNewId(target, hash_value % soft_type_id_limit + first_mapped_id);
309
0
    }
310
0
  }
311
0
}
312
313
0
void CanonicalizeIdsPass::CanonicalizeFunctions() {
314
0
  static constexpr std::uint32_t soft_type_id_limit = 19071;  // Small prime.
315
0
  static constexpr std::uint32_t first_mapped_id =
316
0
      6203;  // Offset into ID space.
317
  // Window size for context-sensitive canonicalization values
318
  // Empirical best size from a single data set.  TODO: Would be a good tunable.
319
  // We essentially perform a little convolution around each instruction,
320
  // to capture the flavor of nearby code, to hopefully match to similar
321
  // code in other modules.
322
0
  static const int32_t window_size = 2;
323
324
0
  for (auto const func_id : function_ids_) {
325
    // Store the instructions and opcode hash values in vectors so that the
326
    // window of instructions can be easily accessed and avoid having to
327
    // recompute the hash value repeatedly in overlapping windows.
328
0
    std::vector<Instruction*> insts;
329
0
    std::vector<uint32_t> opcode_hashvals;
330
0
    auto const func = context()->GetFunction(func_id);
331
0
    func->WhileEachInst([&](Instruction* inst) {
332
0
      insts.emplace_back(inst);
333
0
      opcode_hashvals.emplace_back(HashOpCode(inst));
334
0
      return true;
335
0
    });
336
337
    // For every instruction in the function, compute the hash value using the
338
    // instruction and a small window of surrounding instructions.
339
0
    assert(insts.size() < (size_t)std::numeric_limits<int32_t>::max());
340
0
    for (int32_t i = 0; i < (int32_t)insts.size(); ++i) {
341
0
      auto const inst = insts[i];
342
0
      if (!inst->HasResultId()) {
343
0
        continue;
344
0
      }
345
346
0
      auto const old_id = inst->result_id();
347
0
      if (!IsOldIdUnmapped(old_id)) {
348
0
        continue;
349
0
      }
350
351
0
      int32_t const lower_bound = std::max(0, i - window_size);
352
0
      int32_t const upper_bound =
353
0
          std::min((int32_t)insts.size() - 1, i + window_size);
354
0
      spv::Id hash_value = func_id * 17;  // Small prime.
355
      // Include the hash value of the preceding instructions in the hash but
356
      // don't include instructions before the OpFunction.
357
0
      for (int32_t j = i - 1; j >= lower_bound; --j) {
358
0
        auto const local_inst = insts[j];
359
0
        if (local_inst->opcode() == spv::Op::OpFunction) {
360
0
          break;
361
0
        }
362
363
0
        hash_value = hash_value * 30103 +
364
0
                     opcode_hashvals[j];  // 30103 is a semi-arbitrary prime.
365
0
      }
366
367
      // Include the hash value of the subsequent instructions in the hash but
368
      // don't include instructions past OpFunctionEnd.
369
0
      for (int32_t j = i; j <= upper_bound; ++j) {
370
0
        auto const local_inst = insts[j];
371
0
        if (local_inst->opcode() == spv::Op::OpFunctionEnd) {
372
0
          break;
373
0
        }
374
375
0
        hash_value = hash_value * 30103 +
376
0
                     opcode_hashvals[j];  // 30103 is a semiarbitrary prime.
377
0
      }
378
379
0
      SetNewId(old_id, hash_value % soft_type_id_limit + first_mapped_id);
380
0
    }
381
0
  }
382
0
}
383
384
0
spv::Id CanonicalizeIdsPass::HashOpCode(Instruction const* const inst) const {
385
0
  auto const op_code = inst->opcode();
386
0
  std::uint32_t offset = 0;
387
0
  if (op_code == spv::Op::OpExtInst) {
388
    // offset is literal instruction
389
0
    offset = inst->GetSingleWordOperand(3);
390
0
  }
391
392
0
  return (std::uint32_t)op_code * 19 + offset;  // 19 is a small prime.
393
0
}
394
395
// Assign remaining IDs sequentially from remaining holes in the new ID space.
396
0
void CanonicalizeIdsPass::CanonicalizeRemainders() {
397
0
  spv::Id next_id = 1;
398
0
  for (uint32_t old_id = 0; old_id < new_id_.size(); ++old_id) {
399
0
    if (IsOldIdUnmapped(old_id)) {
400
0
      next_id = SetNewId(old_id, next_id);
401
0
    }
402
0
  }
403
0
}
404
405
0
bool CanonicalizeIdsPass::ApplyMap() {
406
0
  bool modified = false;
407
0
  context()->module()->ForEachInst(
408
0
      [this, &modified](Instruction* inst) {
409
0
        for (auto operand = inst->begin(); operand != inst->end(); ++operand) {
410
0
          const auto type = operand->type;
411
0
          if (spvIsIdType(type)) {
412
0
            uint32_t& id = operand->words[0];
413
0
            uint32_t const new_id = GetNewId(id);
414
0
            if (new_id == unused_) {
415
0
              continue;
416
0
            }
417
418
0
            assert(new_id != unmapped_ && "new_id should not be unmapped_");
419
420
0
            if (id != new_id) {
421
0
              modified = true;
422
0
              id = new_id;
423
0
              if (type == SPV_OPERAND_TYPE_RESULT_ID) {
424
0
                inst->SetResultId(new_id);
425
0
              } else if (type == SPV_OPERAND_TYPE_TYPE_ID) {
426
0
                inst->SetResultType(new_id);
427
0
              }
428
0
            }
429
0
          }
430
0
        }
431
0
        const auto& debug_scope = inst->GetDebugScope();
432
0
        if (debug_scope.GetLexicalScope() != kNoDebugScope) {
433
0
          uint32_t old_scope = debug_scope.GetLexicalScope();
434
0
          uint32_t new_scope = GetNewId(old_scope);
435
0
          uint32_t old_inlined_at = debug_scope.GetInlinedAt();
436
0
          uint32_t new_inlined_at = old_inlined_at != kNoInlinedAt
437
0
                                        ? GetNewId(old_inlined_at)
438
0
                                        : old_inlined_at;
439
0
          if ((new_scope != unused_ && new_scope != old_scope) ||
440
0
              (new_inlined_at != unused_ && new_inlined_at != old_inlined_at)) {
441
0
            DebugScope new_debug_scope(new_scope, new_inlined_at);
442
0
            inst->SetDebugScope(new_debug_scope);
443
0
            modified = true;
444
0
          }
445
0
        }
446
0
      },
447
0
      true);
448
449
0
  return modified;
450
0
}
451
452
0
spv::Id CanonicalizeIdsPass::GetBound() const {
453
0
  return context()->module()->id_bound();
454
0
}
455
456
0
void CanonicalizeIdsPass::UpdateBound() {
457
0
  context()->module()->SetIdBound(context()->module()->ComputeIdBound());
458
459
0
  context()->ResetFeatureManager();
460
0
}
461
462
// Set a new ID. If the new ID is alreadly claimed, the next consecutive ID
463
// will be claimed, mapped, and returned to the caller.
464
0
spv::Id CanonicalizeIdsPass::SetNewId(spv::Id const old_id, spv::Id new_id) {
465
0
  assert(old_id < GetBound() && "don't remap an ID that is out of bounds");
466
467
0
  if (old_id >= new_id_.size()) {
468
0
    new_id_.resize(old_id + 1, unused_);
469
0
  }
470
471
0
  if (new_id != unmapped_ && new_id != unused_) {
472
0
    assert(!IsOldIdUnused(old_id) && "don't remap unused IDs");
473
0
    assert(IsOldIdUnmapped(old_id) && "don't remap already mapped IDs");
474
475
0
    new_id = ClaimNewId(new_id);
476
0
  }
477
478
0
  new_id_[old_id] = new_id;
479
480
0
  return new_id;
481
0
}
482
483
// Helper function for SetNewID. Claim a new ID. If the new ID is already
484
// claimed, the next consecutive ID will be claimed and returned to the caller.
485
0
spv::Id CanonicalizeIdsPass::ClaimNewId(spv::Id new_id) {
486
  // Return the ID if it's not taken.
487
0
  auto iter = claimed_new_ids_.find(new_id);
488
0
  if (iter != claimed_new_ids_.end()) {
489
    // Otherwise, search for the next unused ID using our current iterator.
490
    // Technically, it's a linear search across the set starting at the
491
    // iterator, but it's not as bad as it would appear in practice assuming the
492
    // hash values are well distributed.
493
0
    iter = std::adjacent_find(iter, claimed_new_ids_.end(), [](int a, int b) {
494
0
      return a + 1 != b;  // Stop at the first non-consecutive pair.
495
0
    });
496
0
    if (iter != claimed_new_ids_.end()) {
497
0
      new_id =
498
0
          *iter + 1;  // We need the next ID after where the search stopped.
499
0
    } else {
500
0
      new_id = *(--iter) + 1;  // We reached the end so we use the next ID.
501
0
    }
502
0
  }
503
504
0
  assert(!IsNewIdClaimed(new_id) &&
505
0
         "don't remap to an ID that is already claimed");
506
0
  iter = claimed_new_ids_.insert(iter, new_id);
507
0
  assert(*iter == new_id);
508
509
0
  return new_id;
510
0
}
511
512
0
std::string CanonicalizeIdsPass::IdAsString(spv::Id const id) const {
513
0
  if (id == unused_) {
514
0
    return "unused";
515
0
  } else if (id == unmapped_) {
516
0
    return "unmapped";
517
0
  } else {
518
0
    return std::to_string(id);
519
0
  }
520
0
}
521
522
0
void CanonicalizeIdsPass::PrintNewIds() const {
523
0
  for (spv::Id id = 0; id < new_id_.size(); ++id) {
524
0
    auto const message =
525
0
        "new id[" + IdAsString(id) + "]: " + IdAsString(new_id_[id]);
526
0
    context()->consumer()(SPV_MSG_INFO, "", {0, 0, 0}, message.c_str());
527
0
  }
528
0
}
529
530
}  // namespace opt
531
}  // namespace spvtools