/src/spirv-tools/source/opt/canonicalize_ids_pass.cpp

Source
// Copyright (c) 2025 LunarG Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "source/opt/canonicalize_ids_pass.h"

#include <algorithm>
#include <limits>

namespace spvtools {
namespace opt {

Pass::Status CanonicalizeIdsPass::Process() {
  // Initialize the new ID map.
  new_id_.resize(GetBound(), unused_);

  // Scan the IDs and set to unmapped.
  ScanIds();

  // Create new IDs for types and consts.
  CanonicalizeTypeAndConst();

  // Create new IDs for names.
  CanonicalizeNames();

  // Create new IDs for functions.
  CanonicalizeFunctions();

  // Create new IDs for everything else.
  CanonicalizeRemainders();

  // Apply the new IDs to the module.
  auto const modified = ApplyMap();

  // Update bound in the header.
  if (modified) {
    UpdateBound();
  }

  return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
}

void CanonicalizeIdsPass::ScanIds() {
  get_module()->ForEachInst(
      [this](Instruction* inst) {
        // Look for types and constants.
        if (spvOpcodeGeneratesType(inst->opcode()) ||
            spvOpcodeIsConstant(inst->opcode())) {
          type_and_const_ids_.push_back(inst->result_id());
          SetNewId(inst->result_id(), unmapped_);
        }
        // Look for names.
        else if (inst->opcode() == spv::Op::OpName) {
          // store name string in map so that we can compute the hash later
          auto const name = inst->GetOperand(1).AsString();
          auto const target = inst->GetSingleWordInOperand(0);
          name_ids_[name] = target;
          SetNewId(target, unmapped_);
        }
        // Look for function IDs.
        else if (inst->opcode() == spv::Op::OpFunction) {
          auto const res_id = inst->result_id();
          function_ids_.push_back(res_id);
          SetNewId(res_id, unmapped_);
        }
        // Look for remaining result IDs.
        else if (inst->HasResultId()) {
          auto const res_id = inst->result_id();
          SetNewId(res_id, unmapped_);
        }
      },
      true);
}

void CanonicalizeIdsPass::CanonicalizeTypeAndConst() {
  // Remap type IDs.
  static constexpr std::uint32_t soft_type_id_limit = 3011;  // small prime.
  static constexpr std::uint32_t first_mapped_id = 8;  // offset into ID space
  for (auto const id : type_and_const_ids_) {
    if (!IsOldIdUnmapped(id)) {
      continue;
    }

    // Compute the hash value.
    auto const hash_value = HashTypeAndConst(id);
    if (hash_value != unmapped_) {
      SetNewId(id, hash_value % soft_type_id_limit + first_mapped_id);
    }
  }
}

// Hash types to canonical values.  This can return ID collisions (it's a bit
// inevitable): it's up to the caller to handle that gracefully.
spv::Id CanonicalizeIdsPass::HashTypeAndConst(spv::Id const id) const {
  spv::Id value = 0;

  auto const inst = get_def_use_mgr()->GetDef(id);
  auto const op_code = inst->opcode();
  switch (op_code) {
    case spv::Op::OpTypeVoid:
      value = 0;
      break;
    case spv::Op::OpTypeBool:
      value = 1;
      break;
    case spv::Op::OpTypeInt: {
      auto const signedness = inst->GetSingleWordOperand(2);
      value = 3 + signedness;
      break;
    }
    case spv::Op::OpTypeFloat:
      value = 5;
      break;
    case spv::Op::OpTypeVector: {
      auto const component_type = inst->GetSingleWordOperand(1);
      auto const component_count = inst->GetSingleWordOperand(2);
      value = 6 + HashTypeAndConst(component_type) * (component_count - 1);
      break;
    }
    case spv::Op::OpTypeMatrix: {
      auto const column_type = inst->GetSingleWordOperand(1);
      auto const column_count = inst->GetSingleWordOperand(2);
      value = 30 + HashTypeAndConst(column_type) * (column_count - 1);
      break;
    }
    case spv::Op::OpTypeImage: {
      // TODO: Why isn't the format used to compute the hash value?
      auto const sampled_type = inst->GetSingleWordOperand(1);
      auto const dim = inst->GetSingleWordOperand(2);
      auto const depth = inst->GetSingleWordOperand(3);
      auto const arrayed = inst->GetSingleWordOperand(4);
      auto const ms = inst->GetSingleWordOperand(5);
      auto const sampled = inst->GetSingleWordOperand(6);
      value = 120 + HashTypeAndConst(sampled_type) + dim + depth * 8 * 16 +
              arrayed * 4 * 16 + ms * 2 * 16 + sampled * 1 * 16;
      break;
    }
    case spv::Op::OpTypeSampler:
      value = 500;
      break;
    case spv::Op::OpTypeSampledImage:
      value = 502;
      break;
    case spv::Op::OpTypeArray: {
      auto const element_type = inst->GetSingleWordOperand(1);
      auto const length = inst->GetSingleWordOperand(2);
      value = 501 + HashTypeAndConst(element_type) * length;
      break;
    }
    case spv::Op::OpTypeRuntimeArray: {
      auto const element_type = inst->GetSingleWordOperand(1);
      value = 5000 + HashTypeAndConst(element_type);
      break;
    }
    case spv::Op::OpTypeStruct:
      value = 10000;
      for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
      }
      break;
    case spv::Op::OpTypeOpaque: {
      // TODO: Name is a literal that may have more than one word.
      auto const name = inst->GetSingleWordOperand(1);
      value = 6000 + name;
      break;
    }
    case spv::Op::OpTypePointer: {
      auto const type = inst->GetSingleWordOperand(2);
      value = 100000 + HashTypeAndConst(type);
      break;
    }
    case spv::Op::OpTypeFunction:
      value = 200000;
      for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
      }
      break;
    case spv::Op::OpTypeEvent:
      value = 300000;
      break;
    case spv::Op::OpTypeDeviceEvent:
      value = 300001;
      break;
    case spv::Op::OpTypeReserveId:
      value = 300002;
      break;
    case spv::Op::OpTypeQueue:
      value = 300003;
      break;
    case spv::Op::OpTypePipe:
      value = 300004;
      break;
    case spv::Op::OpTypePipeStorage:
      value = 300005;
      break;
    case spv::Op::OpTypeNamedBarrier:
      value = 300006;
      break;
    case spv::Op::OpConstantTrue:
      value = 300007;
      break;
    case spv::Op::OpConstantFalse:
      value = 300008;
      break;
    case spv::Op::OpTypeRayQueryKHR:
      value = 300009;
      break;
    case spv::Op::OpTypeAccelerationStructureKHR:
      value = 300010;
      break;
    // Don't map the following types.
    // TODO: These types were not remapped in the glslang version of the
    // remapper. Support should be added as necessary.
    case spv::Op::OpTypeCooperativeMatrixNV:
    case spv::Op::OpTypeCooperativeMatrixKHR:
    case spv::Op::OpTypeVectorIdEXT:
    case spv::Op::OpTypeHitObjectNV:
    case spv::Op::OpTypeUntypedPointerKHR:
    case spv::Op::OpTypeNodePayloadArrayAMDX:
    case spv::Op::OpTypeTensorLayoutNV:
    case spv::Op::OpTypeTensorViewNV:
    case spv::Op::OpTypeTensorARM:
    case spv::Op::OpTypeTaskSequenceINTEL:
      value = unmapped_;
      break;
    case spv::Op::OpConstant: {
      auto const result_type = inst->GetSingleWordOperand(0);
      value = 400011 + HashTypeAndConst(result_type);
      auto const literal = inst->GetOperand(2);
      for (uint32_t w = 0; w < literal.words.size(); ++w) {
        value += (w + 3) * literal.words[w];
      }
      break;
    }
    case spv::Op::OpConstantComposite: {
      auto const result_type = inst->GetSingleWordOperand(0);
      value = 300011 + HashTypeAndConst(result_type);
      for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
        value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
      }
      break;
    }
    case spv::Op::OpConstantNull: {
      auto const result_type = inst->GetSingleWordOperand(0);
      value = 500009 + HashTypeAndConst(result_type);
      break;
    }
    case spv::Op::OpConstantSampler: {
      auto const result_type = inst->GetSingleWordOperand(0);
      value = 600011 + HashTypeAndConst(result_type);
      for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
        value += (w + 1) * inst->GetSingleWordOperand(w);
      }
      break;
    }
    // Don't map the following constants.
    // TODO: These constants were not remapped in the glslang version of the
    // remapper. Support should be added as necessary.
    case spv::Op::OpConstantCompositeReplicateEXT:
    case spv::Op::OpConstantFunctionPointerINTEL:
    case spv::Op::OpConstantStringAMDX:
    case spv::Op::OpSpecConstantTrue:
    case spv::Op::OpSpecConstantFalse:
    case spv::Op::OpSpecConstant:
    case spv::Op::OpSpecConstantComposite:
    case spv::Op::OpSpecConstantCompositeReplicateEXT:
    case spv::Op::OpSpecConstantOp:
    case spv::Op::OpSpecConstantStringAMDX:
      value = unmapped_;
      break;
    // TODO: Add additional types/constants as needed. See
    // spvOpcodeGeneratesType and spvOpcodeIsConstant.
    default:
      context()->consumer()(SPV_MSG_WARNING, "", {0, 0, 0},
                            "unhandled opcode will not be canonicalized");
      break;
  }

  return value;
}

void CanonicalizeIdsPass::CanonicalizeNames() {
  static constexpr std::uint32_t soft_type_id_limit = 3011;  // Small prime.
  static constexpr std::uint32_t first_mapped_id =
      3019;  // Offset into ID space.

  for (auto const& [name, target] : name_ids_) {
    if (!IsOldIdUnmapped(target)) {
      continue;
    }

    spv::Id hash_value = 1911;
    for (const char c : name) {
      hash_value = hash_value * 1009 + c;
    }

    if (IsOldIdUnmapped(target)) {
      SetNewId(target, hash_value % soft_type_id_limit + first_mapped_id);
    }
  }
}

void CanonicalizeIdsPass::CanonicalizeFunctions() {
  static constexpr std::uint32_t soft_type_id_limit = 19071;  // Small prime.
  static constexpr std::uint32_t first_mapped_id =
      6203;  // Offset into ID space.
  // Window size for context-sensitive canonicalization values
  // Empirical best size from a single data set.  TODO: Would be a good tunable.
  // We essentially perform a little convolution around each instruction,
  // to capture the flavor of nearby code, to hopefully match to similar
  // code in other modules.
  static const int32_t window_size = 2;

  for (auto const func_id : function_ids_) {
    // Store the instructions and opcode hash values in vectors so that the
    // window of instructions can be easily accessed and avoid having to
    // recompute the hash value repeatedly in overlapping windows.
    std::vector<Instruction*> insts;
    std::vector<uint32_t> opcode_hashvals;
    auto const func = context()->GetFunction(func_id);
    func->WhileEachInst([&](Instruction* inst) {
      insts.emplace_back(inst);
      opcode_hashvals.emplace_back(HashOpCode(inst));
      return true;
    });

    // For every instruction in the function, compute the hash value using the
    // instruction and a small window of surrounding instructions.
    assert(insts.size() < (size_t)std::numeric_limits<int32_t>::max());
    for (int32_t i = 0; i < (int32_t)insts.size(); ++i) {
      auto const inst = insts[i];
      if (!inst->HasResultId()) {
        continue;
      }

      auto const old_id = inst->result_id();
      if (!IsOldIdUnmapped(old_id)) {
        continue;
      }

      int32_t const lower_bound = std::max(0, i - window_size);
      int32_t const upper_bound =
          std::min((int32_t)insts.size() - 1, i + window_size);
      spv::Id hash_value = func_id * 17;  // Small prime.
      // Include the hash value of the preceding instructions in the hash but
      // don't include instructions before the OpFunction.
      for (int32_t j = i - 1; j >= lower_bound; --j) {
        auto const local_inst = insts[j];
        if (local_inst->opcode() == spv::Op::OpFunction) {
          break;
        }

        hash_value = hash_value * 30103 +
                     opcode_hashvals[j];  // 30103 is a semi-arbitrary prime.
      }

      // Include the hash value of the subsequent instructions in the hash but
      // don't include instructions past OpFunctionEnd.
      for (int32_t j = i; j <= upper_bound; ++j) {
        auto const local_inst = insts[j];
        if (local_inst->opcode() == spv::Op::OpFunctionEnd) {
          break;
        }

        hash_value = hash_value * 30103 +
                     opcode_hashvals[j];  // 30103 is a semiarbitrary prime.
      }

      SetNewId(old_id, hash_value % soft_type_id_limit + first_mapped_id);
    }
  }
}

spv::Id CanonicalizeIdsPass::HashOpCode(Instruction const* const inst) const {
  auto const op_code = inst->opcode();
  std::uint32_t offset = 0;
  if (op_code == spv::Op::OpExtInst) {
    // offset is literal instruction
    offset = inst->GetSingleWordOperand(3);
  }

  return (std::uint32_t)op_code * 19 + offset;  // 19 is a small prime.
}

// Assign remaining IDs sequentially from remaining holes in the new ID space.
void CanonicalizeIdsPass::CanonicalizeRemainders() {
  spv::Id next_id = 1;
  for (uint32_t old_id = 0; old_id < new_id_.size(); ++old_id) {
    if (IsOldIdUnmapped(old_id)) {
      next_id = SetNewId(old_id, next_id);
    }
  }
}

bool CanonicalizeIdsPass::ApplyMap() {
  bool modified = false;
  context()->module()->ForEachInst(
      [this, &modified](Instruction* inst) {
        for (auto operand = inst->begin(); operand != inst->end(); ++operand) {
          const auto type = operand->type;
          if (spvIsIdType(type)) {
            uint32_t& id = operand->words[0];
            uint32_t const new_id = GetNewId(id);
            if (new_id == unused_) {
              continue;
            }

            assert(new_id != unmapped_ && "new_id should not be unmapped_");

            if (id != new_id) {
              modified = true;
              id = new_id;
              if (type == SPV_OPERAND_TYPE_RESULT_ID) {
                inst->SetResultId(new_id);
              } else if (type == SPV_OPERAND_TYPE_TYPE_ID) {
                inst->SetResultType(new_id);
              }
            }
          }
        }
        const auto& debug_scope = inst->GetDebugScope();
        if (debug_scope.GetLexicalScope() != kNoDebugScope) {
          uint32_t old_scope = debug_scope.GetLexicalScope();
          uint32_t new_scope = GetNewId(old_scope);
          uint32_t old_inlined_at = debug_scope.GetInlinedAt();
          uint32_t new_inlined_at = old_inlined_at != kNoInlinedAt
                                        ? GetNewId(old_inlined_at)
                                        : old_inlined_at;
          if ((new_scope != unused_ && new_scope != old_scope) ||
              (new_inlined_at != unused_ && new_inlined_at != old_inlined_at)) {
            DebugScope new_debug_scope(new_scope, new_inlined_at);
            inst->SetDebugScope(new_debug_scope);
            modified = true;
          }
        }
      },
      true);

  return modified;
}

spv::Id CanonicalizeIdsPass::GetBound() const {
  return context()->module()->id_bound();
}

void CanonicalizeIdsPass::UpdateBound() {
  context()->module()->SetIdBound(context()->module()->ComputeIdBound());

  context()->ResetFeatureManager();
}

// Set a new ID. If the new ID is alreadly claimed, the next consecutive ID
// will be claimed, mapped, and returned to the caller.
spv::Id CanonicalizeIdsPass::SetNewId(spv::Id const old_id, spv::Id new_id) {
  assert(old_id < GetBound() && "don't remap an ID that is out of bounds");

  if (old_id >= new_id_.size()) {
    new_id_.resize(old_id + 1, unused_);
  }

  if (new_id != unmapped_ && new_id != unused_) {
    assert(!IsOldIdUnused(old_id) && "don't remap unused IDs");
    assert(IsOldIdUnmapped(old_id) && "don't remap already mapped IDs");

    new_id = ClaimNewId(new_id);
  }

  new_id_[old_id] = new_id;

  return new_id;
}

// Helper function for SetNewID. Claim a new ID. If the new ID is already
// claimed, the next consecutive ID will be claimed and returned to the caller.
spv::Id CanonicalizeIdsPass::ClaimNewId(spv::Id new_id) {
  // Return the ID if it's not taken.
  auto iter = claimed_new_ids_.find(new_id);
  if (iter != claimed_new_ids_.end()) {
    // Otherwise, search for the next unused ID using our current iterator.
    // Technically, it's a linear search across the set starting at the
    // iterator, but it's not as bad as it would appear in practice assuming the
    // hash values are well distributed.
    iter = std::adjacent_find(iter, claimed_new_ids_.end(), [](int a, int b) {
      return a + 1 != b;  // Stop at the first non-consecutive pair.
    });
    if (iter != claimed_new_ids_.end()) {
      new_id =
          *iter + 1;  // We need the next ID after where the search stopped.
    } else {
      new_id = *(--iter) + 1;  // We reached the end so we use the next ID.
    }
  }

  assert(!IsNewIdClaimed(new_id) &&
         "don't remap to an ID that is already claimed");
  iter = claimed_new_ids_.insert(iter, new_id);
  assert(*iter == new_id);

  return new_id;
}

std::string CanonicalizeIdsPass::IdAsString(spv::Id const id) const {
  if (id == unused_) {
    return "unused";
  } else if (id == unmapped_) {
    return "unmapped";
  } else {
    return std::to_string(id);
  }
}

void CanonicalizeIdsPass::PrintNewIds() const {
  for (spv::Id id = 0; id < new_id_.size(); ++id) {
    auto const message =
        "new id[" + IdAsString(id) + "]: " + IdAsString(new_id_[id]);
    context()->consumer()(SPV_MSG_INFO, "", {0, 0, 0}, message.c_str());
  }
}

}  // namespace opt
}  // namespace spvtools

Coverage Report

Created: 2026-03-31 06:42

Line	Count	Source
1		// Copyright (c) 2025 LunarG Inc.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// http://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		#include "source/opt/canonicalize_ids_pass.h"
16
17		#include <algorithm>
18		#include <limits>
19
20		namespace spvtools {
21		namespace opt {
22
23	0	Pass::Status CanonicalizeIdsPass::Process() {
24		// Initialize the new ID map.
25	0	new_id_.resize(GetBound(), unused_);
26
27		// Scan the IDs and set to unmapped.
28	0	ScanIds();
29
30		// Create new IDs for types and consts.
31	0	CanonicalizeTypeAndConst();
32
33		// Create new IDs for names.
34	0	CanonicalizeNames();
35
36		// Create new IDs for functions.
37	0	CanonicalizeFunctions();
38
39		// Create new IDs for everything else.
40	0	CanonicalizeRemainders();
41
42		// Apply the new IDs to the module.
43	0	auto const modified = ApplyMap();
44
45		// Update bound in the header.
46	0	if (modified) {
47	0	UpdateBound();
48	0	}
49
50	0	return modified ? Status::SuccessWithChange : Status::SuccessWithoutChange;
51	0	}
52
53	0	void CanonicalizeIdsPass::ScanIds() {
54	0	get_module()->ForEachInst(
55	0	[this](Instruction* inst) {
56		// Look for types and constants.
57	0	if (spvOpcodeGeneratesType(inst->opcode()) \|\|
58	0	spvOpcodeIsConstant(inst->opcode())) {
59	0	type_and_const_ids_.push_back(inst->result_id());
60	0	SetNewId(inst->result_id(), unmapped_);
61	0	}
62		// Look for names.
63	0	else if (inst->opcode() == spv::Op::OpName) {
64		// store name string in map so that we can compute the hash later
65	0	auto const name = inst->GetOperand(1).AsString();
66	0	auto const target = inst->GetSingleWordInOperand(0);
67	0	name_ids_[name] = target;
68	0	SetNewId(target, unmapped_);
69	0	}
70		// Look for function IDs.
71	0	else if (inst->opcode() == spv::Op::OpFunction) {
72	0	auto const res_id = inst->result_id();
73	0	function_ids_.push_back(res_id);
74	0	SetNewId(res_id, unmapped_);
75	0	}
76		// Look for remaining result IDs.
77	0	else if (inst->HasResultId()) {
78	0	auto const res_id = inst->result_id();
79	0	SetNewId(res_id, unmapped_);
80	0	}
81	0	},
82	0	true);
83	0	}
84
85	0	void CanonicalizeIdsPass::CanonicalizeTypeAndConst() {
86		// Remap type IDs.
87	0	static constexpr std::uint32_t soft_type_id_limit = 3011; // small prime.
88	0	static constexpr std::uint32_t first_mapped_id = 8; // offset into ID space
89	0	for (auto const id : type_and_const_ids_) {
90	0	if (!IsOldIdUnmapped(id)) {
91	0	continue;
92	0	}
93
94		// Compute the hash value.
95	0	auto const hash_value = HashTypeAndConst(id);
96	0	if (hash_value != unmapped_) {
97	0	SetNewId(id, hash_value % soft_type_id_limit + first_mapped_id);
98	0	}
99	0	}
100	0	}
101
102		// Hash types to canonical values. This can return ID collisions (it's a bit
103		// inevitable): it's up to the caller to handle that gracefully.
104	0	spv::Id CanonicalizeIdsPass::HashTypeAndConst(spv::Id const id) const {
105	0	spv::Id value = 0;
106
107	0	auto const inst = get_def_use_mgr()->GetDef(id);
108	0	auto const op_code = inst->opcode();
109	0	switch (op_code) {
110	0	case spv::Op::OpTypeVoid:
111	0	value = 0;
112	0	break;
113	0	case spv::Op::OpTypeBool:
114	0	value = 1;
115	0	break;
116	0	case spv::Op::OpTypeInt: {
117	0	auto const signedness = inst->GetSingleWordOperand(2);
118	0	value = 3 + signedness;
119	0	break;
120	0	}
121	0	case spv::Op::OpTypeFloat:
122	0	value = 5;
123	0	break;
124	0	case spv::Op::OpTypeVector: {
125	0	auto const component_type = inst->GetSingleWordOperand(1);
126	0	auto const component_count = inst->GetSingleWordOperand(2);
127	0	value = 6 + HashTypeAndConst(component_type) * (component_count - 1);
128	0	break;
129	0	}
130	0	case spv::Op::OpTypeMatrix: {
131	0	auto const column_type = inst->GetSingleWordOperand(1);
132	0	auto const column_count = inst->GetSingleWordOperand(2);
133	0	value = 30 + HashTypeAndConst(column_type) * (column_count - 1);
134	0	break;
135	0	}
136	0	case spv::Op::OpTypeImage: {
137		// TODO: Why isn't the format used to compute the hash value?
138	0	auto const sampled_type = inst->GetSingleWordOperand(1);
139	0	auto const dim = inst->GetSingleWordOperand(2);
140	0	auto const depth = inst->GetSingleWordOperand(3);
141	0	auto const arrayed = inst->GetSingleWordOperand(4);
142	0	auto const ms = inst->GetSingleWordOperand(5);
143	0	auto const sampled = inst->GetSingleWordOperand(6);
144	0	value = 120 + HashTypeAndConst(sampled_type) + dim + depth * 8 * 16 +
145	0	arrayed * 4 * 16 + ms * 2 * 16 + sampled * 1 * 16;
146	0	break;
147	0	}
148	0	case spv::Op::OpTypeSampler:
149	0	value = 500;
150	0	break;
151	0	case spv::Op::OpTypeSampledImage:
152	0	value = 502;
153	0	break;
154	0	case spv::Op::OpTypeArray: {
155	0	auto const element_type = inst->GetSingleWordOperand(1);
156	0	auto const length = inst->GetSingleWordOperand(2);
157	0	value = 501 + HashTypeAndConst(element_type) * length;
158	0	break;
159	0	}
160	0	case spv::Op::OpTypeRuntimeArray: {
161	0	auto const element_type = inst->GetSingleWordOperand(1);
162	0	value = 5000 + HashTypeAndConst(element_type);
163	0	break;
164	0	}
165	0	case spv::Op::OpTypeStruct:
166	0	value = 10000;
167	0	for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
168	0	value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
169	0	}
170	0	break;
171	0	case spv::Op::OpTypeOpaque: {
172		// TODO: Name is a literal that may have more than one word.
173	0	auto const name = inst->GetSingleWordOperand(1);
174	0	value = 6000 + name;
175	0	break;
176	0	}
177	0	case spv::Op::OpTypePointer: {
178	0	auto const type = inst->GetSingleWordOperand(2);
179	0	value = 100000 + HashTypeAndConst(type);
180	0	break;
181	0	}
182	0	case spv::Op::OpTypeFunction:
183	0	value = 200000;
184	0	for (uint32_t w = 1; w < inst->NumOperandWords(); ++w) {
185	0	value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
186	0	}
187	0	break;
188	0	case spv::Op::OpTypeEvent:
189	0	value = 300000;
190	0	break;
191	0	case spv::Op::OpTypeDeviceEvent:
192	0	value = 300001;
193	0	break;
194	0	case spv::Op::OpTypeReserveId:
195	0	value = 300002;
196	0	break;
197	0	case spv::Op::OpTypeQueue:
198	0	value = 300003;
199	0	break;
200	0	case spv::Op::OpTypePipe:
201	0	value = 300004;
202	0	break;
203	0	case spv::Op::OpTypePipeStorage:
204	0	value = 300005;
205	0	break;
206	0	case spv::Op::OpTypeNamedBarrier:
207	0	value = 300006;
208	0	break;
209	0	case spv::Op::OpConstantTrue:
210	0	value = 300007;
211	0	break;
212	0	case spv::Op::OpConstantFalse:
213	0	value = 300008;
214	0	break;
215	0	case spv::Op::OpTypeRayQueryKHR:
216	0	value = 300009;
217	0	break;
218	0	case spv::Op::OpTypeAccelerationStructureKHR:
219	0	value = 300010;
220	0	break;
221		// Don't map the following types.
222		// TODO: These types were not remapped in the glslang version of the
223		// remapper. Support should be added as necessary.
224	0	case spv::Op::OpTypeCooperativeMatrixNV:
225	0	case spv::Op::OpTypeCooperativeMatrixKHR:
226	0	case spv::Op::OpTypeVectorIdEXT:
227	0	case spv::Op::OpTypeHitObjectNV:
228	0	case spv::Op::OpTypeUntypedPointerKHR:
229	0	case spv::Op::OpTypeNodePayloadArrayAMDX:
230	0	case spv::Op::OpTypeTensorLayoutNV:
231	0	case spv::Op::OpTypeTensorViewNV:
232	0	case spv::Op::OpTypeTensorARM:
233	0	case spv::Op::OpTypeTaskSequenceINTEL:
234	0	value = unmapped_;
235	0	break;
236	0	case spv::Op::OpConstant: {
237	0	auto const result_type = inst->GetSingleWordOperand(0);
238	0	value = 400011 + HashTypeAndConst(result_type);
239	0	auto const literal = inst->GetOperand(2);
240	0	for (uint32_t w = 0; w < literal.words.size(); ++w) {
241	0	value += (w + 3) * literal.words[w];
242	0	}
243	0	break;
244	0	}
245	0	case spv::Op::OpConstantComposite: {
246	0	auto const result_type = inst->GetSingleWordOperand(0);
247	0	value = 300011 + HashTypeAndConst(result_type);
248	0	for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
249	0	value += (w + 1) * HashTypeAndConst(inst->GetSingleWordOperand(w));
250	0	}
251	0	break;
252	0	}
253	0	case spv::Op::OpConstantNull: {
254	0	auto const result_type = inst->GetSingleWordOperand(0);
255	0	value = 500009 + HashTypeAndConst(result_type);
256	0	break;
257	0	}
258	0	case spv::Op::OpConstantSampler: {
259	0	auto const result_type = inst->GetSingleWordOperand(0);
260	0	value = 600011 + HashTypeAndConst(result_type);
261	0	for (uint32_t w = 2; w < inst->NumOperandWords(); ++w) {
262	0	value += (w + 1) * inst->GetSingleWordOperand(w);
263	0	}
264	0	break;
265	0	}
266		// Don't map the following constants.
267		// TODO: These constants were not remapped in the glslang version of the
268		// remapper. Support should be added as necessary.
269	0	case spv::Op::OpConstantCompositeReplicateEXT:
270	0	case spv::Op::OpConstantFunctionPointerINTEL:
271	0	case spv::Op::OpConstantStringAMDX:
272	0	case spv::Op::OpSpecConstantTrue:
273	0	case spv::Op::OpSpecConstantFalse:
274	0	case spv::Op::OpSpecConstant:
275	0	case spv::Op::OpSpecConstantComposite:
276	0	case spv::Op::OpSpecConstantCompositeReplicateEXT:
277	0	case spv::Op::OpSpecConstantOp:
278	0	case spv::Op::OpSpecConstantStringAMDX:
279	0	value = unmapped_;
280	0	break;
281		// TODO: Add additional types/constants as needed. See
282		// spvOpcodeGeneratesType and spvOpcodeIsConstant.
283	0	default:
284	0	context()->consumer()(SPV_MSG_WARNING, "", {0, 0, 0},
285	0	"unhandled opcode will not be canonicalized");
286	0	break;
287	0	}
288
289	0	return value;
290	0	}
291
292	0	void CanonicalizeIdsPass::CanonicalizeNames() {
293	0	static constexpr std::uint32_t soft_type_id_limit = 3011; // Small prime.
294	0	static constexpr std::uint32_t first_mapped_id =
295	0	3019; // Offset into ID space.
296
297	0	for (auto const& [name, target] : name_ids_) {
298	0	if (!IsOldIdUnmapped(target)) {
299	0	continue;
300	0	}
301
302	0	spv::Id hash_value = 1911;
303	0	for (const char c : name) {
304	0	hash_value = hash_value * 1009 + c;
305	0	}
306
307	0	if (IsOldIdUnmapped(target)) {
308	0	SetNewId(target, hash_value % soft_type_id_limit + first_mapped_id);
309	0	}
310	0	}
311	0	}
312
313	0	void CanonicalizeIdsPass::CanonicalizeFunctions() {
314	0	static constexpr std::uint32_t soft_type_id_limit = 19071; // Small prime.
315	0	static constexpr std::uint32_t first_mapped_id =
316	0	6203; // Offset into ID space.
317		// Window size for context-sensitive canonicalization values
318		// Empirical best size from a single data set. TODO: Would be a good tunable.
319		// We essentially perform a little convolution around each instruction,
320		// to capture the flavor of nearby code, to hopefully match to similar
321		// code in other modules.
322	0	static const int32_t window_size = 2;
323
324	0	for (auto const func_id : function_ids_) {
325		// Store the instructions and opcode hash values in vectors so that the
326		// window of instructions can be easily accessed and avoid having to
327		// recompute the hash value repeatedly in overlapping windows.
328	0	std::vector<Instruction*> insts;
329	0	std::vector<uint32_t> opcode_hashvals;
330	0	auto const func = context()->GetFunction(func_id);
331	0	func->WhileEachInst([&](Instruction* inst) {
332	0	insts.emplace_back(inst);
333	0	opcode_hashvals.emplace_back(HashOpCode(inst));
334	0	return true;
335	0	});
336
337		// For every instruction in the function, compute the hash value using the
338		// instruction and a small window of surrounding instructions.
339	0	assert(insts.size() < (size_t)std::numeric_limits<int32_t>::max());
340	0	for (int32_t i = 0; i < (int32_t)insts.size(); ++i) {
341	0	auto const inst = insts[i];
342	0	if (!inst->HasResultId()) {
343	0	continue;
344	0	}
345
346	0	auto const old_id = inst->result_id();
347	0	if (!IsOldIdUnmapped(old_id)) {
348	0	continue;
349	0	}
350
351	0	int32_t const lower_bound = std::max(0, i - window_size);
352	0	int32_t const upper_bound =
353	0	std::min((int32_t)insts.size() - 1, i + window_size);
354	0	spv::Id hash_value = func_id * 17; // Small prime.
355		// Include the hash value of the preceding instructions in the hash but
356		// don't include instructions before the OpFunction.
357	0	for (int32_t j = i - 1; j >= lower_bound; --j) {
358	0	auto const local_inst = insts[j];
359	0	if (local_inst->opcode() == spv::Op::OpFunction) {
360	0	break;
361	0	}
362
363	0	hash_value = hash_value * 30103 +
364	0	opcode_hashvals[j]; // 30103 is a semi-arbitrary prime.
365	0	}
366
367		// Include the hash value of the subsequent instructions in the hash but
368		// don't include instructions past OpFunctionEnd.
369	0	for (int32_t j = i; j <= upper_bound; ++j) {
370	0	auto const local_inst = insts[j];
371	0	if (local_inst->opcode() == spv::Op::OpFunctionEnd) {
372	0	break;
373	0	}
374
375	0	hash_value = hash_value * 30103 +
376	0	opcode_hashvals[j]; // 30103 is a semiarbitrary prime.
377	0	}
378
379	0	SetNewId(old_id, hash_value % soft_type_id_limit + first_mapped_id);
380	0	}
381	0	}
382	0	}
383
384	0	spv::Id CanonicalizeIdsPass::HashOpCode(Instruction const* const inst) const {
385	0	auto const op_code = inst->opcode();
386	0	std::uint32_t offset = 0;
387	0	if (op_code == spv::Op::OpExtInst) {
388		// offset is literal instruction
389	0	offset = inst->GetSingleWordOperand(3);
390	0	}
391
392	0	return (std::uint32_t)op_code * 19 + offset; // 19 is a small prime.
393	0	}
394
395		// Assign remaining IDs sequentially from remaining holes in the new ID space.
396	0	void CanonicalizeIdsPass::CanonicalizeRemainders() {
397	0	spv::Id next_id = 1;
398	0	for (uint32_t old_id = 0; old_id < new_id_.size(); ++old_id) {
399	0	if (IsOldIdUnmapped(old_id)) {
400	0	next_id = SetNewId(old_id, next_id);
401	0	}
402	0	}
403	0	}
404
405	0	bool CanonicalizeIdsPass::ApplyMap() {
406	0	bool modified = false;
407	0	context()->module()->ForEachInst(
408	0	[this, &modified](Instruction* inst) {
409	0	for (auto operand = inst->begin(); operand != inst->end(); ++operand) {
410	0	const auto type = operand->type;
411	0	if (spvIsIdType(type)) {
412	0	uint32_t& id = operand->words[0];
413	0	uint32_t const new_id = GetNewId(id);
414	0	if (new_id == unused_) {
415	0	continue;
416	0	}
417
418	0	assert(new_id != unmapped_ && "new_id should not be unmapped_");
419
420	0	if (id != new_id) {
421	0	modified = true;
422	0	id = new_id;
423	0	if (type == SPV_OPERAND_TYPE_RESULT_ID) {
424	0	inst->SetResultId(new_id);
425	0	} else if (type == SPV_OPERAND_TYPE_TYPE_ID) {
426	0	inst->SetResultType(new_id);
427	0	}
428	0	}
429	0	}
430	0	}
431	0	const auto& debug_scope = inst->GetDebugScope();
432	0	if (debug_scope.GetLexicalScope() != kNoDebugScope) {
433	0	uint32_t old_scope = debug_scope.GetLexicalScope();
434	0	uint32_t new_scope = GetNewId(old_scope);
435	0	uint32_t old_inlined_at = debug_scope.GetInlinedAt();
436	0	uint32_t new_inlined_at = old_inlined_at != kNoInlinedAt
437	0	? GetNewId(old_inlined_at)
438	0	: old_inlined_at;
439	0	if ((new_scope != unused_ && new_scope != old_scope) \|\|
440	0	(new_inlined_at != unused_ && new_inlined_at != old_inlined_at)) {
441	0	DebugScope new_debug_scope(new_scope, new_inlined_at);
442	0	inst->SetDebugScope(new_debug_scope);
443	0	modified = true;
444	0	}
445	0	}
446	0	},
447	0	true);
448
449	0	return modified;
450	0	}
451
452	0	spv::Id CanonicalizeIdsPass::GetBound() const {
453	0	return context()->module()->id_bound();
454	0	}
455
456	0	void CanonicalizeIdsPass::UpdateBound() {
457	0	context()->module()->SetIdBound(context()->module()->ComputeIdBound());
458
459	0	context()->ResetFeatureManager();
460	0	}
461
462		// Set a new ID. If the new ID is alreadly claimed, the next consecutive ID
463		// will be claimed, mapped, and returned to the caller.
464	0	spv::Id CanonicalizeIdsPass::SetNewId(spv::Id const old_id, spv::Id new_id) {
465	0	assert(old_id < GetBound() && "don't remap an ID that is out of bounds");
466
467	0	if (old_id >= new_id_.size()) {
468	0	new_id_.resize(old_id + 1, unused_);
469	0	}
470
471	0	if (new_id != unmapped_ && new_id != unused_) {
472	0	assert(!IsOldIdUnused(old_id) && "don't remap unused IDs");
473	0	assert(IsOldIdUnmapped(old_id) && "don't remap already mapped IDs");
474
475	0	new_id = ClaimNewId(new_id);
476	0	}
477
478	0	new_id_[old_id] = new_id;
479
480	0	return new_id;
481	0	}
482
483		// Helper function for SetNewID. Claim a new ID. If the new ID is already
484		// claimed, the next consecutive ID will be claimed and returned to the caller.
485	0	spv::Id CanonicalizeIdsPass::ClaimNewId(spv::Id new_id) {
486		// Return the ID if it's not taken.
487	0	auto iter = claimed_new_ids_.find(new_id);
488	0	if (iter != claimed_new_ids_.end()) {
489		// Otherwise, search for the next unused ID using our current iterator.
490		// Technically, it's a linear search across the set starting at the
491		// iterator, but it's not as bad as it would appear in practice assuming the
492		// hash values are well distributed.
493	0	iter = std::adjacent_find(iter, claimed_new_ids_.end(), [](int a, int b) {
494	0	return a + 1 != b; // Stop at the first non-consecutive pair.
495	0	});
496	0	if (iter != claimed_new_ids_.end()) {
497	0	new_id =
498	0	*iter + 1; // We need the next ID after where the search stopped.
499	0	} else {
500	0	new_id = *(--iter) + 1; // We reached the end so we use the next ID.
501	0	}
502	0	}
503
504	0	assert(!IsNewIdClaimed(new_id) &&
505	0	"don't remap to an ID that is already claimed");
506	0	iter = claimed_new_ids_.insert(iter, new_id);
507	0	assert(*iter == new_id);
508
509	0	return new_id;
510	0	}
511
512	0	std::string CanonicalizeIdsPass::IdAsString(spv::Id const id) const {
513	0	if (id == unused_) {
514	0	return "unused";
515	0	} else if (id == unmapped_) {
516	0	return "unmapped";
517	0	} else {
518	0	return std::to_string(id);
519	0	}
520	0	}
521
522	0	void CanonicalizeIdsPass::PrintNewIds() const {
523	0	for (spv::Id id = 0; id < new_id_.size(); ++id) {
524	0	auto const message =
525	0	"new id[" + IdAsString(id) + "]: " + IdAsString(new_id_[id]);
526	0	context()->consumer()(SPV_MSG_INFO, "", {0, 0, 0}, message.c_str());
527	0	}
528	0	}
529
530		} // namespace opt
531		} // namespace spvtools