Coverage Report

Created: 2026-03-21 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-arch.cpp
Line
Count
Source
1
#include "llama-arch.h"
2
3
#include "llama-impl.h"
4
5
#include <map>
6
#include <set>
7
#include <vector>
8
9
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
10
    { LLM_ARCH_CLIP,             "clip"             }, // dummy, only used by llama-quantize
11
    { LLM_ARCH_LLAMA,            "llama"            },
12
    { LLM_ARCH_LLAMA4,           "llama4"           },
13
    { LLM_ARCH_DECI,             "deci"             },
14
    { LLM_ARCH_FALCON,           "falcon"           },
15
    { LLM_ARCH_GROK,             "grok"             },
16
    { LLM_ARCH_GPT2,             "gpt2"             },
17
    { LLM_ARCH_GPTJ,             "gptj"             },
18
    { LLM_ARCH_GPTNEOX,          "gptneox"          },
19
    { LLM_ARCH_MPT,              "mpt"              },
20
    { LLM_ARCH_BAICHUAN,         "baichuan"         },
21
    { LLM_ARCH_STARCODER,        "starcoder"        },
22
    { LLM_ARCH_REFACT,           "refact"           },
23
    { LLM_ARCH_BERT,             "bert"             },
24
    { LLM_ARCH_MODERN_BERT,      "modern-bert"      },
25
    { LLM_ARCH_NOMIC_BERT,       "nomic-bert"       },
26
    { LLM_ARCH_NOMIC_BERT_MOE,   "nomic-bert-moe"   },
27
    { LLM_ARCH_NEO_BERT,         "neo-bert"         },
28
    { LLM_ARCH_JINA_BERT_V2,     "jina-bert-v2"     },
29
    { LLM_ARCH_JINA_BERT_V3,     "jina-bert-v3"     },
30
    { LLM_ARCH_EUROBERT,         "eurobert"         },
31
    { LLM_ARCH_BLOOM,            "bloom"            },
32
    { LLM_ARCH_STABLELM,         "stablelm"         },
33
    { LLM_ARCH_QWEN,             "qwen"             },
34
    { LLM_ARCH_QWEN2,            "qwen2"            },
35
    { LLM_ARCH_QWEN2MOE,         "qwen2moe"         },
36
    { LLM_ARCH_QWEN2VL,          "qwen2vl"          },
37
    { LLM_ARCH_QWEN3,            "qwen3"            },
38
    { LLM_ARCH_QWEN3MOE,         "qwen3moe"         },
39
    { LLM_ARCH_QWEN3NEXT,        "qwen3next"        },
40
    { LLM_ARCH_QWEN3VL,          "qwen3vl"          },
41
    { LLM_ARCH_QWEN3VLMOE,       "qwen3vlmoe"       },
42
    { LLM_ARCH_QWEN35,           "qwen35"           },
43
    { LLM_ARCH_QWEN35MOE,        "qwen35moe"        },
44
    { LLM_ARCH_PHI2,             "phi2"             },
45
    { LLM_ARCH_PHI3,             "phi3"             },
46
    { LLM_ARCH_PHIMOE,           "phimoe"           },
47
    { LLM_ARCH_PLAMO,            "plamo"            },
48
    { LLM_ARCH_PLAMO2,           "plamo2"           },
49
    { LLM_ARCH_PLAMO3,           "plamo3"           },
50
    { LLM_ARCH_CODESHELL,        "codeshell"        },
51
    { LLM_ARCH_ORION,            "orion"            },
52
    { LLM_ARCH_INTERNLM2,        "internlm2"        },
53
    { LLM_ARCH_MINICPM,          "minicpm"          },
54
    { LLM_ARCH_MINICPM3,         "minicpm3"         },
55
    { LLM_ARCH_GEMMA,            "gemma"            },
56
    { LLM_ARCH_GEMMA2,           "gemma2"           },
57
    { LLM_ARCH_GEMMA3,           "gemma3"           },
58
    { LLM_ARCH_GEMMA3N,          "gemma3n"          },
59
    { LLM_ARCH_GEMMA_EMBEDDING,  "gemma-embedding"  },
60
    { LLM_ARCH_STARCODER2,       "starcoder2"       },
61
    { LLM_ARCH_MAMBA,            "mamba"            },
62
    { LLM_ARCH_MAMBA2,           "mamba2"           },
63
    { LLM_ARCH_JAMBA,            "jamba"            },
64
    { LLM_ARCH_FALCON_H1,        "falcon-h1"        },
65
    { LLM_ARCH_XVERSE,           "xverse"           },
66
    { LLM_ARCH_COMMAND_R,        "command-r"        },
67
    { LLM_ARCH_COHERE2,          "cohere2"          },
68
    { LLM_ARCH_DBRX,             "dbrx"             },
69
    { LLM_ARCH_OLMO,             "olmo"             },
70
    { LLM_ARCH_OLMO2,            "olmo2"            },
71
    { LLM_ARCH_OLMOE,            "olmoe"            },
72
    { LLM_ARCH_OPENELM,          "openelm"          },
73
    { LLM_ARCH_ARCTIC,           "arctic"           },
74
    { LLM_ARCH_DEEPSEEK,         "deepseek"         },
75
    { LLM_ARCH_DEEPSEEK2,        "deepseek2"        },
76
    { LLM_ARCH_CHATGLM,          "chatglm"          },
77
    { LLM_ARCH_GLM4,             "glm4"             },
78
    { LLM_ARCH_GLM4_MOE,         "glm4moe"          },
79
    { LLM_ARCH_GLM_DSA,          "glm-dsa"          },
80
    { LLM_ARCH_BITNET,           "bitnet"           },
81
    { LLM_ARCH_T5,               "t5"               },
82
    { LLM_ARCH_T5ENCODER,        "t5encoder"        },
83
    { LLM_ARCH_JAIS,             "jais"             },
84
    { LLM_ARCH_JAIS2,            "jais2"            },
85
    { LLM_ARCH_NEMOTRON,         "nemotron"         },
86
    { LLM_ARCH_NEMOTRON_H,       "nemotron_h"       },
87
    { LLM_ARCH_NEMOTRON_H_MOE,   "nemotron_h_moe"   },
88
    { LLM_ARCH_EXAONE,           "exaone"           },
89
    { LLM_ARCH_EXAONE4,          "exaone4"          },
90
    { LLM_ARCH_EXAONE_MOE,       "exaone-moe"       },
91
    { LLM_ARCH_RWKV6,            "rwkv6"            },
92
    { LLM_ARCH_RWKV6QWEN2,       "rwkv6qwen2"       },
93
    { LLM_ARCH_RWKV7,            "rwkv7"            },
94
    { LLM_ARCH_ARWKV7,           "arwkv7"           },
95
    { LLM_ARCH_GRANITE,          "granite"          },
96
    { LLM_ARCH_GRANITE_MOE,      "granitemoe"       },
97
    { LLM_ARCH_GRANITE_HYBRID,   "granitehybrid"    },
98
    { LLM_ARCH_CHAMELEON,        "chameleon"        },
99
    { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
100
    { LLM_ARCH_PLM,              "plm"              },
101
    { LLM_ARCH_BAILINGMOE,       "bailingmoe"       },
102
    { LLM_ARCH_BAILINGMOE2,      "bailingmoe2"      },
103
    { LLM_ARCH_DOTS1,            "dots1"            },
104
    { LLM_ARCH_ARCEE,            "arcee"            },
105
    { LLM_ARCH_AFMOE,            "afmoe"            },
106
    { LLM_ARCH_ERNIE4_5,         "ernie4_5"         },
107
    { LLM_ARCH_ERNIE4_5_MOE,     "ernie4_5-moe"     },
108
    { LLM_ARCH_HUNYUAN_MOE,      "hunyuan-moe"      },
109
    { LLM_ARCH_HUNYUAN_DENSE,    "hunyuan-dense"    },
110
    { LLM_ARCH_SMOLLM3,          "smollm3"          },
111
    { LLM_ARCH_OPENAI_MOE,       "gpt-oss"          },
112
    { LLM_ARCH_LFM2,             "lfm2"             },
113
    { LLM_ARCH_LFM2MOE,          "lfm2moe"          },
114
    { LLM_ARCH_DREAM,            "dream"            },
115
    { LLM_ARCH_SMALLTHINKER,     "smallthinker"     },
116
    { LLM_ARCH_LLADA,            "llada"            },
117
    { LLM_ARCH_LLADA_MOE,        "llada-moe"        },
118
    { LLM_ARCH_SEED_OSS,         "seed_oss"         },
119
    { LLM_ARCH_GROVEMOE,         "grovemoe"         },
120
    { LLM_ARCH_APERTUS,          "apertus"          },
121
    { LLM_ARCH_MINIMAX_M2,       "minimax-m2"       },
122
    { LLM_ARCH_COGVLM,           "cogvlm"           },
123
    { LLM_ARCH_RND1,             "rnd1"             },
124
    { LLM_ARCH_PANGU_EMBED,      "pangu-embedded"   },
125
    { LLM_ARCH_MISTRAL3,         "mistral3"         },
126
    { LLM_ARCH_MISTRAL4,         "mistral4"         },
127
    { LLM_ARCH_PADDLEOCR,        "paddleocr"        },
128
    { LLM_ARCH_MIMO2,            "mimo2"            },
129
    { LLM_ARCH_STEP35,           "step35"           },
130
    { LLM_ARCH_LLAMA_EMBED,      "llama-embed"      },
131
    { LLM_ARCH_MAINCODER,        "maincoder"        },
132
    { LLM_ARCH_KIMI_LINEAR,      "kimi-linear"      },
133
    { LLM_ARCH_UNKNOWN,          "(unknown)"        },
134
};
135
136
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
137
    { LLM_KV_GENERAL_TYPE,                     "general.type"                          },
138
    { LLM_KV_GENERAL_ARCHITECTURE,             "general.architecture"                  },
139
    { LLM_KV_GENERAL_QUANTIZATION_VERSION,     "general.quantization_version"          },
140
    { LLM_KV_GENERAL_ALIGNMENT,                "general.alignment"                     },
141
    { LLM_KV_GENERAL_FILE_TYPE,                "general.file_type"                     },
142
    { LLM_KV_GENERAL_SAMPLING_SEQUENCE,        "general.sampling.sequence"             },
143
    { LLM_KV_GENERAL_SAMPLING_TOP_K,           "general.sampling.top_k"                },
144
    { LLM_KV_GENERAL_SAMPLING_TOP_P,           "general.sampling.top_p"                },
145
    { LLM_KV_GENERAL_SAMPLING_MIN_P,           "general.sampling.min_p"                },
146
    { LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, "general.sampling.xtc_probability"      },
147
    { LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,   "general.sampling.xtc_threshold"        },
148
    { LLM_KV_GENERAL_SAMPLING_TEMP,            "general.sampling.temp"                 },
149
    { LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,  "general.sampling.penalty_last_n"       },
150
    { LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,  "general.sampling.penalty_repeat"       },
151
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT,        "general.sampling.mirostat"             },
152
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,    "general.sampling.mirostat_tau"         },
153
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,    "general.sampling.mirostat_eta"         },
154
    { LLM_KV_GENERAL_NAME,                     "general.name"                          },
155
    { LLM_KV_GENERAL_AUTHOR,                   "general.author"                        },
156
    { LLM_KV_GENERAL_VERSION,                  "general.version"                       },
157
    { LLM_KV_GENERAL_URL,                      "general.url"                           },
158
    { LLM_KV_GENERAL_DESCRIPTION,              "general.description"                   },
159
    { LLM_KV_GENERAL_LICENSE,                  "general.license"                       },
160
    { LLM_KV_GENERAL_SOURCE_URL,               "general.source.url"                    },
161
    { LLM_KV_GENERAL_SOURCE_HF_REPO,           "general.source.huggingface.repository" },
162
163
    { LLM_KV_VOCAB_SIZE,                        "%s.vocab_size"                        },
164
    { LLM_KV_CONTEXT_LENGTH,                    "%s.context_length"                    },
165
    { LLM_KV_EMBEDDING_LENGTH,                  "%s.embedding_length"                  },
166
    { LLM_KV_EMBEDDING_LENGTH_OUT,              "%s.embedding_length_out"              },
167
    { LLM_KV_FEATURES_LENGTH,                   "%s.features_length"                   },
168
    { LLM_KV_BLOCK_COUNT,                       "%s.block_count"                       },
169
    { LLM_KV_LEADING_DENSE_BLOCK_COUNT,         "%s.leading_dense_block_count"         },
170
    { LLM_KV_FEED_FORWARD_LENGTH,               "%s.feed_forward_length"               },
171
    { LLM_KV_EXPERT_FEED_FORWARD_LENGTH,        "%s.expert_feed_forward_length"        },
172
    { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
173
    { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,  "%s.expert_chunk_feed_forward_length"  },
174
    { LLM_KV_SWIGLU_CLAMP_EXP,                  "%s.swiglu_clamp_exp"                  },
175
    { LLM_KV_SWIGLU_CLAMP_SHEXP,                "%s.swiglu_clamp_shexp"                },
176
    { LLM_KV_USE_PARALLEL_RESIDUAL,             "%s.use_parallel_residual"             },
177
    { LLM_KV_TENSOR_DATA_LAYOUT,                "%s.tensor_data_layout"                },
178
    { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      },
179
    { LLM_KV_EXPERT_USED_COUNT,                 "%s.expert_used_count"                 },
180
    { LLM_KV_EXPERT_SHARED_COUNT,               "%s.expert_shared_count"               },
181
    { LLM_KV_EXPERT_GROUP_COUNT,                "%s.expert_group_count"                },
182
    { LLM_KV_EXPERT_GROUP_USED_COUNT,           "%s.expert_group_used_count"           },
183
    { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              },
184
    { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               },
185
    { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                },
186
    { LLM_KV_EXPERT_GROUP_SCALE,                "%s.expert_group_scale"                },
187
    { LLM_KV_EXPERTS_PER_GROUP,                 "%s.experts_per_group"                 },
188
    { LLM_KV_MOE_EVERY_N_LAYERS,                "%s.moe_every_n_layers"                },
189
    { LLM_KV_MOE_LATENT_SIZE,                   "%s.moe_latent_size"                   },
190
    { LLM_KV_NEXTN_PREDICT_LAYERS,              "%s.nextn_predict_layers"              },
191
    { LLM_KV_NUM_DEEPSTACK_LAYERS,              "%s.n_deepstack_layers"                },
192
    { LLM_KV_POOLING_TYPE,                      "%s.pooling_type"                      },
193
    { LLM_KV_LOGIT_SCALE,                       "%s.logit_scale"                       },
194
    { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            },
195
    { LLM_KV_DECODER_BLOCK_COUNT,               "%s.decoder_block_count"               },
196
    { LLM_KV_ATTN_LOGIT_SOFTCAPPING,            "%s.attn_logit_softcapping"            },
197
    { LLM_KV_ROUTER_LOGIT_SOFTCAPPING,          "%s.router_logit_softcapping"          },
198
    { LLM_KV_FINAL_LOGIT_SOFTCAPPING,           "%s.final_logit_softcapping"           },
199
    { LLM_KV_SWIN_NORM,                         "%s.swin_norm"                         },
200
    { LLM_KV_RESCALE_EVERY_N_LAYERS,            "%s.rescale_every_n_layers"            },
201
    { LLM_KV_TIME_MIX_EXTRA_DIM,                "%s.time_mix_extra_dim"                },
202
    { LLM_KV_TIME_DECAY_EXTRA_DIM,              "%s.time_decay_extra_dim"              },
203
    { LLM_KV_RESIDUAL_SCALE,                    "%s.residual_scale"                    },
204
    { LLM_KV_EMBEDDING_SCALE,                   "%s.embedding_scale"                   },
205
    { LLM_KV_TOKEN_SHIFT_COUNT,                 "%s.token_shift_count"                 },
206
    { LLM_KV_INTERLEAVE_MOE_LAYER_STEP,         "%s.interleave_moe_layer_step"         },
207
    { LLM_KV_FULL_ATTENTION_INTERVAL,           "%s.full_attention_interval"           },
208
209
    { LLM_KV_ATTENTION_HEAD_COUNT,                   "%s.attention.head_count"                   },
210
    { LLM_KV_ATTENTION_HEAD_COUNT_KV,                "%s.attention.head_count_kv"                },
211
    { LLM_KV_ATTENTION_MAX_ALIBI_BIAS,               "%s.attention.max_alibi_bias"               },
212
    { LLM_KV_ATTENTION_CLAMP_KQV,                    "%s.attention.clamp_kqv"                    },
213
    { LLM_KV_ATTENTION_KEY_LENGTH,                   "%s.attention.key_length"                   },
214
    { LLM_KV_ATTENTION_VALUE_LENGTH,                 "%s.attention.value_length"                 },
215
    { LLM_KV_ATTENTION_LAYERNORM_EPS,                "%s.attention.layer_norm_epsilon"           },
216
    { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,            "%s.attention.layer_norm_rms_epsilon"       },
217
    { LLM_KV_ATTENTION_GROUPNORM_EPS,                "%s.attention.group_norm_epsilon"           },
218
    { LLM_KV_ATTENTION_GROUPNORM_GROUPS,             "%s.attention.group_norm_groups"            },
219
    { LLM_KV_ATTENTION_CAUSAL,                       "%s.attention.causal"                       },
220
    { LLM_KV_ATTENTION_Q_LORA_RANK,                  "%s.attention.q_lora_rank"                  },
221
    { LLM_KV_ATTENTION_KV_LORA_RANK,                 "%s.attention.kv_lora_rank"                 },
222
    { LLM_KV_ATTENTION_DECAY_LORA_RANK,              "%s.attention.decay_lora_rank"              },
223
    { LLM_KV_ATTENTION_ICLR_LORA_RANK,               "%s.attention.iclr_lora_rank"               },
224
    { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
225
    { LLM_KV_ATTENTION_GATE_LORA_RANK,               "%s.attention.gate_lora_rank"               },
226
    { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,       "%s.attention.relative_buckets_count"       },
227
    { LLM_KV_ATTENTION_SLIDING_WINDOW,               "%s.attention.sliding_window"               },
228
    { LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,       "%s.attention.sliding_window_pattern"       },
229
    { LLM_KV_ATTENTION_SCALE,                        "%s.attention.scale"                        },
230
    { LLM_KV_ATTENTION_OUTPUT_SCALE,                 "%s.attention.output_scale"                 },
231
    { LLM_KV_ATTENTION_TEMPERATURE_LENGTH,           "%s.attention.temperature_length"           },
232
    { LLM_KV_ATTENTION_TEMPERATURE_SCALE,            "%s.attention.temperature_scale"            },
233
    { LLM_KV_ATTENTION_KEY_LENGTH_MLA,               "%s.attention.key_length_mla"               },
234
    { LLM_KV_ATTENTION_VALUE_LENGTH_MLA,             "%s.attention.value_length_mla"             },
235
    { LLM_KV_ATTENTION_KEY_LENGTH_SWA,               "%s.attention.key_length_swa"               },
236
    { LLM_KV_ATTENTION_VALUE_LENGTH_SWA,             "%s.attention.value_length_swa"             },
237
    { LLM_KV_ATTENTION_INDEXER_HEAD_COUNT,           "%s.attention.indexer.head_count"           },
238
    { LLM_KV_ATTENTION_INDEXER_KEY_LENGTH,           "%s.attention.indexer.key_length"           },
239
    { LLM_KV_ATTENTION_INDEXER_TOP_K,                "%s.attention.indexer.top_k"                },
240
241
    { LLM_KV_ROPE_DIMENSION_COUNT,           "%s.rope.dimension_count"                 },
242
    { LLM_KV_ROPE_DIMENSION_COUNT_SWA,       "%s.rope.dimension_count_swa"             },
243
    { LLM_KV_ROPE_DIMENSION_SECTIONS,        "%s.rope.dimension_sections"              },
244
    { LLM_KV_ROPE_FREQ_BASE,                 "%s.rope.freq_base"                       },
245
    { LLM_KV_ROPE_FREQ_BASE_SWA,             "%s.rope.freq_base_swa"                   },
246
    { LLM_KV_ROPE_SCALE_LINEAR,              "%s.rope.scale_linear"                    },
247
    { LLM_KV_ROPE_SCALING_TYPE,              "%s.rope.scaling.type"                    },
248
    { LLM_KV_ROPE_SCALING_FACTOR,            "%s.rope.scaling.factor"                  },
249
    { LLM_KV_ROPE_SCALING_ATTN_FACTOR,       "%s.rope.scaling.attn_factor"             },
250
    { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,      "%s.rope.scaling.original_context_length" },
251
    { LLM_KV_ROPE_SCALING_FINETUNED,         "%s.rope.scaling.finetuned"               },
252
    { LLM_KV_ROPE_SCALING_YARN_LOG_MUL,      "%s.rope.scaling.yarn_log_multiplier"     },
253
    { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,   "%s.rope.scaling.yarn_ext_factor"         },
254
    { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,  "%s.rope.scaling.yarn_attn_factor"        },
255
    { LLM_KV_ROPE_SCALING_YARN_BETA_FAST,    "%s.rope.scaling.yarn_beta_fast"          },
256
    { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,    "%s.rope.scaling.yarn_beta_slow"          },
257
258
    { LLM_KV_SPLIT_NO,            "split.no"            },
259
    { LLM_KV_SPLIT_COUNT,         "split.count"         },
260
    { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
261
262
    { LLM_KV_SSM_CONV_KERNEL,    "%s.ssm.conv_kernel"    },
263
    { LLM_KV_SSM_INNER_SIZE,     "%s.ssm.inner_size"     },
264
    { LLM_KV_SSM_STATE_SIZE,     "%s.ssm.state_size"     },
265
    { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
266
    { LLM_KV_SSM_GROUP_COUNT,    "%s.ssm.group_count"    },
267
    { LLM_KV_SSM_DT_B_C_RMS,     "%s.ssm.dt_b_c_rms"     },
268
269
    { LLM_KV_KDA_HEAD_DIM, "%s.kda.head_dim" },
270
271
    { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
272
273
    { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
274
    { LLM_KV_POSNET_BLOCK_COUNT,      "%s.posnet.block_count"      },
275
276
    { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
277
    { LLM_KV_CONVNEXT_BLOCK_COUNT,      "%s.convnext.block_count"      },
278
279
    { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
280
281
    { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
282
    // sentence-transformers dense modules feature dims
283
    { LLM_KV_DENSE_2_FEAT_IN,        "%s.dense_2_feat_in"  },
284
    { LLM_KV_DENSE_2_FEAT_OUT,       "%s.dense_2_feat_out"  },
285
    { LLM_KV_DENSE_3_FEAT_IN,        "%s.dense_3_feat_in"   },
286
    { LLM_KV_DENSE_3_FEAT_OUT,       "%s.dense_3_feat_out"  },
287
288
    { LLM_KV_TOKENIZER_MODEL,                "tokenizer.ggml.model"                    },
289
    { LLM_KV_TOKENIZER_PRE,                  "tokenizer.ggml.pre"                      },
290
    { LLM_KV_TOKENIZER_LIST,                 "tokenizer.ggml.tokens"                   },
291
    { LLM_KV_TOKENIZER_TOKEN_TYPE,           "tokenizer.ggml.token_type"               },
292
    { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,     "tokenizer.ggml.token_type_count"         },
293
    { LLM_KV_TOKENIZER_SCORES,               "tokenizer.ggml.scores"                   },
294
    { LLM_KV_TOKENIZER_MERGES,               "tokenizer.ggml.merges"                   },
295
    { LLM_KV_TOKENIZER_BOS_ID,               "tokenizer.ggml.bos_token_id"             },
296
    { LLM_KV_TOKENIZER_EOS_ID,               "tokenizer.ggml.eos_token_id"             },
297
    { LLM_KV_TOKENIZER_EOT_ID,               "tokenizer.ggml.eot_token_id"             },
298
    { LLM_KV_TOKENIZER_EOM_ID,               "tokenizer.ggml.eom_token_id"             },
299
    { LLM_KV_TOKENIZER_UNK_ID,               "tokenizer.ggml.unknown_token_id"         },
300
    { LLM_KV_TOKENIZER_SEP_ID,               "tokenizer.ggml.seperator_token_id"       },
301
    { LLM_KV_TOKENIZER_PAD_ID,               "tokenizer.ggml.padding_token_id"         },
302
    { LLM_KV_TOKENIZER_CLS_ID,               "tokenizer.ggml.cls_token_id"             },
303
    { LLM_KV_TOKENIZER_MASK_ID,              "tokenizer.ggml.mask_token_id"            },
304
    { LLM_KV_TOKENIZER_ADD_BOS,              "tokenizer.ggml.add_bos_token"            },
305
    { LLM_KV_TOKENIZER_ADD_EOS,              "tokenizer.ggml.add_eos_token"            },
306
    { LLM_KV_TOKENIZER_ADD_SEP,              "tokenizer.ggml.add_sep_token"            },
307
    { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
308
    { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
309
    { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
310
    { LLM_KV_TOKENIZER_HF_JSON,              "tokenizer.huggingface.json"              },
311
    { LLM_KV_TOKENIZER_RWKV,                 "tokenizer.rwkv.world"                    },
312
    { LLM_KV_TOKENIZER_CHAT_TEMPLATE,        "tokenizer.chat_template"                 },
313
    { LLM_KV_TOKENIZER_FIM_PRE_ID,           "tokenizer.ggml.fim_pre_token_id"         },
314
    { LLM_KV_TOKENIZER_FIM_SUF_ID,           "tokenizer.ggml.fim_suf_token_id"         },
315
    { LLM_KV_TOKENIZER_FIM_MID_ID,           "tokenizer.ggml.fim_mid_token_id"         },
316
    { LLM_KV_TOKENIZER_FIM_PAD_ID,           "tokenizer.ggml.fim_pad_token_id"         },
317
    { LLM_KV_TOKENIZER_FIM_REP_ID,           "tokenizer.ggml.fim_rep_token_id"         },
318
    { LLM_KV_TOKENIZER_FIM_SEP_ID,           "tokenizer.ggml.fim_sep_token_id"         },
319
320
    { LLM_KV_ADAPTER_TYPE,                    "adapter.type"               },
321
    { LLM_KV_ADAPTER_LORA_ALPHA,              "adapter.lora.alpha"         },
322
    { LLM_KV_ADAPTER_LORA_TASK_NAME,          "adapter.lora.task_name"     },
323
    { LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,      "adapter.lora.prompt_prefix" },
324
    { LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
325
326
    { LLM_KV_XIELU_ALPHA_N,         "xielu.alpha_n"         },
327
    { LLM_KV_XIELU_ALPHA_P,         "xielu.alpha_p"         },
328
    { LLM_KV_XIELU_BETA,            "xielu.beta"            },
329
    { LLM_KV_XIELU_EPS,             "xielu.eps"             },
330
331
    // deprecated
332
    { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
333
    { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
334
    { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
335
};
336
337
static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
338
    { LLM_TENSOR_TOKEN_EMBD,                             "token_embd" },
339
    { LLM_TENSOR_OUTPUT_NORM,                            "output_norm" },
340
    { LLM_TENSOR_OUTPUT_NORM_LFM2,                       "token_embd_norm" }, // fix for wrong tensor name
341
    { LLM_TENSOR_OUTPUT,                                 "output" },
342
    { LLM_TENSOR_ROPE_FREQS,                             "rope_freqs" },
343
    { LLM_TENSOR_ATTN_NORM,                              "blk.%d.attn_norm" },
344
    { LLM_TENSOR_ATTN_Q,                                 "blk.%d.attn_q" },
345
    { LLM_TENSOR_ATTN_K,                                 "blk.%d.attn_k" },
346
    { LLM_TENSOR_ATTN_V,                                 "blk.%d.attn_v" },
347
    { LLM_TENSOR_ATTN_OUT,                               "blk.%d.attn_output" },
348
    { LLM_TENSOR_ATTN_ROT_EMBD,                          "blk.%d.attn_rot_embd" },
349
    { LLM_TENSOR_FFN_GATE_INP,                           "blk.%d.ffn_gate_inp" },
350
    { LLM_TENSOR_FFN_NORM,                               "blk.%d.ffn_norm" },
351
    { LLM_TENSOR_FFN_GATE,                               "blk.%d.ffn_gate" },
352
    { LLM_TENSOR_FFN_DOWN,                               "blk.%d.ffn_down" },
353
    { LLM_TENSOR_FFN_UP,                                 "blk.%d.ffn_up" },
354
    { LLM_TENSOR_FFN_GATE_EXP,                           "blk.%d.ffn_gate.%d" },
355
    { LLM_TENSOR_FFN_DOWN_EXP,                           "blk.%d.ffn_down.%d" },
356
    { LLM_TENSOR_FFN_UP_EXP,                             "blk.%d.ffn_up.%d" },
357
    { LLM_TENSOR_FFN_GATE_EXPS,                          "blk.%d.ffn_gate_exps" },
358
    { LLM_TENSOR_FFN_GATE_UP_EXPS,                       "blk.%d.ffn_gate_up_exps" },
359
    { LLM_TENSOR_FFN_DOWN_EXPS,                          "blk.%d.ffn_down_exps" },
360
    { LLM_TENSOR_FFN_UP_EXPS,                            "blk.%d.ffn_up_exps" },
361
    { LLM_TENSOR_ATTN_POST_NORM,                         "blk.%d.post_attention_norm" },
362
    { LLM_TENSOR_ATTN_Q_NORM,                            "blk.%d.attn_q_norm" },
363
    { LLM_TENSOR_ATTN_K_NORM,                            "blk.%d.attn_k_norm" },
364
    { LLM_TENSOR_ATTN_GATE,                              "blk.%d.attn_gate" },
365
    { LLM_TENSOR_FFN_POST_NORM,                          "blk.%d.post_ffw_norm" },
366
    { LLM_TENSOR_FFN_GATE_SHEXP,                         "blk.%d.ffn_gate_shexp" },
367
    { LLM_TENSOR_FFN_UP_SHEXP,                           "blk.%d.ffn_up_shexp" },
368
    { LLM_TENSOR_FFN_DOWN_SHEXP,                         "blk.%d.ffn_down_shexp" },
369
    { LLM_TENSOR_FFN_EXP_PROBS_B,                        "blk.%d.exp_probs_b" },
370
    { LLM_TENSOR_FFN_LATENT_DOWN,                        "blk.%d.ffn_latent_down" },
371
    { LLM_TENSOR_FFN_LATENT_UP,                          "blk.%d.ffn_latent_up" },
372
    { LLM_TENSOR_ATTN_NORM_2,                            "blk.%d.attn_norm_2" },
373
    { LLM_TENSOR_ATTN_QKV,                               "blk.%d.attn_qkv" },
374
    { LLM_TENSOR_LAYER_OUT_NORM,                         "blk.%d.layer_output_norm" },
375
    { LLM_TENSOR_ATTN_OUT_NORM,                          "blk.%d.attn_output_norm" },
376
    { LLM_TENSOR_POS_EMBD,                               "position_embd" },
377
    { LLM_TENSOR_FFN_ACT,                                "blk.%d.ffn.act" },
378
    { LLM_TENSOR_TOKEN_EMBD_NORM,                        "token_embd_norm" },
379
    { LLM_TENSOR_TOKEN_TYPES,                            "token_types" },
380
    { LLM_TENSOR_CLS,                                    "cls" },
381
    { LLM_TENSOR_CLS_OUT,                                "cls.output" },
382
    { LLM_TENSOR_CLS_NORM,                               "cls.norm" },
383
    { LLM_TENSOR_ENC_OUTPUT_NORM,                        "enc.output_norm" },
384
    { LLM_TENSOR_FFN_GATE_INP_SHEXP,                     "blk.%d.ffn_gate_inp_shexp" },
385
    { LLM_TENSOR_SSM_A_NOSCAN,                           "blk.%d.ssm_a" },
386
    { LLM_TENSOR_SSM_CONV1D,                             "blk.%d.ssm_conv1d" },
387
    { LLM_TENSOR_SSM_DT,                                 "blk.%d.ssm_dt" },
388
    { LLM_TENSOR_SSM_BETA_ALPHA,                         "blk.%d.ssm_ba" },
389
    { LLM_TENSOR_SSM_ALPHA,                              "blk.%d.ssm_alpha" },
390
    { LLM_TENSOR_SSM_IN,                                 "blk.%d.ssm_in" },
391
    { LLM_TENSOR_SSM_NORM,                               "blk.%d.ssm_norm" },
392
    { LLM_TENSOR_SSM_OUT,                                "blk.%d.ssm_out" },
393
    { LLM_TENSOR_ROPE_FACTORS_LONG,                      "rope_factors_long" },
394
    { LLM_TENSOR_ROPE_FACTORS_SHORT,                     "rope_factors_short" },
395
    { LLM_TENSOR_SSM_X,                                  "blk.%d.ssm_x" },
396
    { LLM_TENSOR_SSM_A,                                  "blk.%d.ssm_a" },
397
    { LLM_TENSOR_SSM_D,                                  "blk.%d.ssm_d" },
398
    { LLM_TENSOR_SSM_DT_NORM,                            "blk.%d.ssm_dt_norm" },
399
    { LLM_TENSOR_SSM_B_NORM,                             "blk.%d.ssm_b_norm" },
400
    { LLM_TENSOR_SSM_C_NORM,                             "blk.%d.ssm_c_norm" },
401
    { LLM_TENSOR_SSM_CONV1D_Q,                           "blk.%d.ssm_conv1d_q" },
402
    { LLM_TENSOR_SSM_CONV1D_K,                           "blk.%d.ssm_conv1d_k" },
403
    { LLM_TENSOR_SSM_CONV1D_V,                           "blk.%d.ssm_conv1d_v" },
404
    { LLM_TENSOR_SSM_F_A,                                "blk.%d.ssm_f_a" },
405
    { LLM_TENSOR_SSM_F_B,                                "blk.%d.ssm_f_b" },
406
    { LLM_TENSOR_SSM_BETA,                               "blk.%d.ssm_beta" },
407
    { LLM_TENSOR_SSM_G_A,                                "blk.%d.ssm_g_a" },
408
    { LLM_TENSOR_SSM_G_B,                                "blk.%d.ssm_g_b" },
409
    { LLM_TENSOR_SSM_NORM,                               "blk.%d.ssm_norm" },
410
    { LLM_TENSOR_ATTN_Q_A_NORM,                          "blk.%d.attn_q_a_norm" },
411
    { LLM_TENSOR_ATTN_KV_A_NORM,                         "blk.%d.attn_kv_a_norm" },
412
    { LLM_TENSOR_ATTN_Q_A,                               "blk.%d.attn_q_a" },
413
    { LLM_TENSOR_ATTN_Q_B,                               "blk.%d.attn_q_b" },
414
    { LLM_TENSOR_ATTN_KV_A_MQA,                          "blk.%d.attn_kv_a_mqa" },
415
    { LLM_TENSOR_ATTN_KV_B,                              "blk.%d.attn_kv_b" },
416
    { LLM_TENSOR_PER_LAYER_TOKEN_EMBD,                   "per_layer_token_embd" },
417
    { LLM_TENSOR_PER_LAYER_MODEL_PROJ,                   "per_layer_model_proj" },
418
    { LLM_TENSOR_PER_LAYER_PROJ_NORM,                    "per_layer_proj_norm" },
419
    { LLM_TENSOR_ALTUP_UNEMBD_PROJ,                      "altup_unembd_proj" },
420
    { LLM_TENSOR_ALTUP_PROJ,                             "altup_proj" },
421
    { LLM_TENSOR_PER_LAYER_INP_GATE,                     "blk.%d.inp_gate" },
422
    { LLM_TENSOR_PER_LAYER_PROJ,                         "blk.%d.proj" },
423
    { LLM_TENSOR_PER_LAYER_POST_NORM,                    "blk.%d.post_norm" },
424
    { LLM_TENSOR_ALTUP_CORRECT_COEF,                     "blk.%d.altup_correct_coef" },
425
    { LLM_TENSOR_ALTUP_CORRECT_SCALE,                    "blk.%d.altup_correct_scale" },
426
    { LLM_TENSOR_ALTUP_PREDICT_COEF,                     "blk.%d.altup_predict_coef" },
427
    { LLM_TENSOR_ALTUP_ROUTER,                           "blk.%d.altup_router" },
428
    { LLM_TENSOR_ALTUP_ROUTER_NORM,                      "blk.%d.altup_router_norm" },
429
    { LLM_TENSOR_LAUREL_L,                               "blk.%d.laurel_l" },
430
    { LLM_TENSOR_LAUREL_R,                               "blk.%d.laurel_r" },
431
    { LLM_TENSOR_LAUREL_POST_NORM,                       "blk.%d.laurel_post_norm" },
432
    { LLM_TENSOR_DENSE_2_OUT,                            "dense_2" },
433
    { LLM_TENSOR_DENSE_3_OUT,                            "dense_3" },
434
    { LLM_TENSOR_FFN_NORM_EXPS,                          "blk.%d.ffn_norm_exps" },
435
    { LLM_TENSOR_ATTN_K_B,                               "blk.%d.attn_k_b" },
436
    { LLM_TENSOR_ATTN_V_B,                               "blk.%d.attn_v_b" },
437
    { LLM_TENSOR_NEXTN_EH_PROJ,                          "blk.%d.nextn.eh_proj" },
438
    { LLM_TENSOR_NEXTN_EMBED_TOKENS,                     "blk.%d.nextn.embed_tokens" },
439
    { LLM_TENSOR_NEXTN_ENORM,                            "blk.%d.nextn.enorm" },
440
    { LLM_TENSOR_NEXTN_HNORM,                            "blk.%d.nextn.hnorm" },
441
    { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,                 "blk.%d.nextn.shared_head_head" },
442
    { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,                 "blk.%d.nextn.shared_head_norm" },
443
    { LLM_TENSOR_ATTN_SUB_NORM,                          "blk.%d.attn_sub_norm" },
444
    { LLM_TENSOR_FFN_SUB_NORM,                           "blk.%d.ffn_sub_norm" },
445
    { LLM_TENSOR_DEC_OUTPUT_NORM,                        "dec.output_norm" },
446
    { LLM_TENSOR_DEC_ATTN_NORM,                          "dec.blk.%d.attn_norm" },
447
    { LLM_TENSOR_DEC_ATTN_Q,                             "dec.blk.%d.attn_q" },
448
    { LLM_TENSOR_DEC_ATTN_K,                             "dec.blk.%d.attn_k" },
449
    { LLM_TENSOR_DEC_ATTN_V,                             "dec.blk.%d.attn_v" },
450
    { LLM_TENSOR_DEC_ATTN_OUT,                           "dec.blk.%d.attn_o" },
451
    { LLM_TENSOR_DEC_ATTN_REL_B,                         "dec.blk.%d.attn_rel_b" },
452
    { LLM_TENSOR_DEC_CROSS_ATTN_NORM,                    "dec.blk.%d.cross_attn_norm" },
453
    { LLM_TENSOR_DEC_CROSS_ATTN_Q,                       "dec.blk.%d.cross_attn_q" },
454
    { LLM_TENSOR_DEC_CROSS_ATTN_K,                       "dec.blk.%d.cross_attn_k" },
455
    { LLM_TENSOR_DEC_CROSS_ATTN_V,                       "dec.blk.%d.cross_attn_v" },
456
    { LLM_TENSOR_DEC_CROSS_ATTN_OUT,                     "dec.blk.%d.cross_attn_o" },
457
    { LLM_TENSOR_DEC_CROSS_ATTN_REL_B,                   "dec.blk.%d.cross_attn_rel_b" },
458
    { LLM_TENSOR_DEC_FFN_NORM,                           "dec.blk.%d.ffn_norm" },
459
    { LLM_TENSOR_DEC_FFN_GATE,                           "dec.blk.%d.ffn_gate" },
460
    { LLM_TENSOR_DEC_FFN_DOWN,                           "dec.blk.%d.ffn_down" },
461
    { LLM_TENSOR_DEC_FFN_UP,                             "dec.blk.%d.ffn_up" },
462
    { LLM_TENSOR_ENC_ATTN_NORM,                          "enc.blk.%d.attn_norm" },
463
    { LLM_TENSOR_ENC_ATTN_Q,                             "enc.blk.%d.attn_q" },
464
    { LLM_TENSOR_ENC_ATTN_K,                             "enc.blk.%d.attn_k" },
465
    { LLM_TENSOR_ENC_ATTN_V,                             "enc.blk.%d.attn_v" },
466
    { LLM_TENSOR_ENC_ATTN_OUT,                           "enc.blk.%d.attn_o" },
467
    { LLM_TENSOR_ENC_ATTN_REL_B,                         "enc.blk.%d.attn_rel_b" },
468
    { LLM_TENSOR_ENC_FFN_NORM,                           "enc.blk.%d.ffn_norm" },
469
    { LLM_TENSOR_ENC_FFN_GATE,                           "enc.blk.%d.ffn_gate" },
470
    { LLM_TENSOR_ENC_FFN_DOWN,                           "enc.blk.%d.ffn_down" },
471
    { LLM_TENSOR_ENC_FFN_UP,                             "enc.blk.%d.ffn_up" },
472
    { LLM_TENSOR_TIME_MIX_W1,                            "blk.%d.time_mix_w1" },
473
    { LLM_TENSOR_TIME_MIX_W2,                            "blk.%d.time_mix_w2" },
474
    { LLM_TENSOR_TIME_MIX_LERP_X,                        "blk.%d.time_mix_lerp_x" },
475
    { LLM_TENSOR_TIME_MIX_LERP_W,                        "blk.%d.time_mix_lerp_w" },
476
    { LLM_TENSOR_TIME_MIX_LERP_K,                        "blk.%d.time_mix_lerp_k" },
477
    { LLM_TENSOR_TIME_MIX_LERP_V,                        "blk.%d.time_mix_lerp_v" },
478
    { LLM_TENSOR_TIME_MIX_LERP_R,                        "blk.%d.time_mix_lerp_r" },
479
    { LLM_TENSOR_TIME_MIX_LERP_G,                        "blk.%d.time_mix_lerp_g" },
480
    { LLM_TENSOR_TIME_MIX_LERP_FUSED,                    "blk.%d.time_mix_lerp_fused" },
481
    { LLM_TENSOR_TIME_MIX_FIRST,                         "blk.%d.time_mix_first" },
482
    { LLM_TENSOR_TIME_MIX_DECAY,                         "blk.%d.time_mix_decay" },
483
    { LLM_TENSOR_TIME_MIX_DECAY_W1,                      "blk.%d.time_mix_decay_w1" },
484
    { LLM_TENSOR_TIME_MIX_DECAY_W2,                      "blk.%d.time_mix_decay_w2" },
485
    { LLM_TENSOR_TIME_MIX_KEY,                           "blk.%d.time_mix_key" },
486
    { LLM_TENSOR_TIME_MIX_VALUE,                         "blk.%d.time_mix_value" },
487
    { LLM_TENSOR_TIME_MIX_RECEPTANCE,                    "blk.%d.time_mix_receptance" },
488
    { LLM_TENSOR_TIME_MIX_GATE,                          "blk.%d.time_mix_gate" },
489
    { LLM_TENSOR_TIME_MIX_LN,                            "blk.%d.time_mix_ln" },
490
    { LLM_TENSOR_TIME_MIX_OUTPUT,                        "blk.%d.time_mix_output" },
491
    { LLM_TENSOR_CHANNEL_MIX_LERP_K,                     "blk.%d.channel_mix_lerp_k" },
492
    { LLM_TENSOR_CHANNEL_MIX_LERP_R,                     "blk.%d.channel_mix_lerp_r" },
493
    { LLM_TENSOR_CHANNEL_MIX_KEY,                        "blk.%d.channel_mix_key" },
494
    { LLM_TENSOR_CHANNEL_MIX_VALUE,                      "blk.%d.channel_mix_value" },
495
    { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,                 "blk.%d.channel_mix_receptance" },
496
    { LLM_TENSOR_TIME_MIX_W0,                            "blk.%d.time_mix_w0" },
497
    { LLM_TENSOR_TIME_MIX_A0,                            "blk.%d.time_mix_a0" },
498
    { LLM_TENSOR_TIME_MIX_A1,                            "blk.%d.time_mix_a1" },
499
    { LLM_TENSOR_TIME_MIX_A2,                            "blk.%d.time_mix_a2" },
500
    { LLM_TENSOR_TIME_MIX_V0,                            "blk.%d.time_mix_v0" },
501
    { LLM_TENSOR_TIME_MIX_V1,                            "blk.%d.time_mix_v1" },
502
    { LLM_TENSOR_TIME_MIX_V2,                            "blk.%d.time_mix_v2" },
503
    { LLM_TENSOR_TIME_MIX_G1,                            "blk.%d.time_mix_g1" },
504
    { LLM_TENSOR_TIME_MIX_G2,                            "blk.%d.time_mix_g2" },
505
    { LLM_TENSOR_TIME_MIX_K_K,                           "blk.%d.time_mix_k_k" },
506
    { LLM_TENSOR_TIME_MIX_K_A,                           "blk.%d.time_mix_k_a" },
507
    { LLM_TENSOR_TIME_MIX_R_K,                           "blk.%d.time_mix_r_k" },
508
    { LLM_TENSOR_CONV1D,                                 "conv1d" },
509
    { LLM_TENSOR_CONVNEXT_DW,                            "convnext.%d.dw" },
510
    { LLM_TENSOR_CONVNEXT_NORM,                          "convnext.%d.norm" },
511
    { LLM_TENSOR_CONVNEXT_PW1,                           "convnext.%d.pw1" },
512
    { LLM_TENSOR_CONVNEXT_PW2,                           "convnext.%d.pw2" },
513
    { LLM_TENSOR_CONVNEXT_GAMMA,                         "convnext.%d.gamma" },
514
    { LLM_TENSOR_POS_NET_CONV1,                          "posnet.%d.conv1" },
515
    { LLM_TENSOR_POS_NET_CONV2,                          "posnet.%d.conv2" },
516
    { LLM_TENSOR_POS_NET_NORM,                           "posnet.%d.norm" },
517
    { LLM_TENSOR_POS_NET_NORM1,                          "posnet.%d.norm1" },
518
    { LLM_TENSOR_POS_NET_NORM2,                          "posnet.%d.norm2" },
519
    { LLM_TENSOR_POS_NET_ATTN_NORM,                      "posnet.%d.attn_norm" },
520
    { LLM_TENSOR_POS_NET_ATTN_Q,                         "posnet.%d.attn_q" },
521
    { LLM_TENSOR_POS_NET_ATTN_K,                         "posnet.%d.attn_k" },
522
    { LLM_TENSOR_POS_NET_ATTN_V,                         "posnet.%d.attn_v" },
523
    { LLM_TENSOR_POS_NET_ATTN_OUT,                       "posnet.%d.attn_output" },
524
    { LLM_TENSOR_ATTN_SINKS,                             "blk.%d.attn_sinks" },
525
    { LLM_TENSOR_SHORTCONV_CONV,                         "blk.%d.shortconv.conv" },
526
    { LLM_TENSOR_SHORTCONV_INPROJ,                       "blk.%d.shortconv.in_proj" },
527
    { LLM_TENSOR_SHORTCONV_OUTPROJ,                      "blk.%d.shortconv.out_proj" },
528
    { LLM_TENSOR_FFN_GATE_CHEXPS,                        "blk.%d.ffn_gate_chexps" },
529
    { LLM_TENSOR_FFN_DOWN_CHEXPS,                        "blk.%d.ffn_down_chexps" },
530
    { LLM_TENSOR_FFN_UP_CHEXPS,                          "blk.%d.ffn_up_chexps" },
531
    { LLM_TENSOR_VISEXP_ATTN_QKV,                        "blk.%d.vis_attn_qkv" },
532
    { LLM_TENSOR_VISEXP_ATTN_OUT,                        "blk.%d.vis_attn_output" },
533
    { LLM_TENSOR_VISEXP_FFN_GATE,                        "blk.%d.vis_gate" },
534
    { LLM_TENSOR_VISEXP_FFN_DOWN,                        "blk.%d.vis_down" },
535
    { LLM_TENSOR_VISEXP_FFN_UP,                          "blk.%d.vis_up" },
536
    { LLM_TENSOR_INDEXER_K_NORM,                         "blk.%d.indexer.k_norm" },
537
    { LLM_TENSOR_INDEXER_PROJ,                           "blk.%d.indexer.proj" },
538
    { LLM_TENSOR_INDEXER_ATTN_K,                         "blk.%d.indexer.attn_k" },
539
    { LLM_TENSOR_INDEXER_ATTN_Q_B,                       "blk.%d.indexer.attn_q_b" },
540
};
541
542
0
static std::set<llm_tensor> llm_get_tensor_names(llm_arch arch) {
543
0
    switch (arch) {
544
0
        case LLM_ARCH_CLIP:
545
0
            return {};
546
0
        case LLM_ARCH_LLAMA:
547
0
        case LLM_ARCH_DECI:
548
0
        case LLM_ARCH_MISTRAL3:
549
0
        case LLM_ARCH_LLAMA_EMBED:
550
0
            return {
551
0
                LLM_TENSOR_TOKEN_EMBD,
552
0
                LLM_TENSOR_OUTPUT_NORM,
553
0
                LLM_TENSOR_OUTPUT,
554
0
                LLM_TENSOR_ROPE_FREQS,
555
0
                LLM_TENSOR_ATTN_NORM,
556
0
                LLM_TENSOR_ATTN_Q,
557
0
                LLM_TENSOR_ATTN_K,
558
0
                LLM_TENSOR_ATTN_V,
559
0
                LLM_TENSOR_ATTN_OUT,
560
0
                LLM_TENSOR_ATTN_ROT_EMBD,
561
0
                LLM_TENSOR_FFN_GATE_INP,
562
0
                LLM_TENSOR_FFN_NORM,
563
0
                LLM_TENSOR_FFN_GATE,
564
0
                LLM_TENSOR_FFN_DOWN,
565
0
                LLM_TENSOR_FFN_UP,
566
0
                LLM_TENSOR_FFN_GATE_EXP,
567
0
                LLM_TENSOR_FFN_DOWN_EXP,
568
0
                LLM_TENSOR_FFN_UP_EXP,
569
0
                LLM_TENSOR_FFN_GATE_EXPS,
570
0
                LLM_TENSOR_FFN_DOWN_EXPS,
571
0
                LLM_TENSOR_FFN_UP_EXPS,
572
0
            };
573
0
        case LLM_ARCH_ARCEE:
574
0
        case LLM_ARCH_STARCODER2:
575
0
        case LLM_ARCH_NEMOTRON:
576
0
            return {
577
0
                LLM_TENSOR_TOKEN_EMBD,
578
0
                LLM_TENSOR_OUTPUT_NORM,
579
0
                LLM_TENSOR_OUTPUT,
580
0
                LLM_TENSOR_ROPE_FREQS,
581
0
                LLM_TENSOR_ATTN_NORM,
582
0
                LLM_TENSOR_ATTN_Q,
583
0
                LLM_TENSOR_ATTN_K,
584
0
                LLM_TENSOR_ATTN_V,
585
0
                LLM_TENSOR_ATTN_OUT,
586
0
                LLM_TENSOR_ATTN_ROT_EMBD,
587
0
                LLM_TENSOR_FFN_NORM,
588
0
                LLM_TENSOR_FFN_DOWN,
589
0
                LLM_TENSOR_FFN_UP,
590
0
            };
591
0
        case LLM_ARCH_AFMOE:
592
0
            return {
593
0
                LLM_TENSOR_TOKEN_EMBD,
594
0
                LLM_TENSOR_OUTPUT_NORM,
595
0
                LLM_TENSOR_OUTPUT,
596
0
                LLM_TENSOR_ATTN_NORM,
597
0
                LLM_TENSOR_ATTN_POST_NORM,
598
0
                LLM_TENSOR_ATTN_Q,
599
0
                LLM_TENSOR_ATTN_K,
600
0
                LLM_TENSOR_ATTN_V,
601
0
                LLM_TENSOR_ATTN_OUT,
602
0
                LLM_TENSOR_ATTN_Q_NORM,
603
0
                LLM_TENSOR_ATTN_K_NORM,
604
0
                LLM_TENSOR_ATTN_GATE,
605
0
                LLM_TENSOR_FFN_NORM,
606
0
                LLM_TENSOR_FFN_POST_NORM,
607
0
                LLM_TENSOR_FFN_GATE_INP,
608
0
                LLM_TENSOR_FFN_GATE,
609
0
                LLM_TENSOR_FFN_DOWN,
610
0
                LLM_TENSOR_FFN_UP,
611
0
                LLM_TENSOR_FFN_GATE_EXPS,
612
0
                LLM_TENSOR_FFN_DOWN_EXPS,
613
0
                LLM_TENSOR_FFN_UP_EXPS,
614
0
                LLM_TENSOR_FFN_GATE_SHEXP,
615
0
                LLM_TENSOR_FFN_UP_SHEXP,
616
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
617
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
618
0
            };
619
0
        case LLM_ARCH_LLAMA4:
620
0
            return {
621
0
                LLM_TENSOR_TOKEN_EMBD,
622
0
                LLM_TENSOR_OUTPUT_NORM,
623
0
                LLM_TENSOR_OUTPUT,
624
0
                LLM_TENSOR_ROPE_FREQS,
625
0
                LLM_TENSOR_ATTN_NORM,
626
0
                LLM_TENSOR_ATTN_Q,
627
0
                LLM_TENSOR_ATTN_K,
628
0
                LLM_TENSOR_ATTN_V,
629
0
                LLM_TENSOR_ATTN_OUT,
630
0
                LLM_TENSOR_ATTN_ROT_EMBD,
631
0
                LLM_TENSOR_FFN_GATE_INP,
632
0
                LLM_TENSOR_FFN_NORM,
633
0
                LLM_TENSOR_FFN_GATE,
634
0
                LLM_TENSOR_FFN_DOWN,
635
0
                LLM_TENSOR_FFN_UP,
636
0
                LLM_TENSOR_FFN_GATE_EXP,
637
0
                LLM_TENSOR_FFN_DOWN_EXP,
638
0
                LLM_TENSOR_FFN_UP_EXP,
639
0
                LLM_TENSOR_FFN_GATE_EXPS,
640
0
                LLM_TENSOR_FFN_DOWN_EXPS,
641
0
                LLM_TENSOR_FFN_UP_EXPS,
642
0
                LLM_TENSOR_FFN_GATE_SHEXP,
643
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
644
0
                LLM_TENSOR_FFN_UP_SHEXP,
645
0
            };
646
0
        case LLM_ARCH_BAICHUAN:
647
0
        case LLM_ARCH_ORION:
648
0
        case LLM_ARCH_XVERSE:
649
0
        case LLM_ARCH_EXAONE:
650
0
            return {
651
0
                LLM_TENSOR_TOKEN_EMBD,
652
0
                LLM_TENSOR_OUTPUT_NORM,
653
0
                LLM_TENSOR_OUTPUT,
654
0
                LLM_TENSOR_ROPE_FREQS,
655
0
                LLM_TENSOR_ATTN_NORM,
656
0
                LLM_TENSOR_ATTN_Q,
657
0
                LLM_TENSOR_ATTN_K,
658
0
                LLM_TENSOR_ATTN_V,
659
0
                LLM_TENSOR_ATTN_OUT,
660
0
                LLM_TENSOR_ATTN_ROT_EMBD,
661
0
                LLM_TENSOR_FFN_NORM,
662
0
                LLM_TENSOR_FFN_GATE,
663
0
                LLM_TENSOR_FFN_DOWN,
664
0
                LLM_TENSOR_FFN_UP,
665
0
            };
666
0
        case LLM_ARCH_FALCON:
667
0
            return {
668
0
                LLM_TENSOR_TOKEN_EMBD,
669
0
                LLM_TENSOR_OUTPUT_NORM,
670
0
                LLM_TENSOR_OUTPUT,
671
0
                LLM_TENSOR_ATTN_NORM,
672
0
                LLM_TENSOR_ATTN_NORM_2,
673
0
                LLM_TENSOR_ATTN_QKV,
674
0
                LLM_TENSOR_ATTN_OUT,
675
0
                LLM_TENSOR_FFN_DOWN,
676
0
                LLM_TENSOR_FFN_UP,
677
0
            };
678
0
        case LLM_ARCH_GROK:
679
0
            return {
680
0
                LLM_TENSOR_TOKEN_EMBD,
681
0
                LLM_TENSOR_OUTPUT_NORM,
682
0
                LLM_TENSOR_OUTPUT,
683
0
                LLM_TENSOR_ROPE_FREQS,
684
0
                LLM_TENSOR_ATTN_NORM,
685
0
                LLM_TENSOR_ATTN_Q,
686
0
                LLM_TENSOR_ATTN_K,
687
0
                LLM_TENSOR_ATTN_V,
688
0
                LLM_TENSOR_ATTN_OUT,
689
0
                LLM_TENSOR_ATTN_ROT_EMBD,
690
0
                LLM_TENSOR_FFN_GATE_INP,
691
0
                LLM_TENSOR_FFN_NORM,
692
0
                LLM_TENSOR_FFN_GATE,
693
0
                LLM_TENSOR_FFN_DOWN,
694
0
                LLM_TENSOR_FFN_UP,
695
0
                LLM_TENSOR_FFN_GATE_EXP,
696
0
                LLM_TENSOR_FFN_DOWN_EXP,
697
0
                LLM_TENSOR_FFN_UP_EXP,
698
0
                LLM_TENSOR_FFN_GATE_EXPS,
699
0
                LLM_TENSOR_FFN_DOWN_EXPS,
700
0
                LLM_TENSOR_FFN_UP_EXPS,
701
0
                LLM_TENSOR_FFN_POST_NORM,
702
0
                LLM_TENSOR_LAYER_OUT_NORM,
703
0
                LLM_TENSOR_ATTN_OUT_NORM,
704
0
            };
705
0
        case LLM_ARCH_GPT2:
706
0
        case LLM_ARCH_STARCODER:
707
0
            return {
708
0
                LLM_TENSOR_TOKEN_EMBD,
709
0
                LLM_TENSOR_POS_EMBD,
710
0
                LLM_TENSOR_OUTPUT_NORM,
711
0
                LLM_TENSOR_OUTPUT,
712
0
                LLM_TENSOR_ATTN_NORM,
713
0
                LLM_TENSOR_ATTN_QKV,
714
0
                LLM_TENSOR_ATTN_OUT,
715
0
                LLM_TENSOR_FFN_NORM,
716
0
                LLM_TENSOR_FFN_UP,
717
0
                LLM_TENSOR_FFN_DOWN,
718
0
            };
719
0
        case LLM_ARCH_GPTNEOX:
720
0
            return {
721
0
                LLM_TENSOR_TOKEN_EMBD,
722
0
                LLM_TENSOR_OUTPUT_NORM,
723
0
                LLM_TENSOR_OUTPUT,
724
0
                LLM_TENSOR_ATTN_NORM,
725
0
                LLM_TENSOR_ATTN_QKV,
726
0
                LLM_TENSOR_ATTN_OUT,
727
0
                LLM_TENSOR_FFN_NORM,
728
0
                LLM_TENSOR_FFN_DOWN,
729
0
                LLM_TENSOR_FFN_UP,
730
0
            };
731
0
        case LLM_ARCH_MPT:
732
0
            return {
733
0
                LLM_TENSOR_TOKEN_EMBD,
734
0
                LLM_TENSOR_OUTPUT_NORM,
735
0
                LLM_TENSOR_OUTPUT,
736
0
                LLM_TENSOR_ATTN_NORM,
737
0
                LLM_TENSOR_FFN_NORM,
738
0
                LLM_TENSOR_ATTN_QKV,
739
0
                LLM_TENSOR_ATTN_OUT,
740
0
                LLM_TENSOR_FFN_DOWN,
741
0
                LLM_TENSOR_FFN_UP,
742
0
                LLM_TENSOR_FFN_ACT,
743
0
                LLM_TENSOR_POS_EMBD,
744
0
                LLM_TENSOR_ATTN_Q_NORM,
745
0
                LLM_TENSOR_ATTN_K_NORM,
746
0
            };
747
0
        case LLM_ARCH_REFACT:
748
0
        case LLM_ARCH_QWEN2:
749
0
        case LLM_ARCH_QWEN2VL:
750
0
        case LLM_ARCH_INTERNLM2:
751
0
        case LLM_ARCH_GRANITE:
752
0
        case LLM_ARCH_ERNIE4_5:
753
0
        case LLM_ARCH_PADDLEOCR:
754
0
        case LLM_ARCH_SMOLLM3:
755
0
        case LLM_ARCH_DREAM:
756
0
        case LLM_ARCH_LLADA:
757
0
        case LLM_ARCH_PANGU_EMBED:
758
0
            return {
759
0
                LLM_TENSOR_TOKEN_EMBD,
760
0
                LLM_TENSOR_OUTPUT_NORM,
761
0
                LLM_TENSOR_OUTPUT,
762
0
                LLM_TENSOR_ATTN_NORM,
763
0
                LLM_TENSOR_ATTN_Q,
764
0
                LLM_TENSOR_ATTN_K,
765
0
                LLM_TENSOR_ATTN_V,
766
0
                LLM_TENSOR_ATTN_OUT,
767
0
                LLM_TENSOR_FFN_NORM,
768
0
                LLM_TENSOR_FFN_GATE,
769
0
                LLM_TENSOR_FFN_DOWN,
770
0
                LLM_TENSOR_FFN_UP,
771
0
            };
772
0
        case LLM_ARCH_BERT:
773
0
            return {
774
0
                LLM_TENSOR_TOKEN_EMBD,
775
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
776
0
                LLM_TENSOR_TOKEN_TYPES,
777
0
                LLM_TENSOR_POS_EMBD,
778
0
                LLM_TENSOR_ATTN_OUT_NORM,
779
0
                LLM_TENSOR_ATTN_QKV,
780
0
                LLM_TENSOR_ATTN_Q,
781
0
                LLM_TENSOR_ATTN_K,
782
0
                LLM_TENSOR_ATTN_V,
783
0
                LLM_TENSOR_ATTN_OUT,
784
0
                LLM_TENSOR_LAYER_OUT_NORM,
785
0
                LLM_TENSOR_FFN_DOWN,
786
0
                LLM_TENSOR_FFN_UP,
787
0
                LLM_TENSOR_CLS,
788
0
                LLM_TENSOR_CLS_OUT,
789
0
            };
790
0
        case LLM_ARCH_NOMIC_BERT:
791
0
            return {
792
0
                LLM_TENSOR_TOKEN_EMBD,
793
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
794
0
                LLM_TENSOR_TOKEN_TYPES,
795
0
                LLM_TENSOR_ATTN_OUT_NORM,
796
0
                LLM_TENSOR_ATTN_QKV,
797
0
                LLM_TENSOR_ATTN_OUT,
798
0
                LLM_TENSOR_LAYER_OUT_NORM,
799
0
                LLM_TENSOR_FFN_GATE,
800
0
                LLM_TENSOR_FFN_DOWN,
801
0
                LLM_TENSOR_FFN_UP,
802
0
            };
803
0
        case LLM_ARCH_NOMIC_BERT_MOE:
804
0
            return {
805
0
                LLM_TENSOR_TOKEN_EMBD,
806
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
807
0
                LLM_TENSOR_TOKEN_TYPES,
808
0
                LLM_TENSOR_ATTN_OUT_NORM,
809
0
                LLM_TENSOR_ATTN_QKV,
810
0
                LLM_TENSOR_ATTN_OUT,
811
0
                LLM_TENSOR_LAYER_OUT_NORM,
812
0
                LLM_TENSOR_FFN_GATE,
813
0
                LLM_TENSOR_FFN_DOWN,
814
0
                LLM_TENSOR_FFN_UP,
815
0
                LLM_TENSOR_FFN_GATE_INP,
816
0
                LLM_TENSOR_FFN_DOWN_EXPS,
817
0
                LLM_TENSOR_FFN_UP_EXPS,
818
0
            };
819
0
        case LLM_ARCH_NEO_BERT:
820
0
            return {
821
0
                LLM_TENSOR_TOKEN_EMBD,
822
0
                LLM_TENSOR_ATTN_NORM,
823
0
                LLM_TENSOR_ATTN_QKV,
824
0
                LLM_TENSOR_ATTN_OUT,
825
0
                LLM_TENSOR_FFN_NORM,
826
0
                LLM_TENSOR_FFN_DOWN,
827
0
                LLM_TENSOR_FFN_UP,
828
0
                LLM_TENSOR_ENC_OUTPUT_NORM,
829
0
                LLM_TENSOR_CLS,
830
0
                LLM_TENSOR_CLS_OUT,
831
0
            };
832
0
        case LLM_ARCH_EUROBERT:
833
0
            return {
834
0
                LLM_TENSOR_TOKEN_EMBD,
835
0
                LLM_TENSOR_OUTPUT_NORM,
836
0
                LLM_TENSOR_ATTN_NORM,
837
0
                LLM_TENSOR_ATTN_Q,
838
0
                LLM_TENSOR_ATTN_K,
839
0
                LLM_TENSOR_ATTN_V,
840
0
                LLM_TENSOR_ATTN_OUT,
841
0
                LLM_TENSOR_FFN_NORM,
842
0
                LLM_TENSOR_FFN_GATE,
843
0
                LLM_TENSOR_FFN_UP,
844
0
                LLM_TENSOR_FFN_DOWN,
845
0
            };
846
0
        case LLM_ARCH_MODERN_BERT:
847
0
            return {
848
0
                LLM_TENSOR_TOKEN_EMBD,
849
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
850
0
                LLM_TENSOR_OUTPUT_NORM,
851
0
                LLM_TENSOR_ATTN_NORM,
852
0
                LLM_TENSOR_ATTN_OUT,
853
0
                LLM_TENSOR_ATTN_QKV,
854
0
                LLM_TENSOR_FFN_DOWN,
855
0
                LLM_TENSOR_FFN_UP,
856
0
                LLM_TENSOR_FFN_NORM,
857
0
                LLM_TENSOR_CLS,
858
0
                LLM_TENSOR_CLS_OUT,
859
0
                LLM_TENSOR_CLS_NORM,
860
0
            };
861
0
        case LLM_ARCH_JINA_BERT_V2:
862
0
            return {
863
0
                LLM_TENSOR_TOKEN_EMBD,
864
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
865
0
                LLM_TENSOR_TOKEN_TYPES,
866
0
                LLM_TENSOR_ATTN_NORM_2,
867
0
                LLM_TENSOR_ATTN_OUT_NORM,
868
0
                LLM_TENSOR_ATTN_Q,
869
0
                LLM_TENSOR_ATTN_Q_NORM,
870
0
                LLM_TENSOR_ATTN_K,
871
0
                LLM_TENSOR_ATTN_K_NORM,
872
0
                LLM_TENSOR_ATTN_V,
873
0
                LLM_TENSOR_ATTN_OUT,
874
0
                LLM_TENSOR_LAYER_OUT_NORM,
875
0
                LLM_TENSOR_FFN_DOWN,
876
0
                LLM_TENSOR_FFN_GATE,
877
0
                LLM_TENSOR_FFN_UP,
878
0
                LLM_TENSOR_CLS,
879
0
            };
880
0
        case LLM_ARCH_JINA_BERT_V3:
881
0
            return {
882
0
                LLM_TENSOR_TOKEN_EMBD,
883
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
884
0
                LLM_TENSOR_TOKEN_TYPES,
885
0
                LLM_TENSOR_ATTN_OUT_NORM,
886
0
                LLM_TENSOR_ATTN_QKV,
887
0
                LLM_TENSOR_ATTN_OUT,
888
0
                LLM_TENSOR_FFN_DOWN,
889
0
                LLM_TENSOR_FFN_UP,
890
0
                LLM_TENSOR_LAYER_OUT_NORM,
891
0
            };
892
0
        case LLM_ARCH_BLOOM:
893
0
            return {
894
0
                LLM_TENSOR_TOKEN_EMBD,
895
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
896
0
                LLM_TENSOR_OUTPUT_NORM,
897
0
                LLM_TENSOR_OUTPUT,
898
0
                LLM_TENSOR_ATTN_NORM,
899
0
                LLM_TENSOR_ATTN_QKV,
900
0
                LLM_TENSOR_ATTN_OUT,
901
0
                LLM_TENSOR_FFN_NORM,
902
0
                LLM_TENSOR_FFN_UP,
903
0
                LLM_TENSOR_FFN_DOWN,
904
0
            };
905
0
        case LLM_ARCH_STABLELM:
906
0
            return {
907
0
                LLM_TENSOR_TOKEN_EMBD,
908
0
                LLM_TENSOR_OUTPUT_NORM,
909
0
                LLM_TENSOR_OUTPUT,
910
0
                LLM_TENSOR_ROPE_FREQS,
911
0
                LLM_TENSOR_ATTN_NORM,
912
0
                LLM_TENSOR_ATTN_Q,
913
0
                LLM_TENSOR_ATTN_K,
914
0
                LLM_TENSOR_ATTN_V,
915
0
                LLM_TENSOR_ATTN_OUT,
916
0
                LLM_TENSOR_FFN_NORM,
917
0
                LLM_TENSOR_FFN_GATE,
918
0
                LLM_TENSOR_FFN_DOWN,
919
0
                LLM_TENSOR_FFN_UP,
920
0
                LLM_TENSOR_ATTN_Q_NORM,
921
0
                LLM_TENSOR_ATTN_K_NORM,
922
0
            };
923
0
        case LLM_ARCH_QWEN:
924
0
            return {
925
0
                LLM_TENSOR_TOKEN_EMBD,
926
0
                LLM_TENSOR_OUTPUT_NORM,
927
0
                LLM_TENSOR_OUTPUT,
928
0
                LLM_TENSOR_ROPE_FREQS,
929
0
                LLM_TENSOR_ATTN_NORM,
930
0
                LLM_TENSOR_ATTN_QKV,
931
0
                LLM_TENSOR_ATTN_OUT,
932
0
                LLM_TENSOR_FFN_NORM,
933
0
                LLM_TENSOR_FFN_GATE,
934
0
                LLM_TENSOR_FFN_DOWN,
935
0
                LLM_TENSOR_FFN_UP,
936
0
            };
937
0
        case LLM_ARCH_QWEN2MOE:
938
0
            return {
939
0
                LLM_TENSOR_TOKEN_EMBD,
940
0
                LLM_TENSOR_OUTPUT_NORM,
941
0
                LLM_TENSOR_OUTPUT,
942
0
                LLM_TENSOR_ATTN_NORM,
943
0
                LLM_TENSOR_ATTN_Q,
944
0
                LLM_TENSOR_ATTN_K,
945
0
                LLM_TENSOR_ATTN_V,
946
0
                LLM_TENSOR_ATTN_OUT,
947
0
                LLM_TENSOR_FFN_NORM,
948
0
                LLM_TENSOR_FFN_GATE_INP,
949
0
                LLM_TENSOR_FFN_GATE_EXPS,
950
0
                LLM_TENSOR_FFN_DOWN_EXPS,
951
0
                LLM_TENSOR_FFN_UP_EXPS,
952
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
953
0
                LLM_TENSOR_FFN_GATE_SHEXP,
954
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
955
0
                LLM_TENSOR_FFN_UP_SHEXP,
956
0
            };
957
0
        case LLM_ARCH_QWEN3:
958
0
            return {
959
0
                LLM_TENSOR_TOKEN_EMBD,
960
0
                LLM_TENSOR_OUTPUT_NORM,
961
0
                LLM_TENSOR_OUTPUT,
962
0
                LLM_TENSOR_CLS_OUT,
963
0
                LLM_TENSOR_ATTN_NORM,
964
0
                LLM_TENSOR_ATTN_Q,
965
0
                LLM_TENSOR_ATTN_Q_NORM,
966
0
                LLM_TENSOR_ATTN_K,
967
0
                LLM_TENSOR_ATTN_K_NORM,
968
0
                LLM_TENSOR_ATTN_V,
969
0
                LLM_TENSOR_ATTN_OUT,
970
0
                LLM_TENSOR_FFN_NORM,
971
0
                LLM_TENSOR_FFN_GATE,
972
0
                LLM_TENSOR_FFN_DOWN,
973
0
                LLM_TENSOR_FFN_UP,
974
0
            };
975
0
        case LLM_ARCH_QWEN3MOE:
976
0
        case LLM_ARCH_QWEN3VLMOE:
977
0
        case LLM_ARCH_OLMOE:
978
0
        case LLM_ARCH_LLADA_MOE:
979
0
        case LLM_ARCH_RND1:
980
0
            return {
981
0
                LLM_TENSOR_TOKEN_EMBD,
982
0
                LLM_TENSOR_OUTPUT_NORM,
983
0
                LLM_TENSOR_OUTPUT,
984
0
                LLM_TENSOR_ATTN_NORM,
985
0
                LLM_TENSOR_ATTN_Q,
986
0
                LLM_TENSOR_ATTN_Q_NORM,
987
0
                LLM_TENSOR_ATTN_K,
988
0
                LLM_TENSOR_ATTN_K_NORM,
989
0
                LLM_TENSOR_ATTN_V,
990
0
                LLM_TENSOR_ATTN_OUT,
991
0
                LLM_TENSOR_FFN_NORM,
992
0
                LLM_TENSOR_FFN_GATE_INP,
993
0
                LLM_TENSOR_FFN_GATE_EXPS,
994
0
                LLM_TENSOR_FFN_DOWN_EXPS,
995
0
                LLM_TENSOR_FFN_UP_EXPS,
996
0
            };
997
0
        case LLM_ARCH_QWEN3NEXT:
998
0
            return {
999
0
                LLM_TENSOR_TOKEN_EMBD,
1000
0
                LLM_TENSOR_OUTPUT_NORM,
1001
0
                LLM_TENSOR_OUTPUT,
1002
0
                LLM_TENSOR_ATTN_NORM,
1003
0
                LLM_TENSOR_ATTN_POST_NORM,
1004
0
                LLM_TENSOR_ATTN_Q,
1005
0
                LLM_TENSOR_ATTN_Q_NORM,
1006
0
                LLM_TENSOR_ATTN_K,
1007
0
                LLM_TENSOR_ATTN_K_NORM,
1008
0
                LLM_TENSOR_ATTN_V,
1009
0
                LLM_TENSOR_ATTN_OUT,
1010
0
                LLM_TENSOR_ATTN_QKV,
1011
0
                LLM_TENSOR_ATTN_GATE,
1012
0
                LLM_TENSOR_FFN_GATE_INP,
1013
0
                LLM_TENSOR_FFN_GATE_EXPS,
1014
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1015
0
                LLM_TENSOR_FFN_UP_EXPS,
1016
0
                LLM_TENSOR_FFN_GATE_UP_EXPS,
1017
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
1018
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1019
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1020
0
                LLM_TENSOR_FFN_UP_SHEXP,
1021
0
                LLM_TENSOR_SSM_A_NOSCAN,
1022
0
                LLM_TENSOR_SSM_CONV1D,
1023
0
                LLM_TENSOR_SSM_DT,
1024
0
                LLM_TENSOR_SSM_BETA_ALPHA,
1025
0
                LLM_TENSOR_SSM_IN,
1026
0
                LLM_TENSOR_SSM_NORM,
1027
0
                LLM_TENSOR_SSM_OUT,
1028
0
            };
1029
0
        case LLM_ARCH_QWEN35:
1030
0
            return {
1031
0
                LLM_TENSOR_TOKEN_EMBD,
1032
0
                LLM_TENSOR_OUTPUT_NORM,
1033
0
                LLM_TENSOR_OUTPUT,
1034
0
                LLM_TENSOR_ATTN_NORM,
1035
0
                LLM_TENSOR_ATTN_POST_NORM,
1036
0
                LLM_TENSOR_ATTN_Q,
1037
0
                LLM_TENSOR_ATTN_Q_NORM,
1038
0
                LLM_TENSOR_ATTN_K,
1039
0
                LLM_TENSOR_ATTN_K_NORM,
1040
0
                LLM_TENSOR_ATTN_V,
1041
0
                LLM_TENSOR_ATTN_OUT,
1042
0
                LLM_TENSOR_ATTN_QKV,
1043
0
                LLM_TENSOR_ATTN_GATE,
1044
0
                LLM_TENSOR_FFN_GATE,
1045
0
                LLM_TENSOR_FFN_DOWN,
1046
0
                LLM_TENSOR_FFN_UP,
1047
0
                LLM_TENSOR_SSM_A_NOSCAN,
1048
0
                LLM_TENSOR_SSM_CONV1D,
1049
0
                LLM_TENSOR_SSM_DT,
1050
0
                LLM_TENSOR_SSM_BETA,
1051
0
                LLM_TENSOR_SSM_ALPHA,
1052
0
                LLM_TENSOR_SSM_NORM,
1053
0
                LLM_TENSOR_SSM_OUT,
1054
0
            };
1055
0
        case LLM_ARCH_QWEN35MOE:
1056
0
            return {
1057
0
                LLM_TENSOR_TOKEN_EMBD,
1058
0
                LLM_TENSOR_OUTPUT_NORM,
1059
0
                LLM_TENSOR_OUTPUT,
1060
0
                LLM_TENSOR_ATTN_NORM,
1061
0
                LLM_TENSOR_ATTN_POST_NORM,
1062
0
                LLM_TENSOR_ATTN_Q,
1063
0
                LLM_TENSOR_ATTN_Q_NORM,
1064
0
                LLM_TENSOR_ATTN_K,
1065
0
                LLM_TENSOR_ATTN_K_NORM,
1066
0
                LLM_TENSOR_ATTN_V,
1067
0
                LLM_TENSOR_ATTN_OUT,
1068
0
                LLM_TENSOR_ATTN_QKV,
1069
0
                LLM_TENSOR_ATTN_GATE,
1070
0
                LLM_TENSOR_FFN_GATE_INP,
1071
0
                LLM_TENSOR_FFN_GATE_EXPS,
1072
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1073
0
                LLM_TENSOR_FFN_UP_EXPS,
1074
0
                LLM_TENSOR_FFN_GATE_UP_EXPS,
1075
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
1076
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1077
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1078
0
                LLM_TENSOR_FFN_UP_SHEXP,
1079
0
                LLM_TENSOR_SSM_A_NOSCAN,
1080
0
                LLM_TENSOR_SSM_CONV1D,
1081
0
                LLM_TENSOR_SSM_DT,
1082
0
                LLM_TENSOR_SSM_BETA,
1083
0
                LLM_TENSOR_SSM_ALPHA,
1084
0
                LLM_TENSOR_SSM_NORM,
1085
0
                LLM_TENSOR_SSM_OUT,
1086
0
            };
1087
0
        case LLM_ARCH_QWEN3VL:
1088
0
        case LLM_ARCH_CHAMELEON:
1089
0
        case LLM_ARCH_HUNYUAN_DENSE:
1090
0
            return {
1091
0
                LLM_TENSOR_TOKEN_EMBD,
1092
0
                LLM_TENSOR_OUTPUT_NORM,
1093
0
                LLM_TENSOR_OUTPUT,
1094
0
                LLM_TENSOR_CLS_OUT,
1095
0
                LLM_TENSOR_ATTN_NORM,
1096
0
                LLM_TENSOR_ATTN_Q,
1097
0
                LLM_TENSOR_ATTN_Q_NORM,
1098
0
                LLM_TENSOR_ATTN_K,
1099
0
                LLM_TENSOR_ATTN_K_NORM,
1100
0
                LLM_TENSOR_ATTN_V,
1101
0
                LLM_TENSOR_ATTN_OUT,
1102
0
                LLM_TENSOR_FFN_NORM,
1103
0
                LLM_TENSOR_FFN_GATE,
1104
0
                LLM_TENSOR_FFN_DOWN,
1105
0
                LLM_TENSOR_FFN_UP,
1106
0
            };
1107
0
        case LLM_ARCH_PHI2:
1108
0
            return {
1109
0
                LLM_TENSOR_TOKEN_EMBD,
1110
0
                LLM_TENSOR_OUTPUT_NORM,
1111
0
                LLM_TENSOR_OUTPUT,
1112
0
                LLM_TENSOR_ATTN_NORM,
1113
0
                LLM_TENSOR_ATTN_QKV,
1114
0
                LLM_TENSOR_ATTN_Q,
1115
0
                LLM_TENSOR_ATTN_K,
1116
0
                LLM_TENSOR_ATTN_V,
1117
0
                LLM_TENSOR_ATTN_OUT,
1118
0
                LLM_TENSOR_FFN_DOWN,
1119
0
                LLM_TENSOR_FFN_UP,
1120
0
            };
1121
0
        case LLM_ARCH_PHI3:
1122
0
            return {
1123
0
                LLM_TENSOR_TOKEN_EMBD,
1124
0
                LLM_TENSOR_OUTPUT_NORM,
1125
0
                LLM_TENSOR_OUTPUT,
1126
0
                LLM_TENSOR_ROPE_FACTORS_LONG,
1127
0
                LLM_TENSOR_ROPE_FACTORS_SHORT,
1128
0
                LLM_TENSOR_ATTN_NORM,
1129
0
                LLM_TENSOR_ATTN_QKV,
1130
0
                LLM_TENSOR_ATTN_Q,
1131
0
                LLM_TENSOR_ATTN_K,
1132
0
                LLM_TENSOR_ATTN_V,
1133
0
                LLM_TENSOR_ATTN_OUT,
1134
0
                LLM_TENSOR_FFN_NORM,
1135
0
                LLM_TENSOR_FFN_DOWN,
1136
0
                LLM_TENSOR_FFN_UP,
1137
0
            };
1138
0
        case LLM_ARCH_PHIMOE:
1139
0
            return {
1140
0
                LLM_TENSOR_TOKEN_EMBD,
1141
0
                LLM_TENSOR_OUTPUT_NORM,
1142
0
                LLM_TENSOR_OUTPUT,
1143
0
                LLM_TENSOR_ROPE_FACTORS_LONG,
1144
0
                LLM_TENSOR_ROPE_FACTORS_SHORT,
1145
0
                LLM_TENSOR_ATTN_NORM,
1146
0
                LLM_TENSOR_ATTN_QKV,
1147
0
                LLM_TENSOR_ATTN_Q,
1148
0
                LLM_TENSOR_ATTN_K,
1149
0
                LLM_TENSOR_ATTN_V,
1150
0
                LLM_TENSOR_ATTN_OUT,
1151
0
                LLM_TENSOR_FFN_NORM,
1152
0
                LLM_TENSOR_FFN_GATE_INP,
1153
0
                LLM_TENSOR_FFN_GATE_EXPS,
1154
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1155
0
                LLM_TENSOR_FFN_UP_EXPS,
1156
0
            };
1157
0
        case LLM_ARCH_PLAMO:
1158
0
            return {
1159
0
                LLM_TENSOR_TOKEN_EMBD,
1160
0
                LLM_TENSOR_OUTPUT_NORM,
1161
0
                LLM_TENSOR_OUTPUT,
1162
0
                LLM_TENSOR_ROPE_FREQS,
1163
0
                LLM_TENSOR_ATTN_NORM,
1164
0
                LLM_TENSOR_ATTN_Q,
1165
0
                LLM_TENSOR_ATTN_K,
1166
0
                LLM_TENSOR_ATTN_V,
1167
0
                LLM_TENSOR_ATTN_OUT,
1168
0
                LLM_TENSOR_ATTN_ROT_EMBD,
1169
0
                LLM_TENSOR_FFN_GATE,
1170
0
                LLM_TENSOR_FFN_DOWN,
1171
0
                LLM_TENSOR_FFN_UP,
1172
0
            };
1173
0
        case LLM_ARCH_PLAMO2:
1174
0
            return {
1175
0
                LLM_TENSOR_TOKEN_EMBD,
1176
0
                LLM_TENSOR_OUTPUT_NORM,
1177
0
                LLM_TENSOR_OUTPUT,
1178
0
                LLM_TENSOR_ROPE_FREQS,
1179
0
                LLM_TENSOR_ATTN_NORM,
1180
0
                LLM_TENSOR_ATTN_QKV,
1181
0
                LLM_TENSOR_ATTN_Q_NORM,
1182
0
                LLM_TENSOR_ATTN_K_NORM,
1183
0
                LLM_TENSOR_ATTN_OUT,
1184
0
                LLM_TENSOR_ATTN_ROT_EMBD,
1185
0
                LLM_TENSOR_FFN_NORM,
1186
0
                LLM_TENSOR_FFN_DOWN,
1187
0
                LLM_TENSOR_FFN_UP,
1188
0
                LLM_TENSOR_SSM_IN,
1189
0
                LLM_TENSOR_SSM_CONV1D,
1190
0
                LLM_TENSOR_SSM_X,
1191
0
                LLM_TENSOR_SSM_DT,
1192
0
                LLM_TENSOR_SSM_A,
1193
0
                LLM_TENSOR_SSM_D,
1194
0
                LLM_TENSOR_SSM_OUT,
1195
0
                LLM_TENSOR_SSM_DT_NORM,
1196
0
                LLM_TENSOR_SSM_B_NORM,
1197
0
                LLM_TENSOR_SSM_C_NORM,
1198
0
                LLM_TENSOR_ATTN_POST_NORM,
1199
0
                LLM_TENSOR_FFN_POST_NORM,
1200
0
            };
1201
0
        case LLM_ARCH_PLAMO3:
1202
0
            return {
1203
0
                LLM_TENSOR_TOKEN_EMBD,
1204
0
                LLM_TENSOR_OUTPUT_NORM,
1205
0
                LLM_TENSOR_OUTPUT,
1206
0
                LLM_TENSOR_ATTN_NORM,
1207
0
                LLM_TENSOR_ATTN_QKV,
1208
0
                LLM_TENSOR_ATTN_Q_NORM,
1209
0
                LLM_TENSOR_ATTN_K_NORM,
1210
0
                LLM_TENSOR_ATTN_OUT,
1211
0
                LLM_TENSOR_ATTN_POST_NORM,
1212
0
                LLM_TENSOR_FFN_NORM,
1213
0
                LLM_TENSOR_FFN_POST_NORM,
1214
0
                LLM_TENSOR_FFN_DOWN,
1215
0
                LLM_TENSOR_FFN_UP,
1216
0
            };
1217
0
        case LLM_ARCH_CODESHELL:
1218
0
            return {
1219
0
                LLM_TENSOR_TOKEN_EMBD,
1220
0
                LLM_TENSOR_OUTPUT_NORM,
1221
0
                LLM_TENSOR_OUTPUT,
1222
0
                LLM_TENSOR_ROPE_FREQS,
1223
0
                LLM_TENSOR_ATTN_NORM,
1224
0
                LLM_TENSOR_ATTN_Q,
1225
0
                LLM_TENSOR_ATTN_K,
1226
0
                LLM_TENSOR_ATTN_V,
1227
0
                LLM_TENSOR_ATTN_QKV,
1228
0
                LLM_TENSOR_ATTN_OUT,
1229
0
                LLM_TENSOR_ATTN_ROT_EMBD,
1230
0
                LLM_TENSOR_FFN_NORM,
1231
0
                LLM_TENSOR_FFN_GATE,
1232
0
                LLM_TENSOR_FFN_DOWN,
1233
0
                LLM_TENSOR_FFN_UP,
1234
0
            };
1235
0
        case LLM_ARCH_MINICPM:
1236
0
            return {
1237
0
                LLM_TENSOR_TOKEN_EMBD,
1238
0
                LLM_TENSOR_OUTPUT_NORM,
1239
0
                LLM_TENSOR_OUTPUT,
1240
0
                LLM_TENSOR_ROPE_FREQS,
1241
0
                LLM_TENSOR_ROPE_FACTORS_LONG,
1242
0
                LLM_TENSOR_ROPE_FACTORS_SHORT,
1243
0
                LLM_TENSOR_ATTN_NORM,
1244
0
                LLM_TENSOR_ATTN_Q,
1245
0
                LLM_TENSOR_ATTN_K,
1246
0
                LLM_TENSOR_ATTN_V,
1247
0
                LLM_TENSOR_ATTN_OUT,
1248
0
                LLM_TENSOR_ATTN_ROT_EMBD,
1249
0
                LLM_TENSOR_FFN_GATE_INP,
1250
0
                LLM_TENSOR_FFN_NORM,
1251
0
                LLM_TENSOR_FFN_GATE,
1252
0
                LLM_TENSOR_FFN_DOWN,
1253
0
                LLM_TENSOR_FFN_UP,
1254
0
                LLM_TENSOR_FFN_GATE_EXP,
1255
0
                LLM_TENSOR_FFN_DOWN_EXP,
1256
0
                LLM_TENSOR_FFN_UP_EXP,
1257
0
            };
1258
0
        case LLM_ARCH_MINICPM3:
1259
0
            return {
1260
0
                LLM_TENSOR_TOKEN_EMBD,
1261
0
                LLM_TENSOR_OUTPUT_NORM,
1262
0
                LLM_TENSOR_OUTPUT,
1263
0
                LLM_TENSOR_ROPE_FACTORS_LONG,
1264
0
                LLM_TENSOR_ROPE_FACTORS_SHORT,
1265
0
                LLM_TENSOR_ATTN_NORM,
1266
0
                LLM_TENSOR_ATTN_Q_A_NORM,
1267
0
                LLM_TENSOR_ATTN_KV_A_NORM,
1268
0
                LLM_TENSOR_ATTN_Q,
1269
0
                LLM_TENSOR_ATTN_Q_A,
1270
0
                LLM_TENSOR_ATTN_Q_B,
1271
0
                LLM_TENSOR_ATTN_KV_A_MQA,
1272
0
                LLM_TENSOR_ATTN_KV_B,
1273
0
                LLM_TENSOR_ATTN_OUT,
1274
0
                LLM_TENSOR_FFN_NORM,
1275
0
                LLM_TENSOR_FFN_GATE,
1276
0
                LLM_TENSOR_FFN_UP,
1277
0
                LLM_TENSOR_FFN_DOWN,
1278
0
            };
1279
0
        case LLM_ARCH_GEMMA:
1280
0
            return {
1281
0
                LLM_TENSOR_TOKEN_EMBD,
1282
0
                LLM_TENSOR_OUTPUT_NORM,
1283
0
                LLM_TENSOR_ATTN_NORM,
1284
0
                LLM_TENSOR_ATTN_Q,
1285
0
                LLM_TENSOR_ATTN_K,
1286
0
                LLM_TENSOR_ATTN_V,
1287
0
                LLM_TENSOR_ATTN_OUT,
1288
0
                LLM_TENSOR_FFN_NORM,
1289
0
                LLM_TENSOR_FFN_GATE,
1290
0
                LLM_TENSOR_FFN_DOWN,
1291
0
                LLM_TENSOR_FFN_UP,
1292
0
            };
1293
0
        case LLM_ARCH_GEMMA2:
1294
0
            return {
1295
0
                LLM_TENSOR_TOKEN_EMBD,
1296
0
                LLM_TENSOR_OUTPUT_NORM,
1297
0
                LLM_TENSOR_ATTN_NORM,
1298
0
                LLM_TENSOR_ATTN_Q,
1299
0
                LLM_TENSOR_ATTN_K,
1300
0
                LLM_TENSOR_ATTN_V,
1301
0
                LLM_TENSOR_ATTN_OUT,
1302
0
                LLM_TENSOR_ATTN_POST_NORM,
1303
0
                LLM_TENSOR_FFN_NORM,
1304
0
                LLM_TENSOR_FFN_GATE,
1305
0
                LLM_TENSOR_FFN_DOWN,
1306
0
                LLM_TENSOR_FFN_UP,
1307
0
                LLM_TENSOR_FFN_POST_NORM,
1308
0
            };
1309
0
        case LLM_ARCH_GEMMA3:
1310
0
            return {
1311
0
                LLM_TENSOR_TOKEN_EMBD,
1312
0
                LLM_TENSOR_OUTPUT_NORM,
1313
0
                LLM_TENSOR_OUTPUT,
1314
0
                LLM_TENSOR_ATTN_NORM,
1315
0
                LLM_TENSOR_ATTN_Q,
1316
0
                LLM_TENSOR_ATTN_Q_NORM,
1317
0
                LLM_TENSOR_ATTN_K,
1318
0
                LLM_TENSOR_ATTN_K_NORM,
1319
0
                LLM_TENSOR_ATTN_V,
1320
0
                LLM_TENSOR_ATTN_OUT,
1321
0
                LLM_TENSOR_ATTN_POST_NORM,
1322
0
                LLM_TENSOR_FFN_NORM,
1323
0
                LLM_TENSOR_FFN_GATE,
1324
0
                LLM_TENSOR_FFN_DOWN,
1325
0
                LLM_TENSOR_FFN_UP,
1326
0
                LLM_TENSOR_FFN_POST_NORM,
1327
0
            };
1328
0
        case LLM_ARCH_GEMMA3N:
1329
0
            return {
1330
0
                LLM_TENSOR_TOKEN_EMBD,
1331
0
                LLM_TENSOR_OUTPUT_NORM,
1332
0
                LLM_TENSOR_ATTN_NORM,
1333
0
                LLM_TENSOR_ATTN_Q,
1334
0
                LLM_TENSOR_ATTN_Q_NORM,
1335
0
                LLM_TENSOR_ATTN_K,
1336
0
                LLM_TENSOR_ATTN_K_NORM,
1337
0
                LLM_TENSOR_ATTN_V,
1338
0
                LLM_TENSOR_ATTN_OUT,
1339
0
                LLM_TENSOR_ATTN_POST_NORM,
1340
0
                LLM_TENSOR_FFN_NORM,
1341
0
                LLM_TENSOR_FFN_GATE,
1342
0
                LLM_TENSOR_FFN_DOWN,
1343
0
                LLM_TENSOR_FFN_UP,
1344
0
                LLM_TENSOR_FFN_POST_NORM,
1345
0
                LLM_TENSOR_PER_LAYER_TOKEN_EMBD,
1346
0
                LLM_TENSOR_PER_LAYER_MODEL_PROJ,
1347
0
                LLM_TENSOR_PER_LAYER_PROJ_NORM,
1348
0
                LLM_TENSOR_ALTUP_UNEMBD_PROJ,
1349
0
                LLM_TENSOR_ALTUP_PROJ,
1350
0
                LLM_TENSOR_PER_LAYER_INP_GATE,
1351
0
                LLM_TENSOR_PER_LAYER_PROJ,
1352
0
                LLM_TENSOR_PER_LAYER_POST_NORM,
1353
0
                LLM_TENSOR_ALTUP_CORRECT_COEF,
1354
0
                LLM_TENSOR_ALTUP_CORRECT_SCALE,
1355
0
                LLM_TENSOR_ALTUP_PREDICT_COEF,
1356
0
                LLM_TENSOR_ALTUP_ROUTER,
1357
0
                LLM_TENSOR_ALTUP_ROUTER_NORM,
1358
0
                LLM_TENSOR_LAUREL_L,
1359
0
                LLM_TENSOR_LAUREL_R,
1360
0
                LLM_TENSOR_LAUREL_POST_NORM,
1361
0
            };
1362
0
        case LLM_ARCH_GEMMA_EMBEDDING:
1363
0
            return {
1364
0
                LLM_TENSOR_TOKEN_EMBD,
1365
0
                LLM_TENSOR_OUTPUT_NORM,
1366
0
                LLM_TENSOR_OUTPUT,
1367
0
                LLM_TENSOR_DENSE_2_OUT,
1368
0
                LLM_TENSOR_DENSE_3_OUT,
1369
0
                LLM_TENSOR_ATTN_NORM,
1370
0
                LLM_TENSOR_ATTN_Q,
1371
0
                LLM_TENSOR_ATTN_Q_NORM,
1372
0
                LLM_TENSOR_ATTN_K,
1373
0
                LLM_TENSOR_ATTN_K_NORM,
1374
0
                LLM_TENSOR_ATTN_V,
1375
0
                LLM_TENSOR_ATTN_OUT,
1376
0
                LLM_TENSOR_ATTN_POST_NORM,
1377
0
                LLM_TENSOR_FFN_NORM,
1378
0
                LLM_TENSOR_FFN_GATE,
1379
0
                LLM_TENSOR_FFN_DOWN,
1380
0
                LLM_TENSOR_FFN_UP,
1381
0
                LLM_TENSOR_FFN_POST_NORM,
1382
0
            };
1383
0
        case LLM_ARCH_MAMBA:
1384
0
            return {
1385
0
                LLM_TENSOR_TOKEN_EMBD,
1386
0
                LLM_TENSOR_OUTPUT_NORM,
1387
0
                LLM_TENSOR_OUTPUT,
1388
0
                LLM_TENSOR_ATTN_NORM,
1389
0
                LLM_TENSOR_SSM_IN,
1390
0
                LLM_TENSOR_SSM_CONV1D,
1391
0
                LLM_TENSOR_SSM_X,
1392
0
                LLM_TENSOR_SSM_DT,
1393
0
                LLM_TENSOR_SSM_A,
1394
0
                LLM_TENSOR_SSM_D,
1395
0
                LLM_TENSOR_SSM_OUT,
1396
0
            };
1397
0
        case LLM_ARCH_MAMBA2:
1398
0
            return {
1399
0
                LLM_TENSOR_TOKEN_EMBD,
1400
0
                LLM_TENSOR_OUTPUT_NORM,
1401
0
                LLM_TENSOR_OUTPUT,
1402
0
                LLM_TENSOR_ATTN_NORM,
1403
0
                LLM_TENSOR_SSM_IN,
1404
0
                LLM_TENSOR_SSM_CONV1D,
1405
0
                LLM_TENSOR_SSM_DT,
1406
0
                LLM_TENSOR_SSM_A,
1407
0
                LLM_TENSOR_SSM_D,
1408
0
                LLM_TENSOR_SSM_NORM,
1409
0
                LLM_TENSOR_SSM_OUT,
1410
0
            };
1411
0
        case LLM_ARCH_JAMBA:
1412
0
            return {
1413
0
                LLM_TENSOR_TOKEN_EMBD,
1414
0
                LLM_TENSOR_OUTPUT_NORM,
1415
0
                LLM_TENSOR_OUTPUT,
1416
0
                LLM_TENSOR_ATTN_NORM,
1417
0
                LLM_TENSOR_SSM_IN,
1418
0
                LLM_TENSOR_SSM_CONV1D,
1419
0
                LLM_TENSOR_SSM_X,
1420
0
                LLM_TENSOR_SSM_DT,
1421
0
                LLM_TENSOR_SSM_DT_NORM,
1422
0
                LLM_TENSOR_SSM_A,
1423
0
                LLM_TENSOR_SSM_B_NORM,
1424
0
                LLM_TENSOR_SSM_C_NORM,
1425
0
                LLM_TENSOR_SSM_D,
1426
0
                LLM_TENSOR_SSM_OUT,
1427
0
                LLM_TENSOR_ATTN_Q,
1428
0
                LLM_TENSOR_ATTN_K,
1429
0
                LLM_TENSOR_ATTN_V,
1430
0
                LLM_TENSOR_ATTN_OUT,
1431
0
                LLM_TENSOR_FFN_GATE_INP,
1432
0
                LLM_TENSOR_FFN_NORM,
1433
0
                LLM_TENSOR_FFN_GATE,
1434
0
                LLM_TENSOR_FFN_DOWN,
1435
0
                LLM_TENSOR_FFN_UP,
1436
0
                LLM_TENSOR_FFN_GATE_EXPS,
1437
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1438
0
                LLM_TENSOR_FFN_UP_EXPS,
1439
0
            };
1440
0
        case LLM_ARCH_FALCON_H1:
1441
0
            return {
1442
0
                LLM_TENSOR_TOKEN_EMBD,
1443
0
                LLM_TENSOR_OUTPUT,
1444
0
                LLM_TENSOR_OUTPUT_NORM,
1445
0
                LLM_TENSOR_ATTN_NORM,
1446
0
                LLM_TENSOR_ATTN_Q,
1447
0
                LLM_TENSOR_ATTN_K,
1448
0
                LLM_TENSOR_ATTN_V,
1449
0
                LLM_TENSOR_ATTN_OUT,
1450
0
                LLM_TENSOR_SSM_IN,
1451
0
                LLM_TENSOR_SSM_CONV1D,
1452
0
                LLM_TENSOR_SSM_DT,
1453
0
                LLM_TENSOR_SSM_A,
1454
0
                LLM_TENSOR_SSM_D,
1455
0
                LLM_TENSOR_SSM_NORM,
1456
0
                LLM_TENSOR_SSM_OUT,
1457
0
                LLM_TENSOR_FFN_NORM,
1458
0
                LLM_TENSOR_FFN_GATE,
1459
0
                LLM_TENSOR_FFN_DOWN,
1460
0
                LLM_TENSOR_FFN_UP,
1461
0
            };
1462
0
        case LLM_ARCH_COMMAND_R:
1463
0
            return {
1464
0
                LLM_TENSOR_TOKEN_EMBD,
1465
0
                LLM_TENSOR_OUTPUT_NORM,
1466
0
                LLM_TENSOR_ATTN_NORM,
1467
0
                LLM_TENSOR_ATTN_Q,
1468
0
                LLM_TENSOR_ATTN_K,
1469
0
                LLM_TENSOR_ATTN_V,
1470
0
                LLM_TENSOR_ATTN_OUT,
1471
0
                LLM_TENSOR_FFN_GATE,
1472
0
                LLM_TENSOR_FFN_DOWN,
1473
0
                LLM_TENSOR_FFN_UP,
1474
0
                LLM_TENSOR_ATTN_Q_NORM,
1475
0
                LLM_TENSOR_ATTN_K_NORM,
1476
0
            };
1477
0
        case LLM_ARCH_COHERE2:
1478
0
            return {
1479
0
                LLM_TENSOR_TOKEN_EMBD,
1480
0
                LLM_TENSOR_OUTPUT_NORM,
1481
0
                LLM_TENSOR_ATTN_NORM,
1482
0
                LLM_TENSOR_ATTN_Q,
1483
0
                LLM_TENSOR_ATTN_K,
1484
0
                LLM_TENSOR_ATTN_V,
1485
0
                LLM_TENSOR_ATTN_OUT,
1486
0
                LLM_TENSOR_FFN_GATE,
1487
0
                LLM_TENSOR_FFN_DOWN,
1488
0
                LLM_TENSOR_FFN_UP,
1489
0
            };
1490
0
        case LLM_ARCH_DBRX:
1491
0
            return {
1492
0
                LLM_TENSOR_TOKEN_EMBD,
1493
0
                LLM_TENSOR_OUTPUT_NORM,
1494
0
                LLM_TENSOR_OUTPUT,
1495
0
                LLM_TENSOR_ATTN_QKV,
1496
0
                LLM_TENSOR_ATTN_NORM,
1497
0
                LLM_TENSOR_ATTN_OUT,
1498
0
                LLM_TENSOR_ATTN_OUT_NORM,
1499
0
                LLM_TENSOR_FFN_GATE_INP,
1500
0
                LLM_TENSOR_FFN_GATE_EXPS,
1501
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1502
0
                LLM_TENSOR_FFN_UP_EXPS,
1503
0
            };
1504
0
        case LLM_ARCH_OLMO:
1505
0
            return {
1506
0
                LLM_TENSOR_TOKEN_EMBD,
1507
0
                LLM_TENSOR_OUTPUT,
1508
0
                LLM_TENSOR_ATTN_Q,
1509
0
                LLM_TENSOR_ATTN_K,
1510
0
                LLM_TENSOR_ATTN_V,
1511
0
                LLM_TENSOR_ATTN_OUT,
1512
0
                LLM_TENSOR_FFN_GATE,
1513
0
                LLM_TENSOR_FFN_DOWN,
1514
0
                LLM_TENSOR_FFN_UP,
1515
0
            };
1516
0
        case LLM_ARCH_OLMO2:
1517
0
            return {
1518
0
                LLM_TENSOR_TOKEN_EMBD,
1519
0
                LLM_TENSOR_OUTPUT_NORM,
1520
0
                LLM_TENSOR_OUTPUT,
1521
0
                LLM_TENSOR_ATTN_Q,
1522
0
                LLM_TENSOR_ATTN_K,
1523
0
                LLM_TENSOR_ATTN_V,
1524
0
                LLM_TENSOR_ATTN_OUT,
1525
0
                LLM_TENSOR_ATTN_POST_NORM,
1526
0
                LLM_TENSOR_ATTN_Q_NORM,
1527
0
                LLM_TENSOR_ATTN_K_NORM,
1528
0
                LLM_TENSOR_FFN_POST_NORM,
1529
0
                LLM_TENSOR_FFN_GATE,
1530
0
                LLM_TENSOR_FFN_DOWN,
1531
0
                LLM_TENSOR_FFN_UP,
1532
0
            };
1533
0
        case LLM_ARCH_OPENELM:
1534
0
            return {
1535
0
                LLM_TENSOR_TOKEN_EMBD,
1536
0
                LLM_TENSOR_OUTPUT_NORM,
1537
0
                LLM_TENSOR_ATTN_NORM,
1538
0
                LLM_TENSOR_ATTN_QKV,
1539
0
                LLM_TENSOR_ATTN_Q_NORM,
1540
0
                LLM_TENSOR_ATTN_K_NORM,
1541
0
                LLM_TENSOR_ATTN_OUT,
1542
0
                LLM_TENSOR_FFN_NORM,
1543
0
                LLM_TENSOR_FFN_GATE,
1544
0
                LLM_TENSOR_FFN_DOWN,
1545
0
                LLM_TENSOR_FFN_UP,
1546
0
            };
1547
0
        case LLM_ARCH_ARCTIC:
1548
0
            return {
1549
0
                LLM_TENSOR_TOKEN_EMBD,
1550
0
                LLM_TENSOR_OUTPUT_NORM,
1551
0
                LLM_TENSOR_OUTPUT,
1552
0
                LLM_TENSOR_ATTN_NORM,
1553
0
                LLM_TENSOR_ATTN_Q,
1554
0
                LLM_TENSOR_ATTN_K,
1555
0
                LLM_TENSOR_ATTN_V,
1556
0
                LLM_TENSOR_ATTN_OUT,
1557
0
                LLM_TENSOR_FFN_GATE_INP,
1558
0
                LLM_TENSOR_FFN_NORM,
1559
0
                LLM_TENSOR_FFN_GATE,
1560
0
                LLM_TENSOR_FFN_DOWN,
1561
0
                LLM_TENSOR_FFN_UP,
1562
0
                LLM_TENSOR_FFN_NORM_EXPS,
1563
0
                LLM_TENSOR_FFN_GATE_EXPS,
1564
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1565
0
                LLM_TENSOR_FFN_UP_EXPS,
1566
0
            };
1567
0
        case LLM_ARCH_DEEPSEEK:
1568
0
            return {
1569
0
                LLM_TENSOR_TOKEN_EMBD,
1570
0
                LLM_TENSOR_OUTPUT_NORM,
1571
0
                LLM_TENSOR_OUTPUT,
1572
0
                LLM_TENSOR_ROPE_FREQS,
1573
0
                LLM_TENSOR_ATTN_NORM,
1574
0
                LLM_TENSOR_ATTN_Q,
1575
0
                LLM_TENSOR_ATTN_K,
1576
0
                LLM_TENSOR_ATTN_V,
1577
0
                LLM_TENSOR_ATTN_OUT,
1578
0
                LLM_TENSOR_ATTN_ROT_EMBD,
1579
0
                LLM_TENSOR_FFN_GATE_INP,
1580
0
                LLM_TENSOR_FFN_NORM,
1581
0
                LLM_TENSOR_FFN_GATE,
1582
0
                LLM_TENSOR_FFN_DOWN,
1583
0
                LLM_TENSOR_FFN_UP,
1584
0
                LLM_TENSOR_FFN_GATE_EXPS,
1585
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1586
0
                LLM_TENSOR_FFN_UP_EXPS,
1587
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
1588
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1589
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1590
0
                LLM_TENSOR_FFN_UP_SHEXP,
1591
0
            };
1592
0
        case LLM_ARCH_DEEPSEEK2:
1593
0
        case LLM_ARCH_MISTRAL4:
1594
0
            return {
1595
0
                LLM_TENSOR_TOKEN_EMBD,
1596
0
                LLM_TENSOR_OUTPUT_NORM,
1597
0
                LLM_TENSOR_OUTPUT,
1598
0
                LLM_TENSOR_ATTN_NORM,
1599
0
                LLM_TENSOR_ATTN_Q_A_NORM,
1600
0
                LLM_TENSOR_ATTN_KV_A_NORM,
1601
0
                LLM_TENSOR_ATTN_Q,
1602
0
                LLM_TENSOR_ATTN_Q_A,
1603
0
                LLM_TENSOR_ATTN_Q_B,
1604
0
                LLM_TENSOR_ATTN_KV_A_MQA,
1605
0
                LLM_TENSOR_ATTN_KV_B,
1606
0
                LLM_TENSOR_ATTN_K_B,
1607
0
                LLM_TENSOR_ATTN_V_B,
1608
0
                LLM_TENSOR_ATTN_OUT,
1609
0
                LLM_TENSOR_FFN_NORM,
1610
0
                LLM_TENSOR_FFN_GATE,
1611
0
                LLM_TENSOR_FFN_UP,
1612
0
                LLM_TENSOR_FFN_DOWN,
1613
0
                LLM_TENSOR_FFN_GATE_INP,
1614
0
                LLM_TENSOR_FFN_GATE_EXPS,
1615
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1616
0
                LLM_TENSOR_FFN_UP_EXPS,
1617
0
                LLM_TENSOR_FFN_GATE_UP_EXPS,
1618
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
1619
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1620
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1621
0
                LLM_TENSOR_FFN_UP_SHEXP,
1622
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
1623
0
            };
1624
0
        case LLM_ARCH_PLM:
1625
0
            return {
1626
0
                LLM_TENSOR_TOKEN_EMBD,
1627
0
                LLM_TENSOR_OUTPUT_NORM,
1628
0
                LLM_TENSOR_ATTN_NORM,
1629
0
                LLM_TENSOR_ATTN_Q,
1630
0
                LLM_TENSOR_ATTN_KV_A_MQA,
1631
0
                LLM_TENSOR_ATTN_KV_A_NORM,
1632
0
                LLM_TENSOR_ATTN_KV_B,
1633
0
                LLM_TENSOR_ATTN_OUT,
1634
0
                LLM_TENSOR_FFN_NORM,
1635
0
                LLM_TENSOR_FFN_DOWN,
1636
0
                LLM_TENSOR_FFN_UP,
1637
0
            };
1638
0
        case LLM_ARCH_CHATGLM:
1639
0
            return {
1640
0
                LLM_TENSOR_TOKEN_EMBD,
1641
0
                LLM_TENSOR_ROPE_FREQS,
1642
0
                LLM_TENSOR_OUTPUT_NORM,
1643
0
                LLM_TENSOR_OUTPUT,
1644
0
                LLM_TENSOR_ATTN_NORM,
1645
0
                LLM_TENSOR_ATTN_QKV,
1646
0
                LLM_TENSOR_ATTN_Q,
1647
0
                LLM_TENSOR_ATTN_K,
1648
0
                LLM_TENSOR_ATTN_V,
1649
0
                LLM_TENSOR_ATTN_OUT,
1650
0
                LLM_TENSOR_FFN_NORM,
1651
0
                LLM_TENSOR_FFN_UP,
1652
0
                LLM_TENSOR_FFN_DOWN,
1653
0
            };
1654
0
        case LLM_ARCH_GLM4:
1655
0
            return {
1656
0
                LLM_TENSOR_TOKEN_EMBD,
1657
0
                LLM_TENSOR_ROPE_FREQS,
1658
0
                LLM_TENSOR_OUTPUT_NORM,
1659
0
                LLM_TENSOR_OUTPUT,
1660
0
                LLM_TENSOR_ATTN_NORM,
1661
0
                LLM_TENSOR_ATTN_Q,
1662
0
                LLM_TENSOR_ATTN_K,
1663
0
                LLM_TENSOR_ATTN_V,
1664
0
                LLM_TENSOR_ATTN_OUT,
1665
0
                LLM_TENSOR_FFN_NORM,
1666
0
                LLM_TENSOR_FFN_UP,
1667
0
                LLM_TENSOR_FFN_DOWN,
1668
0
                LLM_TENSOR_ATTN_POST_NORM,
1669
0
                LLM_TENSOR_FFN_POST_NORM,
1670
0
                LLM_TENSOR_NEXTN_EH_PROJ,
1671
0
                LLM_TENSOR_NEXTN_EMBED_TOKENS,
1672
0
                LLM_TENSOR_NEXTN_ENORM,
1673
0
                LLM_TENSOR_NEXTN_HNORM,
1674
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1675
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1676
0
            };
1677
0
        case LLM_ARCH_GLM4_MOE:
1678
0
            return {
1679
0
                LLM_TENSOR_TOKEN_EMBD,
1680
0
                LLM_TENSOR_OUTPUT_NORM,
1681
0
                LLM_TENSOR_OUTPUT,
1682
0
                LLM_TENSOR_ATTN_NORM,
1683
0
                LLM_TENSOR_ATTN_POST_NORM,
1684
0
                LLM_TENSOR_ATTN_Q,
1685
0
                LLM_TENSOR_ATTN_K,
1686
0
                LLM_TENSOR_ATTN_V,
1687
0
                LLM_TENSOR_ATTN_OUT,
1688
0
                LLM_TENSOR_ATTN_Q_NORM,
1689
0
                LLM_TENSOR_ATTN_K_NORM,
1690
0
                LLM_TENSOR_FFN_GATE,
1691
0
                LLM_TENSOR_FFN_DOWN,
1692
0
                LLM_TENSOR_FFN_UP,
1693
0
                LLM_TENSOR_FFN_GATE_INP,
1694
0
                LLM_TENSOR_FFN_GATE_EXPS,
1695
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1696
0
                LLM_TENSOR_FFN_UP_EXPS,
1697
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1698
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1699
0
                LLM_TENSOR_FFN_UP_SHEXP,
1700
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
1701
0
                LLM_TENSOR_NEXTN_EH_PROJ,
1702
0
                LLM_TENSOR_NEXTN_EMBED_TOKENS,
1703
0
                LLM_TENSOR_NEXTN_ENORM,
1704
0
                LLM_TENSOR_NEXTN_HNORM,
1705
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1706
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1707
0
            };
1708
0
        case LLM_ARCH_GLM_DSA:
1709
0
            return {
1710
0
                LLM_TENSOR_TOKEN_EMBD,
1711
0
                LLM_TENSOR_OUTPUT_NORM,
1712
0
                LLM_TENSOR_OUTPUT,
1713
0
                LLM_TENSOR_ATTN_NORM,
1714
0
                LLM_TENSOR_ATTN_Q_A_NORM,
1715
0
                LLM_TENSOR_ATTN_KV_A_NORM,
1716
0
                LLM_TENSOR_ATTN_Q,
1717
0
                LLM_TENSOR_ATTN_Q_A,
1718
0
                LLM_TENSOR_ATTN_Q_B,
1719
0
                LLM_TENSOR_ATTN_KV_A_MQA,
1720
0
                LLM_TENSOR_ATTN_KV_B,
1721
0
                LLM_TENSOR_ATTN_K_B,
1722
0
                LLM_TENSOR_ATTN_V_B,
1723
0
                LLM_TENSOR_ATTN_OUT,
1724
0
                LLM_TENSOR_FFN_NORM,
1725
0
                LLM_TENSOR_FFN_GATE,
1726
0
                LLM_TENSOR_FFN_UP,
1727
0
                LLM_TENSOR_FFN_DOWN,
1728
0
                LLM_TENSOR_FFN_GATE_INP,
1729
0
                LLM_TENSOR_FFN_GATE_EXPS,
1730
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1731
0
                LLM_TENSOR_FFN_UP_EXPS,
1732
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
1733
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1734
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1735
0
                LLM_TENSOR_FFN_UP_SHEXP,
1736
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
1737
0
                LLM_TENSOR_INDEXER_K_NORM,
1738
0
                LLM_TENSOR_INDEXER_PROJ,
1739
0
                LLM_TENSOR_INDEXER_ATTN_K,
1740
0
                LLM_TENSOR_INDEXER_ATTN_Q_B,
1741
0
                LLM_TENSOR_NEXTN_EH_PROJ,
1742
0
                LLM_TENSOR_NEXTN_EMBED_TOKENS,
1743
0
                LLM_TENSOR_NEXTN_ENORM,
1744
0
                LLM_TENSOR_NEXTN_HNORM,
1745
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1746
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1747
0
            };
1748
0
        case LLM_ARCH_BITNET:
1749
0
            return {
1750
0
                LLM_TENSOR_TOKEN_EMBD,
1751
0
                LLM_TENSOR_OUTPUT_NORM,
1752
0
                LLM_TENSOR_ATTN_Q,
1753
0
                LLM_TENSOR_ATTN_K,
1754
0
                LLM_TENSOR_ATTN_V,
1755
0
                LLM_TENSOR_ATTN_OUT,
1756
0
                LLM_TENSOR_ATTN_NORM,
1757
0
                LLM_TENSOR_ATTN_SUB_NORM,
1758
0
                LLM_TENSOR_FFN_GATE,
1759
0
                LLM_TENSOR_FFN_DOWN,
1760
0
                LLM_TENSOR_FFN_UP,
1761
0
                LLM_TENSOR_FFN_NORM,
1762
0
                LLM_TENSOR_FFN_SUB_NORM,
1763
0
            };
1764
0
        case LLM_ARCH_T5:
1765
0
            return {
1766
0
                LLM_TENSOR_TOKEN_EMBD,
1767
0
                LLM_TENSOR_OUTPUT,
1768
0
                LLM_TENSOR_DEC_OUTPUT_NORM,
1769
0
                LLM_TENSOR_DEC_ATTN_NORM,
1770
0
                LLM_TENSOR_DEC_ATTN_Q,
1771
0
                LLM_TENSOR_DEC_ATTN_K,
1772
0
                LLM_TENSOR_DEC_ATTN_V,
1773
0
                LLM_TENSOR_DEC_ATTN_OUT,
1774
0
                LLM_TENSOR_DEC_ATTN_REL_B,
1775
0
                LLM_TENSOR_DEC_CROSS_ATTN_NORM,
1776
0
                LLM_TENSOR_DEC_CROSS_ATTN_Q,
1777
0
                LLM_TENSOR_DEC_CROSS_ATTN_K,
1778
0
                LLM_TENSOR_DEC_CROSS_ATTN_V,
1779
0
                LLM_TENSOR_DEC_CROSS_ATTN_OUT,
1780
0
                LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
1781
0
                LLM_TENSOR_DEC_FFN_NORM,
1782
0
                LLM_TENSOR_DEC_FFN_GATE,
1783
0
                LLM_TENSOR_DEC_FFN_DOWN,
1784
0
                LLM_TENSOR_DEC_FFN_UP,
1785
0
                LLM_TENSOR_ENC_OUTPUT_NORM,
1786
0
                LLM_TENSOR_ENC_ATTN_NORM,
1787
0
                LLM_TENSOR_ENC_ATTN_Q,
1788
0
                LLM_TENSOR_ENC_ATTN_K,
1789
0
                LLM_TENSOR_ENC_ATTN_V,
1790
0
                LLM_TENSOR_ENC_ATTN_OUT,
1791
0
                LLM_TENSOR_ENC_ATTN_REL_B,
1792
0
                LLM_TENSOR_ENC_FFN_NORM,
1793
0
                LLM_TENSOR_ENC_FFN_GATE,
1794
0
                LLM_TENSOR_ENC_FFN_DOWN,
1795
0
                LLM_TENSOR_ENC_FFN_UP,
1796
0
            };
1797
0
        case LLM_ARCH_T5ENCODER:
1798
0
            return {
1799
0
                LLM_TENSOR_TOKEN_EMBD,
1800
0
                LLM_TENSOR_OUTPUT,
1801
0
                LLM_TENSOR_ENC_OUTPUT_NORM,
1802
0
                LLM_TENSOR_ENC_ATTN_NORM,
1803
0
                LLM_TENSOR_ENC_ATTN_Q,
1804
0
                LLM_TENSOR_ENC_ATTN_K,
1805
0
                LLM_TENSOR_ENC_ATTN_V,
1806
0
                LLM_TENSOR_ENC_ATTN_OUT,
1807
0
                LLM_TENSOR_ENC_ATTN_REL_B,
1808
0
                LLM_TENSOR_ENC_FFN_NORM,
1809
0
                LLM_TENSOR_ENC_FFN_GATE,
1810
0
                LLM_TENSOR_ENC_FFN_DOWN,
1811
0
                LLM_TENSOR_ENC_FFN_UP,
1812
0
            };
1813
0
        case LLM_ARCH_JAIS:
1814
0
            return {
1815
0
                LLM_TENSOR_TOKEN_EMBD,
1816
0
                LLM_TENSOR_OUTPUT_NORM,
1817
0
                LLM_TENSOR_OUTPUT,
1818
0
                LLM_TENSOR_ATTN_NORM,
1819
0
                LLM_TENSOR_ATTN_QKV,
1820
0
                LLM_TENSOR_ATTN_OUT,
1821
0
                LLM_TENSOR_FFN_NORM,
1822
0
                LLM_TENSOR_FFN_UP,
1823
0
                LLM_TENSOR_FFN_GATE,
1824
0
                LLM_TENSOR_FFN_DOWN,
1825
0
            };
1826
0
        case LLM_ARCH_JAIS2:
1827
0
            return {
1828
0
                LLM_TENSOR_TOKEN_EMBD,
1829
0
                LLM_TENSOR_OUTPUT_NORM,
1830
0
                LLM_TENSOR_OUTPUT,
1831
0
                LLM_TENSOR_ATTN_NORM,
1832
0
                LLM_TENSOR_ATTN_Q,
1833
0
                LLM_TENSOR_ATTN_K,
1834
0
                LLM_TENSOR_ATTN_V,
1835
0
                LLM_TENSOR_ATTN_OUT,
1836
0
                LLM_TENSOR_FFN_NORM,
1837
0
                LLM_TENSOR_FFN_UP,
1838
0
                LLM_TENSOR_FFN_DOWN,
1839
0
            };
1840
0
        case LLM_ARCH_NEMOTRON_H:
1841
0
            return {
1842
0
                LLM_TENSOR_TOKEN_EMBD,
1843
0
                LLM_TENSOR_OUTPUT_NORM,
1844
0
                LLM_TENSOR_OUTPUT,
1845
0
                LLM_TENSOR_ATTN_NORM,
1846
0
                LLM_TENSOR_SSM_IN,
1847
0
                LLM_TENSOR_SSM_CONV1D,
1848
0
                LLM_TENSOR_SSM_DT,
1849
0
                LLM_TENSOR_SSM_A,
1850
0
                LLM_TENSOR_SSM_D,
1851
0
                LLM_TENSOR_SSM_NORM,
1852
0
                LLM_TENSOR_SSM_OUT,
1853
0
                LLM_TENSOR_ATTN_Q,
1854
0
                LLM_TENSOR_ATTN_K,
1855
0
                LLM_TENSOR_ATTN_V,
1856
0
                LLM_TENSOR_ATTN_OUT,
1857
0
                LLM_TENSOR_FFN_DOWN,
1858
0
                LLM_TENSOR_FFN_UP,
1859
0
            };
1860
0
        case LLM_ARCH_NEMOTRON_H_MOE:
1861
0
            return {
1862
0
                LLM_TENSOR_TOKEN_EMBD,
1863
0
                LLM_TENSOR_OUTPUT_NORM,
1864
0
                LLM_TENSOR_OUTPUT,
1865
0
                LLM_TENSOR_ATTN_NORM,
1866
                // mamba(2) ssm layers
1867
0
                LLM_TENSOR_SSM_IN,
1868
0
                LLM_TENSOR_SSM_CONV1D,
1869
0
                LLM_TENSOR_SSM_DT,
1870
0
                LLM_TENSOR_SSM_A,
1871
0
                LLM_TENSOR_SSM_D,
1872
0
                LLM_TENSOR_SSM_NORM,
1873
0
                LLM_TENSOR_SSM_OUT,
1874
                // attention layers
1875
0
                LLM_TENSOR_ATTN_Q,
1876
0
                LLM_TENSOR_ATTN_K,
1877
0
                LLM_TENSOR_ATTN_V,
1878
0
                LLM_TENSOR_ATTN_OUT,
1879
                // dense FFN
1880
0
                LLM_TENSOR_FFN_DOWN,
1881
0
                LLM_TENSOR_FFN_UP,
1882
                // MoE FFN (for MoE layers)
1883
0
                LLM_TENSOR_FFN_GATE_INP,
1884
0
                LLM_TENSOR_FFN_UP_EXPS,
1885
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1886
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
1887
0
                LLM_TENSOR_FFN_LATENT_DOWN,
1888
0
                LLM_TENSOR_FFN_LATENT_UP,
1889
                // MoE shared expert layer
1890
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1891
0
                LLM_TENSOR_FFN_UP_SHEXP,
1892
0
            };
1893
0
        case LLM_ARCH_EXAONE4:
1894
0
            return {
1895
0
                LLM_TENSOR_TOKEN_EMBD,
1896
0
                LLM_TENSOR_OUTPUT_NORM,
1897
0
                LLM_TENSOR_OUTPUT,
1898
0
                LLM_TENSOR_ROPE_FREQS,
1899
0
                LLM_TENSOR_ATTN_Q,
1900
0
                LLM_TENSOR_ATTN_Q_NORM,
1901
0
                LLM_TENSOR_ATTN_K,
1902
0
                LLM_TENSOR_ATTN_K_NORM,
1903
0
                LLM_TENSOR_ATTN_V,
1904
0
                LLM_TENSOR_ATTN_OUT,
1905
0
                LLM_TENSOR_ATTN_POST_NORM,
1906
0
                LLM_TENSOR_FFN_GATE,
1907
0
                LLM_TENSOR_FFN_DOWN,
1908
0
                LLM_TENSOR_FFN_UP,
1909
0
                LLM_TENSOR_FFN_POST_NORM,
1910
0
            };
1911
0
        case LLM_ARCH_EXAONE_MOE:
1912
0
            return {
1913
0
                LLM_TENSOR_TOKEN_EMBD,
1914
0
                LLM_TENSOR_OUTPUT_NORM,
1915
0
                LLM_TENSOR_OUTPUT,
1916
0
                LLM_TENSOR_ROPE_FREQS,
1917
0
                LLM_TENSOR_ATTN_NORM,
1918
0
                LLM_TENSOR_ATTN_Q,
1919
0
                LLM_TENSOR_ATTN_Q_NORM,
1920
0
                LLM_TENSOR_ATTN_K,
1921
0
                LLM_TENSOR_ATTN_K_NORM,
1922
0
                LLM_TENSOR_ATTN_V,
1923
0
                LLM_TENSOR_ATTN_OUT,
1924
0
                LLM_TENSOR_FFN_NORM,
1925
0
                LLM_TENSOR_FFN_GATE,
1926
0
                LLM_TENSOR_FFN_DOWN,
1927
0
                LLM_TENSOR_FFN_UP,
1928
0
                LLM_TENSOR_FFN_GATE_INP,
1929
0
                LLM_TENSOR_FFN_GATE_EXPS,
1930
0
                LLM_TENSOR_FFN_DOWN_EXPS,
1931
0
                LLM_TENSOR_FFN_UP_EXPS,
1932
0
                LLM_TENSOR_FFN_GATE_SHEXP,
1933
0
                LLM_TENSOR_FFN_UP_SHEXP,
1934
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
1935
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
1936
0
                LLM_TENSOR_NEXTN_EH_PROJ,
1937
0
                LLM_TENSOR_NEXTN_EMBED_TOKENS,
1938
0
                LLM_TENSOR_NEXTN_ENORM,
1939
0
                LLM_TENSOR_NEXTN_HNORM,
1940
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
1941
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
1942
0
            };
1943
0
        case LLM_ARCH_RWKV6:
1944
0
            return {
1945
0
                LLM_TENSOR_TOKEN_EMBD,
1946
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
1947
0
                LLM_TENSOR_OUTPUT_NORM,
1948
0
                LLM_TENSOR_OUTPUT,
1949
0
                LLM_TENSOR_ATTN_NORM,
1950
0
                LLM_TENSOR_ATTN_NORM_2,
1951
0
                LLM_TENSOR_TIME_MIX_W1,
1952
0
                LLM_TENSOR_TIME_MIX_W2,
1953
0
                LLM_TENSOR_TIME_MIX_LERP_X,
1954
0
                LLM_TENSOR_TIME_MIX_LERP_W,
1955
0
                LLM_TENSOR_TIME_MIX_LERP_K,
1956
0
                LLM_TENSOR_TIME_MIX_LERP_V,
1957
0
                LLM_TENSOR_TIME_MIX_LERP_R,
1958
0
                LLM_TENSOR_TIME_MIX_LERP_G,
1959
0
                LLM_TENSOR_TIME_MIX_LERP_FUSED,
1960
0
                LLM_TENSOR_TIME_MIX_FIRST,
1961
0
                LLM_TENSOR_TIME_MIX_DECAY,
1962
0
                LLM_TENSOR_TIME_MIX_DECAY_W1,
1963
0
                LLM_TENSOR_TIME_MIX_DECAY_W2,
1964
0
                LLM_TENSOR_TIME_MIX_KEY,
1965
0
                LLM_TENSOR_TIME_MIX_VALUE,
1966
0
                LLM_TENSOR_TIME_MIX_RECEPTANCE,
1967
0
                LLM_TENSOR_TIME_MIX_GATE,
1968
0
                LLM_TENSOR_TIME_MIX_LN,
1969
0
                LLM_TENSOR_TIME_MIX_OUTPUT,
1970
0
                LLM_TENSOR_CHANNEL_MIX_LERP_K,
1971
0
                LLM_TENSOR_CHANNEL_MIX_LERP_R,
1972
0
                LLM_TENSOR_CHANNEL_MIX_KEY,
1973
0
                LLM_TENSOR_CHANNEL_MIX_VALUE,
1974
0
                LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
1975
0
            };
1976
0
        case LLM_ARCH_RWKV6QWEN2:
1977
0
            return {
1978
0
                LLM_TENSOR_TOKEN_EMBD,
1979
0
                LLM_TENSOR_OUTPUT_NORM,
1980
0
                LLM_TENSOR_OUTPUT,
1981
0
                LLM_TENSOR_ATTN_NORM,
1982
0
                LLM_TENSOR_TIME_MIX_W1,
1983
0
                LLM_TENSOR_TIME_MIX_W2,
1984
0
                LLM_TENSOR_TIME_MIX_LERP_X,
1985
0
                LLM_TENSOR_TIME_MIX_LERP_FUSED,
1986
0
                LLM_TENSOR_TIME_MIX_FIRST,
1987
0
                LLM_TENSOR_TIME_MIX_DECAY,
1988
0
                LLM_TENSOR_TIME_MIX_DECAY_W1,
1989
0
                LLM_TENSOR_TIME_MIX_DECAY_W2,
1990
0
                LLM_TENSOR_TIME_MIX_KEY,
1991
0
                LLM_TENSOR_TIME_MIX_VALUE,
1992
0
                LLM_TENSOR_TIME_MIX_RECEPTANCE,
1993
0
                LLM_TENSOR_TIME_MIX_GATE,
1994
0
                LLM_TENSOR_TIME_MIX_OUTPUT,
1995
0
                LLM_TENSOR_FFN_NORM,
1996
0
                LLM_TENSOR_FFN_GATE,
1997
0
                LLM_TENSOR_FFN_DOWN,
1998
0
                LLM_TENSOR_FFN_UP,
1999
0
            };
2000
0
        case LLM_ARCH_RWKV7:
2001
0
            return {
2002
0
                LLM_TENSOR_TOKEN_EMBD,
2003
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
2004
0
                LLM_TENSOR_OUTPUT_NORM,
2005
0
                LLM_TENSOR_OUTPUT,
2006
0
                LLM_TENSOR_ATTN_NORM,
2007
0
                LLM_TENSOR_ATTN_NORM_2,
2008
0
                LLM_TENSOR_TIME_MIX_W0,
2009
0
                LLM_TENSOR_TIME_MIX_W1,
2010
0
                LLM_TENSOR_TIME_MIX_W2,
2011
0
                LLM_TENSOR_TIME_MIX_A0,
2012
0
                LLM_TENSOR_TIME_MIX_A1,
2013
0
                LLM_TENSOR_TIME_MIX_A2,
2014
0
                LLM_TENSOR_TIME_MIX_V0,
2015
0
                LLM_TENSOR_TIME_MIX_V1,
2016
0
                LLM_TENSOR_TIME_MIX_V2,
2017
0
                LLM_TENSOR_TIME_MIX_G1,
2018
0
                LLM_TENSOR_TIME_MIX_G2,
2019
0
                LLM_TENSOR_TIME_MIX_K_K,
2020
0
                LLM_TENSOR_TIME_MIX_K_A,
2021
0
                LLM_TENSOR_TIME_MIX_R_K,
2022
0
                LLM_TENSOR_TIME_MIX_LERP_FUSED,
2023
0
                LLM_TENSOR_TIME_MIX_KEY,
2024
0
                LLM_TENSOR_TIME_MIX_VALUE,
2025
0
                LLM_TENSOR_TIME_MIX_RECEPTANCE,
2026
0
                LLM_TENSOR_TIME_MIX_LN,
2027
0
                LLM_TENSOR_TIME_MIX_OUTPUT,
2028
0
                LLM_TENSOR_CHANNEL_MIX_LERP_K,
2029
0
                LLM_TENSOR_CHANNEL_MIX_KEY,
2030
0
                LLM_TENSOR_CHANNEL_MIX_VALUE,
2031
0
            };
2032
0
        case LLM_ARCH_ARWKV7:
2033
0
            return {
2034
0
                LLM_TENSOR_TOKEN_EMBD,
2035
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
2036
0
                LLM_TENSOR_OUTPUT_NORM,
2037
0
                LLM_TENSOR_OUTPUT,
2038
0
                LLM_TENSOR_ATTN_NORM,
2039
0
                LLM_TENSOR_TIME_MIX_W0,
2040
0
                LLM_TENSOR_TIME_MIX_W1,
2041
0
                LLM_TENSOR_TIME_MIX_W2,
2042
0
                LLM_TENSOR_TIME_MIX_A0,
2043
0
                LLM_TENSOR_TIME_MIX_A1,
2044
0
                LLM_TENSOR_TIME_MIX_A2,
2045
0
                LLM_TENSOR_TIME_MIX_V0,
2046
0
                LLM_TENSOR_TIME_MIX_V1,
2047
0
                LLM_TENSOR_TIME_MIX_V2,
2048
0
                LLM_TENSOR_TIME_MIX_G1,
2049
0
                LLM_TENSOR_TIME_MIX_G2,
2050
0
                LLM_TENSOR_TIME_MIX_K_K,
2051
0
                LLM_TENSOR_TIME_MIX_K_A,
2052
0
                LLM_TENSOR_TIME_MIX_R_K,
2053
0
                LLM_TENSOR_TIME_MIX_LERP_FUSED,
2054
0
                LLM_TENSOR_TIME_MIX_KEY,
2055
0
                LLM_TENSOR_TIME_MIX_VALUE,
2056
0
                LLM_TENSOR_TIME_MIX_RECEPTANCE,
2057
0
                LLM_TENSOR_TIME_MIX_LN,
2058
0
                LLM_TENSOR_TIME_MIX_OUTPUT,
2059
0
                LLM_TENSOR_FFN_NORM,
2060
0
                LLM_TENSOR_FFN_GATE,
2061
0
                LLM_TENSOR_FFN_DOWN,
2062
0
                LLM_TENSOR_FFN_UP,
2063
0
            };
2064
0
        case LLM_ARCH_GRANITE_MOE:
2065
0
            return {
2066
0
                LLM_TENSOR_TOKEN_EMBD,
2067
0
                LLM_TENSOR_OUTPUT_NORM,
2068
0
                LLM_TENSOR_OUTPUT,
2069
0
                LLM_TENSOR_ATTN_NORM,
2070
0
                LLM_TENSOR_ATTN_Q,
2071
0
                LLM_TENSOR_ATTN_K,
2072
0
                LLM_TENSOR_ATTN_V,
2073
0
                LLM_TENSOR_ATTN_OUT,
2074
0
                LLM_TENSOR_FFN_NORM,
2075
0
                LLM_TENSOR_FFN_GATE_INP,
2076
0
                LLM_TENSOR_FFN_GATE_EXPS,
2077
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2078
0
                LLM_TENSOR_FFN_UP_EXPS,
2079
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2080
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2081
0
                LLM_TENSOR_FFN_UP_SHEXP,
2082
0
            };
2083
0
        case LLM_ARCH_GRANITE_HYBRID:
2084
0
            return {
2085
0
                LLM_TENSOR_TOKEN_EMBD,
2086
0
                LLM_TENSOR_OUTPUT_NORM,
2087
0
                LLM_TENSOR_OUTPUT,
2088
0
                LLM_TENSOR_ATTN_NORM,
2089
0
                LLM_TENSOR_SSM_IN,
2090
0
                LLM_TENSOR_SSM_CONV1D,
2091
0
                LLM_TENSOR_SSM_DT,
2092
0
                LLM_TENSOR_SSM_A,
2093
0
                LLM_TENSOR_SSM_D,
2094
0
                LLM_TENSOR_SSM_NORM,
2095
0
                LLM_TENSOR_SSM_OUT,
2096
0
                LLM_TENSOR_ATTN_Q,
2097
0
                LLM_TENSOR_ATTN_K,
2098
0
                LLM_TENSOR_ATTN_V,
2099
0
                LLM_TENSOR_ATTN_OUT,
2100
0
                LLM_TENSOR_FFN_NORM,
2101
0
                LLM_TENSOR_FFN_GATE,
2102
0
                LLM_TENSOR_FFN_DOWN,
2103
0
                LLM_TENSOR_FFN_UP,
2104
0
                LLM_TENSOR_FFN_NORM,
2105
0
                LLM_TENSOR_FFN_GATE_INP,
2106
0
                LLM_TENSOR_FFN_GATE_EXPS,
2107
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2108
0
                LLM_TENSOR_FFN_UP_EXPS,
2109
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2110
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2111
0
                LLM_TENSOR_FFN_UP_SHEXP,
2112
0
            };
2113
0
        case LLM_ARCH_WAVTOKENIZER_DEC:
2114
0
            return {
2115
0
                LLM_TENSOR_TOKEN_EMBD,
2116
0
                LLM_TENSOR_TOKEN_EMBD_NORM,
2117
0
                LLM_TENSOR_CONV1D,
2118
0
                LLM_TENSOR_CONVNEXT_DW,
2119
0
                LLM_TENSOR_CONVNEXT_NORM,
2120
0
                LLM_TENSOR_CONVNEXT_PW1,
2121
0
                LLM_TENSOR_CONVNEXT_PW2,
2122
0
                LLM_TENSOR_CONVNEXT_GAMMA,
2123
0
                LLM_TENSOR_OUTPUT_NORM,
2124
0
                LLM_TENSOR_OUTPUT,
2125
0
                LLM_TENSOR_POS_NET_CONV1,
2126
0
                LLM_TENSOR_POS_NET_CONV2,
2127
0
                LLM_TENSOR_POS_NET_NORM,
2128
0
                LLM_TENSOR_POS_NET_NORM1,
2129
0
                LLM_TENSOR_POS_NET_NORM2,
2130
0
                LLM_TENSOR_POS_NET_ATTN_NORM,
2131
0
                LLM_TENSOR_POS_NET_ATTN_Q,
2132
0
                LLM_TENSOR_POS_NET_ATTN_K,
2133
0
                LLM_TENSOR_POS_NET_ATTN_V,
2134
0
                LLM_TENSOR_POS_NET_ATTN_OUT,
2135
0
            };
2136
0
        case LLM_ARCH_BAILINGMOE:
2137
0
            return {
2138
0
                LLM_TENSOR_TOKEN_EMBD,
2139
0
                LLM_TENSOR_OUTPUT_NORM,
2140
0
                LLM_TENSOR_OUTPUT,
2141
0
                LLM_TENSOR_ROPE_FREQS,
2142
0
                LLM_TENSOR_ATTN_NORM,
2143
0
                LLM_TENSOR_ATTN_Q,
2144
0
                LLM_TENSOR_ATTN_K,
2145
0
                LLM_TENSOR_ATTN_V,
2146
0
                LLM_TENSOR_ATTN_OUT,
2147
0
                LLM_TENSOR_FFN_GATE_INP,
2148
0
                LLM_TENSOR_FFN_NORM,
2149
0
                LLM_TENSOR_FFN_GATE_EXPS,
2150
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2151
0
                LLM_TENSOR_FFN_UP_EXPS,
2152
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
2153
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2154
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2155
0
                LLM_TENSOR_FFN_UP_SHEXP,
2156
0
            };
2157
0
        case LLM_ARCH_BAILINGMOE2:
2158
0
            return {
2159
0
                LLM_TENSOR_TOKEN_EMBD,
2160
0
                LLM_TENSOR_OUTPUT_NORM,
2161
0
                LLM_TENSOR_OUTPUT,
2162
0
                LLM_TENSOR_ATTN_NORM,
2163
0
                LLM_TENSOR_ATTN_Q_NORM,
2164
0
                LLM_TENSOR_ATTN_K_NORM,
2165
0
                LLM_TENSOR_ATTN_QKV,
2166
0
                LLM_TENSOR_ATTN_OUT,
2167
0
                LLM_TENSOR_FFN_GATE_INP,
2168
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2169
0
                LLM_TENSOR_FFN_NORM,
2170
0
                LLM_TENSOR_FFN_GATE,
2171
0
                LLM_TENSOR_FFN_DOWN,
2172
0
                LLM_TENSOR_FFN_UP,
2173
0
                LLM_TENSOR_FFN_GATE_EXPS,
2174
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2175
0
                LLM_TENSOR_FFN_UP_EXPS,
2176
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2177
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2178
0
                LLM_TENSOR_FFN_UP_SHEXP,
2179
0
                LLM_TENSOR_NEXTN_EH_PROJ,
2180
0
                LLM_TENSOR_NEXTN_EMBED_TOKENS,
2181
0
                LLM_TENSOR_NEXTN_ENORM,
2182
0
                LLM_TENSOR_NEXTN_HNORM,
2183
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
2184
0
                LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
2185
0
                LLM_TENSOR_LAYER_OUT_NORM,
2186
0
            };
2187
0
        case LLM_ARCH_DOTS1:
2188
0
            return {
2189
0
                LLM_TENSOR_TOKEN_EMBD,
2190
0
                LLM_TENSOR_OUTPUT_NORM,
2191
0
                LLM_TENSOR_OUTPUT,
2192
0
                LLM_TENSOR_ATTN_NORM,
2193
0
                LLM_TENSOR_ATTN_Q,
2194
0
                LLM_TENSOR_ATTN_Q_NORM,
2195
0
                LLM_TENSOR_ATTN_K,
2196
0
                LLM_TENSOR_ATTN_K_NORM,
2197
0
                LLM_TENSOR_ATTN_V,
2198
0
                LLM_TENSOR_ATTN_OUT,
2199
0
                LLM_TENSOR_FFN_NORM,
2200
0
                LLM_TENSOR_FFN_GATE,
2201
0
                LLM_TENSOR_FFN_UP,
2202
0
                LLM_TENSOR_FFN_DOWN,
2203
0
                LLM_TENSOR_FFN_GATE_INP,
2204
0
                LLM_TENSOR_FFN_GATE_EXPS,
2205
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2206
0
                LLM_TENSOR_FFN_UP_EXPS,
2207
0
                LLM_TENSOR_FFN_GATE_INP_SHEXP,
2208
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2209
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2210
0
                LLM_TENSOR_FFN_UP_SHEXP,
2211
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2212
0
            };
2213
0
        case LLM_ARCH_ERNIE4_5_MOE:
2214
0
            return {
2215
0
                LLM_TENSOR_TOKEN_EMBD,
2216
0
                LLM_TENSOR_OUTPUT_NORM,
2217
0
                LLM_TENSOR_OUTPUT,
2218
0
                LLM_TENSOR_ATTN_NORM,
2219
0
                LLM_TENSOR_ATTN_Q,
2220
0
                LLM_TENSOR_ATTN_K,
2221
0
                LLM_TENSOR_ATTN_V,
2222
0
                LLM_TENSOR_ATTN_OUT,
2223
0
                LLM_TENSOR_FFN_NORM,
2224
0
                LLM_TENSOR_FFN_GATE,
2225
0
                LLM_TENSOR_FFN_DOWN,
2226
0
                LLM_TENSOR_FFN_UP,
2227
0
                LLM_TENSOR_FFN_GATE_INP,
2228
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2229
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2230
0
                LLM_TENSOR_FFN_UP_SHEXP,
2231
0
                LLM_TENSOR_FFN_GATE_EXPS,
2232
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2233
0
                LLM_TENSOR_FFN_UP_EXPS,
2234
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2235
0
            };
2236
0
        case LLM_ARCH_HUNYUAN_MOE:
2237
0
            return {
2238
0
                LLM_TENSOR_TOKEN_EMBD,
2239
0
                LLM_TENSOR_OUTPUT_NORM,
2240
0
                LLM_TENSOR_OUTPUT,
2241
0
                LLM_TENSOR_ATTN_NORM,
2242
0
                LLM_TENSOR_ATTN_Q,
2243
0
                LLM_TENSOR_ATTN_Q_NORM,
2244
0
                LLM_TENSOR_ATTN_K,
2245
0
                LLM_TENSOR_ATTN_K_NORM,
2246
0
                LLM_TENSOR_ATTN_V,
2247
0
                LLM_TENSOR_ATTN_OUT,
2248
0
                LLM_TENSOR_FFN_GATE_INP,
2249
0
                LLM_TENSOR_FFN_NORM,
2250
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2251
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2252
0
                LLM_TENSOR_FFN_UP_SHEXP,
2253
0
                LLM_TENSOR_FFN_GATE_EXPS,
2254
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2255
0
                LLM_TENSOR_FFN_UP_EXPS,
2256
0
            };
2257
0
        case LLM_ARCH_OPENAI_MOE:
2258
0
            return {
2259
0
                LLM_TENSOR_TOKEN_EMBD,
2260
0
                LLM_TENSOR_OUTPUT_NORM,
2261
0
                LLM_TENSOR_OUTPUT,
2262
0
                LLM_TENSOR_ATTN_NORM,
2263
0
                LLM_TENSOR_ATTN_POST_NORM,
2264
0
                LLM_TENSOR_ATTN_Q,
2265
0
                LLM_TENSOR_ATTN_K,
2266
0
                LLM_TENSOR_ATTN_V,
2267
0
                LLM_TENSOR_ATTN_OUT,
2268
0
                LLM_TENSOR_ATTN_SINKS,
2269
0
                LLM_TENSOR_FFN_GATE_INP,
2270
0
                LLM_TENSOR_FFN_GATE_EXPS,
2271
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2272
0
                LLM_TENSOR_FFN_UP_EXPS,
2273
0
            };
2274
0
        case LLM_ARCH_LFM2:
2275
0
            return {
2276
0
                LLM_TENSOR_ATTN_NORM,
2277
0
                LLM_TENSOR_ATTN_Q,
2278
0
                LLM_TENSOR_ATTN_K,
2279
0
                LLM_TENSOR_ATTN_V,
2280
0
                LLM_TENSOR_ATTN_OUT,
2281
0
                LLM_TENSOR_ATTN_K_NORM,
2282
0
                LLM_TENSOR_ATTN_Q_NORM,
2283
0
                LLM_TENSOR_FFN_DOWN,
2284
0
                LLM_TENSOR_FFN_GATE,
2285
0
                LLM_TENSOR_FFN_NORM,
2286
0
                LLM_TENSOR_FFN_UP,
2287
0
                LLM_TENSOR_SHORTCONV_CONV,
2288
0
                LLM_TENSOR_SHORTCONV_INPROJ,
2289
0
                LLM_TENSOR_SHORTCONV_OUTPROJ,
2290
0
                LLM_TENSOR_TOKEN_EMBD,
2291
0
                LLM_TENSOR_OUTPUT_NORM_LFM2,
2292
0
                LLM_TENSOR_OUTPUT,
2293
0
                LLM_TENSOR_DENSE_2_OUT,
2294
0
            };
2295
0
        case LLM_ARCH_LFM2MOE:
2296
0
            return {
2297
0
                LLM_TENSOR_ATTN_NORM,
2298
0
                LLM_TENSOR_ATTN_Q,
2299
0
                LLM_TENSOR_ATTN_K,
2300
0
                LLM_TENSOR_ATTN_V,
2301
0
                LLM_TENSOR_ATTN_OUT,
2302
0
                LLM_TENSOR_ATTN_K_NORM,
2303
0
                LLM_TENSOR_ATTN_Q_NORM,
2304
0
                LLM_TENSOR_FFN_DOWN,
2305
0
                LLM_TENSOR_FFN_GATE,
2306
0
                LLM_TENSOR_FFN_NORM,
2307
0
                LLM_TENSOR_FFN_UP,
2308
0
                LLM_TENSOR_SHORTCONV_CONV,
2309
0
                LLM_TENSOR_SHORTCONV_INPROJ,
2310
0
                LLM_TENSOR_SHORTCONV_OUTPROJ,
2311
0
                LLM_TENSOR_TOKEN_EMBD,
2312
0
                LLM_TENSOR_OUTPUT_NORM_LFM2,
2313
0
                LLM_TENSOR_FFN_GATE_INP,
2314
0
                LLM_TENSOR_FFN_GATE_EXPS,
2315
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2316
0
                LLM_TENSOR_FFN_UP_EXPS,
2317
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2318
0
            };
2319
0
        case LLM_ARCH_SMALLTHINKER:
2320
0
            return {
2321
0
                LLM_TENSOR_TOKEN_EMBD,
2322
0
                LLM_TENSOR_OUTPUT_NORM,
2323
0
                LLM_TENSOR_OUTPUT,
2324
0
                LLM_TENSOR_ATTN_NORM,
2325
0
                LLM_TENSOR_ATTN_Q,
2326
0
                LLM_TENSOR_ATTN_K,
2327
0
                LLM_TENSOR_ATTN_V,
2328
0
                LLM_TENSOR_ATTN_OUT,
2329
0
                LLM_TENSOR_FFN_NORM,
2330
0
                LLM_TENSOR_FFN_GATE,
2331
0
                LLM_TENSOR_FFN_DOWN,
2332
0
                LLM_TENSOR_FFN_UP,
2333
0
                LLM_TENSOR_FFN_GATE_INP,
2334
0
                LLM_TENSOR_FFN_GATE_EXPS,
2335
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2336
0
                LLM_TENSOR_FFN_UP_EXPS,
2337
0
            };
2338
0
        case LLM_ARCH_APERTUS:
2339
0
            return {
2340
0
                LLM_TENSOR_TOKEN_EMBD,
2341
0
                LLM_TENSOR_OUTPUT_NORM,
2342
0
                LLM_TENSOR_OUTPUT,
2343
0
                LLM_TENSOR_ROPE_FREQS,
2344
0
                LLM_TENSOR_ATTN_NORM,
2345
0
                LLM_TENSOR_ATTN_Q,
2346
0
                LLM_TENSOR_ATTN_K,
2347
0
                LLM_TENSOR_ATTN_V,
2348
0
                LLM_TENSOR_ATTN_OUT,
2349
0
                LLM_TENSOR_ATTN_Q_NORM,
2350
0
                LLM_TENSOR_ATTN_K_NORM,
2351
0
                LLM_TENSOR_FFN_NORM,
2352
0
                LLM_TENSOR_FFN_DOWN,
2353
0
                LLM_TENSOR_FFN_UP,
2354
0
            };
2355
0
        case LLM_ARCH_SEED_OSS:
2356
0
            return {
2357
0
                LLM_TENSOR_TOKEN_EMBD,
2358
0
                LLM_TENSOR_OUTPUT_NORM,
2359
0
                LLM_TENSOR_OUTPUT,
2360
0
                LLM_TENSOR_ATTN_NORM,
2361
0
                LLM_TENSOR_ATTN_Q,
2362
0
                LLM_TENSOR_ATTN_K,
2363
0
                LLM_TENSOR_ATTN_V,
2364
0
                LLM_TENSOR_ATTN_OUT,
2365
0
                LLM_TENSOR_ATTN_POST_NORM,
2366
0
                LLM_TENSOR_FFN_GATE,
2367
0
                LLM_TENSOR_FFN_DOWN,
2368
0
                LLM_TENSOR_FFN_UP,
2369
0
            };
2370
0
        case LLM_ARCH_GROVEMOE:
2371
0
            return {
2372
0
                LLM_TENSOR_TOKEN_EMBD,
2373
0
                LLM_TENSOR_OUTPUT_NORM,
2374
0
                LLM_TENSOR_OUTPUT,
2375
0
                LLM_TENSOR_ATTN_NORM,
2376
0
                LLM_TENSOR_ATTN_Q,
2377
0
                LLM_TENSOR_ATTN_Q_NORM,
2378
0
                LLM_TENSOR_ATTN_K,
2379
0
                LLM_TENSOR_ATTN_K_NORM,
2380
0
                LLM_TENSOR_ATTN_V,
2381
0
                LLM_TENSOR_ATTN_OUT,
2382
0
                LLM_TENSOR_FFN_NORM,
2383
0
                LLM_TENSOR_FFN_GATE_INP,
2384
0
                LLM_TENSOR_FFN_GATE_EXPS,
2385
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2386
0
                LLM_TENSOR_FFN_UP_EXPS,
2387
0
                LLM_TENSOR_FFN_GATE_CHEXPS,
2388
0
                LLM_TENSOR_FFN_DOWN_CHEXPS,
2389
0
                LLM_TENSOR_FFN_UP_CHEXPS,
2390
0
            };
2391
0
        case LLM_ARCH_MINIMAX_M2:
2392
0
            return {
2393
0
                LLM_TENSOR_TOKEN_EMBD,
2394
0
                LLM_TENSOR_OUTPUT_NORM,
2395
0
                LLM_TENSOR_OUTPUT,
2396
0
                LLM_TENSOR_ATTN_NORM,
2397
0
                LLM_TENSOR_ATTN_Q,
2398
0
                LLM_TENSOR_ATTN_K,
2399
0
                LLM_TENSOR_ATTN_V,
2400
0
                LLM_TENSOR_ATTN_OUT,
2401
0
                LLM_TENSOR_ATTN_Q_NORM,
2402
0
                LLM_TENSOR_ATTN_K_NORM,
2403
0
                LLM_TENSOR_FFN_NORM,
2404
0
                LLM_TENSOR_FFN_GATE_INP,
2405
0
                LLM_TENSOR_FFN_GATE_EXPS,
2406
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2407
0
                LLM_TENSOR_FFN_UP_EXPS,
2408
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2409
0
            };
2410
0
        case LLM_ARCH_COGVLM:
2411
0
            return {
2412
0
                LLM_TENSOR_TOKEN_EMBD,
2413
0
                LLM_TENSOR_OUTPUT_NORM,
2414
0
                LLM_TENSOR_OUTPUT,
2415
0
                LLM_TENSOR_ATTN_NORM,
2416
0
                LLM_TENSOR_ATTN_QKV,
2417
0
                LLM_TENSOR_ATTN_OUT,
2418
0
                LLM_TENSOR_FFN_NORM,
2419
0
                LLM_TENSOR_FFN_GATE,
2420
0
                LLM_TENSOR_FFN_DOWN,
2421
0
                LLM_TENSOR_FFN_UP,
2422
0
                LLM_TENSOR_VISEXP_ATTN_QKV,
2423
0
                LLM_TENSOR_VISEXP_ATTN_OUT,
2424
0
                LLM_TENSOR_VISEXP_FFN_GATE,
2425
0
                LLM_TENSOR_VISEXP_FFN_DOWN,
2426
0
                LLM_TENSOR_VISEXP_FFN_UP,
2427
0
            };
2428
0
        case LLM_ARCH_MIMO2:
2429
0
            return {
2430
0
                LLM_TENSOR_TOKEN_EMBD,
2431
0
                LLM_TENSOR_OUTPUT_NORM,
2432
0
                LLM_TENSOR_OUTPUT,
2433
0
                LLM_TENSOR_ATTN_NORM,
2434
0
                LLM_TENSOR_ATTN_Q,
2435
0
                LLM_TENSOR_ATTN_K,
2436
0
                LLM_TENSOR_ATTN_V,
2437
0
                LLM_TENSOR_ATTN_SINKS,
2438
0
                LLM_TENSOR_ATTN_OUT,
2439
0
                LLM_TENSOR_FFN_NORM,
2440
0
                LLM_TENSOR_FFN_GATE,
2441
0
                LLM_TENSOR_FFN_DOWN,
2442
0
                LLM_TENSOR_FFN_UP,
2443
0
                LLM_TENSOR_FFN_GATE_INP,
2444
0
                LLM_TENSOR_FFN_GATE_EXPS,
2445
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2446
0
                LLM_TENSOR_FFN_UP_EXPS,
2447
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2448
0
            };
2449
0
        case LLM_ARCH_STEP35:
2450
0
            return {
2451
0
                LLM_TENSOR_TOKEN_EMBD,
2452
0
                LLM_TENSOR_OUTPUT_NORM,
2453
0
                LLM_TENSOR_OUTPUT,
2454
0
                LLM_TENSOR_ROPE_FREQS,
2455
0
                LLM_TENSOR_ROPE_FACTORS_LONG,
2456
0
                LLM_TENSOR_ROPE_FACTORS_SHORT,
2457
0
                LLM_TENSOR_ATTN_NORM,
2458
0
                LLM_TENSOR_ATTN_Q,
2459
0
                LLM_TENSOR_ATTN_Q_NORM,
2460
0
                LLM_TENSOR_ATTN_K,
2461
0
                LLM_TENSOR_ATTN_K_NORM,
2462
0
                LLM_TENSOR_ATTN_V,
2463
0
                LLM_TENSOR_ATTN_GATE,
2464
0
                LLM_TENSOR_ATTN_OUT,
2465
0
                LLM_TENSOR_FFN_NORM,
2466
0
                LLM_TENSOR_FFN_GATE,
2467
0
                LLM_TENSOR_FFN_DOWN,
2468
0
                LLM_TENSOR_FFN_UP,
2469
0
                LLM_TENSOR_FFN_GATE_INP,
2470
0
                LLM_TENSOR_FFN_GATE_EXPS,
2471
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2472
0
                LLM_TENSOR_FFN_UP_EXPS,
2473
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2474
0
                LLM_TENSOR_FFN_UP_SHEXP,
2475
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2476
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2477
0
            };
2478
0
        case LLM_ARCH_GPTJ:
2479
0
        case LLM_ARCH_UNKNOWN:
2480
0
            return {
2481
0
                LLM_TENSOR_TOKEN_EMBD,
2482
0
            };
2483
0
        case LLM_ARCH_MAINCODER:
2484
0
            return {
2485
0
                LLM_TENSOR_TOKEN_EMBD,
2486
0
                LLM_TENSOR_OUTPUT_NORM,
2487
0
                LLM_TENSOR_OUTPUT,
2488
0
                LLM_TENSOR_ATTN_NORM,
2489
0
                LLM_TENSOR_ATTN_Q,
2490
0
                LLM_TENSOR_ATTN_Q_NORM,
2491
0
                LLM_TENSOR_ATTN_K,
2492
0
                LLM_TENSOR_ATTN_K_NORM,
2493
0
                LLM_TENSOR_ATTN_V,
2494
0
                LLM_TENSOR_ATTN_OUT,
2495
0
                LLM_TENSOR_FFN_NORM,
2496
0
                LLM_TENSOR_FFN_GATE,
2497
0
                LLM_TENSOR_FFN_DOWN,
2498
0
                LLM_TENSOR_FFN_UP,
2499
0
            };
2500
0
        case LLM_ARCH_KIMI_LINEAR:
2501
0
            return {
2502
0
                LLM_TENSOR_TOKEN_EMBD,
2503
0
                LLM_TENSOR_OUTPUT_NORM,
2504
0
                LLM_TENSOR_OUTPUT,
2505
0
                LLM_TENSOR_ROPE_FREQS,
2506
0
                LLM_TENSOR_ATTN_NORM,
2507
0
                LLM_TENSOR_ATTN_Q,
2508
0
                LLM_TENSOR_ATTN_K,
2509
0
                LLM_TENSOR_ATTN_V,
2510
0
                LLM_TENSOR_ATTN_OUT,
2511
0
                LLM_TENSOR_FFN_NORM,
2512
                // Dense FFN (layer 0 only)
2513
0
                LLM_TENSOR_FFN_GATE,
2514
0
                LLM_TENSOR_FFN_DOWN,
2515
0
                LLM_TENSOR_FFN_UP,
2516
                // MoE FFN (layers 1+)
2517
0
                LLM_TENSOR_FFN_GATE_INP,
2518
0
                LLM_TENSOR_FFN_GATE_EXPS,
2519
0
                LLM_TENSOR_FFN_DOWN_EXPS,
2520
0
                LLM_TENSOR_FFN_UP_EXPS,
2521
0
                LLM_TENSOR_FFN_EXP_PROBS_B,
2522
                // Shared experts
2523
0
                LLM_TENSOR_FFN_GATE_SHEXP,
2524
0
                LLM_TENSOR_FFN_DOWN_SHEXP,
2525
0
                LLM_TENSOR_FFN_UP_SHEXP,
2526
                // KDA (using SSM_ enum prefix, keeping GGUF names for backward compat)
2527
0
                LLM_TENSOR_SSM_CONV1D_Q,
2528
0
                LLM_TENSOR_SSM_CONV1D_K,
2529
0
                LLM_TENSOR_SSM_CONV1D_V,
2530
0
                LLM_TENSOR_SSM_F_A,
2531
0
                LLM_TENSOR_SSM_F_B,
2532
0
                LLM_TENSOR_SSM_BETA,
2533
0
                LLM_TENSOR_SSM_A,
2534
0
                LLM_TENSOR_SSM_G_A,
2535
0
                LLM_TENSOR_SSM_G_B,
2536
0
                LLM_TENSOR_SSM_DT,
2537
0
                LLM_TENSOR_SSM_NORM,
2538
                // MLA
2539
0
                LLM_TENSOR_ATTN_Q_A,
2540
0
                LLM_TENSOR_ATTN_Q_B,
2541
0
                LLM_TENSOR_ATTN_Q_A_NORM,
2542
0
                LLM_TENSOR_ATTN_KV_A_MQA,
2543
0
                LLM_TENSOR_ATTN_KV_B,
2544
0
                LLM_TENSOR_ATTN_K_B,
2545
0
                LLM_TENSOR_ATTN_V_B,
2546
0
                LLM_TENSOR_ATTN_KV_A_NORM,
2547
0
            };
2548
0
        default:
2549
0
            GGML_ABORT("unknown architecture for tensor mapping");
2550
0
    }
2551
0
}
2552
2553
// declare information about the model weight tensors:
2554
// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
2555
// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
2556
//
2557
// for example, input layers are usually assigned to CPU/host buffer types
2558
//
2559
// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
2560
//   assignment of the buffer types and extra overhead during computation
2561
// example: https://github.com/ggml-org/llama.cpp/pull/17548
2562
//
2563
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
2564
    {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2565
    {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2566
    {LLM_TENSOR_TOKEN_TYPES,                {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2567
    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
2568
    {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2569
    {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2570
    {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2571
    {LLM_TENSOR_CLS_NORM,                   {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2572
    {LLM_TENSOR_DENSE_2_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2573
    {LLM_TENSOR_DENSE_3_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2574
    {LLM_TENSOR_OUTPUT_NORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2575
    {LLM_TENSOR_OUTPUT_NORM_LFM2,           {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2576
    {LLM_TENSOR_DEC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2577
    {LLM_TENSOR_ENC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2578
    {LLM_TENSOR_ROPE_FREQS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2579
    {LLM_TENSOR_ROPE_FACTORS_LONG,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2580
    {LLM_TENSOR_ROPE_FACTORS_SHORT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2581
    {LLM_TENSOR_ATTN_Q,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2582
    {LLM_TENSOR_ATTN_K,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2583
    {LLM_TENSOR_ATTN_V,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2584
    {LLM_TENSOR_ATTN_QKV,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2585
    {LLM_TENSOR_ATTN_OUT,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2586
    {LLM_TENSOR_ATTN_GATE,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2587
    {LLM_TENSOR_FFN_GATE,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2588
    {LLM_TENSOR_FFN_DOWN,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2589
    {LLM_TENSOR_FFN_UP,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2590
    {LLM_TENSOR_FFN_DOWN_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2591
    {LLM_TENSOR_FFN_GATE_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2592
    {LLM_TENSOR_FFN_UP_SHEXP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2593
    {LLM_TENSOR_ATTN_Q_A,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2594
    {LLM_TENSOR_ATTN_Q_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2595
    {LLM_TENSOR_ATTN_KV_A_MQA,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2596
    {LLM_TENSOR_ATTN_KV_B,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2597
    {LLM_TENSOR_ATTN_K_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2598
    {LLM_TENSOR_ATTN_V_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2599
    {LLM_TENSOR_ATTN_SINKS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
2600
    {LLM_TENSOR_DEC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2601
    {LLM_TENSOR_DEC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2602
    {LLM_TENSOR_DEC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2603
    {LLM_TENSOR_DEC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2604
    {LLM_TENSOR_DEC_CROSS_ATTN_Q,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2605
    {LLM_TENSOR_DEC_CROSS_ATTN_K,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2606
    {LLM_TENSOR_DEC_CROSS_ATTN_V,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2607
    {LLM_TENSOR_DEC_CROSS_ATTN_OUT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2608
    {LLM_TENSOR_DEC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2609
    {LLM_TENSOR_DEC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2610
    {LLM_TENSOR_DEC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2611
    {LLM_TENSOR_ENC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2612
    {LLM_TENSOR_ENC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2613
    {LLM_TENSOR_ENC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2614
    {LLM_TENSOR_ENC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2615
    {LLM_TENSOR_ENC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2616
    {LLM_TENSOR_ENC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2617
    {LLM_TENSOR_ENC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2618
    {LLM_TENSOR_FFN_GATE_INP_SHEXP,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2619
    {LLM_TENSOR_FFN_GATE_INP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2620
    {LLM_TENSOR_SSM_IN,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2621
    {LLM_TENSOR_SSM_X,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2622
    {LLM_TENSOR_SSM_DT,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2623
    {LLM_TENSOR_SSM_OUT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2624
    {LLM_TENSOR_SSM_ALPHA,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2625
    {LLM_TENSOR_SSM_BETA_ALPHA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2626
    {LLM_TENSOR_TIME_MIX_W1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2627
    {LLM_TENSOR_TIME_MIX_W2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2628
    {LLM_TENSOR_TIME_MIX_A1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2629
    {LLM_TENSOR_TIME_MIX_A2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2630
    {LLM_TENSOR_TIME_MIX_V1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2631
    {LLM_TENSOR_TIME_MIX_V2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2632
    {LLM_TENSOR_TIME_MIX_G1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2633
    {LLM_TENSOR_TIME_MIX_G2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2634
    {LLM_TENSOR_TIME_MIX_DECAY_W1,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2635
    {LLM_TENSOR_TIME_MIX_DECAY_W2,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2636
    {LLM_TENSOR_TIME_MIX_KEY,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2637
    {LLM_TENSOR_TIME_MIX_VALUE,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2638
    {LLM_TENSOR_TIME_MIX_RECEPTANCE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2639
    {LLM_TENSOR_TIME_MIX_GATE,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2640
    {LLM_TENSOR_TIME_MIX_OUTPUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2641
    {LLM_TENSOR_CHANNEL_MIX_KEY,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2642
    {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2643
    {LLM_TENSOR_CHANNEL_MIX_VALUE,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2644
    {LLM_TENSOR_FFN_ACT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
2645
    {LLM_TENSOR_SSM_CONV1D,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2646
    {LLM_TENSOR_SSM_A,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
2647
    {LLM_TENSOR_SSM_A_NOSCAN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
2648
    {LLM_TENSOR_SSM_DT_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2649
    {LLM_TENSOR_SSM_B_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2650
    {LLM_TENSOR_SSM_C_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2651
    {LLM_TENSOR_SSM_D,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2652
    {LLM_TENSOR_SSM_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2653
    // Kimi KDA - Conv tensors are 4D [d_conv, 1, d_inner, 1], reshaped to 2D at runtime
2654
    {LLM_TENSOR_SSM_CONV1D_Q,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2655
    {LLM_TENSOR_SSM_CONV1D_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2656
    {LLM_TENSOR_SSM_CONV1D_V,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2657
    {LLM_TENSOR_SSM_F_A,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2658
    {LLM_TENSOR_SSM_F_B,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2659
    {LLM_TENSOR_SSM_BETA,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2660
    {LLM_TENSOR_SSM_G_A,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2661
    {LLM_TENSOR_SSM_G_B,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2662
    {LLM_TENSOR_TIME_MIX_LERP_X,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2663
    {LLM_TENSOR_TIME_MIX_LN,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2664
    {LLM_TENSOR_CHANNEL_MIX_LERP_K,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2665
    {LLM_TENSOR_CHANNEL_MIX_LERP_R,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2666
    {LLM_TENSOR_TIME_MIX_K_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2667
    {LLM_TENSOR_TIME_MIX_K_A,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2668
    {LLM_TENSOR_TIME_MIX_R_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2669
    {LLM_TENSOR_TIME_MIX_LERP_W,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2670
    {LLM_TENSOR_TIME_MIX_LERP_K,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2671
    {LLM_TENSOR_TIME_MIX_LERP_V,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2672
    {LLM_TENSOR_TIME_MIX_LERP_R,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2673
    {LLM_TENSOR_TIME_MIX_LERP_G,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2674
    {LLM_TENSOR_TIME_MIX_LERP_FUSED,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2675
    {LLM_TENSOR_TIME_MIX_DECAY,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2676
    {LLM_TENSOR_TIME_MIX_W0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2677
    {LLM_TENSOR_TIME_MIX_A0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2678
    {LLM_TENSOR_TIME_MIX_V0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2679
    {LLM_TENSOR_TIME_MIX_FIRST,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
2680
    {LLM_TENSOR_ATTN_NORM,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2681
    {LLM_TENSOR_ATTN_NORM_2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2682
    {LLM_TENSOR_ATTN_OUT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2683
    {LLM_TENSOR_ATTN_POST_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2684
    {LLM_TENSOR_FFN_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2685
    {LLM_TENSOR_FFN_POST_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2686
    {LLM_TENSOR_FFN_NORM_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2687
    {LLM_TENSOR_ATTN_Q_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2688
    {LLM_TENSOR_ATTN_K_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2689
    {LLM_TENSOR_LAYER_OUT_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2690
    {LLM_TENSOR_ATTN_Q_A_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2691
    {LLM_TENSOR_ATTN_KV_A_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2692
    {LLM_TENSOR_ATTN_SUB_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2693
    {LLM_TENSOR_FFN_SUB_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2694
    {LLM_TENSOR_DEC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2695
    {LLM_TENSOR_DEC_CROSS_ATTN_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2696
    {LLM_TENSOR_DEC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2697
    {LLM_TENSOR_ENC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2698
    {LLM_TENSOR_ENC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2699
    {LLM_TENSOR_DEC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2700
    {LLM_TENSOR_ENC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2701
    {LLM_TENSOR_FFN_DOWN_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2702
    {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2703
    {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2704
    {LLM_TENSOR_FFN_GATE_UP_EXPS,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2705
    {LLM_TENSOR_FFN_DOWN_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2706
    {LLM_TENSOR_FFN_GATE_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2707
    {LLM_TENSOR_FFN_UP_CHEXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2708
    {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2709
    // altup / laurel (gemma 3n)
2710
    {LLM_TENSOR_PER_LAYER_TOKEN_EMBD,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_GET_ROWS}},
2711
    {LLM_TENSOR_PER_LAYER_MODEL_PROJ,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2712
    {LLM_TENSOR_PER_LAYER_PROJ_NORM,        {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
2713
    {LLM_TENSOR_ALTUP_PROJ,                 {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2714
    {LLM_TENSOR_ALTUP_UNEMBD_PROJ,          {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2715
    {LLM_TENSOR_PER_LAYER_INP_GATE,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2716
    {LLM_TENSOR_PER_LAYER_PROJ,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2717
    {LLM_TENSOR_PER_LAYER_POST_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2718
    {LLM_TENSOR_ALTUP_CORRECT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2719
    {LLM_TENSOR_ALTUP_CORRECT_SCALE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2720
    {LLM_TENSOR_ALTUP_PREDICT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2721
    {LLM_TENSOR_ALTUP_ROUTER,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2722
    {LLM_TENSOR_ALTUP_ROUTER_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2723
    {LLM_TENSOR_LAUREL_L,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2724
    {LLM_TENSOR_LAUREL_R,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2725
    {LLM_TENSOR_LAUREL_POST_NORM,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2726
    // this tensor is loaded for T5, but never used
2727
    {LLM_TENSOR_DEC_CROSS_ATTN_REL_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
2728
    {LLM_TENSOR_CONV1D,                     {LLM_TENSOR_LAYER_INPUT,     GGML_OP_IM2COL}},
2729
    {LLM_TENSOR_POS_NET_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2730
    {LLM_TENSOR_POS_NET_NORM1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2731
    {LLM_TENSOR_POS_NET_NORM2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2732
    {LLM_TENSOR_POS_NET_CONV1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2733
    {LLM_TENSOR_POS_NET_CONV2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2734
    {LLM_TENSOR_POS_NET_ATTN_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2735
    {LLM_TENSOR_POS_NET_ATTN_Q,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2736
    {LLM_TENSOR_POS_NET_ATTN_K,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2737
    {LLM_TENSOR_POS_NET_ATTN_V,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2738
    {LLM_TENSOR_POS_NET_ATTN_OUT,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2739
    {LLM_TENSOR_CONVNEXT_DW,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2740
    {LLM_TENSOR_CONVNEXT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2741
    {LLM_TENSOR_CONVNEXT_PW1,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2742
    {LLM_TENSOR_CONVNEXT_PW2,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2743
    {LLM_TENSOR_CONVNEXT_GAMMA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2744
    {LLM_TENSOR_SHORTCONV_CONV,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2745
    {LLM_TENSOR_SHORTCONV_INPROJ,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2746
    {LLM_TENSOR_SHORTCONV_OUTPROJ,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2747
    {LLM_TENSOR_VISEXP_ATTN_QKV,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2748
    {LLM_TENSOR_VISEXP_ATTN_OUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2749
    {LLM_TENSOR_VISEXP_FFN_GATE,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2750
    {LLM_TENSOR_VISEXP_FFN_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2751
    {LLM_TENSOR_VISEXP_FFN_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2752
    {LLM_TENSOR_INDEXER_K_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2753
    {LLM_TENSOR_INDEXER_PROJ,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2754
    {LLM_TENSOR_INDEXER_ATTN_K,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2755
    {LLM_TENSOR_INDEXER_ATTN_Q_B,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2756
    // NextN/MTP tensors are currently ignored (reserved for future MTP support)
2757
    // These tensors only exist in the last layer(s) and are treated as output tensors
2758
    {LLM_TENSOR_NEXTN_EH_PROJ,              {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2759
    {LLM_TENSOR_NEXTN_EMBED_TOKENS,         {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2760
    {LLM_TENSOR_NEXTN_ENORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2761
    {LLM_TENSOR_NEXTN_HNORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2762
    {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2763
    {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2764
    // Nemotron 3 Super
2765
    {LLM_TENSOR_FFN_LATENT_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2766
    {LLM_TENSOR_FFN_LATENT_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2767
};
2768
2769
5.97k
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
2770
2771
2.89k
std::string LLM_KV::operator()(llm_kv kv) const {
2772
2.89k
    std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
2773
2774
2.89k
    if (suffix != nullptr) {
2775
0
        name += ".";
2776
0
        name += suffix;
2777
0
    }
2778
2779
2.89k
    return name;
2780
2.89k
}
2781
2782
LLM_TN_IMPL::LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid)
2783
0
    : arch(arch), tensor(tensor), suffix(suffix), bid(bid), xid(xid),
2784
0
      model_tensors(llm_get_tensor_names(arch)) {}
2785
2786
0
std::string LLM_TN_IMPL::str() const {
2787
0
    if (LLM_TENSOR_NAMES.find(tensor) == LLM_TENSOR_NAMES.end()) {
2788
0
        GGML_ABORT("unknown tensor name for tensor id %d", static_cast<int>(tensor));
2789
0
    }
2790
2791
0
    if (model_tensors.find(tensor) == model_tensors.end()) {
2792
0
        return LLM_TENSOR_NAMES.at(tensor);
2793
0
    }
2794
2795
0
    std::string name = ::format(LLM_TENSOR_NAMES.at(tensor), bid, xid);
2796
0
    if (suffix != nullptr) {
2797
0
        name += ".";
2798
0
        name += suffix;
2799
0
    }
2800
2801
0
    return name;
2802
0
}
2803
2804
0
std::vector<llm_arch> llm_arch_all() {
2805
0
    std::vector<llm_arch> ret;
2806
0
    ret.reserve(LLM_ARCH_NAMES.size());
2807
0
    for (const auto & [arch, _] : LLM_ARCH_NAMES) {
2808
0
        ret.push_back(arch);
2809
0
    }
2810
0
    return ret;
2811
0
}
2812
2813
0
const char * llm_arch_name(llm_arch arch) {
2814
0
    auto it = LLM_ARCH_NAMES.find(arch);
2815
0
    if (it == LLM_ARCH_NAMES.end()) {
2816
0
        return "unknown";
2817
0
    }
2818
0
    return it->second;
2819
0
}
2820
2821
1.06k
llm_arch llm_arch_from_string(const std::string & name) {
2822
121k
    for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
2823
121k
        if (kv.second == name) {
2824
186
            return kv.first;
2825
186
        }
2826
121k
    }
2827
2828
878
    return LLM_ARCH_UNKNOWN;
2829
1.06k
}
2830
2831
0
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
2832
0
    return LLM_TENSOR_INFOS.at(tensor);
2833
0
}
2834
2835
0
bool llm_arch_is_recurrent(const llm_arch & arch) {
2836
0
    switch (arch) {
2837
0
        case LLM_ARCH_MAMBA:
2838
0
        case LLM_ARCH_MAMBA2:
2839
0
        case LLM_ARCH_RWKV6:
2840
0
        case LLM_ARCH_RWKV6QWEN2:
2841
0
        case LLM_ARCH_RWKV7:
2842
0
        case LLM_ARCH_ARWKV7:
2843
0
            return true;
2844
0
        default:
2845
0
            return false;
2846
0
    }
2847
0
}
2848
2849
0
bool llm_arch_is_hybrid(const llm_arch & arch) {
2850
0
    switch (arch) {
2851
0
        case LLM_ARCH_JAMBA:
2852
0
        case LLM_ARCH_FALCON_H1:
2853
0
        case LLM_ARCH_PLAMO2:
2854
0
        case LLM_ARCH_GRANITE_HYBRID:
2855
0
        case LLM_ARCH_LFM2:
2856
0
        case LLM_ARCH_LFM2MOE:
2857
0
        case LLM_ARCH_NEMOTRON_H:
2858
0
        case LLM_ARCH_NEMOTRON_H_MOE:
2859
0
        case LLM_ARCH_QWEN3NEXT:
2860
0
        case LLM_ARCH_KIMI_LINEAR:
2861
0
        case LLM_ARCH_QWEN35:
2862
0
        case LLM_ARCH_QWEN35MOE:
2863
0
            return true;
2864
0
        default:
2865
0
            return false;
2866
0
    }
2867
0
}
2868
2869
0
bool llm_arch_is_diffusion(const llm_arch & arch) {
2870
0
    switch (arch) {
2871
0
        case LLM_ARCH_DREAM:
2872
0
        case LLM_ARCH_LLADA:
2873
0
        case LLM_ARCH_LLADA_MOE:
2874
0
        case LLM_ARCH_RND1:
2875
0
            return true;
2876
0
        default:
2877
0
            return false;
2878
0
    }
2879
0
}