Coverage Report

Created: 2025-12-14 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-arch.cpp
Line
Count
Source
1
#include "llama-arch.h"
2
3
#include "llama-impl.h"
4
5
#include <map>
6
7
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8
    { LLM_ARCH_CLIP,             "clip"             }, // dummy, only used by llama-quantize
9
    { LLM_ARCH_LLAMA,            "llama"            },
10
    { LLM_ARCH_LLAMA4,           "llama4"           },
11
    { LLM_ARCH_DECI,             "deci"             },
12
    { LLM_ARCH_FALCON,           "falcon"           },
13
    { LLM_ARCH_GROK,             "grok"             },
14
    { LLM_ARCH_GPT2,             "gpt2"             },
15
    { LLM_ARCH_GPTJ,             "gptj"             },
16
    { LLM_ARCH_GPTNEOX,          "gptneox"          },
17
    { LLM_ARCH_MPT,              "mpt"              },
18
    { LLM_ARCH_BAICHUAN,         "baichuan"         },
19
    { LLM_ARCH_STARCODER,        "starcoder"        },
20
    { LLM_ARCH_REFACT,           "refact"           },
21
    { LLM_ARCH_BERT,             "bert"             },
22
    { LLM_ARCH_NOMIC_BERT,       "nomic-bert"       },
23
    { LLM_ARCH_NOMIC_BERT_MOE,   "nomic-bert-moe"   },
24
    { LLM_ARCH_NEO_BERT,         "neo-bert"         },
25
    { LLM_ARCH_JINA_BERT_V2,     "jina-bert-v2"     },
26
    { LLM_ARCH_JINA_BERT_V3,     "jina-bert-v3"     },
27
    { LLM_ARCH_BLOOM,            "bloom"            },
28
    { LLM_ARCH_STABLELM,         "stablelm"         },
29
    { LLM_ARCH_QWEN,             "qwen"             },
30
    { LLM_ARCH_QWEN2,            "qwen2"            },
31
    { LLM_ARCH_QWEN2MOE,         "qwen2moe"         },
32
    { LLM_ARCH_QWEN2VL,          "qwen2vl"          },
33
    { LLM_ARCH_QWEN3,            "qwen3"            },
34
    { LLM_ARCH_QWEN3MOE,         "qwen3moe"         },
35
    { LLM_ARCH_QWEN3NEXT,        "qwen3next"        },
36
    { LLM_ARCH_QWEN3VL,          "qwen3vl"          },
37
    { LLM_ARCH_QWEN3VLMOE,       "qwen3vlmoe"       },
38
    { LLM_ARCH_PHI2,             "phi2"             },
39
    { LLM_ARCH_PHI3,             "phi3"             },
40
    { LLM_ARCH_PHIMOE,           "phimoe"           },
41
    { LLM_ARCH_PLAMO,            "plamo"            },
42
    { LLM_ARCH_PLAMO2,           "plamo2"           },
43
    { LLM_ARCH_CODESHELL,        "codeshell"        },
44
    { LLM_ARCH_ORION,            "orion"            },
45
    { LLM_ARCH_INTERNLM2,        "internlm2"        },
46
    { LLM_ARCH_MINICPM,          "minicpm"          },
47
    { LLM_ARCH_MINICPM3,         "minicpm3"         },
48
    { LLM_ARCH_GEMMA,            "gemma"            },
49
    { LLM_ARCH_GEMMA2,           "gemma2"           },
50
    { LLM_ARCH_GEMMA3,           "gemma3"           },
51
    { LLM_ARCH_GEMMA3N,          "gemma3n"          },
52
    { LLM_ARCH_GEMMA_EMBEDDING,  "gemma-embedding"  },
53
    { LLM_ARCH_STARCODER2,       "starcoder2"       },
54
    { LLM_ARCH_MAMBA,            "mamba"            },
55
    { LLM_ARCH_MAMBA2,           "mamba2"           },
56
    { LLM_ARCH_JAMBA,            "jamba"            },
57
    { LLM_ARCH_FALCON_H1,        "falcon-h1"        },
58
    { LLM_ARCH_XVERSE,           "xverse"           },
59
    { LLM_ARCH_COMMAND_R,        "command-r"        },
60
    { LLM_ARCH_COHERE2,          "cohere2"          },
61
    { LLM_ARCH_DBRX,             "dbrx"             },
62
    { LLM_ARCH_OLMO,             "olmo"             },
63
    { LLM_ARCH_OLMO2,            "olmo2"            },
64
    { LLM_ARCH_OLMOE,            "olmoe"            },
65
    { LLM_ARCH_OPENELM,          "openelm"          },
66
    { LLM_ARCH_ARCTIC,           "arctic"           },
67
    { LLM_ARCH_DEEPSEEK,         "deepseek"         },
68
    { LLM_ARCH_DEEPSEEK2,        "deepseek2"        },
69
    { LLM_ARCH_CHATGLM,          "chatglm"          },
70
    { LLM_ARCH_GLM4,             "glm4"             },
71
    { LLM_ARCH_GLM4_MOE,         "glm4moe"          },
72
    { LLM_ARCH_BITNET,           "bitnet"           },
73
    { LLM_ARCH_T5,               "t5"               },
74
    { LLM_ARCH_T5ENCODER,        "t5encoder"        },
75
    { LLM_ARCH_JAIS,             "jais"             },
76
    { LLM_ARCH_NEMOTRON,         "nemotron"         },
77
    { LLM_ARCH_NEMOTRON_H,       "nemotron_h"       },
78
    { LLM_ARCH_EXAONE,           "exaone"           },
79
    { LLM_ARCH_EXAONE4,          "exaone4"          },
80
    { LLM_ARCH_RWKV6,            "rwkv6"            },
81
    { LLM_ARCH_RWKV6QWEN2,       "rwkv6qwen2"       },
82
    { LLM_ARCH_RWKV7,            "rwkv7"            },
83
    { LLM_ARCH_ARWKV7,           "arwkv7"           },
84
    { LLM_ARCH_GRANITE,          "granite"          },
85
    { LLM_ARCH_GRANITE_MOE,      "granitemoe"       },
86
    { LLM_ARCH_GRANITE_HYBRID,   "granitehybrid"    },
87
    { LLM_ARCH_CHAMELEON,        "chameleon"        },
88
    { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
89
    { LLM_ARCH_PLM,              "plm"              },
90
    { LLM_ARCH_BAILINGMOE,       "bailingmoe"       },
91
    { LLM_ARCH_BAILINGMOE2,      "bailingmoe2"      },
92
    { LLM_ARCH_DOTS1,            "dots1"            },
93
    { LLM_ARCH_ARCEE,            "arcee"            },
94
    { LLM_ARCH_AFMOE,            "afmoe"            },
95
    { LLM_ARCH_ERNIE4_5,         "ernie4_5"         },
96
    { LLM_ARCH_ERNIE4_5_MOE,     "ernie4_5-moe"     },
97
    { LLM_ARCH_HUNYUAN_MOE,      "hunyuan-moe"      },
98
    { LLM_ARCH_HUNYUAN_DENSE,    "hunyuan-dense"    },
99
    { LLM_ARCH_SMOLLM3,          "smollm3"          },
100
    { LLM_ARCH_OPENAI_MOE,       "gpt-oss"          },
101
    { LLM_ARCH_LFM2,             "lfm2"             },
102
    { LLM_ARCH_LFM2MOE,          "lfm2moe"          },
103
    { LLM_ARCH_DREAM,            "dream"            },
104
    { LLM_ARCH_SMALLTHINKER,     "smallthinker"     },
105
    { LLM_ARCH_LLADA,            "llada"            },
106
    { LLM_ARCH_LLADA_MOE,        "llada-moe"        },
107
    { LLM_ARCH_SEED_OSS,         "seed_oss"         },
108
    { LLM_ARCH_GROVEMOE,         "grovemoe"         },
109
    { LLM_ARCH_APERTUS,          "apertus"          },
110
    { LLM_ARCH_MINIMAX_M2,       "minimax-m2"       },
111
    { LLM_ARCH_COGVLM,           "cogvlm"           },
112
    { LLM_ARCH_RND1,             "rnd1"             },
113
    { LLM_ARCH_PANGU_EMBED,      "pangu-embedded"   },
114
    { LLM_ARCH_MISTRAL3,         "mistral3"         },
115
    { LLM_ARCH_UNKNOWN,          "(unknown)"        },
116
};
117
118
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
119
    { LLM_KV_GENERAL_TYPE,                     "general.type"                          },
120
    { LLM_KV_GENERAL_ARCHITECTURE,             "general.architecture"                  },
121
    { LLM_KV_GENERAL_QUANTIZATION_VERSION,     "general.quantization_version"          },
122
    { LLM_KV_GENERAL_ALIGNMENT,                "general.alignment"                     },
123
    { LLM_KV_GENERAL_FILE_TYPE,                "general.file_type"                     },
124
    { LLM_KV_GENERAL_SAMPLING_SEQUENCE,        "general.sampling.sequence"             },
125
    { LLM_KV_GENERAL_SAMPLING_TOP_K,           "general.sampling.top_k"                },
126
    { LLM_KV_GENERAL_SAMPLING_TOP_P,           "general.sampling.top_p"                },
127
    { LLM_KV_GENERAL_SAMPLING_MIN_P,           "general.sampling.min_p"                },
128
    { LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, "general.sampling.xtc_probability"      },
129
    { LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,   "general.sampling.xtc_threshold"        },
130
    { LLM_KV_GENERAL_SAMPLING_TEMP,            "general.sampling.temp"                 },
131
    { LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,  "general.sampling.penalty_last_n"       },
132
    { LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,  "general.sampling.penalty_repeat"       },
133
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT,        "general.sampling.mirostat"             },
134
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,    "general.sampling.mirostat_tau"         },
135
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,    "general.sampling.mirostat_eta"         },
136
    { LLM_KV_GENERAL_NAME,                     "general.name"                          },
137
    { LLM_KV_GENERAL_AUTHOR,                   "general.author"                        },
138
    { LLM_KV_GENERAL_VERSION,                  "general.version"                       },
139
    { LLM_KV_GENERAL_URL,                      "general.url"                           },
140
    { LLM_KV_GENERAL_DESCRIPTION,              "general.description"                   },
141
    { LLM_KV_GENERAL_LICENSE,                  "general.license"                       },
142
    { LLM_KV_GENERAL_SOURCE_URL,               "general.source.url"                    },
143
    { LLM_KV_GENERAL_SOURCE_HF_REPO,           "general.source.huggingface.repository" },
144
145
    { LLM_KV_VOCAB_SIZE,                        "%s.vocab_size"                        },
146
    { LLM_KV_CONTEXT_LENGTH,                    "%s.context_length"                    },
147
    { LLM_KV_EMBEDDING_LENGTH,                  "%s.embedding_length"                  },
148
    { LLM_KV_FEATURES_LENGTH,                   "%s.features_length"                   },
149
    { LLM_KV_BLOCK_COUNT,                       "%s.block_count"                       },
150
    { LLM_KV_LEADING_DENSE_BLOCK_COUNT,         "%s.leading_dense_block_count"         },
151
    { LLM_KV_FEED_FORWARD_LENGTH,               "%s.feed_forward_length"               },
152
    { LLM_KV_EXPERT_FEED_FORWARD_LENGTH,        "%s.expert_feed_forward_length"        },
153
    { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
154
    { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,  "%s.expert_chunk_feed_forward_length"  },
155
    { LLM_KV_USE_PARALLEL_RESIDUAL,             "%s.use_parallel_residual"             },
156
    { LLM_KV_TENSOR_DATA_LAYOUT,                "%s.tensor_data_layout"                },
157
    { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      },
158
    { LLM_KV_EXPERT_USED_COUNT,                 "%s.expert_used_count"                 },
159
    { LLM_KV_EXPERT_SHARED_COUNT,               "%s.expert_shared_count"               },
160
    { LLM_KV_EXPERT_GROUP_COUNT,                "%s.expert_group_count"                },
161
    { LLM_KV_EXPERT_GROUP_USED_COUNT,           "%s.expert_group_used_count"           },
162
    { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              },
163
    { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               },
164
    { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                },
165
    { LLM_KV_EXPERT_GROUP_SCALE,                "%s.expert_group_scale"                },
166
    { LLM_KV_EXPERTS_PER_GROUP,                 "%s.experts_per_group"                 },
167
    { LLM_KV_MOE_EVERY_N_LAYERS,                "%s.moe_every_n_layers"                },
168
    { LLM_KV_NEXTN_PREDICT_LAYERS,              "%s.nextn_predict_layers"              },
169
    { LLM_KV_NUM_DEEPSTACK_LAYERS,              "%s.n_deepstack_layers"                },
170
    { LLM_KV_POOLING_TYPE,                      "%s.pooling_type"                      },
171
    { LLM_KV_LOGIT_SCALE,                       "%s.logit_scale"                       },
172
    { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            },
173
    { LLM_KV_DECODER_BLOCK_COUNT,               "%s.decoder_block_count"               },
174
    { LLM_KV_ATTN_LOGIT_SOFTCAPPING,            "%s.attn_logit_softcapping"            },
175
    { LLM_KV_ROUTER_LOGIT_SOFTCAPPING,          "%s.router_logit_softcapping"          },
176
    { LLM_KV_FINAL_LOGIT_SOFTCAPPING,           "%s.final_logit_softcapping"           },
177
    { LLM_KV_SWIN_NORM,                         "%s.swin_norm"                         },
178
    { LLM_KV_RESCALE_EVERY_N_LAYERS,            "%s.rescale_every_n_layers"            },
179
    { LLM_KV_TIME_MIX_EXTRA_DIM,                "%s.time_mix_extra_dim"                },
180
    { LLM_KV_TIME_DECAY_EXTRA_DIM,              "%s.time_decay_extra_dim"              },
181
    { LLM_KV_RESIDUAL_SCALE,                    "%s.residual_scale"                    },
182
    { LLM_KV_EMBEDDING_SCALE,                   "%s.embedding_scale"                   },
183
    { LLM_KV_TOKEN_SHIFT_COUNT,                 "%s.token_shift_count"                 },
184
    { LLM_KV_INTERLEAVE_MOE_LAYER_STEP,         "%s.interleave_moe_layer_step"         },
185
186
    { LLM_KV_ATTENTION_HEAD_COUNT,                   "%s.attention.head_count"                   },
187
    { LLM_KV_ATTENTION_HEAD_COUNT_KV,                "%s.attention.head_count_kv"                },
188
    { LLM_KV_ATTENTION_MAX_ALIBI_BIAS,               "%s.attention.max_alibi_bias"               },
189
    { LLM_KV_ATTENTION_CLAMP_KQV,                    "%s.attention.clamp_kqv"                    },
190
    { LLM_KV_ATTENTION_KEY_LENGTH,                   "%s.attention.key_length"                   },
191
    { LLM_KV_ATTENTION_VALUE_LENGTH,                 "%s.attention.value_length"                 },
192
    { LLM_KV_ATTENTION_LAYERNORM_EPS,                "%s.attention.layer_norm_epsilon"           },
193
    { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,            "%s.attention.layer_norm_rms_epsilon"       },
194
    { LLM_KV_ATTENTION_GROUPNORM_EPS,                "%s.attention.group_norm_epsilon"           },
195
    { LLM_KV_ATTENTION_GROUPNORM_GROUPS,             "%s.attention.group_norm_groups"            },
196
    { LLM_KV_ATTENTION_CAUSAL,                       "%s.attention.causal"                       },
197
    { LLM_KV_ATTENTION_Q_LORA_RANK,                  "%s.attention.q_lora_rank"                  },
198
    { LLM_KV_ATTENTION_KV_LORA_RANK,                 "%s.attention.kv_lora_rank"                 },
199
    { LLM_KV_ATTENTION_DECAY_LORA_RANK,              "%s.attention.decay_lora_rank"              },
200
    { LLM_KV_ATTENTION_ICLR_LORA_RANK,               "%s.attention.iclr_lora_rank"               },
201
    { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
202
    { LLM_KV_ATTENTION_GATE_LORA_RANK,               "%s.attention.gate_lora_rank"               },
203
    { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,       "%s.attention.relative_buckets_count"       },
204
    { LLM_KV_ATTENTION_SLIDING_WINDOW,               "%s.attention.sliding_window"               },
205
    { LLM_KV_ATTENTION_SCALE,                        "%s.attention.scale"                        },
206
    { LLM_KV_ATTENTION_OUTPUT_SCALE,                 "%s.attention.output_scale"                 },
207
    { LLM_KV_ATTENTION_TEMPERATURE_LENGTH,           "%s.attention.temperature_length"           },
208
    { LLM_KV_ATTENTION_TEMPERATURE_SCALE,            "%s.attention.temperature_scale"            },
209
    { LLM_KV_ATTENTION_KEY_LENGTH_MLA,               "%s.attention.key_length_mla"               },
210
    { LLM_KV_ATTENTION_VALUE_LENGTH_MLA,             "%s.attention.value_length_mla"             },
211
212
    { LLM_KV_ROPE_DIMENSION_COUNT,          "%s.rope.dimension_count"                 },
213
    { LLM_KV_ROPE_DIMENSION_SECTIONS,       "%s.rope.dimension_sections"              },
214
    { LLM_KV_ROPE_FREQ_BASE,                "%s.rope.freq_base"                       },
215
    { LLM_KV_ROPE_SCALE_LINEAR,             "%s.rope.scale_linear"                    },
216
    { LLM_KV_ROPE_SCALING_TYPE,             "%s.rope.scaling.type"                    },
217
    { LLM_KV_ROPE_SCALING_FACTOR,           "%s.rope.scaling.factor"                  },
218
    { LLM_KV_ROPE_SCALING_ATTN_FACTOR,      "%s.rope.scaling.attn_factor"             },
219
    { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,     "%s.rope.scaling.original_context_length" },
220
    { LLM_KV_ROPE_SCALING_FINETUNED,        "%s.rope.scaling.finetuned"               },
221
    { LLM_KV_ROPE_SCALING_YARN_LOG_MUL,     "%s.rope.scaling.yarn_log_multiplier"     },
222
    { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,  "%s.rope.scaling.yarn_ext_factor"         },
223
    { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, "%s.rope.scaling.yarn_attn_factor"        },
224
    { LLM_KV_ROPE_SCALING_YARN_BETA_FAST,   "%s.rope.scaling.yarn_beta_fast"          },
225
    { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,   "%s.rope.scaling.yarn_beta_slow"          },
226
227
    { LLM_KV_SPLIT_NO,            "split.no"            },
228
    { LLM_KV_SPLIT_COUNT,         "split.count"         },
229
    { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
230
231
    { LLM_KV_SSM_CONV_KERNEL,    "%s.ssm.conv_kernel"    },
232
    { LLM_KV_SSM_INNER_SIZE,     "%s.ssm.inner_size"     },
233
    { LLM_KV_SSM_STATE_SIZE,     "%s.ssm.state_size"     },
234
    { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
235
    { LLM_KV_SSM_GROUP_COUNT,    "%s.ssm.group_count"    },
236
    { LLM_KV_SSM_DT_B_C_RMS,     "%s.ssm.dt_b_c_rms"     },
237
238
    { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
239
240
    { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
241
    { LLM_KV_POSNET_BLOCK_COUNT,      "%s.posnet.block_count"      },
242
243
    { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
244
    { LLM_KV_CONVNEXT_BLOCK_COUNT,      "%s.convnext.block_count"      },
245
246
    { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
247
248
    { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
249
    // sentence-transformers dense modules feature dims
250
    { LLM_KV_DENSE_2_FEAT_IN,        "%s.dense_2_feat_in"  },
251
    { LLM_KV_DENSE_2_FEAT_OUT,       "%s.dense_2_feat_out"  },
252
    { LLM_KV_DENSE_3_FEAT_IN,        "%s.dense_3_feat_in"   },
253
    { LLM_KV_DENSE_3_FEAT_OUT,       "%s.dense_3_feat_out"  },
254
255
    { LLM_KV_TOKENIZER_MODEL,                "tokenizer.ggml.model"                    },
256
    { LLM_KV_TOKENIZER_PRE,                  "tokenizer.ggml.pre"                      },
257
    { LLM_KV_TOKENIZER_LIST,                 "tokenizer.ggml.tokens"                   },
258
    { LLM_KV_TOKENIZER_TOKEN_TYPE,           "tokenizer.ggml.token_type"               },
259
    { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,     "tokenizer.ggml.token_type_count"         },
260
    { LLM_KV_TOKENIZER_SCORES,               "tokenizer.ggml.scores"                   },
261
    { LLM_KV_TOKENIZER_MERGES,               "tokenizer.ggml.merges"                   },
262
    { LLM_KV_TOKENIZER_BOS_ID,               "tokenizer.ggml.bos_token_id"             },
263
    { LLM_KV_TOKENIZER_EOS_ID,               "tokenizer.ggml.eos_token_id"             },
264
    { LLM_KV_TOKENIZER_EOT_ID,               "tokenizer.ggml.eot_token_id"             },
265
    { LLM_KV_TOKENIZER_EOM_ID,               "tokenizer.ggml.eom_token_id"             },
266
    { LLM_KV_TOKENIZER_UNK_ID,               "tokenizer.ggml.unknown_token_id"         },
267
    { LLM_KV_TOKENIZER_SEP_ID,               "tokenizer.ggml.seperator_token_id"       },
268
    { LLM_KV_TOKENIZER_PAD_ID,               "tokenizer.ggml.padding_token_id"         },
269
    { LLM_KV_TOKENIZER_CLS_ID,               "tokenizer.ggml.cls_token_id"             },
270
    { LLM_KV_TOKENIZER_MASK_ID,              "tokenizer.ggml.mask_token_id"            },
271
    { LLM_KV_TOKENIZER_ADD_BOS,              "tokenizer.ggml.add_bos_token"            },
272
    { LLM_KV_TOKENIZER_ADD_EOS,              "tokenizer.ggml.add_eos_token"            },
273
    { LLM_KV_TOKENIZER_ADD_SEP,              "tokenizer.ggml.add_sep_token"            },
274
    { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
275
    { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
276
    { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
277
    { LLM_KV_TOKENIZER_HF_JSON,              "tokenizer.huggingface.json"              },
278
    { LLM_KV_TOKENIZER_RWKV,                 "tokenizer.rwkv.world"                    },
279
    { LLM_KV_TOKENIZER_CHAT_TEMPLATE,        "tokenizer.chat_template"                 },
280
    { LLM_KV_TOKENIZER_FIM_PRE_ID,           "tokenizer.ggml.fim_pre_token_id"         },
281
    { LLM_KV_TOKENIZER_FIM_SUF_ID,           "tokenizer.ggml.fim_suf_token_id"         },
282
    { LLM_KV_TOKENIZER_FIM_MID_ID,           "tokenizer.ggml.fim_mid_token_id"         },
283
    { LLM_KV_TOKENIZER_FIM_PAD_ID,           "tokenizer.ggml.fim_pad_token_id"         },
284
    { LLM_KV_TOKENIZER_FIM_REP_ID,           "tokenizer.ggml.fim_rep_token_id"         },
285
    { LLM_KV_TOKENIZER_FIM_SEP_ID,           "tokenizer.ggml.fim_sep_token_id"         },
286
287
    { LLM_KV_ADAPTER_TYPE,                    "adapter.type"               },
288
    { LLM_KV_ADAPTER_LORA_ALPHA,              "adapter.lora.alpha"         },
289
    { LLM_KV_ADAPTER_LORA_TASK_NAME,          "adapter.lora.task_name"     },
290
    { LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,      "adapter.lora.prompt_prefix" },
291
    { LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
292
293
    { LLM_KV_XIELU_ALPHA_N,         "xielu.alpha_n"         },
294
    { LLM_KV_XIELU_ALPHA_P,         "xielu.alpha_p"         },
295
    { LLM_KV_XIELU_BETA,            "xielu.beta"            },
296
    { LLM_KV_XIELU_EPS,             "xielu.eps"             },
297
298
    // deprecated
299
    { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
300
    { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
301
    { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
302
};
303
304
static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
305
    {
306
        LLM_ARCH_CLIP,
307
        {},
308
    },
309
    {
310
        LLM_ARCH_LLAMA,
311
        {
312
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
313
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
314
            { LLM_TENSOR_OUTPUT,          "output" },
315
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
316
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
317
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
318
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
319
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
320
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
321
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
322
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
323
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
324
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
325
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
326
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
327
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
328
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
329
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
330
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
331
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
332
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
333
        },
334
    },
335
    {
336
        LLM_ARCH_ARCEE,
337
        {
338
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
339
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
340
            { LLM_TENSOR_OUTPUT,          "output" },
341
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
342
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
343
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
344
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
345
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
346
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
347
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
348
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
349
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
350
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
351
        },
352
    },
353
    {
354
        LLM_ARCH_AFMOE,
355
        {
356
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
357
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
358
            { LLM_TENSOR_OUTPUT,          "output" },
359
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
360
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
361
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
362
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
363
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
364
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
365
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
366
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
367
            { LLM_TENSOR_ATTN_GATE,       "blk.%d.attn_gate" },
368
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
369
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
370
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
371
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
372
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
373
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
374
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
375
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
376
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
377
            { LLM_TENSOR_FFN_GATE_SHEXP,  "blk.%d.ffn_gate_shexp" },
378
            { LLM_TENSOR_FFN_UP_SHEXP,    "blk.%d.ffn_up_shexp" },
379
            { LLM_TENSOR_FFN_DOWN_SHEXP,  "blk.%d.ffn_down_shexp" },
380
            { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
381
        },
382
    },
383
    {
384
        LLM_ARCH_LLAMA4,
385
        {
386
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
387
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
388
            { LLM_TENSOR_OUTPUT,          "output" },
389
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
390
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
391
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
392
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
393
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
394
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
395
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
396
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
397
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
398
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
399
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
400
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
401
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
402
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
403
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
404
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
405
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
406
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
407
            { LLM_TENSOR_FFN_GATE_SHEXP,  "blk.%d.ffn_gate_shexp" },
408
            { LLM_TENSOR_FFN_DOWN_SHEXP,  "blk.%d.ffn_down_shexp" },
409
            { LLM_TENSOR_FFN_UP_SHEXP,    "blk.%d.ffn_up_shexp" },
410
        },
411
    },
412
    {
413
        LLM_ARCH_DECI,
414
        {
415
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
416
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
417
            { LLM_TENSOR_OUTPUT,          "output" },
418
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
419
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
420
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
421
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
422
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
423
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
424
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
425
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
426
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
427
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
428
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
429
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
430
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
431
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
432
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
433
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
434
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
435
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
436
        },
437
    },
438
    {
439
        LLM_ARCH_BAICHUAN,
440
        {
441
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
442
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
443
            { LLM_TENSOR_OUTPUT,          "output" },
444
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
445
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
446
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
447
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
448
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
449
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
450
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
451
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
452
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
453
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
454
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
455
        },
456
    },
457
    {
458
        LLM_ARCH_FALCON,
459
        {
460
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
461
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
462
            { LLM_TENSOR_OUTPUT,          "output" },
463
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
464
            { LLM_TENSOR_ATTN_NORM_2,     "blk.%d.attn_norm_2" },
465
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
466
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
467
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
468
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
469
        },
470
    },
471
    {
472
        LLM_ARCH_GROK,
473
        {
474
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
475
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
476
            { LLM_TENSOR_OUTPUT,          "output" },
477
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
478
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
479
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
480
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
481
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
482
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
483
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
484
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
485
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
486
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
487
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
488
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
489
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
490
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
491
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
492
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
493
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
494
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
495
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
496
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
497
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
498
        },
499
    },
500
    {
501
        LLM_ARCH_GPT2,
502
        {
503
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
504
            { LLM_TENSOR_POS_EMBD,        "position_embd" },
505
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
506
            { LLM_TENSOR_OUTPUT,          "output" },
507
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
508
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
509
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
510
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
511
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
512
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
513
        },
514
    },
515
    {
516
        LLM_ARCH_GPTJ,
517
        {
518
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
519
        },
520
    },
521
    {
522
        LLM_ARCH_GPTNEOX,
523
        {
524
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
525
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
526
            { LLM_TENSOR_OUTPUT,          "output" },
527
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
528
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
529
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
530
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
531
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
532
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
533
        },
534
    },
535
    {
536
        LLM_ARCH_MPT,
537
        {
538
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
539
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
540
            { LLM_TENSOR_OUTPUT,          "output"},
541
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
542
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
543
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
544
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
545
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
546
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
547
            { LLM_TENSOR_FFN_ACT,         "blk.%d.ffn.act" },
548
            { LLM_TENSOR_POS_EMBD,        "position_embd" },
549
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm"},
550
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm"},
551
        },
552
    },
553
    {
554
        LLM_ARCH_STARCODER,
555
        {
556
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
557
            { LLM_TENSOR_POS_EMBD,        "position_embd" },
558
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
559
            { LLM_TENSOR_OUTPUT,          "output" },
560
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
561
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
562
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
563
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
564
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
565
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
566
        },
567
    },
568
    {
569
        LLM_ARCH_REFACT,
570
        {
571
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
572
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
573
            { LLM_TENSOR_OUTPUT,          "output" },
574
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
575
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
576
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
577
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
578
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
579
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
580
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
581
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
582
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
583
        },
584
    },
585
    {
586
        LLM_ARCH_BERT,
587
        {
588
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
589
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
590
            { LLM_TENSOR_TOKEN_TYPES,     "token_types" },
591
            { LLM_TENSOR_POS_EMBD,        "position_embd" },
592
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
593
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
594
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
595
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
596
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
597
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
598
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
599
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
600
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
601
            { LLM_TENSOR_CLS,             "cls" },
602
            { LLM_TENSOR_CLS_OUT,         "cls.output" },
603
        },
604
    },
605
    {
606
        LLM_ARCH_NOMIC_BERT,
607
        {
608
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
609
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
610
            { LLM_TENSOR_TOKEN_TYPES,     "token_types" },
611
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
612
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
613
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
614
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
615
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
616
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
617
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
618
        },
619
    },
620
    {
621
        LLM_ARCH_NOMIC_BERT_MOE,
622
        {
623
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
624
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
625
            { LLM_TENSOR_TOKEN_TYPES,     "token_types" },
626
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
627
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
628
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
629
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
630
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
631
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
632
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
633
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
634
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
635
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
636
        },
637
    },
638
    {
639
        LLM_ARCH_NEO_BERT,
640
        {
641
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
642
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
643
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
644
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
645
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
646
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
647
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
648
            { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
649
            { LLM_TENSOR_CLS,             "cls" },
650
            { LLM_TENSOR_CLS_OUT,         "cls.output" },
651
        },
652
    },
653
    {
654
        LLM_ARCH_JINA_BERT_V2,
655
        {
656
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
657
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
658
            { LLM_TENSOR_TOKEN_TYPES,     "token_types" },
659
            { LLM_TENSOR_ATTN_NORM_2,     "blk.%d.attn_norm_2" },
660
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
661
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
662
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
663
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
664
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
665
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
666
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
667
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
668
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
669
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
670
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
671
            { LLM_TENSOR_CLS,             "cls" },
672
        },
673
    },
674
    {
675
        LLM_ARCH_JINA_BERT_V3,
676
        {
677
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
678
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
679
            { LLM_TENSOR_TOKEN_TYPES,     "token_types" },
680
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
681
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
682
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
683
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
684
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
685
            { LLM_TENSOR_LAYER_OUT_NORM,  "blk.%d.layer_output_norm" },
686
        },
687
    },
688
    {
689
        LLM_ARCH_BLOOM,
690
        {
691
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
692
            { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
693
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
694
            { LLM_TENSOR_OUTPUT,          "output" },
695
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
696
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
697
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
698
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
699
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
700
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
701
        },
702
    },
703
    {
704
        LLM_ARCH_STABLELM,
705
        {
706
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
707
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
708
            { LLM_TENSOR_OUTPUT,          "output" },
709
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
710
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
711
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
712
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
713
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
714
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
715
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
716
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
717
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
718
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
719
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
720
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
721
        },
722
    },
723
    {
724
        LLM_ARCH_QWEN,
725
        {
726
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
727
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
728
            { LLM_TENSOR_OUTPUT,          "output" },
729
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
730
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
731
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
732
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
733
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
734
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
735
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
736
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
737
        },
738
    },
739
    {
740
        LLM_ARCH_QWEN2,
741
        {
742
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
743
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
744
            { LLM_TENSOR_OUTPUT,          "output" },
745
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
746
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
747
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
748
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
749
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
750
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
751
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
752
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
753
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
754
        },
755
    },
756
    {
757
        LLM_ARCH_QWEN2VL,
758
        {
759
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
760
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
761
            { LLM_TENSOR_OUTPUT,          "output" },
762
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
763
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
764
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
765
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
766
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
767
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
768
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
769
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
770
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
771
        },
772
    },
773
    {
774
        LLM_ARCH_QWEN2MOE,
775
        {
776
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
777
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
778
            { LLM_TENSOR_OUTPUT,             "output" },
779
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
780
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
781
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
782
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
783
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
784
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
785
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
786
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
787
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
788
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
789
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
790
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
791
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
792
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
793
        },
794
    },
795
    {
796
        LLM_ARCH_QWEN3,
797
        {
798
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
799
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
800
            { LLM_TENSOR_OUTPUT,          "output" },
801
            { LLM_TENSOR_CLS_OUT,         "cls.output" },
802
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
803
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
804
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
805
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
806
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
807
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
808
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
809
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
810
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
811
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
812
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
813
        },
814
    },
815
    {
816
        LLM_ARCH_QWEN3MOE,
817
        {
818
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
819
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
820
            { LLM_TENSOR_OUTPUT,             "output" },
821
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
822
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
823
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
824
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
825
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
826
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
827
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
828
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
829
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
830
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
831
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
832
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
833
        },
834
    },
835
    {
836
        LLM_ARCH_QWEN3NEXT,
837
        {
838
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
839
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
840
            { LLM_TENSOR_OUTPUT,             "output" },
841
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
842
            { LLM_TENSOR_ATTN_POST_NORM,     "blk.%d.post_attention_norm" },
843
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
844
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
845
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
846
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
847
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
848
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
849
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
850
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
851
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
852
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
853
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
854
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
855
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
856
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
857
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
858
            { LLM_TENSOR_SSM_A_NOSCAN,       "blk.%d.ssm_a" },
859
            { LLM_TENSOR_SSM_CONV1D,         "blk.%d.ssm_conv1d" },
860
            { LLM_TENSOR_SSM_DT,             "blk.%d.ssm_dt" },
861
            { LLM_TENSOR_SSM_BETA_ALPHA,     "blk.%d.ssm_ba" },
862
            { LLM_TENSOR_SSM_IN,             "blk.%d.ssm_in" },
863
            { LLM_TENSOR_SSM_NORM,           "blk.%d.ssm_norm" },
864
            { LLM_TENSOR_SSM_OUT,            "blk.%d.ssm_out" },
865
        },
866
    },
867
    {
868
        LLM_ARCH_QWEN3VL,
869
        {
870
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
871
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
872
            { LLM_TENSOR_OUTPUT,          "output" },
873
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
874
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
875
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
876
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
877
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
878
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
879
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
880
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
881
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
882
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
883
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
884
        },
885
    },
886
    {
887
        LLM_ARCH_QWEN3VLMOE,
888
        {
889
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
890
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
891
            { LLM_TENSOR_OUTPUT,             "output" },
892
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
893
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
894
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
895
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
896
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
897
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
898
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
899
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
900
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
901
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
902
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
903
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
904
        },
905
    },
906
    {
907
        LLM_ARCH_PHI2,
908
        {
909
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
910
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
911
            { LLM_TENSOR_OUTPUT,          "output" },
912
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
913
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
914
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
915
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
916
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
917
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
918
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
919
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
920
        },
921
    },
922
    {
923
        LLM_ARCH_PHI3,
924
        {
925
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
926
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
927
            { LLM_TENSOR_OUTPUT,             "output" },
928
            { LLM_TENSOR_ROPE_FACTORS_LONG,  "rope_factors_long" },
929
            { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
930
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
931
            { LLM_TENSOR_ATTN_QKV,           "blk.%d.attn_qkv" },
932
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
933
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
934
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
935
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
936
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
937
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
938
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
939
        },
940
    },
941
    {
942
        LLM_ARCH_PHIMOE,
943
        {
944
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
945
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
946
            { LLM_TENSOR_OUTPUT,             "output" },
947
            { LLM_TENSOR_ROPE_FACTORS_LONG,  "rope_factors_long" },
948
            { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
949
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
950
            { LLM_TENSOR_ATTN_QKV,           "blk.%d.attn_qkv" },
951
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
952
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
953
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
954
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
955
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
956
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
957
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
958
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
959
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
960
        },
961
    },
962
    {
963
        LLM_ARCH_PLAMO,
964
        {
965
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
966
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
967
            { LLM_TENSOR_OUTPUT,          "output" },
968
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
969
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
970
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
971
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
972
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
973
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
974
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
975
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
976
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
977
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
978
        },
979
    },
980
    {
981
        LLM_ARCH_PLAMO2,
982
        {
983
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
984
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
985
            { LLM_TENSOR_OUTPUT,          "output" },
986
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
987
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
988
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
989
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
990
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
991
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
992
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
993
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
994
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
995
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
996
            { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" },
997
            { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" },
998
            { LLM_TENSOR_SSM_X,           "blk.%d.ssm_x" },
999
            { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" },
1000
            { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" },
1001
            { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" },
1002
            { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" },
1003
            { LLM_TENSOR_SSM_DT_NORM,     "blk.%d.ssm_dt_norm" },
1004
            { LLM_TENSOR_SSM_B_NORM,      "blk.%d.ssm_b_norm" },
1005
            { LLM_TENSOR_SSM_C_NORM,      "blk.%d.ssm_c_norm" },
1006
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1007
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1008
        },
1009
    },
1010
    {
1011
        LLM_ARCH_CODESHELL,
1012
        {
1013
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1014
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1015
            { LLM_TENSOR_OUTPUT,          "output" },
1016
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1017
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1018
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1019
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1020
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1021
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
1022
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1023
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1024
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1025
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1026
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1027
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1028
        },
1029
    },
1030
    {
1031
        LLM_ARCH_ORION,
1032
        {
1033
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1034
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1035
            { LLM_TENSOR_OUTPUT,          "output" },
1036
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1037
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1038
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1039
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1040
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1041
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1042
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1043
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1044
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1045
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1046
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1047
        },
1048
    },
1049
    {
1050
        LLM_ARCH_INTERNLM2,
1051
        {
1052
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1053
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1054
            { LLM_TENSOR_OUTPUT,          "output" },
1055
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1056
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1057
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1058
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1059
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1060
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1061
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1062
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1063
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1064
        },
1065
    },
1066
    {
1067
        LLM_ARCH_MINICPM,
1068
        {
1069
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1070
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1071
            { LLM_TENSOR_OUTPUT,          "output" },
1072
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1073
            { LLM_TENSOR_ROPE_FACTORS_LONG,  "rope_factors_long" },
1074
            { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
1075
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1076
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1077
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1078
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1079
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1080
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1081
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
1082
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1083
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1084
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1085
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1086
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
1087
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
1088
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
1089
        },
1090
    },
1091
    {
1092
        LLM_ARCH_MINICPM3,
1093
        {
1094
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1095
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1096
            { LLM_TENSOR_OUTPUT,             "output" },
1097
            { LLM_TENSOR_ROPE_FACTORS_LONG,  "rope_factors_long" },
1098
            { LLM_TENSOR_ROPE_FACTORS_SHORT, "rope_factors_short" },
1099
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1100
            { LLM_TENSOR_ATTN_Q_A_NORM,      "blk.%d.attn_q_a_norm" },
1101
            { LLM_TENSOR_ATTN_KV_A_NORM,     "blk.%d.attn_kv_a_norm" },
1102
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1103
            { LLM_TENSOR_ATTN_Q_A,           "blk.%d.attn_q_a" },
1104
            { LLM_TENSOR_ATTN_Q_B,           "blk.%d.attn_q_b" },
1105
            { LLM_TENSOR_ATTN_KV_A_MQA,      "blk.%d.attn_kv_a_mqa" },
1106
            { LLM_TENSOR_ATTN_KV_B,          "blk.%d.attn_kv_b" },
1107
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1108
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1109
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1110
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1111
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1112
        },
1113
    },
1114
    {
1115
        LLM_ARCH_GEMMA,
1116
        {
1117
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1118
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1119
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1120
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1121
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1122
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1123
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1124
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1125
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1126
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1127
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1128
        },
1129
    },
1130
    {
1131
        LLM_ARCH_GEMMA2,
1132
        {
1133
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1134
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1135
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1136
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1137
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1138
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1139
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1140
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1141
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1142
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1143
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1144
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1145
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1146
        },
1147
    },
1148
    {
1149
        LLM_ARCH_GEMMA3,
1150
        {
1151
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1152
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1153
            { LLM_TENSOR_OUTPUT,          "output" },
1154
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1155
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1156
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1157
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1158
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1159
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1160
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1161
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1162
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1163
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1164
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1165
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1166
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1167
        },
1168
    },
1169
    {
1170
        LLM_ARCH_GEMMA3N,
1171
        {
1172
            { LLM_TENSOR_TOKEN_EMBD,           "token_embd" },
1173
            { LLM_TENSOR_OUTPUT_NORM,          "output_norm" },
1174
            { LLM_TENSOR_ATTN_NORM,            "blk.%d.attn_norm" },
1175
            { LLM_TENSOR_ATTN_Q,               "blk.%d.attn_q" },
1176
            { LLM_TENSOR_ATTN_Q_NORM,          "blk.%d.attn_q_norm" },
1177
            { LLM_TENSOR_ATTN_K,               "blk.%d.attn_k" },
1178
            { LLM_TENSOR_ATTN_K_NORM,          "blk.%d.attn_k_norm" },
1179
            { LLM_TENSOR_ATTN_V,               "blk.%d.attn_v" },
1180
            { LLM_TENSOR_ATTN_OUT,             "blk.%d.attn_output" },
1181
            { LLM_TENSOR_ATTN_POST_NORM,       "blk.%d.post_attention_norm" },
1182
            { LLM_TENSOR_FFN_NORM,             "blk.%d.ffn_norm" },
1183
            { LLM_TENSOR_FFN_GATE,             "blk.%d.ffn_gate" },
1184
            { LLM_TENSOR_FFN_DOWN,             "blk.%d.ffn_down" },
1185
            { LLM_TENSOR_FFN_UP,               "blk.%d.ffn_up" },
1186
            { LLM_TENSOR_FFN_POST_NORM,        "blk.%d.post_ffw_norm" },
1187
            { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" },
1188
            { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" },
1189
            { LLM_TENSOR_PER_LAYER_PROJ_NORM,  "per_layer_proj_norm" },
1190
            { LLM_TENSOR_ALTUP_UNEMBD_PROJ,    "altup_unembd_proj" },
1191
            { LLM_TENSOR_ALTUP_PROJ,           "altup_proj" },
1192
            { LLM_TENSOR_PER_LAYER_INP_GATE,   "blk.%d.inp_gate" },
1193
            { LLM_TENSOR_PER_LAYER_PROJ,       "blk.%d.proj" },
1194
            { LLM_TENSOR_PER_LAYER_POST_NORM,  "blk.%d.post_norm" },
1195
            { LLM_TENSOR_ALTUP_CORRECT_COEF,   "blk.%d.altup_correct_coef" },
1196
            { LLM_TENSOR_ALTUP_CORRECT_SCALE,  "blk.%d.altup_correct_scale" },
1197
            { LLM_TENSOR_ALTUP_PREDICT_COEF,   "blk.%d.altup_predict_coef" },
1198
            { LLM_TENSOR_ALTUP_ROUTER,         "blk.%d.altup_router" },
1199
            { LLM_TENSOR_ALTUP_ROUTER_NORM,    "blk.%d.altup_router_norm" },
1200
            { LLM_TENSOR_LAUREL_L,             "blk.%d.laurel_l" },
1201
            { LLM_TENSOR_LAUREL_R,             "blk.%d.laurel_r" },
1202
            { LLM_TENSOR_LAUREL_POST_NORM,     "blk.%d.laurel_post_norm" },
1203
        },
1204
    },
1205
    {
1206
        LLM_ARCH_GEMMA_EMBEDDING,
1207
        {
1208
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1209
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1210
            { LLM_TENSOR_OUTPUT,          "output" },
1211
            { LLM_TENSOR_DENSE_2_OUT,     "dense_2" },
1212
            { LLM_TENSOR_DENSE_3_OUT,     "dense_3" },
1213
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1214
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1215
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1216
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1217
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1218
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1219
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1220
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1221
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1222
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1223
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1224
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1225
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1226
        },
1227
    },
1228
    {
1229
        LLM_ARCH_STARCODER2,
1230
        {
1231
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1232
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1233
            { LLM_TENSOR_OUTPUT,          "output" },
1234
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1235
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1236
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1237
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1238
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1239
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1240
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1241
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1242
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1243
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1244
        },
1245
    },
1246
    {
1247
        LLM_ARCH_MAMBA,
1248
        {
1249
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1250
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1251
            { LLM_TENSOR_OUTPUT,          "output" },
1252
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1253
            { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" },
1254
            { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" },
1255
            { LLM_TENSOR_SSM_X,           "blk.%d.ssm_x" },
1256
            { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" },
1257
            { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" },
1258
            { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" },
1259
            { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" },
1260
        },
1261
    },
1262
    {
1263
        LLM_ARCH_MAMBA2,
1264
        {
1265
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1266
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1267
            { LLM_TENSOR_OUTPUT,          "output" },
1268
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1269
            { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" },
1270
            { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" },
1271
            { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" },
1272
            { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" },
1273
            { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" },
1274
            { LLM_TENSOR_SSM_NORM,        "blk.%d.ssm_norm" },
1275
            { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" },
1276
        },
1277
    },
1278
    {
1279
        LLM_ARCH_JAMBA,
1280
        {
1281
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1282
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1283
            { LLM_TENSOR_OUTPUT,          "output" },
1284
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1285
            { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" },
1286
            { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" },
1287
            { LLM_TENSOR_SSM_X,           "blk.%d.ssm_x" },
1288
            { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" },
1289
            { LLM_TENSOR_SSM_DT_NORM,     "blk.%d.ssm_dt_norm" },
1290
            { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" },
1291
            { LLM_TENSOR_SSM_B_NORM,      "blk.%d.ssm_b_norm" },
1292
            { LLM_TENSOR_SSM_C_NORM,      "blk.%d.ssm_c_norm" },
1293
            { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" },
1294
            { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" },
1295
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1296
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1297
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1298
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1299
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
1300
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1301
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1302
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1303
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1304
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
1305
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
1306
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
1307
        },
1308
    },
1309
    {
1310
        LLM_ARCH_FALCON_H1,
1311
        {
1312
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1313
            { LLM_TENSOR_OUTPUT,          "output" },
1314
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1315
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1316
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1317
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1318
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1319
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1320
            { LLM_TENSOR_SSM_IN,          "blk.%d.ssm_in" },
1321
            { LLM_TENSOR_SSM_CONV1D,      "blk.%d.ssm_conv1d" },
1322
            { LLM_TENSOR_SSM_DT,          "blk.%d.ssm_dt" },
1323
            { LLM_TENSOR_SSM_A,           "blk.%d.ssm_a" },
1324
            { LLM_TENSOR_SSM_D,           "blk.%d.ssm_d" },
1325
            { LLM_TENSOR_SSM_NORM,        "blk.%d.ssm_norm" },
1326
            { LLM_TENSOR_SSM_OUT,         "blk.%d.ssm_out" },
1327
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1328
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1329
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1330
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1331
        },
1332
    },
1333
    {
1334
        LLM_ARCH_XVERSE,
1335
        {
1336
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1337
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1338
            { LLM_TENSOR_OUTPUT,          "output" },
1339
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1340
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1341
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1342
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1343
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1344
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1345
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1346
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1347
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1348
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1349
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1350
        },
1351
    },
1352
    {
1353
        LLM_ARCH_COMMAND_R,
1354
        {
1355
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1356
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1357
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1358
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1359
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1360
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1361
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1362
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1363
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1364
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1365
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1366
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1367
        },
1368
    },
1369
    {
1370
        LLM_ARCH_COHERE2,
1371
        {
1372
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1373
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1374
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1375
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1376
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1377
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1378
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1379
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1380
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1381
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1382
        },
1383
    },
1384
    {
1385
        LLM_ARCH_DBRX,
1386
        {
1387
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1388
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1389
            { LLM_TENSOR_OUTPUT,          "output" },
1390
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
1391
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1392
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1393
            { LLM_TENSOR_ATTN_OUT_NORM,   "blk.%d.attn_output_norm" },
1394
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
1395
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
1396
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
1397
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
1398
        },
1399
    },
1400
    {
1401
        LLM_ARCH_OLMO,
1402
        {
1403
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1404
            { LLM_TENSOR_OUTPUT,          "output" },
1405
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1406
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1407
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1408
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1409
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1410
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1411
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1412
        },
1413
    },
1414
    {
1415
        LLM_ARCH_OLMO2,
1416
        {
1417
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1418
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1419
            { LLM_TENSOR_OUTPUT,          "output" },
1420
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1421
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1422
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1423
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1424
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1425
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1426
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1427
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1428
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1429
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1430
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1431
        },
1432
    },
1433
    {
1434
        LLM_ARCH_OLMOE,
1435
        {
1436
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1437
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1438
            { LLM_TENSOR_OUTPUT,             "output" },
1439
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1440
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1441
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1442
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
1443
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1444
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
1445
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
1446
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1447
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
1448
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
1449
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
1450
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
1451
        },
1452
    },
1453
    {
1454
        LLM_ARCH_OPENELM,
1455
        {
1456
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1457
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1458
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1459
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
1460
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1461
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1462
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1463
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1464
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1465
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1466
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1467
        },
1468
    },
1469
    {
1470
        LLM_ARCH_ARCTIC,
1471
        {
1472
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1473
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1474
            { LLM_TENSOR_OUTPUT,          "output" },
1475
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1476
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1477
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1478
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1479
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1480
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
1481
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1482
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1483
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1484
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1485
            { LLM_TENSOR_FFN_NORM_EXPS,   "blk.%d.ffn_norm_exps" },
1486
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
1487
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
1488
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
1489
        },
1490
    },
1491
    {
1492
        LLM_ARCH_DEEPSEEK,
1493
        {
1494
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1495
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1496
            { LLM_TENSOR_OUTPUT,             "output" },
1497
            { LLM_TENSOR_ROPE_FREQS,         "rope_freqs" },
1498
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1499
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1500
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1501
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
1502
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1503
            { LLM_TENSOR_ATTN_ROT_EMBD,      "blk.%d.attn_rot_embd" },
1504
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
1505
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1506
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1507
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1508
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1509
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
1510
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
1511
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
1512
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1513
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
1514
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
1515
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
1516
        },
1517
    },
1518
    {
1519
        LLM_ARCH_DEEPSEEK2,
1520
        {
1521
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1522
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1523
            { LLM_TENSOR_OUTPUT,             "output" },
1524
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1525
            { LLM_TENSOR_ATTN_Q_A_NORM,      "blk.%d.attn_q_a_norm" },
1526
            { LLM_TENSOR_ATTN_KV_A_NORM,     "blk.%d.attn_kv_a_norm" },
1527
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1528
            { LLM_TENSOR_ATTN_Q_A,           "blk.%d.attn_q_a" },
1529
            { LLM_TENSOR_ATTN_Q_B,           "blk.%d.attn_q_b" },
1530
            { LLM_TENSOR_ATTN_KV_A_MQA,      "blk.%d.attn_kv_a_mqa" },
1531
            { LLM_TENSOR_ATTN_KV_B,          "blk.%d.attn_kv_b" },
1532
            { LLM_TENSOR_ATTN_K_B,           "blk.%d.attn_k_b" },
1533
            { LLM_TENSOR_ATTN_V_B,           "blk.%d.attn_v_b" },
1534
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1535
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1536
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1537
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1538
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1539
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
1540
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
1541
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
1542
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
1543
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1544
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
1545
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
1546
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
1547
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
1548
        },
1549
    },
1550
    {
1551
        LLM_ARCH_PLM,
1552
        {
1553
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1554
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1555
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1556
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1557
            { LLM_TENSOR_ATTN_KV_A_MQA,      "blk.%d.attn_kv_a_mqa" },
1558
            { LLM_TENSOR_ATTN_KV_A_NORM,     "blk.%d.attn_kv_a_norm" },
1559
            { LLM_TENSOR_ATTN_KV_B,          "blk.%d.attn_kv_b" },
1560
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1561
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1562
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1563
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1564
        },
1565
    },
1566
    {
1567
        LLM_ARCH_CHATGLM,
1568
        {
1569
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1570
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1571
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1572
            { LLM_TENSOR_OUTPUT,          "output" },
1573
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1574
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
1575
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1576
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1577
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1578
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1579
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1580
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1581
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1582
        },
1583
    },
1584
    {
1585
        LLM_ARCH_GLM4,
1586
        {
1587
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1588
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1589
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1590
            { LLM_TENSOR_OUTPUT,          "output" },
1591
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1592
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1593
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1594
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1595
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1596
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1597
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1598
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1599
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1600
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1601
        },
1602
    },
1603
    {
1604
        LLM_ARCH_GLM4_MOE,
1605
        {
1606
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1607
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1608
            { LLM_TENSOR_OUTPUT,             "output" },
1609
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1610
            { LLM_TENSOR_ATTN_POST_NORM,     "blk.%d.post_attention_norm" },
1611
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1612
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1613
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
1614
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1615
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
1616
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
1617
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1618
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1619
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1620
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
1621
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
1622
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
1623
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
1624
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
1625
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
1626
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
1627
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
1628
            // NextN/MTP tensors - preserved but unused (in final layer, dynamic layer number)
1629
            { LLM_TENSOR_NEXTN_EH_PROJ,      "blk.%d.nextn.eh_proj" },
1630
            { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
1631
            { LLM_TENSOR_NEXTN_ENORM,        "blk.%d.nextn.enorm" },
1632
            { LLM_TENSOR_NEXTN_HNORM,        "blk.%d.nextn.hnorm" },
1633
            { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
1634
            { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
1635
        },
1636
    },
1637
    {
1638
        LLM_ARCH_BITNET,
1639
        {
1640
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
1641
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
1642
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
1643
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
1644
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
1645
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
1646
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
1647
            { LLM_TENSOR_ATTN_SUB_NORM,      "blk.%d.attn_sub_norm" },
1648
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
1649
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
1650
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
1651
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
1652
            { LLM_TENSOR_FFN_SUB_NORM,       "blk.%d.ffn_sub_norm" },
1653
        },
1654
    },
1655
    {
1656
        LLM_ARCH_T5,
1657
        {
1658
            { LLM_TENSOR_TOKEN_EMBD,           "token_embd" },
1659
            { LLM_TENSOR_OUTPUT,               "output" },
1660
            { LLM_TENSOR_DEC_OUTPUT_NORM,      "dec.output_norm" },
1661
            { LLM_TENSOR_DEC_ATTN_NORM,        "dec.blk.%d.attn_norm" },
1662
            { LLM_TENSOR_DEC_ATTN_Q,           "dec.blk.%d.attn_q" },
1663
            { LLM_TENSOR_DEC_ATTN_K,           "dec.blk.%d.attn_k" },
1664
            { LLM_TENSOR_DEC_ATTN_V,           "dec.blk.%d.attn_v" },
1665
            { LLM_TENSOR_DEC_ATTN_OUT,         "dec.blk.%d.attn_o" },
1666
            { LLM_TENSOR_DEC_ATTN_REL_B,       "dec.blk.%d.attn_rel_b" },
1667
            { LLM_TENSOR_DEC_CROSS_ATTN_NORM,  "dec.blk.%d.cross_attn_norm" },
1668
            { LLM_TENSOR_DEC_CROSS_ATTN_Q,     "dec.blk.%d.cross_attn_q" },
1669
            { LLM_TENSOR_DEC_CROSS_ATTN_K,     "dec.blk.%d.cross_attn_k" },
1670
            { LLM_TENSOR_DEC_CROSS_ATTN_V,     "dec.blk.%d.cross_attn_v" },
1671
            { LLM_TENSOR_DEC_CROSS_ATTN_OUT,   "dec.blk.%d.cross_attn_o" },
1672
            { LLM_TENSOR_DEC_CROSS_ATTN_REL_B, "dec.blk.%d.cross_attn_rel_b" },
1673
            { LLM_TENSOR_DEC_FFN_NORM,         "dec.blk.%d.ffn_norm" },
1674
            { LLM_TENSOR_DEC_FFN_GATE,         "dec.blk.%d.ffn_gate" },
1675
            { LLM_TENSOR_DEC_FFN_DOWN,         "dec.blk.%d.ffn_down" },
1676
            { LLM_TENSOR_DEC_FFN_UP,           "dec.blk.%d.ffn_up" },
1677
            { LLM_TENSOR_ENC_OUTPUT_NORM,      "enc.output_norm" },
1678
            { LLM_TENSOR_ENC_ATTN_NORM,        "enc.blk.%d.attn_norm" },
1679
            { LLM_TENSOR_ENC_ATTN_Q,           "enc.blk.%d.attn_q" },
1680
            { LLM_TENSOR_ENC_ATTN_K,           "enc.blk.%d.attn_k" },
1681
            { LLM_TENSOR_ENC_ATTN_V,           "enc.blk.%d.attn_v" },
1682
            { LLM_TENSOR_ENC_ATTN_OUT,         "enc.blk.%d.attn_o" },
1683
            { LLM_TENSOR_ENC_ATTN_REL_B,       "enc.blk.%d.attn_rel_b" },
1684
            { LLM_TENSOR_ENC_FFN_NORM,         "enc.blk.%d.ffn_norm" },
1685
            { LLM_TENSOR_ENC_FFN_GATE,         "enc.blk.%d.ffn_gate" },
1686
            { LLM_TENSOR_ENC_FFN_DOWN,         "enc.blk.%d.ffn_down" },
1687
            { LLM_TENSOR_ENC_FFN_UP,           "enc.blk.%d.ffn_up" },
1688
        },
1689
    },
1690
    {
1691
        LLM_ARCH_T5ENCODER,
1692
        {
1693
            { LLM_TENSOR_TOKEN_EMBD,           "token_embd" },
1694
            { LLM_TENSOR_OUTPUT,               "output" },
1695
            { LLM_TENSOR_ENC_OUTPUT_NORM,      "enc.output_norm" },
1696
            { LLM_TENSOR_ENC_ATTN_NORM,        "enc.blk.%d.attn_norm" },
1697
            { LLM_TENSOR_ENC_ATTN_Q,           "enc.blk.%d.attn_q" },
1698
            { LLM_TENSOR_ENC_ATTN_K,           "enc.blk.%d.attn_k" },
1699
            { LLM_TENSOR_ENC_ATTN_V,           "enc.blk.%d.attn_v" },
1700
            { LLM_TENSOR_ENC_ATTN_OUT,         "enc.blk.%d.attn_o" },
1701
            { LLM_TENSOR_ENC_ATTN_REL_B,       "enc.blk.%d.attn_rel_b" },
1702
            { LLM_TENSOR_ENC_FFN_NORM,         "enc.blk.%d.ffn_norm" },
1703
            { LLM_TENSOR_ENC_FFN_GATE,         "enc.blk.%d.ffn_gate" },
1704
            { LLM_TENSOR_ENC_FFN_DOWN,         "enc.blk.%d.ffn_down" },
1705
            { LLM_TENSOR_ENC_FFN_UP,           "enc.blk.%d.ffn_up" },
1706
        },
1707
    },
1708
    {
1709
        LLM_ARCH_JAIS,
1710
        {
1711
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1712
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1713
            { LLM_TENSOR_OUTPUT,          "output" },
1714
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1715
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
1716
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1717
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1718
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1719
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1720
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1721
        },
1722
    },
1723
    {
1724
        LLM_ARCH_NEMOTRON,
1725
        {
1726
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1727
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1728
            { LLM_TENSOR_OUTPUT,          "output" },
1729
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1730
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1731
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1732
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1733
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1734
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1735
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1736
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1737
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1738
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1739
        },
1740
    },
1741
    {
1742
        LLM_ARCH_NEMOTRON_H,
1743
        {
1744
            { LLM_TENSOR_TOKEN_EMBD,     "token_embd" },
1745
            { LLM_TENSOR_OUTPUT_NORM,    "output_norm" },
1746
            { LLM_TENSOR_OUTPUT,         "output" },
1747
            { LLM_TENSOR_ATTN_NORM,      "blk.%d.attn_norm" },
1748
            // mamba(2) ssm layers
1749
            { LLM_TENSOR_SSM_IN,         "blk.%d.ssm_in" },
1750
            { LLM_TENSOR_SSM_CONV1D,     "blk.%d.ssm_conv1d" },
1751
            { LLM_TENSOR_SSM_DT,         "blk.%d.ssm_dt" },
1752
            { LLM_TENSOR_SSM_A,          "blk.%d.ssm_a" },
1753
            { LLM_TENSOR_SSM_D,          "blk.%d.ssm_d" },
1754
            { LLM_TENSOR_SSM_NORM,       "blk.%d.ssm_norm" },
1755
            { LLM_TENSOR_SSM_OUT,        "blk.%d.ssm_out" },
1756
            // attention layers
1757
            { LLM_TENSOR_ATTN_Q,         "blk.%d.attn_q" },
1758
            { LLM_TENSOR_ATTN_K,         "blk.%d.attn_k" },
1759
            { LLM_TENSOR_ATTN_V,         "blk.%d.attn_v" },
1760
            { LLM_TENSOR_ATTN_OUT,       "blk.%d.attn_output" },
1761
            // dense FFN
1762
            { LLM_TENSOR_FFN_DOWN,       "blk.%d.ffn_down" },
1763
            { LLM_TENSOR_FFN_UP,         "blk.%d.ffn_up" },
1764
        },
1765
    },
1766
    {
1767
        LLM_ARCH_EXAONE,
1768
        {
1769
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1770
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1771
            { LLM_TENSOR_OUTPUT,          "output" },
1772
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1773
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1774
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1775
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1776
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1777
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1778
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
1779
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1780
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1781
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1782
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1783
        },
1784
    },
1785
    {
1786
        LLM_ARCH_EXAONE4,
1787
        {
1788
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1789
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1790
            { LLM_TENSOR_OUTPUT,          "output" },
1791
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
1792
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1793
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
1794
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1795
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
1796
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1797
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1798
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
1799
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1800
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1801
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1802
            { LLM_TENSOR_FFN_POST_NORM,   "blk.%d.post_ffw_norm" },
1803
        }
1804
    },
1805
    {
1806
        LLM_ARCH_RWKV6,
1807
        {
1808
            { LLM_TENSOR_TOKEN_EMBD,                "token_embd" },
1809
            { LLM_TENSOR_TOKEN_EMBD_NORM,           "token_embd_norm" },
1810
            { LLM_TENSOR_OUTPUT_NORM,               "output_norm" },
1811
            { LLM_TENSOR_OUTPUT,                    "output" },
1812
            { LLM_TENSOR_ATTN_NORM,                 "blk.%d.attn_norm" },
1813
            { LLM_TENSOR_ATTN_NORM_2,               "blk.%d.attn_norm_2" },
1814
            { LLM_TENSOR_TIME_MIX_W1,               "blk.%d.time_mix_w1" },
1815
            { LLM_TENSOR_TIME_MIX_W2,               "blk.%d.time_mix_w2" },
1816
            { LLM_TENSOR_TIME_MIX_LERP_X,           "blk.%d.time_mix_lerp_x" },
1817
            { LLM_TENSOR_TIME_MIX_LERP_W,           "blk.%d.time_mix_lerp_w" },
1818
            { LLM_TENSOR_TIME_MIX_LERP_K,           "blk.%d.time_mix_lerp_k" },
1819
            { LLM_TENSOR_TIME_MIX_LERP_V,           "blk.%d.time_mix_lerp_v" },
1820
            { LLM_TENSOR_TIME_MIX_LERP_R,           "blk.%d.time_mix_lerp_r" },
1821
            { LLM_TENSOR_TIME_MIX_LERP_G,           "blk.%d.time_mix_lerp_g" },
1822
            { LLM_TENSOR_TIME_MIX_LERP_FUSED,       "blk.%d.time_mix_lerp_fused" },
1823
            { LLM_TENSOR_TIME_MIX_FIRST,            "blk.%d.time_mix_first" },
1824
            { LLM_TENSOR_TIME_MIX_DECAY,            "blk.%d.time_mix_decay" },
1825
            { LLM_TENSOR_TIME_MIX_DECAY_W1,         "blk.%d.time_mix_decay_w1" },
1826
            { LLM_TENSOR_TIME_MIX_DECAY_W2,         "blk.%d.time_mix_decay_w2" },
1827
            { LLM_TENSOR_TIME_MIX_KEY,              "blk.%d.time_mix_key" },
1828
            { LLM_TENSOR_TIME_MIX_VALUE,            "blk.%d.time_mix_value" },
1829
            { LLM_TENSOR_TIME_MIX_RECEPTANCE,       "blk.%d.time_mix_receptance" },
1830
            { LLM_TENSOR_TIME_MIX_GATE,             "blk.%d.time_mix_gate" },
1831
            { LLM_TENSOR_TIME_MIX_LN,               "blk.%d.time_mix_ln" },
1832
            { LLM_TENSOR_TIME_MIX_OUTPUT,           "blk.%d.time_mix_output" },
1833
            { LLM_TENSOR_CHANNEL_MIX_LERP_K,        "blk.%d.channel_mix_lerp_k" },
1834
            { LLM_TENSOR_CHANNEL_MIX_LERP_R,        "blk.%d.channel_mix_lerp_r" },
1835
            { LLM_TENSOR_CHANNEL_MIX_KEY,           "blk.%d.channel_mix_key" },
1836
            { LLM_TENSOR_CHANNEL_MIX_VALUE,         "blk.%d.channel_mix_value" },
1837
            { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,    "blk.%d.channel_mix_receptance" },
1838
        },
1839
    },
1840
    {
1841
        LLM_ARCH_RWKV6QWEN2,
1842
        {
1843
            { LLM_TENSOR_TOKEN_EMBD,                "token_embd" },
1844
            { LLM_TENSOR_OUTPUT_NORM,               "output_norm" },
1845
            { LLM_TENSOR_OUTPUT,                    "output" },
1846
            { LLM_TENSOR_ATTN_NORM,                 "blk.%d.attn_norm" },
1847
            { LLM_TENSOR_TIME_MIX_W1,               "blk.%d.time_mix_w1" },
1848
            { LLM_TENSOR_TIME_MIX_W2,               "blk.%d.time_mix_w2" },
1849
            { LLM_TENSOR_TIME_MIX_LERP_X,           "blk.%d.time_mix_lerp_x" },
1850
            { LLM_TENSOR_TIME_MIX_LERP_FUSED,       "blk.%d.time_mix_lerp_fused" },
1851
            { LLM_TENSOR_TIME_MIX_FIRST,            "blk.%d.time_mix_first" },
1852
            { LLM_TENSOR_TIME_MIX_DECAY,            "blk.%d.time_mix_decay" },
1853
            { LLM_TENSOR_TIME_MIX_DECAY_W1,         "blk.%d.time_mix_decay_w1" },
1854
            { LLM_TENSOR_TIME_MIX_DECAY_W2,         "blk.%d.time_mix_decay_w2" },
1855
            { LLM_TENSOR_TIME_MIX_KEY,              "blk.%d.time_mix_key" },
1856
            { LLM_TENSOR_TIME_MIX_VALUE,            "blk.%d.time_mix_value" },
1857
            { LLM_TENSOR_TIME_MIX_RECEPTANCE,       "blk.%d.time_mix_receptance" },
1858
            { LLM_TENSOR_TIME_MIX_GATE,             "blk.%d.time_mix_gate" },
1859
            { LLM_TENSOR_TIME_MIX_OUTPUT,           "blk.%d.time_mix_output" },
1860
            { LLM_TENSOR_FFN_NORM,                  "blk.%d.ffn_norm" },
1861
            { LLM_TENSOR_FFN_GATE,                  "blk.%d.ffn_gate" },
1862
            { LLM_TENSOR_FFN_DOWN,                  "blk.%d.ffn_down" },
1863
            { LLM_TENSOR_FFN_UP,                    "blk.%d.ffn_up" },
1864
        },
1865
    },
1866
    {
1867
        LLM_ARCH_RWKV7,
1868
        {
1869
            { LLM_TENSOR_TOKEN_EMBD,                "token_embd" },
1870
            { LLM_TENSOR_TOKEN_EMBD_NORM,           "token_embd_norm" },
1871
            { LLM_TENSOR_OUTPUT_NORM,               "output_norm" },
1872
            { LLM_TENSOR_OUTPUT,                    "output" },
1873
            { LLM_TENSOR_ATTN_NORM,                 "blk.%d.attn_norm" },
1874
            { LLM_TENSOR_ATTN_NORM_2,               "blk.%d.attn_norm_2" },
1875
            { LLM_TENSOR_TIME_MIX_W0,               "blk.%d.time_mix_w0" },
1876
            { LLM_TENSOR_TIME_MIX_W1,               "blk.%d.time_mix_w1" },
1877
            { LLM_TENSOR_TIME_MIX_W2,               "blk.%d.time_mix_w2" },
1878
            { LLM_TENSOR_TIME_MIX_A0,               "blk.%d.time_mix_a0" },
1879
            { LLM_TENSOR_TIME_MIX_A1,               "blk.%d.time_mix_a1" },
1880
            { LLM_TENSOR_TIME_MIX_A2,               "blk.%d.time_mix_a2" },
1881
            { LLM_TENSOR_TIME_MIX_V0,               "blk.%d.time_mix_v0" },
1882
            { LLM_TENSOR_TIME_MIX_V1,               "blk.%d.time_mix_v1" },
1883
            { LLM_TENSOR_TIME_MIX_V2,               "blk.%d.time_mix_v2" },
1884
            { LLM_TENSOR_TIME_MIX_G1,               "blk.%d.time_mix_g1" },
1885
            { LLM_TENSOR_TIME_MIX_G2,               "blk.%d.time_mix_g2" },
1886
            { LLM_TENSOR_TIME_MIX_K_K,              "blk.%d.time_mix_k_k" },
1887
            { LLM_TENSOR_TIME_MIX_K_A,              "blk.%d.time_mix_k_a" },
1888
            { LLM_TENSOR_TIME_MIX_R_K,              "blk.%d.time_mix_r_k" },
1889
            { LLM_TENSOR_TIME_MIX_LERP_FUSED,       "blk.%d.time_mix_lerp_fused" },
1890
            { LLM_TENSOR_TIME_MIX_KEY,              "blk.%d.time_mix_key" },
1891
            { LLM_TENSOR_TIME_MIX_VALUE,            "blk.%d.time_mix_value" },
1892
            { LLM_TENSOR_TIME_MIX_RECEPTANCE,       "blk.%d.time_mix_receptance" },
1893
            { LLM_TENSOR_TIME_MIX_LN,               "blk.%d.time_mix_ln" },
1894
            { LLM_TENSOR_TIME_MIX_OUTPUT,           "blk.%d.time_mix_output" },
1895
            { LLM_TENSOR_CHANNEL_MIX_LERP_K,        "blk.%d.channel_mix_lerp_k" },
1896
            { LLM_TENSOR_CHANNEL_MIX_KEY,           "blk.%d.channel_mix_key" },
1897
            { LLM_TENSOR_CHANNEL_MIX_VALUE,         "blk.%d.channel_mix_value" },
1898
        },
1899
    },
1900
    {
1901
        LLM_ARCH_ARWKV7,
1902
        {
1903
            { LLM_TENSOR_TOKEN_EMBD,                "token_embd" },
1904
            { LLM_TENSOR_TOKEN_EMBD_NORM,           "token_embd_norm" },
1905
            { LLM_TENSOR_OUTPUT_NORM,               "output_norm" },
1906
            { LLM_TENSOR_OUTPUT,                    "output" },
1907
            { LLM_TENSOR_ATTN_NORM,                 "blk.%d.attn_norm" },
1908
            { LLM_TENSOR_TIME_MIX_W0,               "blk.%d.time_mix_w0" },
1909
            { LLM_TENSOR_TIME_MIX_W1,               "blk.%d.time_mix_w1" },
1910
            { LLM_TENSOR_TIME_MIX_W2,               "blk.%d.time_mix_w2" },
1911
            { LLM_TENSOR_TIME_MIX_A0,               "blk.%d.time_mix_a0" },
1912
            { LLM_TENSOR_TIME_MIX_A1,               "blk.%d.time_mix_a1" },
1913
            { LLM_TENSOR_TIME_MIX_A2,               "blk.%d.time_mix_a2" },
1914
            { LLM_TENSOR_TIME_MIX_V0,               "blk.%d.time_mix_v0" },
1915
            { LLM_TENSOR_TIME_MIX_V1,               "blk.%d.time_mix_v1" },
1916
            { LLM_TENSOR_TIME_MIX_V2,               "blk.%d.time_mix_v2" },
1917
            { LLM_TENSOR_TIME_MIX_G1,               "blk.%d.time_mix_g1" },
1918
            { LLM_TENSOR_TIME_MIX_G2,               "blk.%d.time_mix_g2" },
1919
            { LLM_TENSOR_TIME_MIX_K_K,              "blk.%d.time_mix_k_k" },
1920
            { LLM_TENSOR_TIME_MIX_K_A,              "blk.%d.time_mix_k_a" },
1921
            { LLM_TENSOR_TIME_MIX_R_K,              "blk.%d.time_mix_r_k" },
1922
            { LLM_TENSOR_TIME_MIX_LERP_FUSED,       "blk.%d.time_mix_lerp_fused" },
1923
            { LLM_TENSOR_TIME_MIX_KEY,              "blk.%d.time_mix_key" },
1924
            { LLM_TENSOR_TIME_MIX_VALUE,            "blk.%d.time_mix_value" },
1925
            { LLM_TENSOR_TIME_MIX_RECEPTANCE,       "blk.%d.time_mix_receptance" },
1926
            { LLM_TENSOR_TIME_MIX_LN,               "blk.%d.time_mix_ln" },
1927
            { LLM_TENSOR_TIME_MIX_OUTPUT,           "blk.%d.time_mix_output" },
1928
            { LLM_TENSOR_FFN_NORM,                  "blk.%d.ffn_norm" },
1929
            { LLM_TENSOR_FFN_GATE,                  "blk.%d.ffn_gate" },
1930
            { LLM_TENSOR_FFN_DOWN,                  "blk.%d.ffn_down" },
1931
            { LLM_TENSOR_FFN_UP,                    "blk.%d.ffn_up" },
1932
        },
1933
    },
1934
    {
1935
        LLM_ARCH_GRANITE,
1936
        {
1937
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1938
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1939
            { LLM_TENSOR_OUTPUT,          "output" },
1940
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1941
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1942
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1943
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1944
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1945
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1946
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
1947
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
1948
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
1949
        },
1950
    },
1951
    {
1952
        LLM_ARCH_GRANITE_MOE,
1953
        {
1954
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
1955
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
1956
            { LLM_TENSOR_OUTPUT,          "output" },
1957
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
1958
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
1959
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
1960
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
1961
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
1962
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
1963
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
1964
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
1965
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
1966
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
1967
            { LLM_TENSOR_FFN_GATE_SHEXP,  "blk.%d.ffn_gate_shexp" },
1968
            { LLM_TENSOR_FFN_DOWN_SHEXP,  "blk.%d.ffn_down_shexp" },
1969
            { LLM_TENSOR_FFN_UP_SHEXP,    "blk.%d.ffn_up_shexp" },
1970
        },
1971
    },
1972
    {
1973
        LLM_ARCH_GRANITE_HYBRID,
1974
        {
1975
            { LLM_TENSOR_TOKEN_EMBD,     "token_embd" },
1976
            { LLM_TENSOR_OUTPUT_NORM,    "output_norm" },
1977
            { LLM_TENSOR_OUTPUT,         "output" },
1978
            { LLM_TENSOR_ATTN_NORM,      "blk.%d.attn_norm" },
1979
            // mamba(2) ssm layers
1980
            { LLM_TENSOR_SSM_IN,         "blk.%d.ssm_in" },
1981
            { LLM_TENSOR_SSM_CONV1D,     "blk.%d.ssm_conv1d" },
1982
            { LLM_TENSOR_SSM_DT,         "blk.%d.ssm_dt" },
1983
            { LLM_TENSOR_SSM_A,          "blk.%d.ssm_a" },
1984
            { LLM_TENSOR_SSM_D,          "blk.%d.ssm_d" },
1985
            { LLM_TENSOR_SSM_NORM,       "blk.%d.ssm_norm" },
1986
            { LLM_TENSOR_SSM_OUT,        "blk.%d.ssm_out" },
1987
            // attention layers
1988
            { LLM_TENSOR_ATTN_Q,         "blk.%d.attn_q" },
1989
            { LLM_TENSOR_ATTN_K,         "blk.%d.attn_k" },
1990
            { LLM_TENSOR_ATTN_V,         "blk.%d.attn_v" },
1991
            { LLM_TENSOR_ATTN_OUT,       "blk.%d.attn_output" },
1992
            // dense FFN
1993
            { LLM_TENSOR_FFN_NORM,       "blk.%d.ffn_norm" },
1994
            { LLM_TENSOR_FFN_GATE,       "blk.%d.ffn_gate" },
1995
            { LLM_TENSOR_FFN_DOWN,       "blk.%d.ffn_down" },
1996
            { LLM_TENSOR_FFN_UP,         "blk.%d.ffn_up" },
1997
            // moe FFN
1998
            { LLM_TENSOR_FFN_NORM,       "blk.%d.ffn_norm" },
1999
            { LLM_TENSOR_FFN_GATE_INP,   "blk.%d.ffn_gate_inp" },
2000
            { LLM_TENSOR_FFN_GATE_EXPS,  "blk.%d.ffn_gate_exps" },
2001
            { LLM_TENSOR_FFN_DOWN_EXPS,  "blk.%d.ffn_down_exps" },
2002
            { LLM_TENSOR_FFN_UP_EXPS,    "blk.%d.ffn_up_exps" },
2003
            // shared expert
2004
            { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
2005
            { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
2006
            { LLM_TENSOR_FFN_UP_SHEXP,   "blk.%d.ffn_up_shexp" },
2007
        },
2008
    },
2009
    {
2010
        LLM_ARCH_CHAMELEON,
2011
        {
2012
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2013
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2014
            { LLM_TENSOR_OUTPUT,          "output" },
2015
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2016
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2017
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2018
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2019
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2020
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2021
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2022
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2023
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2024
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
2025
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
2026
        },
2027
    },
2028
    {
2029
        LLM_ARCH_WAVTOKENIZER_DEC,
2030
        {
2031
            { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
2032
            { LLM_TENSOR_TOKEN_EMBD_NORM,   "token_embd_norm" },
2033
            { LLM_TENSOR_CONV1D,            "conv1d" },
2034
            { LLM_TENSOR_CONVNEXT_DW,       "convnext.%d.dw" },
2035
            { LLM_TENSOR_CONVNEXT_NORM,     "convnext.%d.norm" },
2036
            { LLM_TENSOR_CONVNEXT_PW1,      "convnext.%d.pw1" },
2037
            { LLM_TENSOR_CONVNEXT_PW2,      "convnext.%d.pw2" },
2038
            { LLM_TENSOR_CONVNEXT_GAMMA,    "convnext.%d.gamma" },
2039
            { LLM_TENSOR_OUTPUT_NORM,       "output_norm" },
2040
            { LLM_TENSOR_OUTPUT,            "output" },
2041
            { LLM_TENSOR_POS_NET_CONV1,     "posnet.%d.conv1" },
2042
            { LLM_TENSOR_POS_NET_CONV2,     "posnet.%d.conv2" },
2043
            { LLM_TENSOR_POS_NET_NORM,      "posnet.%d.norm" },
2044
            { LLM_TENSOR_POS_NET_NORM1,     "posnet.%d.norm1" },
2045
            { LLM_TENSOR_POS_NET_NORM2,     "posnet.%d.norm2" },
2046
            { LLM_TENSOR_POS_NET_ATTN_NORM, "posnet.%d.attn_norm" },
2047
            { LLM_TENSOR_POS_NET_ATTN_Q,    "posnet.%d.attn_q" },
2048
            { LLM_TENSOR_POS_NET_ATTN_K,    "posnet.%d.attn_k" },
2049
            { LLM_TENSOR_POS_NET_ATTN_V,    "posnet.%d.attn_v" },
2050
            { LLM_TENSOR_POS_NET_ATTN_OUT,  "posnet.%d.attn_output" },
2051
        },
2052
    },
2053
    {
2054
        LLM_ARCH_BAILINGMOE,
2055
        {
2056
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2057
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2058
            { LLM_TENSOR_OUTPUT,             "output" },
2059
            { LLM_TENSOR_ROPE_FREQS,         "rope_freqs" },
2060
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2061
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2062
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2063
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2064
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2065
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2066
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2067
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2068
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2069
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2070
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
2071
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
2072
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
2073
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
2074
        },
2075
    },
2076
    {
2077
        LLM_ARCH_BAILINGMOE2,
2078
        {
2079
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2080
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2081
            { LLM_TENSOR_OUTPUT,             "output" },
2082
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2083
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2084
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2085
            { LLM_TENSOR_ATTN_QKV,           "blk.%d.attn_qkv" },
2086
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2087
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2088
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
2089
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2090
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
2091
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
2092
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
2093
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2094
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2095
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2096
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
2097
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
2098
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
2099
            { LLM_TENSOR_NEXTN_EH_PROJ,      "blk.%d.nextn.eh_proj" },
2100
            { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
2101
            { LLM_TENSOR_NEXTN_ENORM,        "blk.%d.nextn.enorm" },
2102
            { LLM_TENSOR_NEXTN_HNORM,        "blk.%d.nextn.hnorm" },
2103
            { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
2104
            { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
2105
            { LLM_TENSOR_LAYER_OUT_NORM,     "blk.%d.layer_output_norm" },
2106
        },
2107
    },
2108
    {
2109
        LLM_ARCH_DOTS1,
2110
        {
2111
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2112
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2113
            { LLM_TENSOR_OUTPUT,             "output" },
2114
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2115
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2116
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2117
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2118
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2119
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2120
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2121
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2122
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
2123
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
2124
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
2125
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2126
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2127
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2128
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2129
            { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
2130
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
2131
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
2132
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
2133
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
2134
        }
2135
    },
2136
    {
2137
        LLM_ARCH_ERNIE4_5,
2138
        {
2139
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2140
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2141
            { LLM_TENSOR_OUTPUT,             "output" },
2142
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2143
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2144
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2145
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2146
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2147
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2148
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
2149
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
2150
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
2151
        },
2152
    },
2153
    {
2154
        LLM_ARCH_ERNIE4_5_MOE,
2155
        {
2156
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2157
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2158
            { LLM_TENSOR_OUTPUT,             "output" },
2159
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2160
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2161
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2162
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2163
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2164
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2165
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
2166
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
2167
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
2168
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2169
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
2170
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
2171
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
2172
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2173
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2174
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2175
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
2176
        },
2177
    },
2178
    {
2179
        LLM_ARCH_HUNYUAN_MOE,
2180
        {
2181
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2182
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2183
            { LLM_TENSOR_OUTPUT,          "output" },
2184
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2185
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2186
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
2187
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2188
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
2189
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2190
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2191
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
2192
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2193
            { LLM_TENSOR_FFN_GATE_SHEXP,  "blk.%d.ffn_gate_shexp" },
2194
            { LLM_TENSOR_FFN_DOWN_SHEXP,  "blk.%d.ffn_down_shexp" },
2195
            { LLM_TENSOR_FFN_UP_SHEXP,    "blk.%d.ffn_up_shexp" },
2196
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
2197
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
2198
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
2199
        },
2200
    },
2201
    {
2202
        LLM_ARCH_HUNYUAN_DENSE,
2203
        {
2204
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2205
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2206
            { LLM_TENSOR_OUTPUT,          "output" },
2207
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2208
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2209
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
2210
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2211
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
2212
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2213
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2214
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2215
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2216
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2217
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2218
2219
        },
2220
    },
2221
    {
2222
        LLM_ARCH_SMOLLM3,
2223
        {
2224
            { LLM_TENSOR_TOKEN_EMBD,     "token_embd" },
2225
            { LLM_TENSOR_OUTPUT_NORM,    "output_norm" },
2226
            { LLM_TENSOR_OUTPUT,         "output" },
2227
            { LLM_TENSOR_ATTN_NORM,      "blk.%d.attn_norm" },
2228
            { LLM_TENSOR_ATTN_Q,         "blk.%d.attn_q" },
2229
            { LLM_TENSOR_ATTN_K,         "blk.%d.attn_k" },
2230
            { LLM_TENSOR_ATTN_V,         "blk.%d.attn_v" },
2231
            { LLM_TENSOR_ATTN_OUT,       "blk.%d.attn_output" },
2232
            { LLM_TENSOR_FFN_NORM,       "blk.%d.ffn_norm" },
2233
            { LLM_TENSOR_FFN_GATE,       "blk.%d.ffn_gate" },
2234
            { LLM_TENSOR_FFN_DOWN,       "blk.%d.ffn_down" },
2235
            { LLM_TENSOR_FFN_UP,         "blk.%d.ffn_up" },
2236
        },
2237
    },
2238
    {
2239
        LLM_ARCH_OPENAI_MOE,
2240
        {
2241
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2242
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2243
            { LLM_TENSOR_OUTPUT,             "output" },
2244
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2245
            { LLM_TENSOR_ATTN_POST_NORM,     "blk.%d.post_attention_norm" },
2246
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2247
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2248
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2249
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2250
            { LLM_TENSOR_ATTN_SINKS,         "blk.%d.attn_sinks" },
2251
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2252
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2253
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2254
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2255
        },
2256
    },
2257
    {
2258
        LLM_ARCH_LFM2,
2259
        {
2260
            { LLM_TENSOR_ATTN_NORM,         "blk.%d.attn_norm" },
2261
            { LLM_TENSOR_ATTN_Q,            "blk.%d.attn_q" },
2262
            { LLM_TENSOR_ATTN_K,            "blk.%d.attn_k" },
2263
            { LLM_TENSOR_ATTN_V,            "blk.%d.attn_v" },
2264
            { LLM_TENSOR_ATTN_OUT,          "blk.%d.attn_output" },
2265
            { LLM_TENSOR_ATTN_K_NORM,       "blk.%d.attn_k_norm" },
2266
            { LLM_TENSOR_ATTN_Q_NORM,       "blk.%d.attn_q_norm" },
2267
            { LLM_TENSOR_FFN_DOWN,          "blk.%d.ffn_down" },
2268
            { LLM_TENSOR_FFN_GATE,          "blk.%d.ffn_gate" },
2269
            { LLM_TENSOR_FFN_NORM,          "blk.%d.ffn_norm" },
2270
            { LLM_TENSOR_FFN_UP,            "blk.%d.ffn_up" },
2271
            { LLM_TENSOR_SHORTCONV_CONV,    "blk.%d.shortconv.conv" },
2272
            { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
2273
            { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
2274
            { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
2275
            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
2276
            { LLM_TENSOR_OUTPUT,            "output" },
2277
        }
2278
    },
2279
    {
2280
        LLM_ARCH_LFM2MOE,
2281
        {
2282
            { LLM_TENSOR_ATTN_NORM,         "blk.%d.attn_norm" },
2283
            { LLM_TENSOR_ATTN_Q,            "blk.%d.attn_q" },
2284
            { LLM_TENSOR_ATTN_K,            "blk.%d.attn_k" },
2285
            { LLM_TENSOR_ATTN_V,            "blk.%d.attn_v" },
2286
            { LLM_TENSOR_ATTN_OUT,          "blk.%d.attn_output" },
2287
            { LLM_TENSOR_ATTN_K_NORM,       "blk.%d.attn_k_norm" },
2288
            { LLM_TENSOR_ATTN_Q_NORM,       "blk.%d.attn_q_norm" },
2289
            { LLM_TENSOR_FFN_DOWN,          "blk.%d.ffn_down" },
2290
            { LLM_TENSOR_FFN_GATE,          "blk.%d.ffn_gate" },
2291
            { LLM_TENSOR_FFN_NORM,          "blk.%d.ffn_norm" },
2292
            { LLM_TENSOR_FFN_UP,            "blk.%d.ffn_up" },
2293
            { LLM_TENSOR_SHORTCONV_CONV,    "blk.%d.shortconv.conv" },
2294
            { LLM_TENSOR_SHORTCONV_INPROJ,  "blk.%d.shortconv.in_proj" },
2295
            { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
2296
            { LLM_TENSOR_TOKEN_EMBD,        "token_embd" },
2297
            { LLM_TENSOR_OUTPUT_NORM,       "token_embd_norm" }, // note: wrong tensor name
2298
            { LLM_TENSOR_FFN_GATE_INP,      "blk.%d.ffn_gate_inp" },
2299
            { LLM_TENSOR_FFN_GATE_EXPS,     "blk.%d.ffn_gate_exps" },
2300
            { LLM_TENSOR_FFN_DOWN_EXPS,     "blk.%d.ffn_down_exps" },
2301
            { LLM_TENSOR_FFN_UP_EXPS,       "blk.%d.ffn_up_exps" },
2302
            { LLM_TENSOR_FFN_EXP_PROBS_B,   "blk.%d.exp_probs_b" },
2303
        }
2304
    },
2305
    {
2306
        LLM_ARCH_SMALLTHINKER,
2307
        {
2308
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2309
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2310
            { LLM_TENSOR_OUTPUT,             "output" },
2311
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2312
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2313
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2314
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2315
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2316
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2317
            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
2318
            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
2319
            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
2320
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2321
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2322
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2323
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" }
2324
        },
2325
    },
2326
    {
2327
        LLM_ARCH_APERTUS,
2328
        {
2329
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2330
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2331
            { LLM_TENSOR_OUTPUT,          "output" },
2332
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
2333
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2334
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2335
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2336
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2337
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2338
            { LLM_TENSOR_ATTN_Q_NORM,     "blk.%d.attn_q_norm" },
2339
            { LLM_TENSOR_ATTN_K_NORM,     "blk.%d.attn_k_norm" },
2340
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2341
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2342
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2343
        },
2344
    },
2345
    {
2346
        LLM_ARCH_DREAM,
2347
        {
2348
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2349
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2350
            { LLM_TENSOR_OUTPUT,          "output" },
2351
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2352
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2353
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2354
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2355
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2356
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2357
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2358
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2359
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2360
        },
2361
    },
2362
    {
2363
        LLM_ARCH_LLADA,
2364
        {
2365
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2366
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2367
            { LLM_TENSOR_OUTPUT,          "output" },
2368
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2369
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2370
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2371
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2372
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2373
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2374
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2375
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2376
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2377
        },
2378
    },
2379
    {
2380
        LLM_ARCH_LLADA_MOE,
2381
        {
2382
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2383
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2384
            { LLM_TENSOR_OUTPUT,             "output" },
2385
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2386
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2387
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2388
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2389
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2390
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2391
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2392
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2393
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2394
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2395
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2396
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2397
        },
2398
    },
2399
    {
2400
        LLM_ARCH_SEED_OSS,
2401
        {
2402
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2403
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2404
            { LLM_TENSOR_OUTPUT,          "output" },
2405
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2406
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2407
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2408
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2409
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2410
            { LLM_TENSOR_ATTN_POST_NORM,  "blk.%d.post_attention_norm" },
2411
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2412
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2413
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2414
        },
2415
    },
2416
    {
2417
        LLM_ARCH_GROVEMOE,
2418
        {
2419
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2420
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2421
            { LLM_TENSOR_OUTPUT,             "output" },
2422
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2423
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2424
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2425
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2426
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2427
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2428
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2429
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2430
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2431
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2432
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2433
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2434
            { LLM_TENSOR_FFN_GATE_CHEXPS,    "blk.%d.ffn_gate_chexps" },
2435
            { LLM_TENSOR_FFN_DOWN_CHEXPS,    "blk.%d.ffn_down_chexps" },
2436
            { LLM_TENSOR_FFN_UP_CHEXPS,      "blk.%d.ffn_up_chexps" },
2437
        },
2438
    },
2439
    {
2440
        LLM_ARCH_MINIMAX_M2,
2441
        {
2442
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2443
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2444
            { LLM_TENSOR_OUTPUT,             "output" },
2445
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2446
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2447
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2448
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2449
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2450
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2451
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2452
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2453
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2454
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2455
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2456
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2457
            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
2458
        },
2459
    },
2460
    {
2461
        LLM_ARCH_PANGU_EMBED,
2462
        {
2463
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2464
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2465
            { LLM_TENSOR_OUTPUT,          "output" },
2466
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2467
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2468
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2469
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2470
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2471
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2472
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2473
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2474
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2475
        },
2476
    },
2477
    {
2478
        LLM_ARCH_COGVLM,
2479
        {
2480
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2481
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2482
            { LLM_TENSOR_OUTPUT,          "output" },
2483
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2484
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
2485
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2486
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2487
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2488
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2489
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2490
            { LLM_TENSOR_VISEXP_ATTN_QKV, "blk.%d.vis_attn_qkv" },
2491
            { LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
2492
            { LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
2493
            { LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
2494
            { LLM_TENSOR_VISEXP_FFN_UP,   "blk.%d.vis_up" },
2495
        },
2496
    },
2497
    {
2498
        LLM_ARCH_RND1,
2499
        {
2500
            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
2501
            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
2502
            { LLM_TENSOR_OUTPUT,             "output" },
2503
            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
2504
            { LLM_TENSOR_ATTN_Q,             "blk.%d.attn_q" },
2505
            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
2506
            { LLM_TENSOR_ATTN_K,             "blk.%d.attn_k" },
2507
            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
2508
            { LLM_TENSOR_ATTN_V,             "blk.%d.attn_v" },
2509
            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
2510
            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
2511
            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
2512
            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
2513
            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
2514
            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
2515
        },
2516
    },
2517
    {
2518
        LLM_ARCH_MISTRAL3,
2519
        {
2520
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2521
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
2522
            { LLM_TENSOR_OUTPUT,          "output" },
2523
            { LLM_TENSOR_ROPE_FREQS,      "rope_freqs" },
2524
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
2525
            { LLM_TENSOR_ATTN_Q,          "blk.%d.attn_q" },
2526
            { LLM_TENSOR_ATTN_K,          "blk.%d.attn_k" },
2527
            { LLM_TENSOR_ATTN_V,          "blk.%d.attn_v" },
2528
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
2529
            { LLM_TENSOR_ATTN_ROT_EMBD,   "blk.%d.attn_rot_embd" },
2530
            { LLM_TENSOR_FFN_GATE_INP,    "blk.%d.ffn_gate_inp" },
2531
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
2532
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
2533
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
2534
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
2535
            { LLM_TENSOR_FFN_GATE_EXP,    "blk.%d.ffn_gate.%d" },
2536
            { LLM_TENSOR_FFN_DOWN_EXP,    "blk.%d.ffn_down.%d" },
2537
            { LLM_TENSOR_FFN_UP_EXP,      "blk.%d.ffn_up.%d" },
2538
            { LLM_TENSOR_FFN_GATE_EXPS,   "blk.%d.ffn_gate_exps" },
2539
            { LLM_TENSOR_FFN_DOWN_EXPS,   "blk.%d.ffn_down_exps" },
2540
            { LLM_TENSOR_FFN_UP_EXPS,     "blk.%d.ffn_up_exps" },
2541
        },
2542
    },
2543
    {
2544
        LLM_ARCH_UNKNOWN,
2545
        {
2546
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
2547
        },
2548
    },
2549
};
2550
2551
// declare information about the model weight tensors:
2552
// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
2553
// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
2554
//
2555
// for example, input layers are usually assigned to CPU/host buffer types
2556
//
2557
// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
2558
//   assignment of the buffer types and extra overhead during computation
2559
// example: https://github.com/ggml-org/llama.cpp/pull/17548
2560
//
2561
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
2562
    {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2563
    {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2564
    {LLM_TENSOR_TOKEN_TYPES,                {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2565
    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
2566
    {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2567
    {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2568
    {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2569
    {LLM_TENSOR_DENSE_2_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2570
    {LLM_TENSOR_DENSE_3_OUT,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, // Dense layer output
2571
    {LLM_TENSOR_OUTPUT_NORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2572
    {LLM_TENSOR_DEC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2573
    {LLM_TENSOR_ENC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2574
    {LLM_TENSOR_ROPE_FREQS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2575
    {LLM_TENSOR_ROPE_FACTORS_LONG,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2576
    {LLM_TENSOR_ROPE_FACTORS_SHORT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
2577
    {LLM_TENSOR_ATTN_Q,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2578
    {LLM_TENSOR_ATTN_K,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2579
    {LLM_TENSOR_ATTN_V,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2580
    {LLM_TENSOR_ATTN_QKV,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2581
    {LLM_TENSOR_ATTN_OUT,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2582
    {LLM_TENSOR_ATTN_GATE,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2583
    {LLM_TENSOR_FFN_GATE,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2584
    {LLM_TENSOR_FFN_DOWN,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2585
    {LLM_TENSOR_FFN_UP,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2586
    {LLM_TENSOR_FFN_DOWN_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2587
    {LLM_TENSOR_FFN_GATE_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2588
    {LLM_TENSOR_FFN_UP_SHEXP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2589
    {LLM_TENSOR_ATTN_Q_A,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2590
    {LLM_TENSOR_ATTN_Q_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2591
    {LLM_TENSOR_ATTN_KV_A_MQA,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2592
    {LLM_TENSOR_ATTN_KV_B,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2593
    {LLM_TENSOR_ATTN_K_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2594
    {LLM_TENSOR_ATTN_V_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2595
    {LLM_TENSOR_ATTN_SINKS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
2596
    {LLM_TENSOR_DEC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2597
    {LLM_TENSOR_DEC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2598
    {LLM_TENSOR_DEC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2599
    {LLM_TENSOR_DEC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2600
    {LLM_TENSOR_DEC_CROSS_ATTN_Q,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2601
    {LLM_TENSOR_DEC_CROSS_ATTN_K,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2602
    {LLM_TENSOR_DEC_CROSS_ATTN_V,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2603
    {LLM_TENSOR_DEC_CROSS_ATTN_OUT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2604
    {LLM_TENSOR_DEC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2605
    {LLM_TENSOR_DEC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2606
    {LLM_TENSOR_DEC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2607
    {LLM_TENSOR_ENC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2608
    {LLM_TENSOR_ENC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2609
    {LLM_TENSOR_ENC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2610
    {LLM_TENSOR_ENC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2611
    {LLM_TENSOR_ENC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2612
    {LLM_TENSOR_ENC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2613
    {LLM_TENSOR_ENC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2614
    {LLM_TENSOR_FFN_GATE_INP_SHEXP,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2615
    {LLM_TENSOR_FFN_GATE_INP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2616
    {LLM_TENSOR_SSM_IN,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2617
    {LLM_TENSOR_SSM_X,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2618
    {LLM_TENSOR_SSM_DT,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2619
    {LLM_TENSOR_SSM_OUT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2620
    {LLM_TENSOR_SSM_BETA_ALPHA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2621
    {LLM_TENSOR_TIME_MIX_W1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2622
    {LLM_TENSOR_TIME_MIX_W2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2623
    {LLM_TENSOR_TIME_MIX_A1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2624
    {LLM_TENSOR_TIME_MIX_A2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2625
    {LLM_TENSOR_TIME_MIX_V1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2626
    {LLM_TENSOR_TIME_MIX_V2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2627
    {LLM_TENSOR_TIME_MIX_G1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2628
    {LLM_TENSOR_TIME_MIX_G2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2629
    {LLM_TENSOR_TIME_MIX_DECAY_W1,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2630
    {LLM_TENSOR_TIME_MIX_DECAY_W2,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2631
    {LLM_TENSOR_TIME_MIX_KEY,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2632
    {LLM_TENSOR_TIME_MIX_VALUE,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2633
    {LLM_TENSOR_TIME_MIX_RECEPTANCE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2634
    {LLM_TENSOR_TIME_MIX_GATE,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2635
    {LLM_TENSOR_TIME_MIX_OUTPUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2636
    {LLM_TENSOR_CHANNEL_MIX_KEY,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2637
    {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2638
    {LLM_TENSOR_CHANNEL_MIX_VALUE,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2639
    {LLM_TENSOR_FFN_ACT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
2640
    {LLM_TENSOR_SSM_CONV1D,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2641
    {LLM_TENSOR_SSM_A,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
2642
    {LLM_TENSOR_SSM_A_NOSCAN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
2643
    {LLM_TENSOR_SSM_DT_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2644
    {LLM_TENSOR_SSM_B_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2645
    {LLM_TENSOR_SSM_C_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2646
    {LLM_TENSOR_SSM_D,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2647
    {LLM_TENSOR_SSM_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2648
    {LLM_TENSOR_TIME_MIX_LERP_X,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2649
    {LLM_TENSOR_TIME_MIX_LN,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2650
    {LLM_TENSOR_CHANNEL_MIX_LERP_K,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2651
    {LLM_TENSOR_CHANNEL_MIX_LERP_R,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2652
    {LLM_TENSOR_TIME_MIX_K_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2653
    {LLM_TENSOR_TIME_MIX_K_A,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2654
    {LLM_TENSOR_TIME_MIX_R_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2655
    {LLM_TENSOR_TIME_MIX_LERP_W,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2656
    {LLM_TENSOR_TIME_MIX_LERP_K,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2657
    {LLM_TENSOR_TIME_MIX_LERP_V,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2658
    {LLM_TENSOR_TIME_MIX_LERP_R,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2659
    {LLM_TENSOR_TIME_MIX_LERP_G,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2660
    {LLM_TENSOR_TIME_MIX_LERP_FUSED,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2661
    {LLM_TENSOR_TIME_MIX_DECAY,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2662
    {LLM_TENSOR_TIME_MIX_W0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2663
    {LLM_TENSOR_TIME_MIX_A0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2664
    {LLM_TENSOR_TIME_MIX_V0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2665
    {LLM_TENSOR_TIME_MIX_FIRST,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
2666
    {LLM_TENSOR_ATTN_NORM,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2667
    {LLM_TENSOR_ATTN_NORM_2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2668
    {LLM_TENSOR_ATTN_OUT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2669
    {LLM_TENSOR_ATTN_POST_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2670
    {LLM_TENSOR_FFN_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2671
    {LLM_TENSOR_FFN_POST_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2672
    {LLM_TENSOR_FFN_NORM_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2673
    {LLM_TENSOR_ATTN_Q_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2674
    {LLM_TENSOR_ATTN_K_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2675
    {LLM_TENSOR_LAYER_OUT_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2676
    {LLM_TENSOR_ATTN_Q_A_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2677
    {LLM_TENSOR_ATTN_KV_A_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2678
    {LLM_TENSOR_ATTN_SUB_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2679
    {LLM_TENSOR_FFN_SUB_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2680
    {LLM_TENSOR_DEC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2681
    {LLM_TENSOR_DEC_CROSS_ATTN_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2682
    {LLM_TENSOR_DEC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2683
    {LLM_TENSOR_ENC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2684
    {LLM_TENSOR_ENC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2685
    {LLM_TENSOR_DEC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2686
    {LLM_TENSOR_ENC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
2687
    {LLM_TENSOR_FFN_DOWN_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2688
    {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2689
    {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2690
    {LLM_TENSOR_FFN_DOWN_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2691
    {LLM_TENSOR_FFN_GATE_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2692
    {LLM_TENSOR_FFN_UP_CHEXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2693
    {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
2694
    // altup / laurel (gemma 3n)
2695
    {LLM_TENSOR_PER_LAYER_TOKEN_EMBD,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_GET_ROWS}},
2696
    {LLM_TENSOR_PER_LAYER_MODEL_PROJ,       {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2697
    {LLM_TENSOR_PER_LAYER_PROJ_NORM,        {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
2698
    {LLM_TENSOR_ALTUP_PROJ,                 {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2699
    {LLM_TENSOR_ALTUP_UNEMBD_PROJ,          {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
2700
    {LLM_TENSOR_PER_LAYER_INP_GATE,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2701
    {LLM_TENSOR_PER_LAYER_PROJ,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2702
    {LLM_TENSOR_PER_LAYER_POST_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2703
    {LLM_TENSOR_ALTUP_CORRECT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2704
    {LLM_TENSOR_ALTUP_CORRECT_SCALE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2705
    {LLM_TENSOR_ALTUP_PREDICT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2706
    {LLM_TENSOR_ALTUP_ROUTER,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2707
    {LLM_TENSOR_ALTUP_ROUTER_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2708
    {LLM_TENSOR_LAUREL_L,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2709
    {LLM_TENSOR_LAUREL_R,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2710
    {LLM_TENSOR_LAUREL_POST_NORM,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2711
    // this tensor is loaded for T5, but never used
2712
    {LLM_TENSOR_DEC_CROSS_ATTN_REL_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
2713
    {LLM_TENSOR_CONV1D,                     {LLM_TENSOR_LAYER_INPUT,     GGML_OP_IM2COL}},
2714
    {LLM_TENSOR_POS_NET_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2715
    {LLM_TENSOR_POS_NET_NORM1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2716
    {LLM_TENSOR_POS_NET_NORM2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2717
    {LLM_TENSOR_POS_NET_CONV1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2718
    {LLM_TENSOR_POS_NET_CONV2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2719
    {LLM_TENSOR_POS_NET_ATTN_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2720
    {LLM_TENSOR_POS_NET_ATTN_Q,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2721
    {LLM_TENSOR_POS_NET_ATTN_K,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2722
    {LLM_TENSOR_POS_NET_ATTN_V,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2723
    {LLM_TENSOR_POS_NET_ATTN_OUT,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2724
    {LLM_TENSOR_CONVNEXT_DW,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
2725
    {LLM_TENSOR_CONVNEXT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2726
    {LLM_TENSOR_CONVNEXT_PW1,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2727
    {LLM_TENSOR_CONVNEXT_PW2,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2728
    {LLM_TENSOR_CONVNEXT_GAMMA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
2729
    {LLM_TENSOR_SHORTCONV_CONV,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
2730
    {LLM_TENSOR_SHORTCONV_INPROJ,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2731
    {LLM_TENSOR_SHORTCONV_OUTPROJ,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2732
    {LLM_TENSOR_VISEXP_ATTN_QKV,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2733
    {LLM_TENSOR_VISEXP_ATTN_OUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2734
    {LLM_TENSOR_VISEXP_FFN_GATE,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2735
    {LLM_TENSOR_VISEXP_FFN_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2736
    {LLM_TENSOR_VISEXP_FFN_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
2737
    // NextN/MTP tensors are currently ignored (reserved for future MTP support)
2738
    // These tensors only exist in the last layer(s) and are treated as output tensors
2739
    {LLM_TENSOR_NEXTN_EH_PROJ,              {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2740
    {LLM_TENSOR_NEXTN_EMBED_TOKENS,         {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2741
    {LLM_TENSOR_NEXTN_ENORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
2742
    {LLM_TENSOR_NEXTN_HNORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2743
    {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
2744
    {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
2745
};
2746
2747
0
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
2748
2749
0
std::string LLM_KV::operator()(llm_kv kv) const {
2750
0
    std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
2751
2752
0
    if (suffix != nullptr) {
2753
0
        name += ".";
2754
0
        name += suffix;
2755
0
    }
2756
2757
0
    return name;
2758
0
}
2759
2760
0
std::string LLM_TN_IMPL::str() const {
2761
0
    if (LLM_TENSOR_NAMES.at(arch).find(tensor) == LLM_TENSOR_NAMES.at(arch).end()) {
2762
0
        return "__missing__";
2763
0
    }
2764
2765
0
    std::string name = ::format(LLM_TENSOR_NAMES.at(arch).at(tensor), bid, xid);
2766
2767
0
    if (suffix != nullptr) {
2768
0
        name += ".";
2769
0
        name += suffix;
2770
0
    }
2771
2772
0
    return name;
2773
0
}
2774
2775
0
const char * llm_arch_name(llm_arch arch) {
2776
0
    auto it = LLM_ARCH_NAMES.find(arch);
2777
0
    if (it == LLM_ARCH_NAMES.end()) {
2778
0
        return "unknown";
2779
0
    }
2780
0
    return it->second;
2781
0
}
2782
2783
0
llm_arch llm_arch_from_string(const std::string & name) {
2784
0
    for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
2785
0
        if (kv.second == name) {
2786
0
            return kv.first;
2787
0
        }
2788
0
    }
2789
2790
0
    return LLM_ARCH_UNKNOWN;
2791
0
}
2792
2793
0
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
2794
0
    return LLM_TENSOR_INFOS.at(tensor);
2795
0
}
2796
2797
0
bool llm_arch_is_recurrent(const llm_arch & arch) {
2798
0
    switch (arch) {
2799
0
        case LLM_ARCH_MAMBA:
2800
0
        case LLM_ARCH_MAMBA2:
2801
0
        case LLM_ARCH_RWKV6:
2802
0
        case LLM_ARCH_RWKV6QWEN2:
2803
0
        case LLM_ARCH_RWKV7:
2804
0
        case LLM_ARCH_ARWKV7:
2805
0
            return true;
2806
0
        default:
2807
0
            return false;
2808
0
    }
2809
0
}
2810
2811
0
bool llm_arch_is_hybrid(const llm_arch & arch) {
2812
0
    switch (arch) {
2813
0
        case LLM_ARCH_JAMBA:
2814
0
        case LLM_ARCH_FALCON_H1:
2815
0
        case LLM_ARCH_PLAMO2:
2816
0
        case LLM_ARCH_GRANITE_HYBRID:
2817
0
        case LLM_ARCH_LFM2:
2818
0
        case LLM_ARCH_LFM2MOE:
2819
0
        case LLM_ARCH_NEMOTRON_H:
2820
0
        case LLM_ARCH_QWEN3NEXT:
2821
0
            return true;
2822
0
        default:
2823
0
            return false;
2824
0
    }
2825
0
}
2826
2827
0
bool llm_arch_is_diffusion(const llm_arch & arch) {
2828
0
    switch (arch) {
2829
0
        case LLM_ARCH_DREAM:
2830
0
        case LLM_ARCH_LLADA:
2831
0
        case LLM_ARCH_LLADA_MOE:
2832
0
        case LLM_ARCH_RND1:
2833
0
            return true;
2834
0
        default:
2835
0
            return false;
2836
0
    }
2837
0
}