Coverage Report

Created: 2026-04-12 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-arch.cpp
Line
Count
Source
1
#include "llama-arch.h"
2
3
#include "llama-impl.h"
4
5
#include <map>
6
#include <set>
7
#include <vector>
8
9
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
10
    { LLM_ARCH_CLIP,             "clip"             }, // dummy, only used by llama-quantize
11
    { LLM_ARCH_LLAMA,            "llama"            },
12
    { LLM_ARCH_LLAMA4,           "llama4"           },
13
    { LLM_ARCH_DECI,             "deci"             },
14
    { LLM_ARCH_FALCON,           "falcon"           },
15
    { LLM_ARCH_GROK,             "grok"             },
16
    { LLM_ARCH_GPT2,             "gpt2"             },
17
    { LLM_ARCH_GPTJ,             "gptj"             },
18
    { LLM_ARCH_GPTNEOX,          "gptneox"          },
19
    { LLM_ARCH_MPT,              "mpt"              },
20
    { LLM_ARCH_BAICHUAN,         "baichuan"         },
21
    { LLM_ARCH_STARCODER,        "starcoder"        },
22
    { LLM_ARCH_REFACT,           "refact"           },
23
    { LLM_ARCH_BERT,             "bert"             },
24
    { LLM_ARCH_MODERN_BERT,      "modern-bert"      },
25
    { LLM_ARCH_NOMIC_BERT,       "nomic-bert"       },
26
    { LLM_ARCH_NOMIC_BERT_MOE,   "nomic-bert-moe"   },
27
    { LLM_ARCH_NEO_BERT,         "neo-bert"         },
28
    { LLM_ARCH_JINA_BERT_V2,     "jina-bert-v2"     },
29
    { LLM_ARCH_JINA_BERT_V3,     "jina-bert-v3"     },
30
    { LLM_ARCH_EUROBERT,         "eurobert"         },
31
    { LLM_ARCH_BLOOM,            "bloom"            },
32
    { LLM_ARCH_STABLELM,         "stablelm"         },
33
    { LLM_ARCH_QWEN,             "qwen"             },
34
    { LLM_ARCH_QWEN2,            "qwen2"            },
35
    { LLM_ARCH_QWEN2MOE,         "qwen2moe"         },
36
    { LLM_ARCH_QWEN2VL,          "qwen2vl"          },
37
    { LLM_ARCH_QWEN3,            "qwen3"            },
38
    { LLM_ARCH_QWEN3MOE,         "qwen3moe"         },
39
    { LLM_ARCH_QWEN3NEXT,        "qwen3next"        },
40
    { LLM_ARCH_QWEN3VL,          "qwen3vl"          },
41
    { LLM_ARCH_QWEN3VLMOE,       "qwen3vlmoe"       },
42
    { LLM_ARCH_QWEN35,           "qwen35"           },
43
    { LLM_ARCH_QWEN35MOE,        "qwen35moe"        },
44
    { LLM_ARCH_PHI2,             "phi2"             },
45
    { LLM_ARCH_PHI3,             "phi3"             },
46
    { LLM_ARCH_PHIMOE,           "phimoe"           },
47
    { LLM_ARCH_PLAMO,            "plamo"            },
48
    { LLM_ARCH_PLAMO2,           "plamo2"           },
49
    { LLM_ARCH_PLAMO3,           "plamo3"           },
50
    { LLM_ARCH_CODESHELL,        "codeshell"        },
51
    { LLM_ARCH_ORION,            "orion"            },
52
    { LLM_ARCH_INTERNLM2,        "internlm2"        },
53
    { LLM_ARCH_MINICPM,          "minicpm"          },
54
    { LLM_ARCH_MINICPM3,         "minicpm3"         },
55
    { LLM_ARCH_GEMMA,            "gemma"            },
56
    { LLM_ARCH_GEMMA2,           "gemma2"           },
57
    { LLM_ARCH_GEMMA3,           "gemma3"           },
58
    { LLM_ARCH_GEMMA3N,          "gemma3n"          },
59
    { LLM_ARCH_GEMMA4,           "gemma4"           },
60
    { LLM_ARCH_GEMMA_EMBEDDING,  "gemma-embedding"  },
61
    { LLM_ARCH_STARCODER2,       "starcoder2"       },
62
    { LLM_ARCH_MAMBA,            "mamba"            },
63
    { LLM_ARCH_MAMBA2,           "mamba2"           },
64
    { LLM_ARCH_JAMBA,            "jamba"            },
65
    { LLM_ARCH_FALCON_H1,        "falcon-h1"        },
66
    { LLM_ARCH_XVERSE,           "xverse"           },
67
    { LLM_ARCH_COMMAND_R,        "command-r"        },
68
    { LLM_ARCH_COHERE2,          "cohere2"          },
69
    { LLM_ARCH_DBRX,             "dbrx"             },
70
    { LLM_ARCH_OLMO,             "olmo"             },
71
    { LLM_ARCH_OLMO2,            "olmo2"            },
72
    { LLM_ARCH_OLMOE,            "olmoe"            },
73
    { LLM_ARCH_OPENELM,          "openelm"          },
74
    { LLM_ARCH_ARCTIC,           "arctic"           },
75
    { LLM_ARCH_DEEPSEEK,         "deepseek"         },
76
    { LLM_ARCH_DEEPSEEK2,        "deepseek2"        },
77
    { LLM_ARCH_DEEPSEEK2OCR,     "deepseek2-ocr"    },
78
    { LLM_ARCH_CHATGLM,          "chatglm"          },
79
    { LLM_ARCH_GLM4,             "glm4"             },
80
    { LLM_ARCH_GLM4_MOE,         "glm4moe"          },
81
    { LLM_ARCH_GLM_DSA,          "glm-dsa"          },
82
    { LLM_ARCH_BITNET,           "bitnet"           },
83
    { LLM_ARCH_T5,               "t5"               },
84
    { LLM_ARCH_T5ENCODER,        "t5encoder"        },
85
    { LLM_ARCH_JAIS,             "jais"             },
86
    { LLM_ARCH_JAIS2,            "jais2"            },
87
    { LLM_ARCH_NEMOTRON,         "nemotron"         },
88
    { LLM_ARCH_NEMOTRON_H,       "nemotron_h"       },
89
    { LLM_ARCH_NEMOTRON_H_MOE,   "nemotron_h_moe"   },
90
    { LLM_ARCH_EXAONE,           "exaone"           },
91
    { LLM_ARCH_EXAONE4,          "exaone4"          },
92
    { LLM_ARCH_EXAONE_MOE,       "exaone-moe"       },
93
    { LLM_ARCH_RWKV6,            "rwkv6"            },
94
    { LLM_ARCH_RWKV6QWEN2,       "rwkv6qwen2"       },
95
    { LLM_ARCH_RWKV7,            "rwkv7"            },
96
    { LLM_ARCH_ARWKV7,           "arwkv7"           },
97
    { LLM_ARCH_GRANITE,          "granite"          },
98
    { LLM_ARCH_GRANITE_MOE,      "granitemoe"       },
99
    { LLM_ARCH_GRANITE_HYBRID,   "granitehybrid"    },
100
    { LLM_ARCH_CHAMELEON,        "chameleon"        },
101
    { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
102
    { LLM_ARCH_PLM,              "plm"              },
103
    { LLM_ARCH_BAILINGMOE,       "bailingmoe"       },
104
    { LLM_ARCH_BAILINGMOE2,      "bailingmoe2"      },
105
    { LLM_ARCH_DOTS1,            "dots1"            },
106
    { LLM_ARCH_ARCEE,            "arcee"            },
107
    { LLM_ARCH_AFMOE,            "afmoe"            },
108
    { LLM_ARCH_ERNIE4_5,         "ernie4_5"         },
109
    { LLM_ARCH_ERNIE4_5_MOE,     "ernie4_5-moe"     },
110
    { LLM_ARCH_HUNYUAN_MOE,      "hunyuan-moe"      },
111
    { LLM_ARCH_HUNYUAN_DENSE,    "hunyuan-dense"    },
112
    { LLM_ARCH_SMOLLM3,          "smollm3"          },
113
    { LLM_ARCH_OPENAI_MOE,       "gpt-oss"          },
114
    { LLM_ARCH_LFM2,             "lfm2"             },
115
    { LLM_ARCH_LFM2MOE,          "lfm2moe"          },
116
    { LLM_ARCH_DREAM,            "dream"            },
117
    { LLM_ARCH_SMALLTHINKER,     "smallthinker"     },
118
    { LLM_ARCH_LLADA,            "llada"            },
119
    { LLM_ARCH_LLADA_MOE,        "llada-moe"        },
120
    { LLM_ARCH_SEED_OSS,         "seed_oss"         },
121
    { LLM_ARCH_GROVEMOE,         "grovemoe"         },
122
    { LLM_ARCH_APERTUS,          "apertus"          },
123
    { LLM_ARCH_MINIMAX_M2,       "minimax-m2"       },
124
    { LLM_ARCH_COGVLM,           "cogvlm"           },
125
    { LLM_ARCH_RND1,             "rnd1"             },
126
    { LLM_ARCH_PANGU_EMBED,      "pangu-embedded"   },
127
    { LLM_ARCH_MISTRAL3,         "mistral3"         },
128
    { LLM_ARCH_MISTRAL4,         "mistral4"         },
129
    { LLM_ARCH_PADDLEOCR,        "paddleocr"        },
130
    { LLM_ARCH_MIMO2,            "mimo2"            },
131
    { LLM_ARCH_STEP35,           "step35"           },
132
    { LLM_ARCH_LLAMA_EMBED,      "llama-embed"      },
133
    { LLM_ARCH_MAINCODER,        "maincoder"        },
134
    { LLM_ARCH_KIMI_LINEAR,      "kimi-linear"      },
135
    { LLM_ARCH_UNKNOWN,          "(unknown)"        },
136
};
137
138
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
139
    { LLM_KV_GENERAL_TYPE,                     "general.type"                          },
140
    { LLM_KV_GENERAL_ARCHITECTURE,             "general.architecture"                  },
141
    { LLM_KV_GENERAL_QUANTIZATION_VERSION,     "general.quantization_version"          },
142
    { LLM_KV_GENERAL_ALIGNMENT,                "general.alignment"                     },
143
    { LLM_KV_GENERAL_FILE_TYPE,                "general.file_type"                     },
144
    { LLM_KV_GENERAL_SAMPLING_SEQUENCE,        "general.sampling.sequence"             },
145
    { LLM_KV_GENERAL_SAMPLING_TOP_K,           "general.sampling.top_k"                },
146
    { LLM_KV_GENERAL_SAMPLING_TOP_P,           "general.sampling.top_p"                },
147
    { LLM_KV_GENERAL_SAMPLING_MIN_P,           "general.sampling.min_p"                },
148
    { LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, "general.sampling.xtc_probability"      },
149
    { LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,   "general.sampling.xtc_threshold"        },
150
    { LLM_KV_GENERAL_SAMPLING_TEMP,            "general.sampling.temp"                 },
151
    { LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,  "general.sampling.penalty_last_n"       },
152
    { LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,  "general.sampling.penalty_repeat"       },
153
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT,        "general.sampling.mirostat"             },
154
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,    "general.sampling.mirostat_tau"         },
155
    { LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,    "general.sampling.mirostat_eta"         },
156
    { LLM_KV_GENERAL_NAME,                     "general.name"                          },
157
    { LLM_KV_GENERAL_AUTHOR,                   "general.author"                        },
158
    { LLM_KV_GENERAL_VERSION,                  "general.version"                       },
159
    { LLM_KV_GENERAL_URL,                      "general.url"                           },
160
    { LLM_KV_GENERAL_DESCRIPTION,              "general.description"                   },
161
    { LLM_KV_GENERAL_LICENSE,                  "general.license"                       },
162
    { LLM_KV_GENERAL_SOURCE_URL,               "general.source.url"                    },
163
    { LLM_KV_GENERAL_SOURCE_HF_REPO,           "general.source.huggingface.repository" },
164
165
    { LLM_KV_VOCAB_SIZE,                        "%s.vocab_size"                        },
166
    { LLM_KV_CONTEXT_LENGTH,                    "%s.context_length"                    },
167
    { LLM_KV_EMBEDDING_LENGTH,                  "%s.embedding_length"                  },
168
    { LLM_KV_EMBEDDING_LENGTH_OUT,              "%s.embedding_length_out"              },
169
    { LLM_KV_EMBEDDING_LENGTH_PER_LAYER,        "%s.embedding_length_per_layer_input"  },
170
    { LLM_KV_FEATURES_LENGTH,                   "%s.features_length"                   },
171
    { LLM_KV_BLOCK_COUNT,                       "%s.block_count"                       },
172
    { LLM_KV_LEADING_DENSE_BLOCK_COUNT,         "%s.leading_dense_block_count"         },
173
    { LLM_KV_FEED_FORWARD_LENGTH,               "%s.feed_forward_length"               },
174
    { LLM_KV_EXPERT_FEED_FORWARD_LENGTH,        "%s.expert_feed_forward_length"        },
175
    { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, "%s.expert_shared_feed_forward_length" },
176
    { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,  "%s.expert_chunk_feed_forward_length"  },
177
    { LLM_KV_SWIGLU_CLAMP_EXP,                  "%s.swiglu_clamp_exp"                  },
178
    { LLM_KV_SWIGLU_CLAMP_SHEXP,                "%s.swiglu_clamp_shexp"                },
179
    { LLM_KV_USE_PARALLEL_RESIDUAL,             "%s.use_parallel_residual"             },
180
    { LLM_KV_TENSOR_DATA_LAYOUT,                "%s.tensor_data_layout"                },
181
    { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      },
182
    { LLM_KV_EXPERT_USED_COUNT,                 "%s.expert_used_count"                 },
183
    { LLM_KV_EXPERT_SHARED_COUNT,               "%s.expert_shared_count"               },
184
    { LLM_KV_EXPERT_GROUP_COUNT,                "%s.expert_group_count"                },
185
    { LLM_KV_EXPERT_GROUP_USED_COUNT,           "%s.expert_group_used_count"           },
186
    { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              },
187
    { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               },
188
    { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                },
189
    { LLM_KV_EXPERT_GROUP_SCALE,                "%s.expert_group_scale"                },
190
    { LLM_KV_EXPERTS_PER_GROUP,                 "%s.experts_per_group"                 },
191
    { LLM_KV_MOE_EVERY_N_LAYERS,                "%s.moe_every_n_layers"                },
192
    { LLM_KV_MOE_LATENT_SIZE,                   "%s.moe_latent_size"                   },
193
    { LLM_KV_NEXTN_PREDICT_LAYERS,              "%s.nextn_predict_layers"              },
194
    { LLM_KV_NUM_DEEPSTACK_LAYERS,              "%s.n_deepstack_layers"                },
195
    { LLM_KV_POOLING_TYPE,                      "%s.pooling_type"                      },
196
    { LLM_KV_LOGIT_SCALE,                       "%s.logit_scale"                       },
197
    { LLM_KV_DECODER_START_TOKEN_ID,            "%s.decoder_start_token_id"            },
198
    { LLM_KV_DECODER_BLOCK_COUNT,               "%s.decoder_block_count"               },
199
    { LLM_KV_ATTN_LOGIT_SOFTCAPPING,            "%s.attn_logit_softcapping"            },
200
    { LLM_KV_ROUTER_LOGIT_SOFTCAPPING,          "%s.router_logit_softcapping"          },
201
    { LLM_KV_FINAL_LOGIT_SOFTCAPPING,           "%s.final_logit_softcapping"           },
202
    { LLM_KV_SWIN_NORM,                         "%s.swin_norm"                         },
203
    { LLM_KV_RESCALE_EVERY_N_LAYERS,            "%s.rescale_every_n_layers"            },
204
    { LLM_KV_TIME_MIX_EXTRA_DIM,                "%s.time_mix_extra_dim"                },
205
    { LLM_KV_TIME_DECAY_EXTRA_DIM,              "%s.time_decay_extra_dim"              },
206
    { LLM_KV_RESIDUAL_SCALE,                    "%s.residual_scale"                    },
207
    { LLM_KV_EMBEDDING_SCALE,                   "%s.embedding_scale"                   },
208
    { LLM_KV_TOKEN_SHIFT_COUNT,                 "%s.token_shift_count"                 },
209
    { LLM_KV_INTERLEAVE_MOE_LAYER_STEP,         "%s.interleave_moe_layer_step"         },
210
    { LLM_KV_FULL_ATTENTION_INTERVAL,           "%s.full_attention_interval"           },
211
212
    { LLM_KV_ATTENTION_HEAD_COUNT,                   "%s.attention.head_count"                   },
213
    { LLM_KV_ATTENTION_HEAD_COUNT_KV,                "%s.attention.head_count_kv"                },
214
    { LLM_KV_ATTENTION_MAX_ALIBI_BIAS,               "%s.attention.max_alibi_bias"               },
215
    { LLM_KV_ATTENTION_CLAMP_KQV,                    "%s.attention.clamp_kqv"                    },
216
    { LLM_KV_ATTENTION_KEY_LENGTH,                   "%s.attention.key_length"                   },
217
    { LLM_KV_ATTENTION_VALUE_LENGTH,                 "%s.attention.value_length"                 },
218
    { LLM_KV_ATTENTION_LAYERNORM_EPS,                "%s.attention.layer_norm_epsilon"           },
219
    { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,            "%s.attention.layer_norm_rms_epsilon"       },
220
    { LLM_KV_ATTENTION_GROUPNORM_EPS,                "%s.attention.group_norm_epsilon"           },
221
    { LLM_KV_ATTENTION_GROUPNORM_GROUPS,             "%s.attention.group_norm_groups"            },
222
    { LLM_KV_ATTENTION_CAUSAL,                       "%s.attention.causal"                       },
223
    { LLM_KV_ATTENTION_Q_LORA_RANK,                  "%s.attention.q_lora_rank"                  },
224
    { LLM_KV_ATTENTION_KV_LORA_RANK,                 "%s.attention.kv_lora_rank"                 },
225
    { LLM_KV_ATTENTION_DECAY_LORA_RANK,              "%s.attention.decay_lora_rank"              },
226
    { LLM_KV_ATTENTION_ICLR_LORA_RANK,               "%s.attention.iclr_lora_rank"               },
227
    { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
228
    { LLM_KV_ATTENTION_GATE_LORA_RANK,               "%s.attention.gate_lora_rank"               },
229
    { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,       "%s.attention.relative_buckets_count"       },
230
    { LLM_KV_ATTENTION_SLIDING_WINDOW,               "%s.attention.sliding_window"               },
231
    { LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,       "%s.attention.sliding_window_pattern"       },
232
    { LLM_KV_ATTENTION_SCALE,                        "%s.attention.scale"                        },
233
    { LLM_KV_ATTENTION_OUTPUT_SCALE,                 "%s.attention.output_scale"                 },
234
    { LLM_KV_ATTENTION_TEMPERATURE_LENGTH,           "%s.attention.temperature_length"           },
235
    { LLM_KV_ATTENTION_TEMPERATURE_SCALE,            "%s.attention.temperature_scale"            },
236
    { LLM_KV_ATTENTION_KEY_LENGTH_MLA,               "%s.attention.key_length_mla"               },
237
    { LLM_KV_ATTENTION_VALUE_LENGTH_MLA,             "%s.attention.value_length_mla"             },
238
    { LLM_KV_ATTENTION_KEY_LENGTH_SWA,               "%s.attention.key_length_swa"               },
239
    { LLM_KV_ATTENTION_VALUE_LENGTH_SWA,             "%s.attention.value_length_swa"             },
240
    { LLM_KV_ATTENTION_INDEXER_HEAD_COUNT,           "%s.attention.indexer.head_count"           },
241
    { LLM_KV_ATTENTION_INDEXER_KEY_LENGTH,           "%s.attention.indexer.key_length"           },
242
    { LLM_KV_ATTENTION_INDEXER_TOP_K,                "%s.attention.indexer.top_k"                },
243
    { LLM_KV_ATTENTION_SHARED_KV_LAYERS,             "%s.attention.shared_kv_layers"             },
244
245
    { LLM_KV_ROPE_DIMENSION_COUNT,           "%s.rope.dimension_count"                 },
246
    { LLM_KV_ROPE_DIMENSION_COUNT_SWA,       "%s.rope.dimension_count_swa"             },
247
    { LLM_KV_ROPE_DIMENSION_SECTIONS,        "%s.rope.dimension_sections"              },
248
    { LLM_KV_ROPE_FREQ_BASE,                 "%s.rope.freq_base"                       },
249
    { LLM_KV_ROPE_FREQ_BASE_SWA,             "%s.rope.freq_base_swa"                   },
250
    { LLM_KV_ROPE_SCALE_LINEAR,              "%s.rope.scale_linear"                    },
251
    { LLM_KV_ROPE_SCALING_TYPE,              "%s.rope.scaling.type"                    },
252
    { LLM_KV_ROPE_SCALING_FACTOR,            "%s.rope.scaling.factor"                  },
253
    { LLM_KV_ROPE_SCALING_ATTN_FACTOR,       "%s.rope.scaling.attn_factor"             },
254
    { LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,      "%s.rope.scaling.original_context_length" },
255
    { LLM_KV_ROPE_SCALING_FINETUNED,         "%s.rope.scaling.finetuned"               },
256
    { LLM_KV_ROPE_SCALING_YARN_LOG_MUL,      "%s.rope.scaling.yarn_log_multiplier"     },
257
    { LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,   "%s.rope.scaling.yarn_ext_factor"         },
258
    { LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,  "%s.rope.scaling.yarn_attn_factor"        },
259
    { LLM_KV_ROPE_SCALING_YARN_BETA_FAST,    "%s.rope.scaling.yarn_beta_fast"          },
260
    { LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,    "%s.rope.scaling.yarn_beta_slow"          },
261
262
    { LLM_KV_SPLIT_NO,            "split.no"            },
263
    { LLM_KV_SPLIT_COUNT,         "split.count"         },
264
    { LLM_KV_SPLIT_TENSORS_COUNT, "split.tensors.count" },
265
266
    { LLM_KV_SSM_CONV_KERNEL,    "%s.ssm.conv_kernel"    },
267
    { LLM_KV_SSM_INNER_SIZE,     "%s.ssm.inner_size"     },
268
    { LLM_KV_SSM_STATE_SIZE,     "%s.ssm.state_size"     },
269
    { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
270
    { LLM_KV_SSM_GROUP_COUNT,    "%s.ssm.group_count"    },
271
    { LLM_KV_SSM_DT_B_C_RMS,     "%s.ssm.dt_b_c_rms"     },
272
273
    { LLM_KV_KDA_HEAD_DIM, "%s.kda.head_dim" },
274
275
    { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" },
276
277
    { LLM_KV_POSNET_EMBEDDING_LENGTH, "%s.posnet.embedding_length" },
278
    { LLM_KV_POSNET_BLOCK_COUNT,      "%s.posnet.block_count"      },
279
280
    { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
281
    { LLM_KV_CONVNEXT_BLOCK_COUNT,      "%s.convnext.block_count"      },
282
283
    { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
284
285
    { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
286
    // sentence-transformers dense modules feature dims
287
    { LLM_KV_DENSE_2_FEAT_IN,        "%s.dense_2_feat_in"  },
288
    { LLM_KV_DENSE_2_FEAT_OUT,       "%s.dense_2_feat_out"  },
289
    { LLM_KV_DENSE_3_FEAT_IN,        "%s.dense_3_feat_in"   },
290
    { LLM_KV_DENSE_3_FEAT_OUT,       "%s.dense_3_feat_out"  },
291
292
    { LLM_KV_TOKENIZER_MODEL,                "tokenizer.ggml.model"                    },
293
    { LLM_KV_TOKENIZER_PRE,                  "tokenizer.ggml.pre"                      },
294
    { LLM_KV_TOKENIZER_LIST,                 "tokenizer.ggml.tokens"                   },
295
    { LLM_KV_TOKENIZER_TOKEN_TYPE,           "tokenizer.ggml.token_type"               },
296
    { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,     "tokenizer.ggml.token_type_count"         },
297
    { LLM_KV_TOKENIZER_SCORES,               "tokenizer.ggml.scores"                   },
298
    { LLM_KV_TOKENIZER_MERGES,               "tokenizer.ggml.merges"                   },
299
    { LLM_KV_TOKENIZER_BOS_ID,               "tokenizer.ggml.bos_token_id"             },
300
    { LLM_KV_TOKENIZER_EOS_ID,               "tokenizer.ggml.eos_token_id"             },
301
    { LLM_KV_TOKENIZER_EOT_ID,               "tokenizer.ggml.eot_token_id"             },
302
    { LLM_KV_TOKENIZER_EOM_ID,               "tokenizer.ggml.eom_token_id"             },
303
    { LLM_KV_TOKENIZER_UNK_ID,               "tokenizer.ggml.unknown_token_id"         },
304
    { LLM_KV_TOKENIZER_SEP_ID,               "tokenizer.ggml.seperator_token_id"       },
305
    { LLM_KV_TOKENIZER_PAD_ID,               "tokenizer.ggml.padding_token_id"         },
306
    { LLM_KV_TOKENIZER_CLS_ID,               "tokenizer.ggml.cls_token_id"             },
307
    { LLM_KV_TOKENIZER_MASK_ID,              "tokenizer.ggml.mask_token_id"            },
308
    { LLM_KV_TOKENIZER_ADD_BOS,              "tokenizer.ggml.add_bos_token"            },
309
    { LLM_KV_TOKENIZER_ADD_EOS,              "tokenizer.ggml.add_eos_token"            },
310
    { LLM_KV_TOKENIZER_ADD_SEP,              "tokenizer.ggml.add_sep_token"            },
311
    { LLM_KV_TOKENIZER_ADD_PREFIX,           "tokenizer.ggml.add_space_prefix"         },
312
    { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,      "tokenizer.ggml.remove_extra_whitespaces" },
313
    { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap"     },
314
    { LLM_KV_TOKENIZER_HF_JSON,              "tokenizer.huggingface.json"              },
315
    { LLM_KV_TOKENIZER_RWKV,                 "tokenizer.rwkv.world"                    },
316
    { LLM_KV_TOKENIZER_CHAT_TEMPLATE,        "tokenizer.chat_template"                 },
317
    { LLM_KV_TOKENIZER_FIM_PRE_ID,           "tokenizer.ggml.fim_pre_token_id"         },
318
    { LLM_KV_TOKENIZER_FIM_SUF_ID,           "tokenizer.ggml.fim_suf_token_id"         },
319
    { LLM_KV_TOKENIZER_FIM_MID_ID,           "tokenizer.ggml.fim_mid_token_id"         },
320
    { LLM_KV_TOKENIZER_FIM_PAD_ID,           "tokenizer.ggml.fim_pad_token_id"         },
321
    { LLM_KV_TOKENIZER_FIM_REP_ID,           "tokenizer.ggml.fim_rep_token_id"         },
322
    { LLM_KV_TOKENIZER_FIM_SEP_ID,           "tokenizer.ggml.fim_sep_token_id"         },
323
324
    { LLM_KV_ADAPTER_TYPE,                    "adapter.type"               },
325
    { LLM_KV_ADAPTER_LORA_ALPHA,              "adapter.lora.alpha"         },
326
    { LLM_KV_ADAPTER_LORA_TASK_NAME,          "adapter.lora.task_name"     },
327
    { LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,      "adapter.lora.prompt_prefix" },
328
    { LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, "adapter.alora.invocation_tokens" },
329
330
    { LLM_KV_XIELU_ALPHA_N,         "xielu.alpha_n"         },
331
    { LLM_KV_XIELU_ALPHA_P,         "xielu.alpha_p"         },
332
    { LLM_KV_XIELU_BETA,            "xielu.beta"            },
333
    { LLM_KV_XIELU_EPS,             "xielu.eps"             },
334
335
    // deprecated
336
    { LLM_KV_TOKENIZER_PREFIX_ID, "tokenizer.ggml.prefix_token_id" },
337
    { LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
338
    { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
339
};
340
341
static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
342
    { LLM_TENSOR_TOKEN_EMBD,                             "token_embd" },
343
    { LLM_TENSOR_OUTPUT_NORM,                            "output_norm" },
344
    { LLM_TENSOR_OUTPUT_NORM_LFM2,                       "token_embd_norm" }, // fix for wrong tensor name
345
    { LLM_TENSOR_OUTPUT,                                 "output" },
346
    { LLM_TENSOR_ROPE_FREQS,                             "rope_freqs" },
347
    { LLM_TENSOR_ATTN_NORM,                              "blk.%d.attn_norm" },
348
    { LLM_TENSOR_ATTN_Q,                                 "blk.%d.attn_q" },
349
    { LLM_TENSOR_ATTN_K,                                 "blk.%d.attn_k" },
350
    { LLM_TENSOR_ATTN_V,                                 "blk.%d.attn_v" },
351
    { LLM_TENSOR_ATTN_OUT,                               "blk.%d.attn_output" },
352
    { LLM_TENSOR_ATTN_ROT_EMBD,                          "blk.%d.attn_rot_embd" },
353
    { LLM_TENSOR_FFN_GATE_INP,                           "blk.%d.ffn_gate_inp" },
354
    { LLM_TENSOR_FFN_NORM,                               "blk.%d.ffn_norm" },
355
    { LLM_TENSOR_FFN_GATE,                               "blk.%d.ffn_gate" },
356
    { LLM_TENSOR_FFN_DOWN,                               "blk.%d.ffn_down" },
357
    { LLM_TENSOR_FFN_UP,                                 "blk.%d.ffn_up" },
358
    { LLM_TENSOR_FFN_GATE_EXP,                           "blk.%d.ffn_gate.%d" },
359
    { LLM_TENSOR_FFN_DOWN_EXP,                           "blk.%d.ffn_down.%d" },
360
    { LLM_TENSOR_FFN_UP_EXP,                             "blk.%d.ffn_up.%d" },
361
    { LLM_TENSOR_FFN_GATE_EXPS,                          "blk.%d.ffn_gate_exps" },
362
    { LLM_TENSOR_FFN_GATE_UP_EXPS,                       "blk.%d.ffn_gate_up_exps" },
363
    { LLM_TENSOR_FFN_DOWN_EXPS,                          "blk.%d.ffn_down_exps" },
364
    { LLM_TENSOR_FFN_UP_EXPS,                            "blk.%d.ffn_up_exps" },
365
    { LLM_TENSOR_ATTN_POST_NORM,                         "blk.%d.post_attention_norm" },
366
    { LLM_TENSOR_ATTN_Q_NORM,                            "blk.%d.attn_q_norm" },
367
    { LLM_TENSOR_ATTN_K_NORM,                            "blk.%d.attn_k_norm" },
368
    { LLM_TENSOR_ATTN_GATE,                              "blk.%d.attn_gate" },
369
    { LLM_TENSOR_FFN_POST_NORM,                          "blk.%d.post_ffw_norm" },
370
    { LLM_TENSOR_FFN_POST_NORM_1,                        "blk.%d.post_ffw_norm_1" },
371
    { LLM_TENSOR_FFN_POST_NORM_2,                        "blk.%d.post_ffw_norm_2" },
372
    { LLM_TENSOR_FFN_PRE_NORM_2,                         "blk.%d.pre_ffw_norm_2" },
373
    { LLM_TENSOR_FFN_GATE_SHEXP,                         "blk.%d.ffn_gate_shexp" },
374
    { LLM_TENSOR_FFN_UP_SHEXP,                           "blk.%d.ffn_up_shexp" },
375
    { LLM_TENSOR_FFN_DOWN_SHEXP,                         "blk.%d.ffn_down_shexp" },
376
    { LLM_TENSOR_FFN_EXP_PROBS_B,                        "blk.%d.exp_probs_b" },
377
    { LLM_TENSOR_FFN_LATENT_DOWN,                        "blk.%d.ffn_latent_down" },
378
    { LLM_TENSOR_FFN_LATENT_UP,                          "blk.%d.ffn_latent_up" },
379
    { LLM_TENSOR_ATTN_NORM_2,                            "blk.%d.attn_norm_2" },
380
    { LLM_TENSOR_ATTN_QKV,                               "blk.%d.attn_qkv" },
381
    { LLM_TENSOR_LAYER_OUT_NORM,                         "blk.%d.layer_output_norm" },
382
    { LLM_TENSOR_LAYER_OUT_SCALE,                        "blk.%d.layer_output_scale" },
383
    { LLM_TENSOR_ATTN_OUT_NORM,                          "blk.%d.attn_output_norm" },
384
    { LLM_TENSOR_POS_EMBD,                               "position_embd" },
385
    { LLM_TENSOR_FFN_ACT,                                "blk.%d.ffn.act" },
386
    { LLM_TENSOR_TOKEN_EMBD_NORM,                        "token_embd_norm" },
387
    { LLM_TENSOR_TOKEN_TYPES,                            "token_types" },
388
    { LLM_TENSOR_CLS,                                    "cls" },
389
    { LLM_TENSOR_CLS_OUT,                                "cls.output" },
390
    { LLM_TENSOR_CLS_NORM,                               "cls.norm" },
391
    { LLM_TENSOR_ENC_OUTPUT_NORM,                        "enc.output_norm" },
392
    { LLM_TENSOR_FFN_GATE_INP_SHEXP,                     "blk.%d.ffn_gate_inp_shexp" },
393
    { LLM_TENSOR_SSM_A_NOSCAN,                           "blk.%d.ssm_a" },
394
    { LLM_TENSOR_SSM_CONV1D,                             "blk.%d.ssm_conv1d" },
395
    { LLM_TENSOR_SSM_DT,                                 "blk.%d.ssm_dt" },
396
    { LLM_TENSOR_SSM_BETA_ALPHA,                         "blk.%d.ssm_ba" },
397
    { LLM_TENSOR_SSM_ALPHA,                              "blk.%d.ssm_alpha" },
398
    { LLM_TENSOR_SSM_IN,                                 "blk.%d.ssm_in" },
399
    { LLM_TENSOR_SSM_NORM,                               "blk.%d.ssm_norm" },
400
    { LLM_TENSOR_SSM_OUT,                                "blk.%d.ssm_out" },
401
    { LLM_TENSOR_ROPE_FACTORS_LONG,                      "rope_factors_long" },
402
    { LLM_TENSOR_ROPE_FACTORS_SHORT,                     "rope_factors_short" },
403
    { LLM_TENSOR_SSM_X,                                  "blk.%d.ssm_x" },
404
    { LLM_TENSOR_SSM_A,                                  "blk.%d.ssm_a" },
405
    { LLM_TENSOR_SSM_D,                                  "blk.%d.ssm_d" },
406
    { LLM_TENSOR_SSM_DT_NORM,                            "blk.%d.ssm_dt_norm" },
407
    { LLM_TENSOR_SSM_B_NORM,                             "blk.%d.ssm_b_norm" },
408
    { LLM_TENSOR_SSM_C_NORM,                             "blk.%d.ssm_c_norm" },
409
    { LLM_TENSOR_SSM_CONV1D_Q,                           "blk.%d.ssm_conv1d_q" },
410
    { LLM_TENSOR_SSM_CONV1D_K,                           "blk.%d.ssm_conv1d_k" },
411
    { LLM_TENSOR_SSM_CONV1D_V,                           "blk.%d.ssm_conv1d_v" },
412
    { LLM_TENSOR_SSM_F_A,                                "blk.%d.ssm_f_a" },
413
    { LLM_TENSOR_SSM_F_B,                                "blk.%d.ssm_f_b" },
414
    { LLM_TENSOR_SSM_BETA,                               "blk.%d.ssm_beta" },
415
    { LLM_TENSOR_SSM_G_A,                                "blk.%d.ssm_g_a" },
416
    { LLM_TENSOR_SSM_G_B,                                "blk.%d.ssm_g_b" },
417
    { LLM_TENSOR_SSM_NORM,                               "blk.%d.ssm_norm" },
418
    { LLM_TENSOR_ATTN_Q_A_NORM,                          "blk.%d.attn_q_a_norm" },
419
    { LLM_TENSOR_ATTN_KV_A_NORM,                         "blk.%d.attn_kv_a_norm" },
420
    { LLM_TENSOR_ATTN_Q_A,                               "blk.%d.attn_q_a" },
421
    { LLM_TENSOR_ATTN_Q_B,                               "blk.%d.attn_q_b" },
422
    { LLM_TENSOR_ATTN_KV_A_MQA,                          "blk.%d.attn_kv_a_mqa" },
423
    { LLM_TENSOR_ATTN_KV_B,                              "blk.%d.attn_kv_b" },
424
    { LLM_TENSOR_PER_LAYER_TOKEN_EMBD,                   "per_layer_token_embd" },
425
    { LLM_TENSOR_PER_LAYER_MODEL_PROJ,                   "per_layer_model_proj" },
426
    { LLM_TENSOR_PER_LAYER_PROJ_NORM,                    "per_layer_proj_norm" },
427
    { LLM_TENSOR_ALTUP_UNEMBD_PROJ,                      "altup_unembd_proj" },
428
    { LLM_TENSOR_ALTUP_PROJ,                             "altup_proj" },
429
    { LLM_TENSOR_PER_LAYER_INP_GATE,                     "blk.%d.inp_gate" },
430
    { LLM_TENSOR_PER_LAYER_PROJ,                         "blk.%d.proj" },
431
    { LLM_TENSOR_PER_LAYER_POST_NORM,                    "blk.%d.post_norm" },
432
    { LLM_TENSOR_ALTUP_CORRECT_COEF,                     "blk.%d.altup_correct_coef" },
433
    { LLM_TENSOR_ALTUP_CORRECT_SCALE,                    "blk.%d.altup_correct_scale" },
434
    { LLM_TENSOR_ALTUP_PREDICT_COEF,                     "blk.%d.altup_predict_coef" },
435
    { LLM_TENSOR_ALTUP_ROUTER,                           "blk.%d.altup_router" },
436
    { LLM_TENSOR_ALTUP_ROUTER_NORM,                      "blk.%d.altup_router_norm" },
437
    { LLM_TENSOR_LAUREL_L,                               "blk.%d.laurel_l" },
438
    { LLM_TENSOR_LAUREL_R,                               "blk.%d.laurel_r" },
439
    { LLM_TENSOR_LAUREL_POST_NORM,                       "blk.%d.laurel_post_norm" },
440
    { LLM_TENSOR_DENSE_2_OUT,                            "dense_2" },
441
    { LLM_TENSOR_DENSE_3_OUT,                            "dense_3" },
442
    { LLM_TENSOR_FFN_NORM_EXPS,                          "blk.%d.ffn_norm_exps" },
443
    { LLM_TENSOR_ATTN_K_B,                               "blk.%d.attn_k_b" },
444
    { LLM_TENSOR_ATTN_V_B,                               "blk.%d.attn_v_b" },
445
    { LLM_TENSOR_NEXTN_EH_PROJ,                          "blk.%d.nextn.eh_proj" },
446
    { LLM_TENSOR_NEXTN_EMBED_TOKENS,                     "blk.%d.nextn.embed_tokens" },
447
    { LLM_TENSOR_NEXTN_ENORM,                            "blk.%d.nextn.enorm" },
448
    { LLM_TENSOR_NEXTN_HNORM,                            "blk.%d.nextn.hnorm" },
449
    { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,                 "blk.%d.nextn.shared_head_head" },
450
    { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,                 "blk.%d.nextn.shared_head_norm" },
451
    { LLM_TENSOR_ATTN_SUB_NORM,                          "blk.%d.attn_sub_norm" },
452
    { LLM_TENSOR_FFN_SUB_NORM,                           "blk.%d.ffn_sub_norm" },
453
    { LLM_TENSOR_DEC_OUTPUT_NORM,                        "dec.output_norm" },
454
    { LLM_TENSOR_DEC_ATTN_NORM,                          "dec.blk.%d.attn_norm" },
455
    { LLM_TENSOR_DEC_ATTN_Q,                             "dec.blk.%d.attn_q" },
456
    { LLM_TENSOR_DEC_ATTN_K,                             "dec.blk.%d.attn_k" },
457
    { LLM_TENSOR_DEC_ATTN_V,                             "dec.blk.%d.attn_v" },
458
    { LLM_TENSOR_DEC_ATTN_OUT,                           "dec.blk.%d.attn_o" },
459
    { LLM_TENSOR_DEC_ATTN_REL_B,                         "dec.blk.%d.attn_rel_b" },
460
    { LLM_TENSOR_DEC_CROSS_ATTN_NORM,                    "dec.blk.%d.cross_attn_norm" },
461
    { LLM_TENSOR_DEC_CROSS_ATTN_Q,                       "dec.blk.%d.cross_attn_q" },
462
    { LLM_TENSOR_DEC_CROSS_ATTN_K,                       "dec.blk.%d.cross_attn_k" },
463
    { LLM_TENSOR_DEC_CROSS_ATTN_V,                       "dec.blk.%d.cross_attn_v" },
464
    { LLM_TENSOR_DEC_CROSS_ATTN_OUT,                     "dec.blk.%d.cross_attn_o" },
465
    { LLM_TENSOR_DEC_CROSS_ATTN_REL_B,                   "dec.blk.%d.cross_attn_rel_b" },
466
    { LLM_TENSOR_DEC_FFN_NORM,                           "dec.blk.%d.ffn_norm" },
467
    { LLM_TENSOR_DEC_FFN_GATE,                           "dec.blk.%d.ffn_gate" },
468
    { LLM_TENSOR_DEC_FFN_DOWN,                           "dec.blk.%d.ffn_down" },
469
    { LLM_TENSOR_DEC_FFN_UP,                             "dec.blk.%d.ffn_up" },
470
    { LLM_TENSOR_ENC_ATTN_NORM,                          "enc.blk.%d.attn_norm" },
471
    { LLM_TENSOR_ENC_ATTN_Q,                             "enc.blk.%d.attn_q" },
472
    { LLM_TENSOR_ENC_ATTN_K,                             "enc.blk.%d.attn_k" },
473
    { LLM_TENSOR_ENC_ATTN_V,                             "enc.blk.%d.attn_v" },
474
    { LLM_TENSOR_ENC_ATTN_OUT,                           "enc.blk.%d.attn_o" },
475
    { LLM_TENSOR_ENC_ATTN_REL_B,                         "enc.blk.%d.attn_rel_b" },
476
    { LLM_TENSOR_ENC_FFN_NORM,                           "enc.blk.%d.ffn_norm" },
477
    { LLM_TENSOR_ENC_FFN_GATE,                           "enc.blk.%d.ffn_gate" },
478
    { LLM_TENSOR_ENC_FFN_DOWN,                           "enc.blk.%d.ffn_down" },
479
    { LLM_TENSOR_ENC_FFN_UP,                             "enc.blk.%d.ffn_up" },
480
    { LLM_TENSOR_TIME_MIX_W1,                            "blk.%d.time_mix_w1" },
481
    { LLM_TENSOR_TIME_MIX_W2,                            "blk.%d.time_mix_w2" },
482
    { LLM_TENSOR_TIME_MIX_LERP_X,                        "blk.%d.time_mix_lerp_x" },
483
    { LLM_TENSOR_TIME_MIX_LERP_W,                        "blk.%d.time_mix_lerp_w" },
484
    { LLM_TENSOR_TIME_MIX_LERP_K,                        "blk.%d.time_mix_lerp_k" },
485
    { LLM_TENSOR_TIME_MIX_LERP_V,                        "blk.%d.time_mix_lerp_v" },
486
    { LLM_TENSOR_TIME_MIX_LERP_R,                        "blk.%d.time_mix_lerp_r" },
487
    { LLM_TENSOR_TIME_MIX_LERP_G,                        "blk.%d.time_mix_lerp_g" },
488
    { LLM_TENSOR_TIME_MIX_LERP_FUSED,                    "blk.%d.time_mix_lerp_fused" },
489
    { LLM_TENSOR_TIME_MIX_FIRST,                         "blk.%d.time_mix_first" },
490
    { LLM_TENSOR_TIME_MIX_DECAY,                         "blk.%d.time_mix_decay" },
491
    { LLM_TENSOR_TIME_MIX_DECAY_W1,                      "blk.%d.time_mix_decay_w1" },
492
    { LLM_TENSOR_TIME_MIX_DECAY_W2,                      "blk.%d.time_mix_decay_w2" },
493
    { LLM_TENSOR_TIME_MIX_KEY,                           "blk.%d.time_mix_key" },
494
    { LLM_TENSOR_TIME_MIX_VALUE,                         "blk.%d.time_mix_value" },
495
    { LLM_TENSOR_TIME_MIX_RECEPTANCE,                    "blk.%d.time_mix_receptance" },
496
    { LLM_TENSOR_TIME_MIX_GATE,                          "blk.%d.time_mix_gate" },
497
    { LLM_TENSOR_TIME_MIX_LN,                            "blk.%d.time_mix_ln" },
498
    { LLM_TENSOR_TIME_MIX_OUTPUT,                        "blk.%d.time_mix_output" },
499
    { LLM_TENSOR_CHANNEL_MIX_LERP_K,                     "blk.%d.channel_mix_lerp_k" },
500
    { LLM_TENSOR_CHANNEL_MIX_LERP_R,                     "blk.%d.channel_mix_lerp_r" },
501
    { LLM_TENSOR_CHANNEL_MIX_KEY,                        "blk.%d.channel_mix_key" },
502
    { LLM_TENSOR_CHANNEL_MIX_VALUE,                      "blk.%d.channel_mix_value" },
503
    { LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,                 "blk.%d.channel_mix_receptance" },
504
    { LLM_TENSOR_TIME_MIX_W0,                            "blk.%d.time_mix_w0" },
505
    { LLM_TENSOR_TIME_MIX_A0,                            "blk.%d.time_mix_a0" },
506
    { LLM_TENSOR_TIME_MIX_A1,                            "blk.%d.time_mix_a1" },
507
    { LLM_TENSOR_TIME_MIX_A2,                            "blk.%d.time_mix_a2" },
508
    { LLM_TENSOR_TIME_MIX_V0,                            "blk.%d.time_mix_v0" },
509
    { LLM_TENSOR_TIME_MIX_V1,                            "blk.%d.time_mix_v1" },
510
    { LLM_TENSOR_TIME_MIX_V2,                            "blk.%d.time_mix_v2" },
511
    { LLM_TENSOR_TIME_MIX_G1,                            "blk.%d.time_mix_g1" },
512
    { LLM_TENSOR_TIME_MIX_G2,                            "blk.%d.time_mix_g2" },
513
    { LLM_TENSOR_TIME_MIX_K_K,                           "blk.%d.time_mix_k_k" },
514
    { LLM_TENSOR_TIME_MIX_K_A,                           "blk.%d.time_mix_k_a" },
515
    { LLM_TENSOR_TIME_MIX_R_K,                           "blk.%d.time_mix_r_k" },
516
    { LLM_TENSOR_CONV1D,                                 "conv1d" },
517
    { LLM_TENSOR_CONVNEXT_DW,                            "convnext.%d.dw" },
518
    { LLM_TENSOR_CONVNEXT_NORM,                          "convnext.%d.norm" },
519
    { LLM_TENSOR_CONVNEXT_PW1,                           "convnext.%d.pw1" },
520
    { LLM_TENSOR_CONVNEXT_PW2,                           "convnext.%d.pw2" },
521
    { LLM_TENSOR_CONVNEXT_GAMMA,                         "convnext.%d.gamma" },
522
    { LLM_TENSOR_POS_NET_CONV1,                          "posnet.%d.conv1" },
523
    { LLM_TENSOR_POS_NET_CONV2,                          "posnet.%d.conv2" },
524
    { LLM_TENSOR_POS_NET_NORM,                           "posnet.%d.norm" },
525
    { LLM_TENSOR_POS_NET_NORM1,                          "posnet.%d.norm1" },
526
    { LLM_TENSOR_POS_NET_NORM2,                          "posnet.%d.norm2" },
527
    { LLM_TENSOR_POS_NET_ATTN_NORM,                      "posnet.%d.attn_norm" },
528
    { LLM_TENSOR_POS_NET_ATTN_Q,                         "posnet.%d.attn_q" },
529
    { LLM_TENSOR_POS_NET_ATTN_K,                         "posnet.%d.attn_k" },
530
    { LLM_TENSOR_POS_NET_ATTN_V,                         "posnet.%d.attn_v" },
531
    { LLM_TENSOR_POS_NET_ATTN_OUT,                       "posnet.%d.attn_output" },
532
    { LLM_TENSOR_ATTN_SINKS,                             "blk.%d.attn_sinks" },
533
    { LLM_TENSOR_SHORTCONV_CONV,                         "blk.%d.shortconv.conv" },
534
    { LLM_TENSOR_SHORTCONV_INPROJ,                       "blk.%d.shortconv.in_proj" },
535
    { LLM_TENSOR_SHORTCONV_OUTPROJ,                      "blk.%d.shortconv.out_proj" },
536
    { LLM_TENSOR_FFN_GATE_CHEXPS,                        "blk.%d.ffn_gate_chexps" },
537
    { LLM_TENSOR_FFN_DOWN_CHEXPS,                        "blk.%d.ffn_down_chexps" },
538
    { LLM_TENSOR_FFN_UP_CHEXPS,                          "blk.%d.ffn_up_chexps" },
539
    { LLM_TENSOR_VISEXP_ATTN_QKV,                        "blk.%d.vis_attn_qkv" },
540
    { LLM_TENSOR_VISEXP_ATTN_OUT,                        "blk.%d.vis_attn_output" },
541
    { LLM_TENSOR_VISEXP_FFN_GATE,                        "blk.%d.vis_gate" },
542
    { LLM_TENSOR_VISEXP_FFN_DOWN,                        "blk.%d.vis_down" },
543
    { LLM_TENSOR_VISEXP_FFN_UP,                          "blk.%d.vis_up" },
544
    { LLM_TENSOR_INDEXER_K_NORM,                         "blk.%d.indexer.k_norm" },
545
    { LLM_TENSOR_INDEXER_PROJ,                           "blk.%d.indexer.proj" },
546
    { LLM_TENSOR_INDEXER_ATTN_K,                         "blk.%d.indexer.attn_k" },
547
    { LLM_TENSOR_INDEXER_ATTN_Q_B,                       "blk.%d.indexer.attn_q_b" },
548
};
549
550
// declare information about the model weight tensors:
551
// - the layer in which the tensor is going to be used. this is needed in order to assign the correct buffer type for the weight
552
// - the operator which is going to use the weight. this is needed to determine if the respective backend supports the operator
553
//
554
// for example, input layers are usually assigned to CPU/host buffer types
555
//
556
// a mismatch between the declared information and the actual layer/op in which the tensor is used can lead to sub-optimal
557
//   assignment of the buffer types and extra overhead during computation
558
// example: https://github.com/ggml-org/llama.cpp/pull/17548
559
//
560
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
561
    {LLM_TENSOR_TOKEN_EMBD,                 {LLM_TENSOR_LAYER_INPUT,     GGML_OP_GET_ROWS}},
562
    {LLM_TENSOR_POS_EMBD,                   {LLM_TENSOR_LAYER_INPUT,     GGML_OP_GET_ROWS}},
563
    {LLM_TENSOR_TOKEN_TYPES,                {LLM_TENSOR_LAYER_INPUT,     GGML_OP_GET_ROWS}},
564
    {LLM_TENSOR_TOKEN_EMBD_NORM,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},  // do the norms on the first layer (not the input layer)
565
    {LLM_TENSOR_OUTPUT,                     {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
566
    {LLM_TENSOR_CLS,                        {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
567
    {LLM_TENSOR_CLS_OUT,                    {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
568
    {LLM_TENSOR_CLS_NORM,                   {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
569
    {LLM_TENSOR_DENSE_2_OUT,                {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}}, // Dense layer output
570
    {LLM_TENSOR_DENSE_3_OUT,                {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}}, // Dense layer output
571
    {LLM_TENSOR_OUTPUT_NORM,                {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
572
    {LLM_TENSOR_OUTPUT_NORM_LFM2,           {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
573
    {LLM_TENSOR_DEC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
574
    {LLM_TENSOR_ENC_OUTPUT_NORM,            {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL}},
575
    {LLM_TENSOR_ROPE_FREQS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
576
    {LLM_TENSOR_ROPE_FACTORS_LONG,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
577
    {LLM_TENSOR_ROPE_FACTORS_SHORT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ROPE}},
578
    {LLM_TENSOR_ATTN_Q,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
579
    {LLM_TENSOR_ATTN_K,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
580
    {LLM_TENSOR_ATTN_V,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
581
    {LLM_TENSOR_ATTN_QKV,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
582
    {LLM_TENSOR_ATTN_OUT,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
583
    {LLM_TENSOR_ATTN_GATE,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
584
    {LLM_TENSOR_FFN_GATE,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
585
    {LLM_TENSOR_FFN_DOWN,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
586
    {LLM_TENSOR_FFN_UP,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
587
    {LLM_TENSOR_FFN_DOWN_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
588
    {LLM_TENSOR_FFN_GATE_SHEXP,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
589
    {LLM_TENSOR_FFN_UP_SHEXP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
590
    {LLM_TENSOR_ATTN_Q_A,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
591
    {LLM_TENSOR_ATTN_Q_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
592
    {LLM_TENSOR_ATTN_KV_A_MQA,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
593
    {LLM_TENSOR_ATTN_KV_B,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
594
    {LLM_TENSOR_ATTN_K_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
595
    {LLM_TENSOR_ATTN_V_B,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
596
    {LLM_TENSOR_ATTN_SINKS,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SCALE}},
597
    {LLM_TENSOR_DEC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
598
    {LLM_TENSOR_DEC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
599
    {LLM_TENSOR_DEC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
600
    {LLM_TENSOR_DEC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
601
    {LLM_TENSOR_DEC_CROSS_ATTN_Q,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
602
    {LLM_TENSOR_DEC_CROSS_ATTN_K,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
603
    {LLM_TENSOR_DEC_CROSS_ATTN_V,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
604
    {LLM_TENSOR_DEC_CROSS_ATTN_OUT,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
605
    {LLM_TENSOR_DEC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
606
    {LLM_TENSOR_DEC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
607
    {LLM_TENSOR_DEC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
608
    {LLM_TENSOR_ENC_ATTN_Q,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
609
    {LLM_TENSOR_ENC_ATTN_K,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
610
    {LLM_TENSOR_ENC_ATTN_V,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
611
    {LLM_TENSOR_ENC_ATTN_OUT,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
612
    {LLM_TENSOR_ENC_FFN_GATE,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
613
    {LLM_TENSOR_ENC_FFN_DOWN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
614
    {LLM_TENSOR_ENC_FFN_UP,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
615
    {LLM_TENSOR_FFN_GATE_INP_SHEXP,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
616
    {LLM_TENSOR_FFN_GATE_INP,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
617
    {LLM_TENSOR_SSM_IN,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
618
    {LLM_TENSOR_SSM_X,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
619
    {LLM_TENSOR_SSM_DT,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
620
    {LLM_TENSOR_SSM_OUT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
621
    {LLM_TENSOR_SSM_ALPHA,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
622
    {LLM_TENSOR_SSM_BETA_ALPHA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
623
    {LLM_TENSOR_TIME_MIX_W1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
624
    {LLM_TENSOR_TIME_MIX_W2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
625
    {LLM_TENSOR_TIME_MIX_A1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
626
    {LLM_TENSOR_TIME_MIX_A2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
627
    {LLM_TENSOR_TIME_MIX_V1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
628
    {LLM_TENSOR_TIME_MIX_V2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
629
    {LLM_TENSOR_TIME_MIX_G1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
630
    {LLM_TENSOR_TIME_MIX_G2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
631
    {LLM_TENSOR_TIME_MIX_DECAY_W1,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
632
    {LLM_TENSOR_TIME_MIX_DECAY_W2,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
633
    {LLM_TENSOR_TIME_MIX_KEY,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
634
    {LLM_TENSOR_TIME_MIX_VALUE,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
635
    {LLM_TENSOR_TIME_MIX_RECEPTANCE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
636
    {LLM_TENSOR_TIME_MIX_GATE,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
637
    {LLM_TENSOR_TIME_MIX_OUTPUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
638
    {LLM_TENSOR_CHANNEL_MIX_KEY,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
639
    {LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
640
    {LLM_TENSOR_CHANNEL_MIX_VALUE,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
641
    {LLM_TENSOR_FFN_ACT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}},
642
    {LLM_TENSOR_SSM_CONV1D,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
643
    {LLM_TENSOR_SSM_A,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}},
644
    {LLM_TENSOR_SSM_A_NOSCAN,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // a version of SSM_A used for MUL instead of SSM_SCAN
645
    {LLM_TENSOR_SSM_DT_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
646
    {LLM_TENSOR_SSM_B_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
647
    {LLM_TENSOR_SSM_C_NORM,                 {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
648
    {LLM_TENSOR_SSM_D,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
649
    {LLM_TENSOR_SSM_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
650
    // Kimi KDA - Conv tensors are 4D [d_conv, 1, d_inner, 1], reshaped to 2D at runtime
651
    {LLM_TENSOR_SSM_CONV1D_Q,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
652
    {LLM_TENSOR_SSM_CONV1D_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
653
    {LLM_TENSOR_SSM_CONV1D_V,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
654
    {LLM_TENSOR_SSM_F_A,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
655
    {LLM_TENSOR_SSM_F_B,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
656
    {LLM_TENSOR_SSM_BETA,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
657
    {LLM_TENSOR_SSM_G_A,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
658
    {LLM_TENSOR_SSM_G_B,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
659
    {LLM_TENSOR_TIME_MIX_LERP_X,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
660
    {LLM_TENSOR_TIME_MIX_LN,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
661
    {LLM_TENSOR_CHANNEL_MIX_LERP_K,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
662
    {LLM_TENSOR_CHANNEL_MIX_LERP_R,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
663
    {LLM_TENSOR_TIME_MIX_K_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
664
    {LLM_TENSOR_TIME_MIX_K_A,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
665
    {LLM_TENSOR_TIME_MIX_R_K,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
666
    {LLM_TENSOR_TIME_MIX_LERP_W,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
667
    {LLM_TENSOR_TIME_MIX_LERP_K,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
668
    {LLM_TENSOR_TIME_MIX_LERP_V,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
669
    {LLM_TENSOR_TIME_MIX_LERP_R,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
670
    {LLM_TENSOR_TIME_MIX_LERP_G,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
671
    {LLM_TENSOR_TIME_MIX_LERP_FUSED,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
672
    {LLM_TENSOR_TIME_MIX_DECAY,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
673
    {LLM_TENSOR_TIME_MIX_W0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
674
    {LLM_TENSOR_TIME_MIX_A0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
675
    {LLM_TENSOR_TIME_MIX_V0,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
676
    {LLM_TENSOR_TIME_MIX_FIRST,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
677
    {LLM_TENSOR_ATTN_NORM,                  {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
678
    {LLM_TENSOR_ATTN_NORM_2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
679
    {LLM_TENSOR_ATTN_OUT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
680
    {LLM_TENSOR_ATTN_POST_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
681
    {LLM_TENSOR_FFN_NORM,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
682
    {LLM_TENSOR_FFN_PRE_NORM_2,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
683
    {LLM_TENSOR_FFN_POST_NORM_1,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
684
    {LLM_TENSOR_FFN_POST_NORM_2,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
685
    {LLM_TENSOR_FFN_POST_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
686
    {LLM_TENSOR_FFN_NORM_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
687
    {LLM_TENSOR_ATTN_Q_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
688
    {LLM_TENSOR_ATTN_K_NORM,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
689
    {LLM_TENSOR_LAYER_OUT_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
690
    {LLM_TENSOR_LAYER_OUT_SCALE,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
691
    {LLM_TENSOR_ATTN_Q_A_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
692
    {LLM_TENSOR_ATTN_KV_A_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
693
    {LLM_TENSOR_ATTN_SUB_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
694
    {LLM_TENSOR_FFN_SUB_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
695
    {LLM_TENSOR_DEC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
696
    {LLM_TENSOR_DEC_CROSS_ATTN_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
697
    {LLM_TENSOR_DEC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
698
    {LLM_TENSOR_ENC_ATTN_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
699
    {LLM_TENSOR_ENC_FFN_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
700
    {LLM_TENSOR_DEC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
701
    {LLM_TENSOR_ENC_ATTN_REL_B,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_GET_ROWS}},
702
    {LLM_TENSOR_FFN_DOWN_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
703
    {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
704
    {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
705
    {LLM_TENSOR_FFN_GATE_UP_EXPS,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
706
    {LLM_TENSOR_FFN_DOWN_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
707
    {LLM_TENSOR_FFN_GATE_CHEXPS,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
708
    {LLM_TENSOR_FFN_UP_CHEXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
709
    {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
710
    // altup / laurel (gemma 3n)
711
    {LLM_TENSOR_PER_LAYER_TOKEN_EMBD,       {LLM_TENSOR_LAYER_INPUT,     GGML_OP_GET_ROWS}},
712
    {LLM_TENSOR_PER_LAYER_MODEL_PROJ,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
713
    {LLM_TENSOR_PER_LAYER_PROJ_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
714
    {LLM_TENSOR_ALTUP_PROJ,                 {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
715
    {LLM_TENSOR_ALTUP_UNEMBD_PROJ,          {LLM_TENSOR_LAYER_OUTPUT,    GGML_OP_MUL_MAT}},
716
    {LLM_TENSOR_PER_LAYER_INP_GATE,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
717
    {LLM_TENSOR_PER_LAYER_PROJ,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
718
    {LLM_TENSOR_PER_LAYER_POST_NORM,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
719
    {LLM_TENSOR_ALTUP_CORRECT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
720
    {LLM_TENSOR_ALTUP_CORRECT_SCALE,        {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
721
    {LLM_TENSOR_ALTUP_PREDICT_COEF,         {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
722
    {LLM_TENSOR_ALTUP_ROUTER,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
723
    {LLM_TENSOR_ALTUP_ROUTER_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
724
    {LLM_TENSOR_LAUREL_L,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
725
    {LLM_TENSOR_LAUREL_R,                   {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
726
    {LLM_TENSOR_LAUREL_POST_NORM,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
727
    // this tensor is loaded for T5, but never used
728
    {LLM_TENSOR_DEC_CROSS_ATTN_REL_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
729
    {LLM_TENSOR_CONV1D,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
730
    {LLM_TENSOR_POS_NET_NORM,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
731
    {LLM_TENSOR_POS_NET_NORM1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
732
    {LLM_TENSOR_POS_NET_NORM2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
733
    {LLM_TENSOR_POS_NET_CONV1,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
734
    {LLM_TENSOR_POS_NET_CONV2,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
735
    {LLM_TENSOR_POS_NET_ATTN_NORM,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
736
    {LLM_TENSOR_POS_NET_ATTN_Q,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
737
    {LLM_TENSOR_POS_NET_ATTN_K,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
738
    {LLM_TENSOR_POS_NET_ATTN_V,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
739
    {LLM_TENSOR_POS_NET_ATTN_OUT,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
740
    {LLM_TENSOR_CONVNEXT_DW,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_IM2COL}},
741
    {LLM_TENSOR_CONVNEXT_NORM,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
742
    {LLM_TENSOR_CONVNEXT_PW1,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
743
    {LLM_TENSOR_CONVNEXT_PW2,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
744
    {LLM_TENSOR_CONVNEXT_GAMMA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
745
    {LLM_TENSOR_SHORTCONV_CONV,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
746
    {LLM_TENSOR_SHORTCONV_INPROJ,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
747
    {LLM_TENSOR_SHORTCONV_OUTPROJ,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
748
    {LLM_TENSOR_VISEXP_ATTN_QKV,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
749
    {LLM_TENSOR_VISEXP_ATTN_OUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
750
    {LLM_TENSOR_VISEXP_FFN_GATE,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
751
    {LLM_TENSOR_VISEXP_FFN_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
752
    {LLM_TENSOR_VISEXP_FFN_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
753
    {LLM_TENSOR_INDEXER_K_NORM,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
754
    {LLM_TENSOR_INDEXER_PROJ,               {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
755
    {LLM_TENSOR_INDEXER_ATTN_K,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
756
    {LLM_TENSOR_INDEXER_ATTN_Q_B,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
757
    // NextN/MTP tensors are currently ignored (reserved for future MTP support)
758
    // These tensors only exist in the last layer(s) and are treated as output tensors
759
    {LLM_TENSOR_NEXTN_EH_PROJ,              {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
760
    {LLM_TENSOR_NEXTN_EMBED_TOKENS,         {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
761
    {LLM_TENSOR_NEXTN_ENORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
762
    {LLM_TENSOR_NEXTN_HNORM,                {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
763
    {LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
764
    {LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,     {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
765
    // Nemotron 3 Super
766
    {LLM_TENSOR_FFN_LATENT_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
767
    {LLM_TENSOR_FFN_LATENT_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
768
};
769
770
0
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
771
772
0
std::string LLM_KV::operator()(llm_kv kv) const {
773
0
    std::string name = ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
774
775
0
    if (suffix != nullptr) {
776
0
        name += ".";
777
0
        name += suffix;
778
0
    }
779
780
0
    return name;
781
0
}
782
783
LLM_TN_IMPL::LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid)
784
0
    : arch(arch), tensor(tensor), suffix(suffix), bid(bid), xid(xid) {}
785
786
0
std::string LLM_TN_IMPL::str() const {
787
0
    if (LLM_TENSOR_NAMES.find(tensor) == LLM_TENSOR_NAMES.end()) {
788
0
        GGML_ABORT("unknown tensor name for tensor id %d", static_cast<int>(tensor));
789
0
    }
790
791
0
    std::string name = ::format(LLM_TENSOR_NAMES.at(tensor), bid, xid);
792
0
    if (suffix != nullptr) {
793
0
        name += ".";
794
0
        name += suffix;
795
0
    }
796
797
0
    return name;
798
0
}
799
800
0
std::vector<llm_arch> llm_arch_all() {
801
0
    std::vector<llm_arch> ret;
802
0
    ret.reserve(LLM_ARCH_NAMES.size());
803
0
    for (const auto & [arch, _] : LLM_ARCH_NAMES) {
804
0
        ret.push_back(arch);
805
0
    }
806
0
    return ret;
807
0
}
808
809
0
const char * llm_arch_name(llm_arch arch) {
810
0
    auto it = LLM_ARCH_NAMES.find(arch);
811
0
    if (it == LLM_ARCH_NAMES.end()) {
812
0
        return "unknown";
813
0
    }
814
0
    return it->second;
815
0
}
816
817
0
llm_arch llm_arch_from_string(const std::string & name) {
818
0
    for (const auto & kv : LLM_ARCH_NAMES) { // NOLINT
819
0
        if (kv.second == name) {
820
0
            return kv.first;
821
0
        }
822
0
    }
823
824
0
    return LLM_ARCH_UNKNOWN;
825
0
}
826
827
0
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) {
828
0
    return LLM_TENSOR_INFOS.at(tensor);
829
0
}
830
831
0
bool llm_arch_is_recurrent(const llm_arch & arch) {
832
0
    switch (arch) {
833
0
        case LLM_ARCH_MAMBA:
834
0
        case LLM_ARCH_MAMBA2:
835
0
        case LLM_ARCH_RWKV6:
836
0
        case LLM_ARCH_RWKV6QWEN2:
837
0
        case LLM_ARCH_RWKV7:
838
0
        case LLM_ARCH_ARWKV7:
839
0
            return true;
840
0
        default:
841
0
            return false;
842
0
    }
843
0
}
844
845
0
bool llm_arch_is_hybrid(const llm_arch & arch) {
846
0
    switch (arch) {
847
0
        case LLM_ARCH_JAMBA:
848
0
        case LLM_ARCH_FALCON_H1:
849
0
        case LLM_ARCH_PLAMO2:
850
0
        case LLM_ARCH_GRANITE_HYBRID:
851
0
        case LLM_ARCH_LFM2:
852
0
        case LLM_ARCH_LFM2MOE:
853
0
        case LLM_ARCH_NEMOTRON_H:
854
0
        case LLM_ARCH_NEMOTRON_H_MOE:
855
0
        case LLM_ARCH_QWEN3NEXT:
856
0
        case LLM_ARCH_KIMI_LINEAR:
857
0
        case LLM_ARCH_QWEN35:
858
0
        case LLM_ARCH_QWEN35MOE:
859
0
            return true;
860
0
        default:
861
0
            return false;
862
0
    }
863
0
}
864
865
0
bool llm_arch_is_diffusion(const llm_arch & arch) {
866
0
    switch (arch) {
867
0
        case LLM_ARCH_DREAM:
868
0
        case LLM_ARCH_LLADA:
869
0
        case LLM_ARCH_LLADA_MOE:
870
0
        case LLM_ARCH_RND1:
871
0
            return true;
872
0
        default:
873
0
            return false;
874
0
    }
875
0
}
876
877
0
bool llm_arch_supports_sm_tensor(const llm_arch & arch) {
878
0
    switch (arch) {
879
0
        case LLM_ARCH_GROK:
880
0
        case LLM_ARCH_MPT:
881
0
        case LLM_ARCH_PLAMO2:
882
0
        case LLM_ARCH_MINICPM3:
883
0
        case LLM_ARCH_GEMMA3N:
884
0
        case LLM_ARCH_MAMBA:
885
0
        case LLM_ARCH_MAMBA2:
886
0
        case LLM_ARCH_JAMBA:
887
0
        case LLM_ARCH_FALCON_H1:
888
0
        case LLM_ARCH_OLMO2:
889
0
        case LLM_ARCH_OLMOE:
890
0
        case LLM_ARCH_DEEPSEEK2:
891
0
        case LLM_ARCH_GLM_DSA:
892
0
        case LLM_ARCH_BITNET:
893
0
        case LLM_ARCH_T5:
894
0
        case LLM_ARCH_NEMOTRON_H:
895
0
        case LLM_ARCH_NEMOTRON_H_MOE:
896
0
        case LLM_ARCH_GRANITE_HYBRID:
897
0
        case LLM_ARCH_LFM2:
898
0
        case LLM_ARCH_LFM2MOE:
899
0
        case LLM_ARCH_MINIMAX_M2:
900
0
        case LLM_ARCH_MISTRAL4:
901
0
        case LLM_ARCH_KIMI_LINEAR:
902
0
            return false;
903
0
        default:
904
0
            return true;
905
0
    }
906
0
}