/src/llama.cpp/src/llama-arch.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include "ggml.h" // ggml_op |
4 | | |
5 | | #include <string> |
6 | | #include <set> |
7 | | #include <vector> |
8 | | |
9 | | // |
10 | | // gguf constants (sync with gguf.py) |
11 | | // |
12 | | |
13 | | enum llm_arch { |
14 | | LLM_ARCH_CLIP, |
15 | | LLM_ARCH_LLAMA, |
16 | | LLM_ARCH_LLAMA4, |
17 | | LLM_ARCH_DECI, |
18 | | LLM_ARCH_FALCON, |
19 | | LLM_ARCH_BAICHUAN, |
20 | | LLM_ARCH_GROK, |
21 | | LLM_ARCH_GPT2, |
22 | | LLM_ARCH_GPTJ, |
23 | | LLM_ARCH_GPTNEOX, |
24 | | LLM_ARCH_MPT, |
25 | | LLM_ARCH_STARCODER, |
26 | | LLM_ARCH_REFACT, |
27 | | LLM_ARCH_BERT, |
28 | | LLM_ARCH_MODERN_BERT, |
29 | | LLM_ARCH_NOMIC_BERT, |
30 | | LLM_ARCH_NOMIC_BERT_MOE, |
31 | | LLM_ARCH_NEO_BERT, |
32 | | LLM_ARCH_JINA_BERT_V2, |
33 | | LLM_ARCH_JINA_BERT_V3, |
34 | | LLM_ARCH_EUROBERT, |
35 | | LLM_ARCH_BLOOM, |
36 | | LLM_ARCH_STABLELM, |
37 | | LLM_ARCH_QWEN, |
38 | | LLM_ARCH_QWEN2, |
39 | | LLM_ARCH_QWEN2MOE, |
40 | | LLM_ARCH_QWEN2VL, |
41 | | LLM_ARCH_QWEN3, |
42 | | LLM_ARCH_QWEN3MOE, |
43 | | LLM_ARCH_QWEN3NEXT, |
44 | | LLM_ARCH_QWEN3VL, |
45 | | LLM_ARCH_QWEN3VLMOE, |
46 | | LLM_ARCH_QWEN35, |
47 | | LLM_ARCH_QWEN35MOE, |
48 | | LLM_ARCH_PHI2, |
49 | | LLM_ARCH_PHI3, |
50 | | LLM_ARCH_PHIMOE, |
51 | | LLM_ARCH_PLAMO, |
52 | | LLM_ARCH_PLAMO2, |
53 | | LLM_ARCH_PLAMO3, |
54 | | LLM_ARCH_CODESHELL, |
55 | | LLM_ARCH_ORION, |
56 | | LLM_ARCH_INTERNLM2, |
57 | | LLM_ARCH_MINICPM, |
58 | | LLM_ARCH_MINICPM3, |
59 | | LLM_ARCH_GEMMA, |
60 | | LLM_ARCH_GEMMA2, |
61 | | LLM_ARCH_GEMMA3, |
62 | | LLM_ARCH_GEMMA3N, |
63 | | LLM_ARCH_GEMMA4, |
64 | | LLM_ARCH_GEMMA4_ASSISTANT, |
65 | | LLM_ARCH_GEMMA_EMBEDDING, |
66 | | LLM_ARCH_STARCODER2, |
67 | | LLM_ARCH_MAMBA, |
68 | | LLM_ARCH_MAMBA2, |
69 | | LLM_ARCH_JAMBA, |
70 | | LLM_ARCH_FALCON_H1, |
71 | | LLM_ARCH_XVERSE, |
72 | | LLM_ARCH_COMMAND_R, |
73 | | LLM_ARCH_COHERE2, |
74 | | LLM_ARCH_DBRX, |
75 | | LLM_ARCH_OLMO, |
76 | | LLM_ARCH_OLMO2, |
77 | | LLM_ARCH_OLMOE, |
78 | | LLM_ARCH_OPENELM, |
79 | | LLM_ARCH_ARCTIC, |
80 | | LLM_ARCH_DEEPSEEK, |
81 | | LLM_ARCH_DEEPSEEK2, |
82 | | LLM_ARCH_DEEPSEEK2OCR, |
83 | | LLM_ARCH_DEEPSEEK32, |
84 | | LLM_ARCH_CHATGLM, |
85 | | LLM_ARCH_GLM4, |
86 | | LLM_ARCH_GLM4_MOE, |
87 | | LLM_ARCH_GLM_DSA, |
88 | | LLM_ARCH_BITNET, |
89 | | LLM_ARCH_T5, |
90 | | LLM_ARCH_T5ENCODER, |
91 | | LLM_ARCH_JAIS, |
92 | | LLM_ARCH_JAIS2, |
93 | | LLM_ARCH_NEMOTRON, |
94 | | LLM_ARCH_NEMOTRON_H, |
95 | | LLM_ARCH_NEMOTRON_H_MOE, |
96 | | LLM_ARCH_EXAONE, |
97 | | LLM_ARCH_EXAONE4, |
98 | | LLM_ARCH_EXAONE_MOE, |
99 | | LLM_ARCH_RWKV6, |
100 | | LLM_ARCH_RWKV6QWEN2, |
101 | | LLM_ARCH_RWKV7, |
102 | | LLM_ARCH_ARWKV7, |
103 | | LLM_ARCH_GRANITE, |
104 | | LLM_ARCH_GRANITE_MOE, |
105 | | LLM_ARCH_GRANITE_HYBRID, |
106 | | LLM_ARCH_CHAMELEON, |
107 | | LLM_ARCH_WAVTOKENIZER_DEC, |
108 | | LLM_ARCH_PLM, |
109 | | LLM_ARCH_BAILINGMOE, |
110 | | LLM_ARCH_BAILINGMOE2, |
111 | | LLM_ARCH_DOTS1, |
112 | | LLM_ARCH_ARCEE, |
113 | | LLM_ARCH_AFMOE, |
114 | | LLM_ARCH_ERNIE4_5, |
115 | | LLM_ARCH_ERNIE4_5_MOE, |
116 | | LLM_ARCH_HUNYUAN_MOE, |
117 | | LLM_ARCH_HUNYUAN_DENSE, |
118 | | LLM_ARCH_HUNYUAN_VL, |
119 | | LLM_ARCH_SMOLLM3, |
120 | | LLM_ARCH_OPENAI_MOE, |
121 | | LLM_ARCH_LFM2, |
122 | | LLM_ARCH_LFM2MOE, |
123 | | LLM_ARCH_DREAM, |
124 | | LLM_ARCH_SMALLTHINKER, |
125 | | LLM_ARCH_LLADA, |
126 | | LLM_ARCH_LLADA_MOE, |
127 | | LLM_ARCH_SEED_OSS, |
128 | | LLM_ARCH_GROVEMOE, |
129 | | LLM_ARCH_APERTUS, |
130 | | LLM_ARCH_MINIMAX_M2, |
131 | | LLM_ARCH_COGVLM, |
132 | | LLM_ARCH_RND1, |
133 | | LLM_ARCH_PANGU_EMBED, |
134 | | LLM_ARCH_MISTRAL3, |
135 | | LLM_ARCH_MISTRAL4, |
136 | | LLM_ARCH_PADDLEOCR, |
137 | | LLM_ARCH_MIMO2, |
138 | | LLM_ARCH_STEP35, |
139 | | LLM_ARCH_LLAMA_EMBED, |
140 | | LLM_ARCH_MAINCODER, |
141 | | LLM_ARCH_KIMI_LINEAR, |
142 | | LLM_ARCH_TALKIE, |
143 | | LLM_ARCH_MELLUM, |
144 | | LLM_ARCH_EAGLE3, |
145 | | LLM_ARCH_UNKNOWN, |
146 | | }; |
147 | | |
148 | | enum llm_kv { |
149 | | LLM_KV_GENERAL_TYPE, |
150 | | LLM_KV_GENERAL_ARCHITECTURE, |
151 | | LLM_KV_GENERAL_QUANTIZATION_VERSION, |
152 | | LLM_KV_GENERAL_ALIGNMENT, |
153 | | LLM_KV_GENERAL_FILE_TYPE, |
154 | | LLM_KV_GENERAL_SAMPLING_SEQUENCE, |
155 | | LLM_KV_GENERAL_SAMPLING_TOP_K, |
156 | | LLM_KV_GENERAL_SAMPLING_TOP_P, |
157 | | LLM_KV_GENERAL_SAMPLING_MIN_P, |
158 | | LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, |
159 | | LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD, |
160 | | LLM_KV_GENERAL_SAMPLING_TEMP, |
161 | | LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N, |
162 | | LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT, |
163 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT, |
164 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU, |
165 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA, |
166 | | LLM_KV_GENERAL_NAME, |
167 | | LLM_KV_GENERAL_AUTHOR, |
168 | | LLM_KV_GENERAL_VERSION, |
169 | | LLM_KV_GENERAL_URL, |
170 | | LLM_KV_GENERAL_DESCRIPTION, |
171 | | LLM_KV_GENERAL_LICENSE, |
172 | | LLM_KV_GENERAL_SOURCE_URL, |
173 | | LLM_KV_GENERAL_SOURCE_HF_REPO, |
174 | | |
175 | | LLM_KV_VOCAB_SIZE, |
176 | | LLM_KV_CONTEXT_LENGTH, |
177 | | LLM_KV_EMBEDDING_LENGTH, |
178 | | LLM_KV_EMBEDDING_LENGTH_OUT, |
179 | | LLM_KV_EMBEDDING_LENGTH_PER_LAYER, |
180 | | LLM_KV_FEATURES_LENGTH, |
181 | | LLM_KV_BLOCK_COUNT, |
182 | | LLM_KV_LEADING_DENSE_BLOCK_COUNT, |
183 | | LLM_KV_FEED_FORWARD_LENGTH, |
184 | | LLM_KV_EXPERT_FEED_FORWARD_LENGTH, |
185 | | LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, |
186 | | LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, |
187 | | LLM_KV_SWIGLU_CLAMP_EXP, |
188 | | LLM_KV_SWIGLU_CLAMP_SHEXP, |
189 | | LLM_KV_USE_PARALLEL_RESIDUAL, |
190 | | LLM_KV_TENSOR_DATA_LAYOUT, |
191 | | LLM_KV_EXPERT_COUNT, |
192 | | LLM_KV_EXPERT_USED_COUNT, |
193 | | LLM_KV_EXPERT_SHARED_COUNT, |
194 | | LLM_KV_EXPERT_GROUP_COUNT, |
195 | | LLM_KV_EXPERT_GROUP_USED_COUNT, |
196 | | LLM_KV_EXPERT_WEIGHTS_SCALE, |
197 | | LLM_KV_EXPERT_WEIGHTS_NORM, |
198 | | LLM_KV_EXPERT_GATING_FUNC, |
199 | | LLM_KV_EXPERT_GROUP_SCALE, |
200 | | LLM_KV_EXPERTS_PER_GROUP, |
201 | | LLM_KV_MOE_EVERY_N_LAYERS, |
202 | | LLM_KV_MOE_LATENT_SIZE, |
203 | | LLM_KV_NEXTN_PREDICT_LAYERS, |
204 | | LLM_KV_NUM_DEEPSTACK_LAYERS, |
205 | | LLM_KV_DEEPSTACK_MAPPING, |
206 | | LLM_KV_HIDDEN_ACT, |
207 | | LLM_KV_POOLING_TYPE, |
208 | | LLM_KV_LOGIT_SCALE, |
209 | | LLM_KV_DECODER_START_TOKEN_ID, |
210 | | LLM_KV_DECODER_BLOCK_COUNT, |
211 | | LLM_KV_ATTN_LOGIT_SOFTCAPPING, |
212 | | LLM_KV_ROUTER_LOGIT_SOFTCAPPING, |
213 | | LLM_KV_FINAL_LOGIT_SOFTCAPPING, |
214 | | LLM_KV_SWIN_NORM, |
215 | | LLM_KV_RESCALE_EVERY_N_LAYERS, |
216 | | LLM_KV_TIME_MIX_EXTRA_DIM, |
217 | | LLM_KV_TIME_DECAY_EXTRA_DIM, |
218 | | LLM_KV_RESIDUAL_SCALE, |
219 | | LLM_KV_EMBEDDING_SCALE, |
220 | | LLM_KV_TOKEN_SHIFT_COUNT, |
221 | | LLM_KV_INTERLEAVE_MOE_LAYER_STEP, |
222 | | LLM_KV_FULL_ATTENTION_INTERVAL, |
223 | | |
224 | | LLM_KV_ATTENTION_HEAD_COUNT, |
225 | | LLM_KV_ATTENTION_HEAD_COUNT_KV, |
226 | | LLM_KV_ATTENTION_MAX_ALIBI_BIAS, |
227 | | LLM_KV_ATTENTION_CLAMP_KQV, |
228 | | LLM_KV_ATTENTION_KEY_LENGTH, |
229 | | LLM_KV_ATTENTION_VALUE_LENGTH, |
230 | | LLM_KV_ATTENTION_LAYERNORM_EPS, |
231 | | LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, |
232 | | LLM_KV_ATTENTION_GROUPNORM_EPS, |
233 | | LLM_KV_ATTENTION_GROUPNORM_GROUPS, |
234 | | LLM_KV_ATTENTION_CAUSAL, |
235 | | LLM_KV_ATTENTION_Q_LORA_RANK, |
236 | | LLM_KV_ATTENTION_KV_LORA_RANK, |
237 | | LLM_KV_ATTENTION_DECAY_LORA_RANK, |
238 | | LLM_KV_ATTENTION_ICLR_LORA_RANK, |
239 | | LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, |
240 | | LLM_KV_ATTENTION_GATE_LORA_RANK, |
241 | | LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, |
242 | | LLM_KV_ATTENTION_SLIDING_WINDOW, |
243 | | LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, |
244 | | LLM_KV_ATTENTION_SCALE, |
245 | | LLM_KV_ATTENTION_OUTPUT_SCALE, |
246 | | LLM_KV_ATTENTION_VALUE_SCALE, |
247 | | LLM_KV_ATTENTION_TEMPERATURE_LENGTH, |
248 | | LLM_KV_ATTENTION_TEMPERATURE_SCALE, |
249 | | LLM_KV_ATTENTION_KEY_LENGTH_MLA, |
250 | | LLM_KV_ATTENTION_VALUE_LENGTH_MLA, |
251 | | LLM_KV_ATTENTION_KEY_LENGTH_SWA, |
252 | | LLM_KV_ATTENTION_VALUE_LENGTH_SWA, |
253 | | LLM_KV_ATTENTION_INDEXER_HEAD_COUNT, |
254 | | LLM_KV_ATTENTION_INDEXER_KEY_LENGTH, |
255 | | LLM_KV_ATTENTION_INDEXER_TOP_K, |
256 | | LLM_KV_ATTENTION_SHARED_KV_LAYERS, |
257 | | LLM_KV_ATTENTION_RECURRENT_LAYERS, |
258 | | |
259 | | LLM_KV_ROPE_DIMENSION_COUNT, |
260 | | LLM_KV_ROPE_DIMENSION_COUNT_SWA, |
261 | | LLM_KV_ROPE_DIMENSION_SECTIONS, |
262 | | LLM_KV_ROPE_FREQ_BASE, |
263 | | LLM_KV_ROPE_FREQ_BASE_SWA, |
264 | | LLM_KV_ROPE_SCALE_LINEAR, |
265 | | LLM_KV_ROPE_SCALING_TYPE, |
266 | | LLM_KV_ROPE_SCALING_FACTOR, |
267 | | LLM_KV_ROPE_SCALING_ALPHA, |
268 | | LLM_KV_ROPE_SCALING_ATTN_FACTOR, |
269 | | LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, |
270 | | LLM_KV_ROPE_SCALING_FINETUNED, |
271 | | LLM_KV_ROPE_SCALING_YARN_LOG_MUL, |
272 | | LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR, |
273 | | LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, |
274 | | LLM_KV_ROPE_SCALING_YARN_BETA_FAST, |
275 | | LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, |
276 | | |
277 | | LLM_KV_SPLIT_NO, |
278 | | LLM_KV_SPLIT_COUNT, |
279 | | LLM_KV_SPLIT_TENSORS_COUNT, |
280 | | |
281 | | LLM_KV_SSM_INNER_SIZE, |
282 | | LLM_KV_SSM_CONV_KERNEL, |
283 | | LLM_KV_SSM_STATE_SIZE, |
284 | | LLM_KV_SSM_TIME_STEP_RANK, |
285 | | LLM_KV_SSM_GROUP_COUNT, |
286 | | LLM_KV_SSM_DT_B_C_RMS, |
287 | | |
288 | | LLM_KV_KDA_HEAD_DIM, |
289 | | |
290 | | LLM_KV_WKV_HEAD_SIZE, |
291 | | |
292 | | LLM_KV_TOKENIZER_MODEL, |
293 | | LLM_KV_TOKENIZER_PRE, |
294 | | LLM_KV_TOKENIZER_LIST, |
295 | | LLM_KV_TOKENIZER_TOKEN_TYPE, |
296 | | LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, |
297 | | LLM_KV_TOKENIZER_SCORES, |
298 | | LLM_KV_TOKENIZER_MERGES, |
299 | | LLM_KV_TOKENIZER_BOS_ID, |
300 | | LLM_KV_TOKENIZER_EOS_ID, |
301 | | LLM_KV_TOKENIZER_EOT_ID, |
302 | | LLM_KV_TOKENIZER_EOM_ID, |
303 | | LLM_KV_TOKENIZER_UNK_ID, |
304 | | LLM_KV_TOKENIZER_SEP_ID, |
305 | | LLM_KV_TOKENIZER_PAD_ID, |
306 | | LLM_KV_TOKENIZER_CLS_ID, |
307 | | LLM_KV_TOKENIZER_MASK_ID, |
308 | | LLM_KV_TOKENIZER_ADD_BOS, |
309 | | LLM_KV_TOKENIZER_ADD_EOS, |
310 | | LLM_KV_TOKENIZER_ADD_SEP, |
311 | | LLM_KV_TOKENIZER_ADD_PREFIX, |
312 | | LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, |
313 | | LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, |
314 | | LLM_KV_TOKENIZER_HF_JSON, |
315 | | LLM_KV_TOKENIZER_RWKV, |
316 | | LLM_KV_TOKENIZER_CHAT_TEMPLATE, |
317 | | LLM_KV_TOKENIZER_NORMALIZER_LOWERCASE, |
318 | | LLM_KV_TOKENIZER_NORMALIZER_STRIP_ACCENTS, |
319 | | LLM_KV_TOKENIZER_FIM_PRE_ID, |
320 | | LLM_KV_TOKENIZER_FIM_SUF_ID, |
321 | | LLM_KV_TOKENIZER_FIM_MID_ID, |
322 | | LLM_KV_TOKENIZER_FIM_PAD_ID, |
323 | | LLM_KV_TOKENIZER_FIM_REP_ID, |
324 | | LLM_KV_TOKENIZER_FIM_SEP_ID, |
325 | | LLM_KV_TOKENIZER_SUPPRESS_TOKENS, |
326 | | |
327 | | LLM_KV_ADAPTER_TYPE, |
328 | | LLM_KV_ADAPTER_LORA_ALPHA, |
329 | | LLM_KV_ADAPTER_LORA_TASK_NAME, |
330 | | LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, |
331 | | LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, |
332 | | |
333 | | LLM_KV_POSNET_EMBEDDING_LENGTH, |
334 | | LLM_KV_POSNET_BLOCK_COUNT, |
335 | | |
336 | | LLM_KV_CONVNEXT_EMBEDDING_LENGTH, |
337 | | LLM_KV_CONVNEXT_BLOCK_COUNT, |
338 | | |
339 | | LLM_KV_CLASSIFIER_OUTPUT_LABELS, |
340 | | |
341 | | LLM_KV_TARGET_LAYERS, |
342 | | LLM_KV_TARGET_HIDDEN_SIZE, |
343 | | LLM_KV_NORM_BEFORE_RESIDUAL, |
344 | | |
345 | | LLM_KV_SHORTCONV_L_CACHE, |
346 | | |
347 | | LLM_KV_XIELU_ALPHA_N, |
348 | | LLM_KV_XIELU_ALPHA_P, |
349 | | LLM_KV_XIELU_BETA, |
350 | | LLM_KV_XIELU_EPS, |
351 | | |
352 | | // deprecated: |
353 | | LLM_KV_TOKENIZER_PREFIX_ID, |
354 | | LLM_KV_TOKENIZER_SUFFIX_ID, |
355 | | LLM_KV_TOKENIZER_MIDDLE_ID, |
356 | | |
357 | | // sentence-transformers dense layers in and out features |
358 | | LLM_KV_DENSE_2_FEAT_IN, |
359 | | LLM_KV_DENSE_2_FEAT_OUT, |
360 | | LLM_KV_DENSE_3_FEAT_IN, |
361 | | LLM_KV_DENSE_3_FEAT_OUT, |
362 | | }; |
363 | | |
364 | | enum llm_tensor { |
365 | | LLM_TENSOR_TOKEN_EMBD, |
366 | | LLM_TENSOR_TOKEN_EMBD_NORM, |
367 | | LLM_TENSOR_TOKEN_TYPES, |
368 | | LLM_TENSOR_POS_EMBD, |
369 | | LLM_TENSOR_DENSE_2_OUT, |
370 | | LLM_TENSOR_DENSE_3_OUT, |
371 | | LLM_TENSOR_OUTPUT, |
372 | | LLM_TENSOR_OUTPUT_NORM, |
373 | | LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name |
374 | | LLM_TENSOR_ROPE_FREQS, |
375 | | LLM_TENSOR_ROPE_FACTORS_LONG, |
376 | | LLM_TENSOR_ROPE_FACTORS_SHORT, |
377 | | LLM_TENSOR_ATTN_Q, |
378 | | LLM_TENSOR_ATTN_K, |
379 | | LLM_TENSOR_ATTN_V, |
380 | | LLM_TENSOR_ATTN_QKV, |
381 | | LLM_TENSOR_ATTN_OUT, |
382 | | LLM_TENSOR_ATTN_NORM, |
383 | | LLM_TENSOR_ATTN_NORM_2, |
384 | | LLM_TENSOR_ATTN_OUT_NORM, |
385 | | LLM_TENSOR_ATTN_POST_NORM, |
386 | | LLM_TENSOR_ATTN_ROT_EMBD, |
387 | | LLM_TENSOR_ATTN_SINKS, |
388 | | LLM_TENSOR_ATTN_GATE, |
389 | | LLM_TENSOR_FFN_GATE_INP, |
390 | | LLM_TENSOR_FFN_GATE_INP_SHEXP, |
391 | | LLM_TENSOR_FFN_NORM, |
392 | | LLM_TENSOR_FFN_POST_NORM, |
393 | | LLM_TENSOR_FFN_POST_NORM_1, |
394 | | LLM_TENSOR_FFN_POST_NORM_2, |
395 | | LLM_TENSOR_FFN_PRE_NORM_2, |
396 | | LLM_TENSOR_FFN_GATE, |
397 | | LLM_TENSOR_FFN_DOWN, |
398 | | LLM_TENSOR_FFN_UP, |
399 | | LLM_TENSOR_FFN_ACT, |
400 | | LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility |
401 | | LLM_TENSOR_FFN_GATE_EXP, |
402 | | LLM_TENSOR_FFN_UP_EXP, |
403 | | LLM_TENSOR_FFN_NORM_EXPS, |
404 | | LLM_TENSOR_FFN_DOWN_EXPS, // merged experts |
405 | | LLM_TENSOR_FFN_GATE_EXPS, |
406 | | LLM_TENSOR_FFN_UP_EXPS, |
407 | | LLM_TENSOR_FFN_GATE_UP_EXPS, |
408 | | LLM_TENSOR_FFN_DOWN_SHEXP, |
409 | | LLM_TENSOR_FFN_GATE_SHEXP, |
410 | | LLM_TENSOR_FFN_UP_SHEXP, |
411 | | LLM_TENSOR_FFN_DOWN_CHEXPS, |
412 | | LLM_TENSOR_FFN_GATE_CHEXPS, |
413 | | LLM_TENSOR_FFN_UP_CHEXPS, |
414 | | LLM_TENSOR_FFN_EXP_PROBS_B, |
415 | | LLM_TENSOR_FFN_LATENT_DOWN, |
416 | | LLM_TENSOR_FFN_LATENT_UP, |
417 | | LLM_TENSOR_ATTN_Q_NORM, |
418 | | LLM_TENSOR_ATTN_K_NORM, |
419 | | LLM_TENSOR_LAYER_OUT_NORM, |
420 | | LLM_TENSOR_LAYER_OUT_SCALE, |
421 | | LLM_TENSOR_POST_ATTN_NORM, |
422 | | LLM_TENSOR_POST_MLP_NORM, |
423 | | LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n |
424 | | LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n |
425 | | LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n |
426 | | LLM_TENSOR_PER_LAYER_PROJ, // gemma3n |
427 | | LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n |
428 | | LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n |
429 | | LLM_TENSOR_ALTUP_PROJ, // gemma3n |
430 | | LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n |
431 | | LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n |
432 | | LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n |
433 | | LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n |
434 | | LLM_TENSOR_ALTUP_ROUTER, // gemma3n |
435 | | LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n |
436 | | LLM_TENSOR_LAUREL_L, // gemma3n |
437 | | LLM_TENSOR_LAUREL_R, // gemma3n |
438 | | LLM_TENSOR_LAUREL_POST_NORM, // gemma3n |
439 | | LLM_TENSOR_SSM_IN, |
440 | | LLM_TENSOR_SSM_CONV1D, |
441 | | LLM_TENSOR_SSM_X, |
442 | | LLM_TENSOR_SSM_DT, |
443 | | LLM_TENSOR_SSM_DT_NORM, |
444 | | LLM_TENSOR_SSM_A, |
445 | | LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN |
446 | | LLM_TENSOR_SSM_B_NORM, |
447 | | LLM_TENSOR_SSM_C_NORM, |
448 | | LLM_TENSOR_SSM_D, |
449 | | LLM_TENSOR_SSM_NORM, |
450 | | LLM_TENSOR_SSM_OUT, |
451 | | LLM_TENSOR_SSM_BETA_ALPHA, // qwen3next |
452 | | LLM_TENSOR_SSM_ALPHA, // qwen3.5 |
453 | | // Kimi Linear KDA (using SSM_ prefix for consistency) |
454 | | LLM_TENSOR_SSM_CONV1D_Q, // kimi: Q conv1d weight |
455 | | LLM_TENSOR_SSM_CONV1D_K, // kimi: K conv1d weight |
456 | | LLM_TENSOR_SSM_CONV1D_V, // kimi: V conv1d weight |
457 | | LLM_TENSOR_SSM_F_A, // kimi: forget gate projection A |
458 | | LLM_TENSOR_SSM_F_B, // kimi: forget gate projection B |
459 | | LLM_TENSOR_SSM_BETA, // kimi: beta mixing coefficient and qwen3.5 |
460 | | LLM_TENSOR_SSM_G_A, // kimi: output gate projection A |
461 | | LLM_TENSOR_SSM_G_B, // kimi: output gate projection B |
462 | | LLM_TENSOR_TIME_MIX_W0, |
463 | | LLM_TENSOR_TIME_MIX_W1, |
464 | | LLM_TENSOR_TIME_MIX_W2, |
465 | | LLM_TENSOR_TIME_MIX_A0, |
466 | | LLM_TENSOR_TIME_MIX_A1, |
467 | | LLM_TENSOR_TIME_MIX_A2, |
468 | | LLM_TENSOR_TIME_MIX_V0, |
469 | | LLM_TENSOR_TIME_MIX_V1, |
470 | | LLM_TENSOR_TIME_MIX_V2, |
471 | | LLM_TENSOR_TIME_MIX_G1, |
472 | | LLM_TENSOR_TIME_MIX_G2, |
473 | | LLM_TENSOR_TIME_MIX_K_K, |
474 | | LLM_TENSOR_TIME_MIX_K_A, |
475 | | LLM_TENSOR_TIME_MIX_R_K, |
476 | | LLM_TENSOR_TIME_MIX_LERP_X, |
477 | | LLM_TENSOR_TIME_MIX_LERP_W, |
478 | | LLM_TENSOR_TIME_MIX_LERP_K, |
479 | | LLM_TENSOR_TIME_MIX_LERP_V, |
480 | | LLM_TENSOR_TIME_MIX_LERP_R, |
481 | | LLM_TENSOR_TIME_MIX_LERP_G, |
482 | | LLM_TENSOR_TIME_MIX_LERP_FUSED, |
483 | | LLM_TENSOR_TIME_MIX_FIRST, |
484 | | LLM_TENSOR_TIME_MIX_DECAY, |
485 | | LLM_TENSOR_TIME_MIX_DECAY_W1, |
486 | | LLM_TENSOR_TIME_MIX_DECAY_W2, |
487 | | LLM_TENSOR_TIME_MIX_KEY, |
488 | | LLM_TENSOR_TIME_MIX_VALUE, |
489 | | LLM_TENSOR_TIME_MIX_RECEPTANCE, |
490 | | LLM_TENSOR_TIME_MIX_GATE, |
491 | | LLM_TENSOR_TIME_MIX_LN, |
492 | | LLM_TENSOR_TIME_MIX_OUTPUT, |
493 | | LLM_TENSOR_CHANNEL_MIX_LERP_K, |
494 | | LLM_TENSOR_CHANNEL_MIX_LERP_R, |
495 | | LLM_TENSOR_CHANNEL_MIX_KEY, |
496 | | LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, |
497 | | LLM_TENSOR_CHANNEL_MIX_VALUE, |
498 | | LLM_TENSOR_ATTN_Q_A, |
499 | | LLM_TENSOR_ATTN_Q_B, |
500 | | LLM_TENSOR_ATTN_KV_A_MQA, |
501 | | LLM_TENSOR_ATTN_KV_B, |
502 | | LLM_TENSOR_ATTN_K_B, |
503 | | LLM_TENSOR_ATTN_V_B, |
504 | | LLM_TENSOR_ATTN_Q_A_NORM, |
505 | | LLM_TENSOR_ATTN_KV_A_NORM, |
506 | | LLM_TENSOR_ATTN_SUB_NORM, |
507 | | LLM_TENSOR_FFN_SUB_NORM, |
508 | | LLM_TENSOR_DEC_ATTN_NORM, |
509 | | LLM_TENSOR_DEC_ATTN_Q, |
510 | | LLM_TENSOR_DEC_ATTN_K, |
511 | | LLM_TENSOR_DEC_ATTN_V, |
512 | | LLM_TENSOR_DEC_ATTN_OUT, |
513 | | LLM_TENSOR_DEC_ATTN_REL_B, |
514 | | LLM_TENSOR_DEC_CROSS_ATTN_NORM, |
515 | | LLM_TENSOR_DEC_CROSS_ATTN_Q, |
516 | | LLM_TENSOR_DEC_CROSS_ATTN_K, |
517 | | LLM_TENSOR_DEC_CROSS_ATTN_V, |
518 | | LLM_TENSOR_DEC_CROSS_ATTN_OUT, |
519 | | LLM_TENSOR_DEC_CROSS_ATTN_REL_B, |
520 | | LLM_TENSOR_DEC_FFN_NORM, |
521 | | LLM_TENSOR_DEC_FFN_GATE, |
522 | | LLM_TENSOR_DEC_FFN_DOWN, |
523 | | LLM_TENSOR_DEC_FFN_UP, |
524 | | LLM_TENSOR_DEC_OUTPUT_NORM, |
525 | | LLM_TENSOR_ENC_ATTN_NORM, |
526 | | LLM_TENSOR_ENC_ATTN_Q, |
527 | | LLM_TENSOR_ENC_ATTN_K, |
528 | | LLM_TENSOR_ENC_ATTN_V, |
529 | | LLM_TENSOR_ENC_ATTN_OUT, |
530 | | LLM_TENSOR_ENC_ATTN_REL_B, |
531 | | LLM_TENSOR_ENC_FFN_NORM, |
532 | | LLM_TENSOR_ENC_FFN_GATE, |
533 | | LLM_TENSOR_ENC_FFN_DOWN, |
534 | | LLM_TENSOR_ENC_FFN_UP, |
535 | | LLM_TENSOR_ENC_OUTPUT_NORM, |
536 | | LLM_TENSOR_CLS, |
537 | | LLM_TENSOR_CLS_OUT, |
538 | | LLM_TENSOR_CLS_NORM, |
539 | | LLM_TENSOR_CONV1D, |
540 | | LLM_TENSOR_CONVNEXT_DW, |
541 | | LLM_TENSOR_CONVNEXT_NORM, |
542 | | LLM_TENSOR_CONVNEXT_PW1, |
543 | | LLM_TENSOR_CONVNEXT_PW2, |
544 | | LLM_TENSOR_CONVNEXT_GAMMA, |
545 | | LLM_TENSOR_POS_NET_CONV1, |
546 | | LLM_TENSOR_POS_NET_CONV2, |
547 | | LLM_TENSOR_POS_NET_NORM, |
548 | | LLM_TENSOR_POS_NET_NORM1, |
549 | | LLM_TENSOR_POS_NET_NORM2, |
550 | | LLM_TENSOR_POS_NET_ATTN_NORM, |
551 | | LLM_TENSOR_POS_NET_ATTN_Q, |
552 | | LLM_TENSOR_POS_NET_ATTN_K, |
553 | | LLM_TENSOR_POS_NET_ATTN_V, |
554 | | LLM_TENSOR_POS_NET_ATTN_OUT, |
555 | | LLM_TENSOR_SHORTCONV_CONV, |
556 | | LLM_TENSOR_SHORTCONV_INPROJ, |
557 | | LLM_TENSOR_SHORTCONV_OUTPROJ, |
558 | | LLM_TENSOR_VISEXP_ATTN_QKV, |
559 | | LLM_TENSOR_VISEXP_ATTN_OUT, |
560 | | LLM_TENSOR_VISEXP_FFN_GATE, |
561 | | LLM_TENSOR_VISEXP_FFN_DOWN, |
562 | | LLM_TENSOR_VISEXP_FFN_UP, |
563 | | LLM_TENSOR_INDEXER_K_NORM, |
564 | | LLM_TENSOR_INDEXER_PROJ, |
565 | | LLM_TENSOR_INDEXER_ATTN_K, |
566 | | LLM_TENSOR_INDEXER_ATTN_Q_B, |
567 | | LLM_TENSOR_NEXTN_PROJ_PRE, |
568 | | LLM_TENSOR_NEXTN_PROJ_POST, |
569 | | LLM_TENSOR_NEXTN_EH_PROJ, |
570 | | LLM_TENSOR_NEXTN_EMBED_TOKENS, |
571 | | LLM_TENSOR_NEXTN_ENORM, |
572 | | LLM_TENSOR_NEXTN_HNORM, |
573 | | LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, |
574 | | LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, |
575 | | LLM_TENSOR_MASKED_EMBD_CENTROIDS, |
576 | | LLM_TENSOR_MASKED_EMBD_ORDERING, |
577 | | LLM_TENSOR_FC, |
578 | | LLM_TENSOR_D2T, |
579 | | }; |
580 | | |
581 | | |
582 | | enum llm_tensor_layer { |
583 | | LLM_TENSOR_LAYER_INPUT, |
584 | | LLM_TENSOR_LAYER_REPEATING, |
585 | | LLM_TENSOR_LAYER_OUTPUT, |
586 | | }; |
587 | | |
588 | | struct LLM_KV { |
589 | | LLM_KV(llm_arch arch, const char * suffix = nullptr); |
590 | | |
591 | | llm_arch arch; |
592 | | const char * suffix; |
593 | | |
594 | | std::string operator()(llm_kv kv) const; |
595 | | }; |
596 | | |
597 | | // helper to handle gguf constants |
598 | | // usage: |
599 | | // |
600 | | // const auto tn = LLM_TN(LLM_ARCH_LLAMA); |
601 | | // |
602 | | // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output" |
603 | | // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias" |
604 | | // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight" |
605 | | // |
606 | | struct LLM_TN_IMPL { |
607 | | const llm_arch arch; |
608 | | const llm_tensor tensor; |
609 | | const char * const suffix; |
610 | | const int bid; |
611 | | const int xid; |
612 | | |
613 | | LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid); |
614 | | |
615 | | std::string str() const; |
616 | | |
617 | 0 | operator std::string() const { |
618 | 0 | return str(); |
619 | 0 | } |
620 | | |
621 | 0 | friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) { |
622 | 0 | return str == tn.str(); |
623 | 0 | } |
624 | | |
625 | 0 | friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) { |
626 | 0 | return str != tn.str(); |
627 | 0 | } |
628 | | }; |
629 | | |
630 | | struct LLM_TN { |
631 | 0 | LLM_TN(llm_arch arch) : arch(arch) {} |
632 | | |
633 | | llm_arch arch; |
634 | | |
635 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const { |
636 | 0 | return LLM_TN_IMPL(arch, tensor, suffix, bid, xid); |
637 | 0 | } |
638 | | |
639 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const { |
640 | 0 | return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid); |
641 | 0 | } |
642 | | }; |
643 | | |
644 | | |
645 | | struct llm_tensor_info { |
646 | | llm_tensor_layer layer; |
647 | | ggml_op op; |
648 | | }; |
649 | | |
650 | | std::vector<llm_arch> llm_arch_all(); |
651 | | |
652 | | const char * llm_arch_name(llm_arch arch); |
653 | | |
654 | | llm_arch llm_arch_from_string(const std::string & name); |
655 | | |
656 | | const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor); |
657 | | |
658 | | bool llm_arch_is_recurrent (const llm_arch & arch); |
659 | | bool llm_arch_is_hybrid (const llm_arch & arch); |
660 | | bool llm_arch_is_diffusion (const llm_arch & arch); |
661 | | bool llm_arch_supports_sm_tensor(const llm_arch & arch); |
662 | | bool llm_arch_supports_rs_rollback(const llm_arch & arch); |