/src/llama.cpp/src/llama-arch.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include "ggml.h" // ggml_op |
4 | | |
5 | | #include <string> |
6 | | #include <set> |
7 | | |
8 | | // |
9 | | // gguf constants (sync with gguf.py) |
10 | | // |
11 | | |
12 | | enum llm_arch { |
13 | | LLM_ARCH_CLIP, |
14 | | LLM_ARCH_LLAMA, |
15 | | LLM_ARCH_LLAMA4, |
16 | | LLM_ARCH_DECI, |
17 | | LLM_ARCH_FALCON, |
18 | | LLM_ARCH_BAICHUAN, |
19 | | LLM_ARCH_GROK, |
20 | | LLM_ARCH_GPT2, |
21 | | LLM_ARCH_GPTJ, |
22 | | LLM_ARCH_GPTNEOX, |
23 | | LLM_ARCH_MPT, |
24 | | LLM_ARCH_STARCODER, |
25 | | LLM_ARCH_REFACT, |
26 | | LLM_ARCH_BERT, |
27 | | LLM_ARCH_MODERN_BERT, |
28 | | LLM_ARCH_NOMIC_BERT, |
29 | | LLM_ARCH_NOMIC_BERT_MOE, |
30 | | LLM_ARCH_NEO_BERT, |
31 | | LLM_ARCH_JINA_BERT_V2, |
32 | | LLM_ARCH_JINA_BERT_V3, |
33 | | LLM_ARCH_BLOOM, |
34 | | LLM_ARCH_STABLELM, |
35 | | LLM_ARCH_QWEN, |
36 | | LLM_ARCH_QWEN2, |
37 | | LLM_ARCH_QWEN2MOE, |
38 | | LLM_ARCH_QWEN2VL, |
39 | | LLM_ARCH_QWEN3, |
40 | | LLM_ARCH_QWEN3MOE, |
41 | | LLM_ARCH_QWEN3NEXT, |
42 | | LLM_ARCH_QWEN3VL, |
43 | | LLM_ARCH_QWEN3VLMOE, |
44 | | LLM_ARCH_PHI2, |
45 | | LLM_ARCH_PHI3, |
46 | | LLM_ARCH_PHIMOE, |
47 | | LLM_ARCH_PLAMO, |
48 | | LLM_ARCH_PLAMO2, |
49 | | LLM_ARCH_PLAMO3, |
50 | | LLM_ARCH_CODESHELL, |
51 | | LLM_ARCH_ORION, |
52 | | LLM_ARCH_INTERNLM2, |
53 | | LLM_ARCH_MINICPM, |
54 | | LLM_ARCH_MINICPM3, |
55 | | LLM_ARCH_GEMMA, |
56 | | LLM_ARCH_GEMMA2, |
57 | | LLM_ARCH_GEMMA3, |
58 | | LLM_ARCH_GEMMA3N, |
59 | | LLM_ARCH_GEMMA_EMBEDDING, |
60 | | LLM_ARCH_STARCODER2, |
61 | | LLM_ARCH_MAMBA, |
62 | | LLM_ARCH_MAMBA2, |
63 | | LLM_ARCH_JAMBA, |
64 | | LLM_ARCH_FALCON_H1, |
65 | | LLM_ARCH_XVERSE, |
66 | | LLM_ARCH_COMMAND_R, |
67 | | LLM_ARCH_COHERE2, |
68 | | LLM_ARCH_DBRX, |
69 | | LLM_ARCH_OLMO, |
70 | | LLM_ARCH_OLMO2, |
71 | | LLM_ARCH_OLMOE, |
72 | | LLM_ARCH_OPENELM, |
73 | | LLM_ARCH_ARCTIC, |
74 | | LLM_ARCH_DEEPSEEK, |
75 | | LLM_ARCH_DEEPSEEK2, |
76 | | LLM_ARCH_CHATGLM, |
77 | | LLM_ARCH_GLM4, |
78 | | LLM_ARCH_GLM4_MOE, |
79 | | LLM_ARCH_BITNET, |
80 | | LLM_ARCH_T5, |
81 | | LLM_ARCH_T5ENCODER, |
82 | | LLM_ARCH_JAIS, |
83 | | LLM_ARCH_NEMOTRON, |
84 | | LLM_ARCH_NEMOTRON_H, |
85 | | LLM_ARCH_NEMOTRON_H_MOE, |
86 | | LLM_ARCH_EXAONE, |
87 | | LLM_ARCH_EXAONE4, |
88 | | LLM_ARCH_RWKV6, |
89 | | LLM_ARCH_RWKV6QWEN2, |
90 | | LLM_ARCH_RWKV7, |
91 | | LLM_ARCH_ARWKV7, |
92 | | LLM_ARCH_GRANITE, |
93 | | LLM_ARCH_GRANITE_MOE, |
94 | | LLM_ARCH_GRANITE_HYBRID, |
95 | | LLM_ARCH_CHAMELEON, |
96 | | LLM_ARCH_WAVTOKENIZER_DEC, |
97 | | LLM_ARCH_PLM, |
98 | | LLM_ARCH_BAILINGMOE, |
99 | | LLM_ARCH_BAILINGMOE2, |
100 | | LLM_ARCH_DOTS1, |
101 | | LLM_ARCH_ARCEE, |
102 | | LLM_ARCH_AFMOE, |
103 | | LLM_ARCH_ERNIE4_5, |
104 | | LLM_ARCH_ERNIE4_5_MOE, |
105 | | LLM_ARCH_HUNYUAN_MOE, |
106 | | LLM_ARCH_HUNYUAN_DENSE, |
107 | | LLM_ARCH_SMOLLM3, |
108 | | LLM_ARCH_OPENAI_MOE, |
109 | | LLM_ARCH_LFM2, |
110 | | LLM_ARCH_LFM2MOE, |
111 | | LLM_ARCH_DREAM, |
112 | | LLM_ARCH_SMALLTHINKER, |
113 | | LLM_ARCH_LLADA, |
114 | | LLM_ARCH_LLADA_MOE, |
115 | | LLM_ARCH_SEED_OSS, |
116 | | LLM_ARCH_GROVEMOE, |
117 | | LLM_ARCH_APERTUS, |
118 | | LLM_ARCH_MINIMAX_M2, |
119 | | LLM_ARCH_COGVLM, |
120 | | LLM_ARCH_RND1, |
121 | | LLM_ARCH_PANGU_EMBED, |
122 | | LLM_ARCH_MISTRAL3, |
123 | | LLM_ARCH_MIMO2, |
124 | | LLM_ARCH_LLAMA_EMBED, |
125 | | LLM_ARCH_MAINCODER, |
126 | | LLM_ARCH_UNKNOWN, |
127 | | }; |
128 | | |
129 | | enum llm_kv { |
130 | | LLM_KV_GENERAL_TYPE, |
131 | | LLM_KV_GENERAL_ARCHITECTURE, |
132 | | LLM_KV_GENERAL_QUANTIZATION_VERSION, |
133 | | LLM_KV_GENERAL_ALIGNMENT, |
134 | | LLM_KV_GENERAL_FILE_TYPE, |
135 | | LLM_KV_GENERAL_SAMPLING_SEQUENCE, |
136 | | LLM_KV_GENERAL_SAMPLING_TOP_K, |
137 | | LLM_KV_GENERAL_SAMPLING_TOP_P, |
138 | | LLM_KV_GENERAL_SAMPLING_MIN_P, |
139 | | LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY, |
140 | | LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD, |
141 | | LLM_KV_GENERAL_SAMPLING_TEMP, |
142 | | LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N, |
143 | | LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT, |
144 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT, |
145 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU, |
146 | | LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA, |
147 | | LLM_KV_GENERAL_NAME, |
148 | | LLM_KV_GENERAL_AUTHOR, |
149 | | LLM_KV_GENERAL_VERSION, |
150 | | LLM_KV_GENERAL_URL, |
151 | | LLM_KV_GENERAL_DESCRIPTION, |
152 | | LLM_KV_GENERAL_LICENSE, |
153 | | LLM_KV_GENERAL_SOURCE_URL, |
154 | | LLM_KV_GENERAL_SOURCE_HF_REPO, |
155 | | |
156 | | LLM_KV_VOCAB_SIZE, |
157 | | LLM_KV_CONTEXT_LENGTH, |
158 | | LLM_KV_EMBEDDING_LENGTH, |
159 | | LLM_KV_EMBEDDING_LENGTH_OUT, |
160 | | LLM_KV_FEATURES_LENGTH, |
161 | | LLM_KV_BLOCK_COUNT, |
162 | | LLM_KV_LEADING_DENSE_BLOCK_COUNT, |
163 | | LLM_KV_FEED_FORWARD_LENGTH, |
164 | | LLM_KV_EXPERT_FEED_FORWARD_LENGTH, |
165 | | LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, |
166 | | LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, |
167 | | LLM_KV_USE_PARALLEL_RESIDUAL, |
168 | | LLM_KV_TENSOR_DATA_LAYOUT, |
169 | | LLM_KV_EXPERT_COUNT, |
170 | | LLM_KV_EXPERT_USED_COUNT, |
171 | | LLM_KV_EXPERT_SHARED_COUNT, |
172 | | LLM_KV_EXPERT_GROUP_COUNT, |
173 | | LLM_KV_EXPERT_GROUP_USED_COUNT, |
174 | | LLM_KV_EXPERT_WEIGHTS_SCALE, |
175 | | LLM_KV_EXPERT_WEIGHTS_NORM, |
176 | | LLM_KV_EXPERT_GATING_FUNC, |
177 | | LLM_KV_EXPERT_GROUP_SCALE, |
178 | | LLM_KV_EXPERTS_PER_GROUP, |
179 | | LLM_KV_MOE_EVERY_N_LAYERS, |
180 | | LLM_KV_NEXTN_PREDICT_LAYERS, |
181 | | LLM_KV_NUM_DEEPSTACK_LAYERS, |
182 | | LLM_KV_POOLING_TYPE, |
183 | | LLM_KV_LOGIT_SCALE, |
184 | | LLM_KV_DECODER_START_TOKEN_ID, |
185 | | LLM_KV_DECODER_BLOCK_COUNT, |
186 | | LLM_KV_ATTN_LOGIT_SOFTCAPPING, |
187 | | LLM_KV_ROUTER_LOGIT_SOFTCAPPING, |
188 | | LLM_KV_FINAL_LOGIT_SOFTCAPPING, |
189 | | LLM_KV_SWIN_NORM, |
190 | | LLM_KV_RESCALE_EVERY_N_LAYERS, |
191 | | LLM_KV_TIME_MIX_EXTRA_DIM, |
192 | | LLM_KV_TIME_DECAY_EXTRA_DIM, |
193 | | LLM_KV_RESIDUAL_SCALE, |
194 | | LLM_KV_EMBEDDING_SCALE, |
195 | | LLM_KV_TOKEN_SHIFT_COUNT, |
196 | | LLM_KV_INTERLEAVE_MOE_LAYER_STEP, |
197 | | |
198 | | LLM_KV_ATTENTION_HEAD_COUNT, |
199 | | LLM_KV_ATTENTION_HEAD_COUNT_KV, |
200 | | LLM_KV_ATTENTION_MAX_ALIBI_BIAS, |
201 | | LLM_KV_ATTENTION_CLAMP_KQV, |
202 | | LLM_KV_ATTENTION_KEY_LENGTH, |
203 | | LLM_KV_ATTENTION_VALUE_LENGTH, |
204 | | LLM_KV_ATTENTION_LAYERNORM_EPS, |
205 | | LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, |
206 | | LLM_KV_ATTENTION_GROUPNORM_EPS, |
207 | | LLM_KV_ATTENTION_GROUPNORM_GROUPS, |
208 | | LLM_KV_ATTENTION_CAUSAL, |
209 | | LLM_KV_ATTENTION_Q_LORA_RANK, |
210 | | LLM_KV_ATTENTION_KV_LORA_RANK, |
211 | | LLM_KV_ATTENTION_DECAY_LORA_RANK, |
212 | | LLM_KV_ATTENTION_ICLR_LORA_RANK, |
213 | | LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, |
214 | | LLM_KV_ATTENTION_GATE_LORA_RANK, |
215 | | LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, |
216 | | LLM_KV_ATTENTION_SLIDING_WINDOW, |
217 | | LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN, |
218 | | LLM_KV_ATTENTION_SCALE, |
219 | | LLM_KV_ATTENTION_OUTPUT_SCALE, |
220 | | LLM_KV_ATTENTION_TEMPERATURE_LENGTH, |
221 | | LLM_KV_ATTENTION_TEMPERATURE_SCALE, |
222 | | LLM_KV_ATTENTION_KEY_LENGTH_MLA, |
223 | | LLM_KV_ATTENTION_VALUE_LENGTH_MLA, |
224 | | |
225 | | LLM_KV_ROPE_DIMENSION_COUNT, |
226 | | LLM_KV_ROPE_DIMENSION_SECTIONS, |
227 | | LLM_KV_ROPE_FREQ_BASE, |
228 | | LLM_KV_ROPE_FREQ_BASE_SWA, |
229 | | LLM_KV_ROPE_SCALE_LINEAR, |
230 | | LLM_KV_ROPE_SCALING_TYPE, |
231 | | LLM_KV_ROPE_SCALING_FACTOR, |
232 | | LLM_KV_ROPE_SCALING_ATTN_FACTOR, |
233 | | LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, |
234 | | LLM_KV_ROPE_SCALING_FINETUNED, |
235 | | LLM_KV_ROPE_SCALING_YARN_LOG_MUL, |
236 | | LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR, |
237 | | LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, |
238 | | LLM_KV_ROPE_SCALING_YARN_BETA_FAST, |
239 | | LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, |
240 | | |
241 | | LLM_KV_SPLIT_NO, |
242 | | LLM_KV_SPLIT_COUNT, |
243 | | LLM_KV_SPLIT_TENSORS_COUNT, |
244 | | |
245 | | LLM_KV_SSM_INNER_SIZE, |
246 | | LLM_KV_SSM_CONV_KERNEL, |
247 | | LLM_KV_SSM_STATE_SIZE, |
248 | | LLM_KV_SSM_TIME_STEP_RANK, |
249 | | LLM_KV_SSM_GROUP_COUNT, |
250 | | LLM_KV_SSM_DT_B_C_RMS, |
251 | | |
252 | | LLM_KV_WKV_HEAD_SIZE, |
253 | | |
254 | | LLM_KV_TOKENIZER_MODEL, |
255 | | LLM_KV_TOKENIZER_PRE, |
256 | | LLM_KV_TOKENIZER_LIST, |
257 | | LLM_KV_TOKENIZER_TOKEN_TYPE, |
258 | | LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, |
259 | | LLM_KV_TOKENIZER_SCORES, |
260 | | LLM_KV_TOKENIZER_MERGES, |
261 | | LLM_KV_TOKENIZER_BOS_ID, |
262 | | LLM_KV_TOKENIZER_EOS_ID, |
263 | | LLM_KV_TOKENIZER_EOT_ID, |
264 | | LLM_KV_TOKENIZER_EOM_ID, |
265 | | LLM_KV_TOKENIZER_UNK_ID, |
266 | | LLM_KV_TOKENIZER_SEP_ID, |
267 | | LLM_KV_TOKENIZER_PAD_ID, |
268 | | LLM_KV_TOKENIZER_CLS_ID, |
269 | | LLM_KV_TOKENIZER_MASK_ID, |
270 | | LLM_KV_TOKENIZER_ADD_BOS, |
271 | | LLM_KV_TOKENIZER_ADD_EOS, |
272 | | LLM_KV_TOKENIZER_ADD_SEP, |
273 | | LLM_KV_TOKENIZER_ADD_PREFIX, |
274 | | LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, |
275 | | LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, |
276 | | LLM_KV_TOKENIZER_HF_JSON, |
277 | | LLM_KV_TOKENIZER_RWKV, |
278 | | LLM_KV_TOKENIZER_CHAT_TEMPLATE, |
279 | | LLM_KV_TOKENIZER_FIM_PRE_ID, |
280 | | LLM_KV_TOKENIZER_FIM_SUF_ID, |
281 | | LLM_KV_TOKENIZER_FIM_MID_ID, |
282 | | LLM_KV_TOKENIZER_FIM_PAD_ID, |
283 | | LLM_KV_TOKENIZER_FIM_REP_ID, |
284 | | LLM_KV_TOKENIZER_FIM_SEP_ID, |
285 | | |
286 | | LLM_KV_ADAPTER_TYPE, |
287 | | LLM_KV_ADAPTER_LORA_ALPHA, |
288 | | LLM_KV_ADAPTER_LORA_TASK_NAME, |
289 | | LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, |
290 | | LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, |
291 | | |
292 | | LLM_KV_POSNET_EMBEDDING_LENGTH, |
293 | | LLM_KV_POSNET_BLOCK_COUNT, |
294 | | |
295 | | LLM_KV_CONVNEXT_EMBEDDING_LENGTH, |
296 | | LLM_KV_CONVNEXT_BLOCK_COUNT, |
297 | | |
298 | | LLM_KV_CLASSIFIER_OUTPUT_LABELS, |
299 | | |
300 | | LLM_KV_SHORTCONV_L_CACHE, |
301 | | |
302 | | LLM_KV_XIELU_ALPHA_N, |
303 | | LLM_KV_XIELU_ALPHA_P, |
304 | | LLM_KV_XIELU_BETA, |
305 | | LLM_KV_XIELU_EPS, |
306 | | |
307 | | // deprecated: |
308 | | LLM_KV_TOKENIZER_PREFIX_ID, |
309 | | LLM_KV_TOKENIZER_SUFFIX_ID, |
310 | | LLM_KV_TOKENIZER_MIDDLE_ID, |
311 | | |
312 | | // sentence-transformers dense layers in and out features |
313 | | LLM_KV_DENSE_2_FEAT_IN, |
314 | | LLM_KV_DENSE_2_FEAT_OUT, |
315 | | LLM_KV_DENSE_3_FEAT_IN, |
316 | | LLM_KV_DENSE_3_FEAT_OUT, |
317 | | }; |
318 | | |
319 | | enum llm_tensor { |
320 | | LLM_TENSOR_TOKEN_EMBD, |
321 | | LLM_TENSOR_TOKEN_EMBD_NORM, |
322 | | LLM_TENSOR_TOKEN_TYPES, |
323 | | LLM_TENSOR_POS_EMBD, |
324 | | LLM_TENSOR_DENSE_2_OUT, |
325 | | LLM_TENSOR_DENSE_3_OUT, |
326 | | LLM_TENSOR_OUTPUT, |
327 | | LLM_TENSOR_OUTPUT_NORM, |
328 | | LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name |
329 | | LLM_TENSOR_ROPE_FREQS, |
330 | | LLM_TENSOR_ROPE_FACTORS_LONG, |
331 | | LLM_TENSOR_ROPE_FACTORS_SHORT, |
332 | | LLM_TENSOR_ATTN_Q, |
333 | | LLM_TENSOR_ATTN_K, |
334 | | LLM_TENSOR_ATTN_V, |
335 | | LLM_TENSOR_ATTN_QKV, |
336 | | LLM_TENSOR_ATTN_OUT, |
337 | | LLM_TENSOR_ATTN_NORM, |
338 | | LLM_TENSOR_ATTN_NORM_2, |
339 | | LLM_TENSOR_ATTN_OUT_NORM, |
340 | | LLM_TENSOR_ATTN_POST_NORM, |
341 | | LLM_TENSOR_ATTN_ROT_EMBD, |
342 | | LLM_TENSOR_ATTN_SINKS, |
343 | | LLM_TENSOR_ATTN_GATE, |
344 | | LLM_TENSOR_FFN_GATE_INP, |
345 | | LLM_TENSOR_FFN_GATE_INP_SHEXP, |
346 | | LLM_TENSOR_FFN_NORM, |
347 | | LLM_TENSOR_FFN_POST_NORM, |
348 | | LLM_TENSOR_FFN_GATE, |
349 | | LLM_TENSOR_FFN_DOWN, |
350 | | LLM_TENSOR_FFN_UP, |
351 | | LLM_TENSOR_FFN_ACT, |
352 | | LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility |
353 | | LLM_TENSOR_FFN_GATE_EXP, |
354 | | LLM_TENSOR_FFN_UP_EXP, |
355 | | LLM_TENSOR_FFN_NORM_EXPS, |
356 | | LLM_TENSOR_FFN_DOWN_EXPS, // merged experts |
357 | | LLM_TENSOR_FFN_GATE_EXPS, |
358 | | LLM_TENSOR_FFN_UP_EXPS, |
359 | | LLM_TENSOR_FFN_DOWN_SHEXP, |
360 | | LLM_TENSOR_FFN_GATE_SHEXP, |
361 | | LLM_TENSOR_FFN_UP_SHEXP, |
362 | | LLM_TENSOR_FFN_DOWN_CHEXPS, |
363 | | LLM_TENSOR_FFN_GATE_CHEXPS, |
364 | | LLM_TENSOR_FFN_UP_CHEXPS, |
365 | | LLM_TENSOR_FFN_EXP_PROBS_B, |
366 | | LLM_TENSOR_ATTN_Q_NORM, |
367 | | LLM_TENSOR_ATTN_K_NORM, |
368 | | LLM_TENSOR_LAYER_OUT_NORM, |
369 | | LLM_TENSOR_POST_ATTN_NORM, |
370 | | LLM_TENSOR_POST_MLP_NORM, |
371 | | LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n |
372 | | LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n |
373 | | LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n |
374 | | LLM_TENSOR_PER_LAYER_PROJ, // gemma3n |
375 | | LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n |
376 | | LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n |
377 | | LLM_TENSOR_ALTUP_PROJ, // gemma3n |
378 | | LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n |
379 | | LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n |
380 | | LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n |
381 | | LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n |
382 | | LLM_TENSOR_ALTUP_ROUTER, // gemma3n |
383 | | LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n |
384 | | LLM_TENSOR_LAUREL_L, // gemma3n |
385 | | LLM_TENSOR_LAUREL_R, // gemma3n |
386 | | LLM_TENSOR_LAUREL_POST_NORM, // gemma3n |
387 | | LLM_TENSOR_SSM_IN, |
388 | | LLM_TENSOR_SSM_CONV1D, |
389 | | LLM_TENSOR_SSM_X, |
390 | | LLM_TENSOR_SSM_DT, |
391 | | LLM_TENSOR_SSM_DT_NORM, |
392 | | LLM_TENSOR_SSM_A, |
393 | | LLM_TENSOR_SSM_A_NOSCAN, // qwen3next special case with MUL instead of SSM_SCAN |
394 | | LLM_TENSOR_SSM_B_NORM, |
395 | | LLM_TENSOR_SSM_C_NORM, |
396 | | LLM_TENSOR_SSM_D, |
397 | | LLM_TENSOR_SSM_NORM, |
398 | | LLM_TENSOR_SSM_OUT, |
399 | | LLM_TENSOR_SSM_BETA_ALPHA, // qwen3next |
400 | | LLM_TENSOR_TIME_MIX_W0, |
401 | | LLM_TENSOR_TIME_MIX_W1, |
402 | | LLM_TENSOR_TIME_MIX_W2, |
403 | | LLM_TENSOR_TIME_MIX_A0, |
404 | | LLM_TENSOR_TIME_MIX_A1, |
405 | | LLM_TENSOR_TIME_MIX_A2, |
406 | | LLM_TENSOR_TIME_MIX_V0, |
407 | | LLM_TENSOR_TIME_MIX_V1, |
408 | | LLM_TENSOR_TIME_MIX_V2, |
409 | | LLM_TENSOR_TIME_MIX_G1, |
410 | | LLM_TENSOR_TIME_MIX_G2, |
411 | | LLM_TENSOR_TIME_MIX_K_K, |
412 | | LLM_TENSOR_TIME_MIX_K_A, |
413 | | LLM_TENSOR_TIME_MIX_R_K, |
414 | | LLM_TENSOR_TIME_MIX_LERP_X, |
415 | | LLM_TENSOR_TIME_MIX_LERP_W, |
416 | | LLM_TENSOR_TIME_MIX_LERP_K, |
417 | | LLM_TENSOR_TIME_MIX_LERP_V, |
418 | | LLM_TENSOR_TIME_MIX_LERP_R, |
419 | | LLM_TENSOR_TIME_MIX_LERP_G, |
420 | | LLM_TENSOR_TIME_MIX_LERP_FUSED, |
421 | | LLM_TENSOR_TIME_MIX_FIRST, |
422 | | LLM_TENSOR_TIME_MIX_DECAY, |
423 | | LLM_TENSOR_TIME_MIX_DECAY_W1, |
424 | | LLM_TENSOR_TIME_MIX_DECAY_W2, |
425 | | LLM_TENSOR_TIME_MIX_KEY, |
426 | | LLM_TENSOR_TIME_MIX_VALUE, |
427 | | LLM_TENSOR_TIME_MIX_RECEPTANCE, |
428 | | LLM_TENSOR_TIME_MIX_GATE, |
429 | | LLM_TENSOR_TIME_MIX_LN, |
430 | | LLM_TENSOR_TIME_MIX_OUTPUT, |
431 | | LLM_TENSOR_CHANNEL_MIX_LERP_K, |
432 | | LLM_TENSOR_CHANNEL_MIX_LERP_R, |
433 | | LLM_TENSOR_CHANNEL_MIX_KEY, |
434 | | LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, |
435 | | LLM_TENSOR_CHANNEL_MIX_VALUE, |
436 | | LLM_TENSOR_ATTN_Q_A, |
437 | | LLM_TENSOR_ATTN_Q_B, |
438 | | LLM_TENSOR_ATTN_KV_A_MQA, |
439 | | LLM_TENSOR_ATTN_KV_B, |
440 | | LLM_TENSOR_ATTN_K_B, |
441 | | LLM_TENSOR_ATTN_V_B, |
442 | | LLM_TENSOR_ATTN_Q_A_NORM, |
443 | | LLM_TENSOR_ATTN_KV_A_NORM, |
444 | | LLM_TENSOR_ATTN_SUB_NORM, |
445 | | LLM_TENSOR_FFN_SUB_NORM, |
446 | | LLM_TENSOR_DEC_ATTN_NORM, |
447 | | LLM_TENSOR_DEC_ATTN_Q, |
448 | | LLM_TENSOR_DEC_ATTN_K, |
449 | | LLM_TENSOR_DEC_ATTN_V, |
450 | | LLM_TENSOR_DEC_ATTN_OUT, |
451 | | LLM_TENSOR_DEC_ATTN_REL_B, |
452 | | LLM_TENSOR_DEC_CROSS_ATTN_NORM, |
453 | | LLM_TENSOR_DEC_CROSS_ATTN_Q, |
454 | | LLM_TENSOR_DEC_CROSS_ATTN_K, |
455 | | LLM_TENSOR_DEC_CROSS_ATTN_V, |
456 | | LLM_TENSOR_DEC_CROSS_ATTN_OUT, |
457 | | LLM_TENSOR_DEC_CROSS_ATTN_REL_B, |
458 | | LLM_TENSOR_DEC_FFN_NORM, |
459 | | LLM_TENSOR_DEC_FFN_GATE, |
460 | | LLM_TENSOR_DEC_FFN_DOWN, |
461 | | LLM_TENSOR_DEC_FFN_UP, |
462 | | LLM_TENSOR_DEC_OUTPUT_NORM, |
463 | | LLM_TENSOR_ENC_ATTN_NORM, |
464 | | LLM_TENSOR_ENC_ATTN_Q, |
465 | | LLM_TENSOR_ENC_ATTN_K, |
466 | | LLM_TENSOR_ENC_ATTN_V, |
467 | | LLM_TENSOR_ENC_ATTN_OUT, |
468 | | LLM_TENSOR_ENC_ATTN_REL_B, |
469 | | LLM_TENSOR_ENC_FFN_NORM, |
470 | | LLM_TENSOR_ENC_FFN_GATE, |
471 | | LLM_TENSOR_ENC_FFN_DOWN, |
472 | | LLM_TENSOR_ENC_FFN_UP, |
473 | | LLM_TENSOR_ENC_OUTPUT_NORM, |
474 | | LLM_TENSOR_CLS, |
475 | | LLM_TENSOR_CLS_OUT, |
476 | | LLM_TENSOR_CONV1D, |
477 | | LLM_TENSOR_CONVNEXT_DW, |
478 | | LLM_TENSOR_CONVNEXT_NORM, |
479 | | LLM_TENSOR_CONVNEXT_PW1, |
480 | | LLM_TENSOR_CONVNEXT_PW2, |
481 | | LLM_TENSOR_CONVNEXT_GAMMA, |
482 | | LLM_TENSOR_POS_NET_CONV1, |
483 | | LLM_TENSOR_POS_NET_CONV2, |
484 | | LLM_TENSOR_POS_NET_NORM, |
485 | | LLM_TENSOR_POS_NET_NORM1, |
486 | | LLM_TENSOR_POS_NET_NORM2, |
487 | | LLM_TENSOR_POS_NET_ATTN_NORM, |
488 | | LLM_TENSOR_POS_NET_ATTN_Q, |
489 | | LLM_TENSOR_POS_NET_ATTN_K, |
490 | | LLM_TENSOR_POS_NET_ATTN_V, |
491 | | LLM_TENSOR_POS_NET_ATTN_OUT, |
492 | | LLM_TENSOR_SHORTCONV_CONV, |
493 | | LLM_TENSOR_SHORTCONV_INPROJ, |
494 | | LLM_TENSOR_SHORTCONV_OUTPROJ, |
495 | | LLM_TENSOR_VISEXP_ATTN_QKV, |
496 | | LLM_TENSOR_VISEXP_ATTN_OUT, |
497 | | LLM_TENSOR_VISEXP_FFN_GATE, |
498 | | LLM_TENSOR_VISEXP_FFN_DOWN, |
499 | | LLM_TENSOR_VISEXP_FFN_UP, |
500 | | LLM_TENSOR_NEXTN_EH_PROJ, |
501 | | LLM_TENSOR_NEXTN_EMBED_TOKENS, |
502 | | LLM_TENSOR_NEXTN_ENORM, |
503 | | LLM_TENSOR_NEXTN_HNORM, |
504 | | LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, |
505 | | LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, |
506 | | }; |
507 | | |
508 | | enum llm_tensor_layer { |
509 | | LLM_TENSOR_LAYER_INPUT, |
510 | | LLM_TENSOR_LAYER_REPEATING, |
511 | | LLM_TENSOR_LAYER_OUTPUT, |
512 | | }; |
513 | | |
514 | | struct LLM_KV { |
515 | | LLM_KV(llm_arch arch, const char * suffix = nullptr); |
516 | | |
517 | | llm_arch arch; |
518 | | const char * suffix; |
519 | | |
520 | | std::string operator()(llm_kv kv) const; |
521 | | }; |
522 | | |
523 | | // helper to handle gguf constants |
524 | | // usage: |
525 | | // |
526 | | // const auto tn = LLM_TN(LLM_ARCH_LLAMA); |
527 | | // |
528 | | // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output" |
529 | | // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias" |
530 | | // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight" |
531 | | // |
532 | | struct LLM_TN_IMPL { |
533 | | const llm_arch arch; |
534 | | const llm_tensor tensor; |
535 | | const char * const suffix; |
536 | | const int bid; |
537 | | const int xid; |
538 | | |
539 | | const std::set<llm_tensor> model_tensors; |
540 | | |
541 | | LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid); |
542 | | |
543 | | std::string str() const; |
544 | | |
545 | 0 | operator std::string() const { |
546 | 0 | return str(); |
547 | 0 | } |
548 | | |
549 | 0 | friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) { |
550 | 0 | return str == tn.str(); |
551 | 0 | } |
552 | | |
553 | 0 | friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) { |
554 | 0 | return str != tn.str(); |
555 | 0 | } |
556 | | }; |
557 | | |
558 | | struct LLM_TN { |
559 | 0 | LLM_TN(llm_arch arch) : arch(arch) {} |
560 | | |
561 | | llm_arch arch; |
562 | | |
563 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const { |
564 | 0 | return LLM_TN_IMPL(arch, tensor, suffix, bid, xid); |
565 | 0 | } |
566 | | |
567 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const { |
568 | 0 | return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid); |
569 | 0 | } |
570 | | }; |
571 | | |
572 | | |
573 | | struct llm_tensor_info { |
574 | | llm_tensor_layer layer; |
575 | | ggml_op op; |
576 | | }; |
577 | | |
578 | | const char * llm_arch_name(llm_arch arch); |
579 | | |
580 | | llm_arch llm_arch_from_string(const std::string & name); |
581 | | |
582 | | const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor); |
583 | | |
584 | | bool llm_arch_is_recurrent(const llm_arch & arch); |
585 | | bool llm_arch_is_hybrid (const llm_arch & arch); |
586 | | bool llm_arch_is_diffusion(const llm_arch & arch); |