/src/llama.cpp/src/llama-arch.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include "ggml.h" // ggml_op |
4 | | |
5 | | #include <string> |
6 | | |
7 | | // |
8 | | // gguf constants (sync with gguf.py) |
9 | | // |
10 | | |
11 | | enum llm_arch { |
12 | | LLM_ARCH_CLIP, |
13 | | LLM_ARCH_LLAMA, |
14 | | LLM_ARCH_LLAMA4, |
15 | | LLM_ARCH_DECI, |
16 | | LLM_ARCH_FALCON, |
17 | | LLM_ARCH_BAICHUAN, |
18 | | LLM_ARCH_GROK, |
19 | | LLM_ARCH_GPT2, |
20 | | LLM_ARCH_GPTJ, |
21 | | LLM_ARCH_GPTNEOX, |
22 | | LLM_ARCH_MPT, |
23 | | LLM_ARCH_STARCODER, |
24 | | LLM_ARCH_REFACT, |
25 | | LLM_ARCH_BERT, |
26 | | LLM_ARCH_NOMIC_BERT, |
27 | | LLM_ARCH_NOMIC_BERT_MOE, |
28 | | LLM_ARCH_NEO_BERT, |
29 | | LLM_ARCH_JINA_BERT_V2, |
30 | | LLM_ARCH_JINA_BERT_V3, |
31 | | LLM_ARCH_BLOOM, |
32 | | LLM_ARCH_STABLELM, |
33 | | LLM_ARCH_QWEN, |
34 | | LLM_ARCH_QWEN2, |
35 | | LLM_ARCH_QWEN2MOE, |
36 | | LLM_ARCH_QWEN2VL, |
37 | | LLM_ARCH_QWEN3, |
38 | | LLM_ARCH_QWEN3MOE, |
39 | | LLM_ARCH_QWEN3VL, |
40 | | LLM_ARCH_QWEN3VLMOE, |
41 | | LLM_ARCH_PHI2, |
42 | | LLM_ARCH_PHI3, |
43 | | LLM_ARCH_PHIMOE, |
44 | | LLM_ARCH_PLAMO, |
45 | | LLM_ARCH_PLAMO2, |
46 | | LLM_ARCH_CODESHELL, |
47 | | LLM_ARCH_ORION, |
48 | | LLM_ARCH_INTERNLM2, |
49 | | LLM_ARCH_MINICPM, |
50 | | LLM_ARCH_MINICPM3, |
51 | | LLM_ARCH_GEMMA, |
52 | | LLM_ARCH_GEMMA2, |
53 | | LLM_ARCH_GEMMA3, |
54 | | LLM_ARCH_GEMMA3N, |
55 | | LLM_ARCH_GEMMA_EMBEDDING, |
56 | | LLM_ARCH_STARCODER2, |
57 | | LLM_ARCH_MAMBA, |
58 | | LLM_ARCH_MAMBA2, |
59 | | LLM_ARCH_JAMBA, |
60 | | LLM_ARCH_FALCON_H1, |
61 | | LLM_ARCH_XVERSE, |
62 | | LLM_ARCH_COMMAND_R, |
63 | | LLM_ARCH_COHERE2, |
64 | | LLM_ARCH_DBRX, |
65 | | LLM_ARCH_OLMO, |
66 | | LLM_ARCH_OLMO2, |
67 | | LLM_ARCH_OLMOE, |
68 | | LLM_ARCH_OPENELM, |
69 | | LLM_ARCH_ARCTIC, |
70 | | LLM_ARCH_DEEPSEEK, |
71 | | LLM_ARCH_DEEPSEEK2, |
72 | | LLM_ARCH_CHATGLM, |
73 | | LLM_ARCH_GLM4, |
74 | | LLM_ARCH_GLM4_MOE, |
75 | | LLM_ARCH_BITNET, |
76 | | LLM_ARCH_T5, |
77 | | LLM_ARCH_T5ENCODER, |
78 | | LLM_ARCH_JAIS, |
79 | | LLM_ARCH_NEMOTRON, |
80 | | LLM_ARCH_NEMOTRON_H, |
81 | | LLM_ARCH_EXAONE, |
82 | | LLM_ARCH_EXAONE4, |
83 | | LLM_ARCH_RWKV6, |
84 | | LLM_ARCH_RWKV6QWEN2, |
85 | | LLM_ARCH_RWKV7, |
86 | | LLM_ARCH_ARWKV7, |
87 | | LLM_ARCH_GRANITE, |
88 | | LLM_ARCH_GRANITE_MOE, |
89 | | LLM_ARCH_GRANITE_HYBRID, |
90 | | LLM_ARCH_CHAMELEON, |
91 | | LLM_ARCH_WAVTOKENIZER_DEC, |
92 | | LLM_ARCH_PLM, |
93 | | LLM_ARCH_BAILINGMOE, |
94 | | LLM_ARCH_BAILINGMOE2, |
95 | | LLM_ARCH_DOTS1, |
96 | | LLM_ARCH_ARCEE, |
97 | | LLM_ARCH_AFMOE, |
98 | | LLM_ARCH_ERNIE4_5, |
99 | | LLM_ARCH_ERNIE4_5_MOE, |
100 | | LLM_ARCH_HUNYUAN_MOE, |
101 | | LLM_ARCH_HUNYUAN_DENSE, |
102 | | LLM_ARCH_SMOLLM3, |
103 | | LLM_ARCH_OPENAI_MOE, |
104 | | LLM_ARCH_LFM2, |
105 | | LLM_ARCH_LFM2MOE, |
106 | | LLM_ARCH_DREAM, |
107 | | LLM_ARCH_SMALLTHINKER, |
108 | | LLM_ARCH_LLADA, |
109 | | LLM_ARCH_LLADA_MOE, |
110 | | LLM_ARCH_SEED_OSS, |
111 | | LLM_ARCH_GROVEMOE, |
112 | | LLM_ARCH_APERTUS, |
113 | | LLM_ARCH_MINIMAX_M2, |
114 | | LLM_ARCH_COGVLM, |
115 | | LLM_ARCH_PANGU_EMBED, |
116 | | LLM_ARCH_UNKNOWN, |
117 | | }; |
118 | | |
119 | | enum llm_kv { |
120 | | LLM_KV_GENERAL_TYPE, |
121 | | LLM_KV_GENERAL_ARCHITECTURE, |
122 | | LLM_KV_GENERAL_QUANTIZATION_VERSION, |
123 | | LLM_KV_GENERAL_ALIGNMENT, |
124 | | LLM_KV_GENERAL_FILE_TYPE, |
125 | | LLM_KV_GENERAL_NAME, |
126 | | LLM_KV_GENERAL_AUTHOR, |
127 | | LLM_KV_GENERAL_VERSION, |
128 | | LLM_KV_GENERAL_URL, |
129 | | LLM_KV_GENERAL_DESCRIPTION, |
130 | | LLM_KV_GENERAL_LICENSE, |
131 | | LLM_KV_GENERAL_SOURCE_URL, |
132 | | LLM_KV_GENERAL_SOURCE_HF_REPO, |
133 | | |
134 | | LLM_KV_VOCAB_SIZE, |
135 | | LLM_KV_CONTEXT_LENGTH, |
136 | | LLM_KV_EMBEDDING_LENGTH, |
137 | | LLM_KV_FEATURES_LENGTH, |
138 | | LLM_KV_BLOCK_COUNT, |
139 | | LLM_KV_LEADING_DENSE_BLOCK_COUNT, |
140 | | LLM_KV_FEED_FORWARD_LENGTH, |
141 | | LLM_KV_EXPERT_FEED_FORWARD_LENGTH, |
142 | | LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, |
143 | | LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, |
144 | | LLM_KV_USE_PARALLEL_RESIDUAL, |
145 | | LLM_KV_TENSOR_DATA_LAYOUT, |
146 | | LLM_KV_EXPERT_COUNT, |
147 | | LLM_KV_EXPERT_USED_COUNT, |
148 | | LLM_KV_EXPERT_SHARED_COUNT, |
149 | | LLM_KV_EXPERT_GROUP_COUNT, |
150 | | LLM_KV_EXPERT_GROUP_USED_COUNT, |
151 | | LLM_KV_EXPERT_WEIGHTS_SCALE, |
152 | | LLM_KV_EXPERT_WEIGHTS_NORM, |
153 | | LLM_KV_EXPERT_GATING_FUNC, |
154 | | LLM_KV_EXPERT_GROUP_SCALE, |
155 | | LLM_KV_EXPERTS_PER_GROUP, |
156 | | LLM_KV_MOE_EVERY_N_LAYERS, |
157 | | LLM_KV_NEXTN_PREDICT_LAYERS, |
158 | | LLM_KV_NUM_DEEPSTACK_LAYERS, |
159 | | LLM_KV_POOLING_TYPE, |
160 | | LLM_KV_LOGIT_SCALE, |
161 | | LLM_KV_DECODER_START_TOKEN_ID, |
162 | | LLM_KV_DECODER_BLOCK_COUNT, |
163 | | LLM_KV_ATTN_LOGIT_SOFTCAPPING, |
164 | | LLM_KV_ROUTER_LOGIT_SOFTCAPPING, |
165 | | LLM_KV_FINAL_LOGIT_SOFTCAPPING, |
166 | | LLM_KV_SWIN_NORM, |
167 | | LLM_KV_RESCALE_EVERY_N_LAYERS, |
168 | | LLM_KV_TIME_MIX_EXTRA_DIM, |
169 | | LLM_KV_TIME_DECAY_EXTRA_DIM, |
170 | | LLM_KV_RESIDUAL_SCALE, |
171 | | LLM_KV_EMBEDDING_SCALE, |
172 | | LLM_KV_TOKEN_SHIFT_COUNT, |
173 | | LLM_KV_INTERLEAVE_MOE_LAYER_STEP, |
174 | | |
175 | | LLM_KV_ATTENTION_HEAD_COUNT, |
176 | | LLM_KV_ATTENTION_HEAD_COUNT_KV, |
177 | | LLM_KV_ATTENTION_MAX_ALIBI_BIAS, |
178 | | LLM_KV_ATTENTION_CLAMP_KQV, |
179 | | LLM_KV_ATTENTION_KEY_LENGTH, |
180 | | LLM_KV_ATTENTION_VALUE_LENGTH, |
181 | | LLM_KV_ATTENTION_LAYERNORM_EPS, |
182 | | LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, |
183 | | LLM_KV_ATTENTION_GROUPNORM_EPS, |
184 | | LLM_KV_ATTENTION_GROUPNORM_GROUPS, |
185 | | LLM_KV_ATTENTION_CAUSAL, |
186 | | LLM_KV_ATTENTION_Q_LORA_RANK, |
187 | | LLM_KV_ATTENTION_KV_LORA_RANK, |
188 | | LLM_KV_ATTENTION_DECAY_LORA_RANK, |
189 | | LLM_KV_ATTENTION_ICLR_LORA_RANK, |
190 | | LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, |
191 | | LLM_KV_ATTENTION_GATE_LORA_RANK, |
192 | | LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, |
193 | | LLM_KV_ATTENTION_SLIDING_WINDOW, |
194 | | LLM_KV_ATTENTION_SCALE, |
195 | | LLM_KV_ATTENTION_OUTPUT_SCALE, |
196 | | LLM_KV_ATTENTION_TEMPERATURE_LENGTH, |
197 | | LLM_KV_ATTENTION_KEY_LENGTH_MLA, |
198 | | LLM_KV_ATTENTION_VALUE_LENGTH_MLA, |
199 | | |
200 | | LLM_KV_ROPE_DIMENSION_COUNT, |
201 | | LLM_KV_ROPE_DIMENSION_SECTIONS, |
202 | | LLM_KV_ROPE_FREQ_BASE, |
203 | | LLM_KV_ROPE_SCALE_LINEAR, |
204 | | LLM_KV_ROPE_SCALING_TYPE, |
205 | | LLM_KV_ROPE_SCALING_FACTOR, |
206 | | LLM_KV_ROPE_SCALING_ATTN_FACTOR, |
207 | | LLM_KV_ROPE_SCALING_ORIG_CTX_LEN, |
208 | | LLM_KV_ROPE_SCALING_FINETUNED, |
209 | | LLM_KV_ROPE_SCALING_YARN_LOG_MUL, |
210 | | LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR, |
211 | | LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR, |
212 | | LLM_KV_ROPE_SCALING_YARN_BETA_FAST, |
213 | | LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, |
214 | | |
215 | | LLM_KV_SPLIT_NO, |
216 | | LLM_KV_SPLIT_COUNT, |
217 | | LLM_KV_SPLIT_TENSORS_COUNT, |
218 | | |
219 | | LLM_KV_SSM_INNER_SIZE, |
220 | | LLM_KV_SSM_CONV_KERNEL, |
221 | | LLM_KV_SSM_STATE_SIZE, |
222 | | LLM_KV_SSM_TIME_STEP_RANK, |
223 | | LLM_KV_SSM_GROUP_COUNT, |
224 | | LLM_KV_SSM_DT_B_C_RMS, |
225 | | |
226 | | LLM_KV_WKV_HEAD_SIZE, |
227 | | |
228 | | LLM_KV_TOKENIZER_MODEL, |
229 | | LLM_KV_TOKENIZER_PRE, |
230 | | LLM_KV_TOKENIZER_LIST, |
231 | | LLM_KV_TOKENIZER_TOKEN_TYPE, |
232 | | LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, |
233 | | LLM_KV_TOKENIZER_SCORES, |
234 | | LLM_KV_TOKENIZER_MERGES, |
235 | | LLM_KV_TOKENIZER_BOS_ID, |
236 | | LLM_KV_TOKENIZER_EOS_ID, |
237 | | LLM_KV_TOKENIZER_EOT_ID, |
238 | | LLM_KV_TOKENIZER_EOM_ID, |
239 | | LLM_KV_TOKENIZER_UNK_ID, |
240 | | LLM_KV_TOKENIZER_SEP_ID, |
241 | | LLM_KV_TOKENIZER_PAD_ID, |
242 | | LLM_KV_TOKENIZER_CLS_ID, |
243 | | LLM_KV_TOKENIZER_MASK_ID, |
244 | | LLM_KV_TOKENIZER_ADD_BOS, |
245 | | LLM_KV_TOKENIZER_ADD_EOS, |
246 | | LLM_KV_TOKENIZER_ADD_SEP, |
247 | | LLM_KV_TOKENIZER_ADD_PREFIX, |
248 | | LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, |
249 | | LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, |
250 | | LLM_KV_TOKENIZER_HF_JSON, |
251 | | LLM_KV_TOKENIZER_RWKV, |
252 | | LLM_KV_TOKENIZER_CHAT_TEMPLATE, |
253 | | LLM_KV_TOKENIZER_FIM_PRE_ID, |
254 | | LLM_KV_TOKENIZER_FIM_SUF_ID, |
255 | | LLM_KV_TOKENIZER_FIM_MID_ID, |
256 | | LLM_KV_TOKENIZER_FIM_PAD_ID, |
257 | | LLM_KV_TOKENIZER_FIM_REP_ID, |
258 | | LLM_KV_TOKENIZER_FIM_SEP_ID, |
259 | | |
260 | | LLM_KV_ADAPTER_TYPE, |
261 | | LLM_KV_ADAPTER_LORA_ALPHA, |
262 | | LLM_KV_ADAPTER_LORA_TASK_NAME, |
263 | | LLM_KV_ADAPTER_LORA_PROMPT_PREFIX, |
264 | | LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS, |
265 | | |
266 | | LLM_KV_POSNET_EMBEDDING_LENGTH, |
267 | | LLM_KV_POSNET_BLOCK_COUNT, |
268 | | |
269 | | LLM_KV_CONVNEXT_EMBEDDING_LENGTH, |
270 | | LLM_KV_CONVNEXT_BLOCK_COUNT, |
271 | | |
272 | | LLM_KV_CLASSIFIER_OUTPUT_LABELS, |
273 | | |
274 | | LLM_KV_SHORTCONV_L_CACHE, |
275 | | |
276 | | LLM_KV_XIELU_ALPHA_N, |
277 | | LLM_KV_XIELU_ALPHA_P, |
278 | | LLM_KV_XIELU_BETA, |
279 | | LLM_KV_XIELU_EPS, |
280 | | |
281 | | // deprecated: |
282 | | LLM_KV_TOKENIZER_PREFIX_ID, |
283 | | LLM_KV_TOKENIZER_SUFFIX_ID, |
284 | | LLM_KV_TOKENIZER_MIDDLE_ID, |
285 | | |
286 | | // sentence-transformers dense layers in and out features |
287 | | LLM_KV_DENSE_2_FEAT_IN, |
288 | | LLM_KV_DENSE_2_FEAT_OUT, |
289 | | LLM_KV_DENSE_3_FEAT_IN, |
290 | | LLM_KV_DENSE_3_FEAT_OUT, |
291 | | }; |
292 | | |
293 | | enum llm_tensor { |
294 | | LLM_TENSOR_TOKEN_EMBD, |
295 | | LLM_TENSOR_TOKEN_EMBD_NORM, |
296 | | LLM_TENSOR_TOKEN_TYPES, |
297 | | LLM_TENSOR_POS_EMBD, |
298 | | LLM_TENSOR_DENSE_2_OUT, |
299 | | LLM_TENSOR_DENSE_3_OUT, |
300 | | LLM_TENSOR_OUTPUT, |
301 | | LLM_TENSOR_OUTPUT_NORM, |
302 | | LLM_TENSOR_ROPE_FREQS, |
303 | | LLM_TENSOR_ROPE_FACTORS_LONG, |
304 | | LLM_TENSOR_ROPE_FACTORS_SHORT, |
305 | | LLM_TENSOR_ATTN_Q, |
306 | | LLM_TENSOR_ATTN_K, |
307 | | LLM_TENSOR_ATTN_V, |
308 | | LLM_TENSOR_ATTN_QKV, |
309 | | LLM_TENSOR_ATTN_OUT, |
310 | | LLM_TENSOR_ATTN_NORM, |
311 | | LLM_TENSOR_ATTN_NORM_2, |
312 | | LLM_TENSOR_ATTN_OUT_NORM, |
313 | | LLM_TENSOR_ATTN_POST_NORM, |
314 | | LLM_TENSOR_ATTN_ROT_EMBD, |
315 | | LLM_TENSOR_ATTN_SINKS, |
316 | | LLM_TENSOR_ATTN_GATE, |
317 | | LLM_TENSOR_FFN_GATE_INP, |
318 | | LLM_TENSOR_FFN_GATE_INP_SHEXP, |
319 | | LLM_TENSOR_FFN_NORM, |
320 | | LLM_TENSOR_FFN_POST_NORM, |
321 | | LLM_TENSOR_FFN_GATE, |
322 | | LLM_TENSOR_FFN_DOWN, |
323 | | LLM_TENSOR_FFN_UP, |
324 | | LLM_TENSOR_FFN_ACT, |
325 | | LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility |
326 | | LLM_TENSOR_FFN_GATE_EXP, |
327 | | LLM_TENSOR_FFN_UP_EXP, |
328 | | LLM_TENSOR_FFN_NORM_EXPS, |
329 | | LLM_TENSOR_FFN_DOWN_EXPS, // merged experts |
330 | | LLM_TENSOR_FFN_GATE_EXPS, |
331 | | LLM_TENSOR_FFN_UP_EXPS, |
332 | | LLM_TENSOR_FFN_DOWN_SHEXP, |
333 | | LLM_TENSOR_FFN_GATE_SHEXP, |
334 | | LLM_TENSOR_FFN_UP_SHEXP, |
335 | | LLM_TENSOR_FFN_DOWN_CHEXPS, |
336 | | LLM_TENSOR_FFN_GATE_CHEXPS, |
337 | | LLM_TENSOR_FFN_UP_CHEXPS, |
338 | | LLM_TENSOR_FFN_EXP_PROBS_B, |
339 | | LLM_TENSOR_ATTN_Q_NORM, |
340 | | LLM_TENSOR_ATTN_K_NORM, |
341 | | LLM_TENSOR_LAYER_OUT_NORM, |
342 | | LLM_TENSOR_POST_ATTN_NORM, |
343 | | LLM_TENSOR_POST_MLP_NORM, |
344 | | LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n |
345 | | LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n |
346 | | LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n |
347 | | LLM_TENSOR_PER_LAYER_PROJ, // gemma3n |
348 | | LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n |
349 | | LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n |
350 | | LLM_TENSOR_ALTUP_PROJ, // gemma3n |
351 | | LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n |
352 | | LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n |
353 | | LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n |
354 | | LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n |
355 | | LLM_TENSOR_ALTUP_ROUTER, // gemma3n |
356 | | LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n |
357 | | LLM_TENSOR_LAUREL_L, // gemma3n |
358 | | LLM_TENSOR_LAUREL_R, // gemma3n |
359 | | LLM_TENSOR_LAUREL_POST_NORM, // gemma3n |
360 | | LLM_TENSOR_SSM_IN, |
361 | | LLM_TENSOR_SSM_CONV1D, |
362 | | LLM_TENSOR_SSM_X, |
363 | | LLM_TENSOR_SSM_DT, |
364 | | LLM_TENSOR_SSM_DT_NORM, |
365 | | LLM_TENSOR_SSM_A, |
366 | | LLM_TENSOR_SSM_B_NORM, |
367 | | LLM_TENSOR_SSM_C_NORM, |
368 | | LLM_TENSOR_SSM_D, |
369 | | LLM_TENSOR_SSM_NORM, |
370 | | LLM_TENSOR_SSM_OUT, |
371 | | LLM_TENSOR_TIME_MIX_W0, |
372 | | LLM_TENSOR_TIME_MIX_W1, |
373 | | LLM_TENSOR_TIME_MIX_W2, |
374 | | LLM_TENSOR_TIME_MIX_A0, |
375 | | LLM_TENSOR_TIME_MIX_A1, |
376 | | LLM_TENSOR_TIME_MIX_A2, |
377 | | LLM_TENSOR_TIME_MIX_V0, |
378 | | LLM_TENSOR_TIME_MIX_V1, |
379 | | LLM_TENSOR_TIME_MIX_V2, |
380 | | LLM_TENSOR_TIME_MIX_G1, |
381 | | LLM_TENSOR_TIME_MIX_G2, |
382 | | LLM_TENSOR_TIME_MIX_K_K, |
383 | | LLM_TENSOR_TIME_MIX_K_A, |
384 | | LLM_TENSOR_TIME_MIX_R_K, |
385 | | LLM_TENSOR_TIME_MIX_LERP_X, |
386 | | LLM_TENSOR_TIME_MIX_LERP_W, |
387 | | LLM_TENSOR_TIME_MIX_LERP_K, |
388 | | LLM_TENSOR_TIME_MIX_LERP_V, |
389 | | LLM_TENSOR_TIME_MIX_LERP_R, |
390 | | LLM_TENSOR_TIME_MIX_LERP_G, |
391 | | LLM_TENSOR_TIME_MIX_LERP_FUSED, |
392 | | LLM_TENSOR_TIME_MIX_FIRST, |
393 | | LLM_TENSOR_TIME_MIX_DECAY, |
394 | | LLM_TENSOR_TIME_MIX_DECAY_W1, |
395 | | LLM_TENSOR_TIME_MIX_DECAY_W2, |
396 | | LLM_TENSOR_TIME_MIX_KEY, |
397 | | LLM_TENSOR_TIME_MIX_VALUE, |
398 | | LLM_TENSOR_TIME_MIX_RECEPTANCE, |
399 | | LLM_TENSOR_TIME_MIX_GATE, |
400 | | LLM_TENSOR_TIME_MIX_LN, |
401 | | LLM_TENSOR_TIME_MIX_OUTPUT, |
402 | | LLM_TENSOR_CHANNEL_MIX_LERP_K, |
403 | | LLM_TENSOR_CHANNEL_MIX_LERP_R, |
404 | | LLM_TENSOR_CHANNEL_MIX_KEY, |
405 | | LLM_TENSOR_CHANNEL_MIX_RECEPTANCE, |
406 | | LLM_TENSOR_CHANNEL_MIX_VALUE, |
407 | | LLM_TENSOR_ATTN_Q_A, |
408 | | LLM_TENSOR_ATTN_Q_B, |
409 | | LLM_TENSOR_ATTN_KV_A_MQA, |
410 | | LLM_TENSOR_ATTN_KV_B, |
411 | | LLM_TENSOR_ATTN_K_B, |
412 | | LLM_TENSOR_ATTN_V_B, |
413 | | LLM_TENSOR_ATTN_Q_A_NORM, |
414 | | LLM_TENSOR_ATTN_KV_A_NORM, |
415 | | LLM_TENSOR_ATTN_SUB_NORM, |
416 | | LLM_TENSOR_FFN_SUB_NORM, |
417 | | LLM_TENSOR_DEC_ATTN_NORM, |
418 | | LLM_TENSOR_DEC_ATTN_Q, |
419 | | LLM_TENSOR_DEC_ATTN_K, |
420 | | LLM_TENSOR_DEC_ATTN_V, |
421 | | LLM_TENSOR_DEC_ATTN_OUT, |
422 | | LLM_TENSOR_DEC_ATTN_REL_B, |
423 | | LLM_TENSOR_DEC_CROSS_ATTN_NORM, |
424 | | LLM_TENSOR_DEC_CROSS_ATTN_Q, |
425 | | LLM_TENSOR_DEC_CROSS_ATTN_K, |
426 | | LLM_TENSOR_DEC_CROSS_ATTN_V, |
427 | | LLM_TENSOR_DEC_CROSS_ATTN_OUT, |
428 | | LLM_TENSOR_DEC_CROSS_ATTN_REL_B, |
429 | | LLM_TENSOR_DEC_FFN_NORM, |
430 | | LLM_TENSOR_DEC_FFN_GATE, |
431 | | LLM_TENSOR_DEC_FFN_DOWN, |
432 | | LLM_TENSOR_DEC_FFN_UP, |
433 | | LLM_TENSOR_DEC_OUTPUT_NORM, |
434 | | LLM_TENSOR_ENC_ATTN_NORM, |
435 | | LLM_TENSOR_ENC_ATTN_Q, |
436 | | LLM_TENSOR_ENC_ATTN_K, |
437 | | LLM_TENSOR_ENC_ATTN_V, |
438 | | LLM_TENSOR_ENC_ATTN_OUT, |
439 | | LLM_TENSOR_ENC_ATTN_REL_B, |
440 | | LLM_TENSOR_ENC_FFN_NORM, |
441 | | LLM_TENSOR_ENC_FFN_GATE, |
442 | | LLM_TENSOR_ENC_FFN_DOWN, |
443 | | LLM_TENSOR_ENC_FFN_UP, |
444 | | LLM_TENSOR_ENC_OUTPUT_NORM, |
445 | | LLM_TENSOR_CLS, |
446 | | LLM_TENSOR_CLS_OUT, |
447 | | LLM_TENSOR_CONV1D, |
448 | | LLM_TENSOR_CONVNEXT_DW, |
449 | | LLM_TENSOR_CONVNEXT_NORM, |
450 | | LLM_TENSOR_CONVNEXT_PW1, |
451 | | LLM_TENSOR_CONVNEXT_PW2, |
452 | | LLM_TENSOR_CONVNEXT_GAMMA, |
453 | | LLM_TENSOR_POS_NET_CONV1, |
454 | | LLM_TENSOR_POS_NET_CONV2, |
455 | | LLM_TENSOR_POS_NET_NORM, |
456 | | LLM_TENSOR_POS_NET_NORM1, |
457 | | LLM_TENSOR_POS_NET_NORM2, |
458 | | LLM_TENSOR_POS_NET_ATTN_NORM, |
459 | | LLM_TENSOR_POS_NET_ATTN_Q, |
460 | | LLM_TENSOR_POS_NET_ATTN_K, |
461 | | LLM_TENSOR_POS_NET_ATTN_V, |
462 | | LLM_TENSOR_POS_NET_ATTN_OUT, |
463 | | LLM_TENSOR_SHORTCONV_CONV, |
464 | | LLM_TENSOR_SHORTCONV_INPROJ, |
465 | | LLM_TENSOR_SHORTCONV_OUTPROJ, |
466 | | LLM_TENSOR_VISEXP_ATTN_QKV, |
467 | | LLM_TENSOR_VISEXP_ATTN_OUT, |
468 | | LLM_TENSOR_VISEXP_FFN_GATE, |
469 | | LLM_TENSOR_VISEXP_FFN_DOWN, |
470 | | LLM_TENSOR_VISEXP_FFN_UP, |
471 | | LLM_TENSOR_NEXTN_EH_PROJ, |
472 | | LLM_TENSOR_NEXTN_EMBED_TOKENS, |
473 | | LLM_TENSOR_NEXTN_ENORM, |
474 | | LLM_TENSOR_NEXTN_HNORM, |
475 | | LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, |
476 | | LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, |
477 | | }; |
478 | | |
479 | | enum llm_tensor_layer { |
480 | | LLM_TENSOR_LAYER_INPUT, |
481 | | LLM_TENSOR_LAYER_REPEATING, |
482 | | LLM_TENSOR_LAYER_OUTPUT, |
483 | | }; |
484 | | |
485 | | struct LLM_KV { |
486 | | LLM_KV(llm_arch arch, const char * suffix = nullptr); |
487 | | |
488 | | llm_arch arch; |
489 | | const char * suffix; |
490 | | |
491 | | std::string operator()(llm_kv kv) const; |
492 | | }; |
493 | | |
494 | | // helper to handle gguf constants |
495 | | // usage: |
496 | | // |
497 | | // const auto tn = LLM_TN(LLM_ARCH_LLAMA); |
498 | | // |
499 | | // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output" |
500 | | // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias" |
501 | | // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight" |
502 | | // |
503 | | struct LLM_TN_IMPL { |
504 | | const llm_arch arch; |
505 | | const llm_tensor tensor; |
506 | | const char * const suffix; |
507 | | const int bid; |
508 | | const int xid; |
509 | | |
510 | | std::string str() const; |
511 | | |
512 | 0 | operator std::string() const { |
513 | 0 | return str(); |
514 | 0 | } |
515 | | |
516 | 0 | friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) { |
517 | 0 | return str == tn.str(); |
518 | 0 | } |
519 | | |
520 | 0 | friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) { |
521 | 0 | return str != tn.str(); |
522 | 0 | } |
523 | | }; |
524 | | |
525 | | struct LLM_TN { |
526 | 0 | LLM_TN(llm_arch arch) : arch(arch) {} |
527 | | |
528 | | llm_arch arch; |
529 | | |
530 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const { |
531 | 0 | return { arch, tensor, suffix, bid, xid }; |
532 | 0 | } |
533 | | |
534 | 0 | LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const { |
535 | 0 | return { arch, tensor, nullptr, bid, xid }; |
536 | 0 | } |
537 | | }; |
538 | | |
539 | | |
540 | | struct llm_tensor_info { |
541 | | llm_tensor_layer layer; |
542 | | ggml_op op; |
543 | | }; |
544 | | |
545 | | const char * llm_arch_name(llm_arch arch); |
546 | | |
547 | | llm_arch llm_arch_from_string(const std::string & name); |
548 | | |
549 | | const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor); |
550 | | |
551 | | bool llm_arch_is_recurrent(const llm_arch & arch); |
552 | | bool llm_arch_is_hybrid (const llm_arch & arch); |
553 | | bool llm_arch_is_diffusion(const llm_arch & arch); |