Coverage Report

Created: 2026-06-13 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-arch.h
Line
Count
Source
1
#pragma once
2
3
#include "ggml.h" // ggml_op
4
5
#include <string>
6
#include <set>
7
#include <vector>
8
9
//
10
// gguf constants (sync with gguf.py)
11
//
12
13
enum llm_arch {
14
    LLM_ARCH_CLIP,
15
    LLM_ARCH_LLAMA,
16
    LLM_ARCH_LLAMA4,
17
    LLM_ARCH_DECI,
18
    LLM_ARCH_FALCON,
19
    LLM_ARCH_BAICHUAN,
20
    LLM_ARCH_GROK,
21
    LLM_ARCH_GPT2,
22
    LLM_ARCH_GPTJ,
23
    LLM_ARCH_GPTNEOX,
24
    LLM_ARCH_MPT,
25
    LLM_ARCH_STARCODER,
26
    LLM_ARCH_REFACT,
27
    LLM_ARCH_BERT,
28
    LLM_ARCH_MODERN_BERT,
29
    LLM_ARCH_NOMIC_BERT,
30
    LLM_ARCH_NOMIC_BERT_MOE,
31
    LLM_ARCH_NEO_BERT,
32
    LLM_ARCH_JINA_BERT_V2,
33
    LLM_ARCH_JINA_BERT_V3,
34
    LLM_ARCH_EUROBERT,
35
    LLM_ARCH_BLOOM,
36
    LLM_ARCH_STABLELM,
37
    LLM_ARCH_QWEN,
38
    LLM_ARCH_QWEN2,
39
    LLM_ARCH_QWEN2MOE,
40
    LLM_ARCH_QWEN2VL,
41
    LLM_ARCH_QWEN3,
42
    LLM_ARCH_QWEN3MOE,
43
    LLM_ARCH_QWEN3NEXT,
44
    LLM_ARCH_QWEN3VL,
45
    LLM_ARCH_QWEN3VLMOE,
46
    LLM_ARCH_QWEN35,
47
    LLM_ARCH_QWEN35MOE,
48
    LLM_ARCH_PHI2,
49
    LLM_ARCH_PHI3,
50
    LLM_ARCH_PHIMOE,
51
    LLM_ARCH_PLAMO,
52
    LLM_ARCH_PLAMO2,
53
    LLM_ARCH_PLAMO3,
54
    LLM_ARCH_CODESHELL,
55
    LLM_ARCH_ORION,
56
    LLM_ARCH_INTERNLM2,
57
    LLM_ARCH_MINICPM,
58
    LLM_ARCH_MINICPM3,
59
    LLM_ARCH_GEMMA,
60
    LLM_ARCH_GEMMA2,
61
    LLM_ARCH_GEMMA3,
62
    LLM_ARCH_GEMMA3N,
63
    LLM_ARCH_GEMMA4,
64
    LLM_ARCH_GEMMA4_ASSISTANT,
65
    LLM_ARCH_GEMMA_EMBEDDING,
66
    LLM_ARCH_STARCODER2,
67
    LLM_ARCH_MAMBA,
68
    LLM_ARCH_MAMBA2,
69
    LLM_ARCH_JAMBA,
70
    LLM_ARCH_FALCON_H1,
71
    LLM_ARCH_XVERSE,
72
    LLM_ARCH_COMMAND_R,
73
    LLM_ARCH_COHERE2,
74
    LLM_ARCH_DBRX,
75
    LLM_ARCH_OLMO,
76
    LLM_ARCH_OLMO2,
77
    LLM_ARCH_OLMOE,
78
    LLM_ARCH_OPENELM,
79
    LLM_ARCH_ARCTIC,
80
    LLM_ARCH_DEEPSEEK,
81
    LLM_ARCH_DEEPSEEK2,
82
    LLM_ARCH_DEEPSEEK2OCR,
83
    LLM_ARCH_DEEPSEEK32,
84
    LLM_ARCH_CHATGLM,
85
    LLM_ARCH_GLM4,
86
    LLM_ARCH_GLM4_MOE,
87
    LLM_ARCH_GLM_DSA,
88
    LLM_ARCH_BITNET,
89
    LLM_ARCH_T5,
90
    LLM_ARCH_T5ENCODER,
91
    LLM_ARCH_JAIS,
92
    LLM_ARCH_JAIS2,
93
    LLM_ARCH_NEMOTRON,
94
    LLM_ARCH_NEMOTRON_H,
95
    LLM_ARCH_NEMOTRON_H_MOE,
96
    LLM_ARCH_EXAONE,
97
    LLM_ARCH_EXAONE4,
98
    LLM_ARCH_EXAONE_MOE,
99
    LLM_ARCH_RWKV6,
100
    LLM_ARCH_RWKV6QWEN2,
101
    LLM_ARCH_RWKV7,
102
    LLM_ARCH_ARWKV7,
103
    LLM_ARCH_GRANITE,
104
    LLM_ARCH_GRANITE_MOE,
105
    LLM_ARCH_GRANITE_HYBRID,
106
    LLM_ARCH_CHAMELEON,
107
    LLM_ARCH_WAVTOKENIZER_DEC,
108
    LLM_ARCH_PLM,
109
    LLM_ARCH_BAILINGMOE,
110
    LLM_ARCH_BAILINGMOE2,
111
    LLM_ARCH_DOTS1,
112
    LLM_ARCH_ARCEE,
113
    LLM_ARCH_AFMOE,
114
    LLM_ARCH_ERNIE4_5,
115
    LLM_ARCH_ERNIE4_5_MOE,
116
    LLM_ARCH_HUNYUAN_MOE,
117
    LLM_ARCH_HUNYUAN_DENSE,
118
    LLM_ARCH_HUNYUAN_VL,
119
    LLM_ARCH_SMOLLM3,
120
    LLM_ARCH_OPENAI_MOE,
121
    LLM_ARCH_LFM2,
122
    LLM_ARCH_LFM2MOE,
123
    LLM_ARCH_DREAM,
124
    LLM_ARCH_SMALLTHINKER,
125
    LLM_ARCH_LLADA,
126
    LLM_ARCH_LLADA_MOE,
127
    LLM_ARCH_SEED_OSS,
128
    LLM_ARCH_GROVEMOE,
129
    LLM_ARCH_APERTUS,
130
    LLM_ARCH_MINIMAX_M2,
131
    LLM_ARCH_COGVLM,
132
    LLM_ARCH_RND1,
133
    LLM_ARCH_PANGU_EMBED,
134
    LLM_ARCH_MISTRAL3,
135
    LLM_ARCH_MISTRAL4,
136
    LLM_ARCH_PADDLEOCR,
137
    LLM_ARCH_MIMO2,
138
    LLM_ARCH_STEP35,
139
    LLM_ARCH_LLAMA_EMBED,
140
    LLM_ARCH_MAINCODER,
141
    LLM_ARCH_KIMI_LINEAR,
142
    LLM_ARCH_TALKIE,
143
    LLM_ARCH_MELLUM,
144
    LLM_ARCH_EAGLE3,
145
    LLM_ARCH_UNKNOWN,
146
};
147
148
enum llm_kv {
149
    LLM_KV_GENERAL_TYPE,
150
    LLM_KV_GENERAL_ARCHITECTURE,
151
    LLM_KV_GENERAL_QUANTIZATION_VERSION,
152
    LLM_KV_GENERAL_ALIGNMENT,
153
    LLM_KV_GENERAL_FILE_TYPE,
154
    LLM_KV_GENERAL_SAMPLING_SEQUENCE,
155
    LLM_KV_GENERAL_SAMPLING_TOP_K,
156
    LLM_KV_GENERAL_SAMPLING_TOP_P,
157
    LLM_KV_GENERAL_SAMPLING_MIN_P,
158
    LLM_KV_GENERAL_SAMPLING_XTC_PROBABILITY,
159
    LLM_KV_GENERAL_SAMPLING_XTC_THRESHOLD,
160
    LLM_KV_GENERAL_SAMPLING_TEMP,
161
    LLM_KV_GENERAL_SAMPLING_PENALTY_LAST_N,
162
    LLM_KV_GENERAL_SAMPLING_PENALTY_REPEAT,
163
    LLM_KV_GENERAL_SAMPLING_MIROSTAT,
164
    LLM_KV_GENERAL_SAMPLING_MIROSTAT_TAU,
165
    LLM_KV_GENERAL_SAMPLING_MIROSTAT_ETA,
166
    LLM_KV_GENERAL_NAME,
167
    LLM_KV_GENERAL_AUTHOR,
168
    LLM_KV_GENERAL_VERSION,
169
    LLM_KV_GENERAL_URL,
170
    LLM_KV_GENERAL_DESCRIPTION,
171
    LLM_KV_GENERAL_LICENSE,
172
    LLM_KV_GENERAL_SOURCE_URL,
173
    LLM_KV_GENERAL_SOURCE_HF_REPO,
174
175
    LLM_KV_VOCAB_SIZE,
176
    LLM_KV_CONTEXT_LENGTH,
177
    LLM_KV_EMBEDDING_LENGTH,
178
    LLM_KV_EMBEDDING_LENGTH_OUT,
179
    LLM_KV_EMBEDDING_LENGTH_PER_LAYER,
180
    LLM_KV_FEATURES_LENGTH,
181
    LLM_KV_BLOCK_COUNT,
182
    LLM_KV_LEADING_DENSE_BLOCK_COUNT,
183
    LLM_KV_FEED_FORWARD_LENGTH,
184
    LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
185
    LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
186
    LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH,
187
    LLM_KV_SWIGLU_CLAMP_EXP,
188
    LLM_KV_SWIGLU_CLAMP_SHEXP,
189
    LLM_KV_USE_PARALLEL_RESIDUAL,
190
    LLM_KV_TENSOR_DATA_LAYOUT,
191
    LLM_KV_EXPERT_COUNT,
192
    LLM_KV_EXPERT_USED_COUNT,
193
    LLM_KV_EXPERT_SHARED_COUNT,
194
    LLM_KV_EXPERT_GROUP_COUNT,
195
    LLM_KV_EXPERT_GROUP_USED_COUNT,
196
    LLM_KV_EXPERT_WEIGHTS_SCALE,
197
    LLM_KV_EXPERT_WEIGHTS_NORM,
198
    LLM_KV_EXPERT_GATING_FUNC,
199
    LLM_KV_EXPERT_GROUP_SCALE,
200
    LLM_KV_EXPERTS_PER_GROUP,
201
    LLM_KV_MOE_EVERY_N_LAYERS,
202
    LLM_KV_MOE_LATENT_SIZE,
203
    LLM_KV_NEXTN_PREDICT_LAYERS,
204
    LLM_KV_NUM_DEEPSTACK_LAYERS,
205
    LLM_KV_DEEPSTACK_MAPPING,
206
    LLM_KV_HIDDEN_ACT,
207
    LLM_KV_POOLING_TYPE,
208
    LLM_KV_LOGIT_SCALE,
209
    LLM_KV_DECODER_START_TOKEN_ID,
210
    LLM_KV_DECODER_BLOCK_COUNT,
211
    LLM_KV_ATTN_LOGIT_SOFTCAPPING,
212
    LLM_KV_ROUTER_LOGIT_SOFTCAPPING,
213
    LLM_KV_FINAL_LOGIT_SOFTCAPPING,
214
    LLM_KV_SWIN_NORM,
215
    LLM_KV_RESCALE_EVERY_N_LAYERS,
216
    LLM_KV_TIME_MIX_EXTRA_DIM,
217
    LLM_KV_TIME_DECAY_EXTRA_DIM,
218
    LLM_KV_RESIDUAL_SCALE,
219
    LLM_KV_EMBEDDING_SCALE,
220
    LLM_KV_TOKEN_SHIFT_COUNT,
221
    LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
222
    LLM_KV_FULL_ATTENTION_INTERVAL,
223
224
    LLM_KV_ATTENTION_HEAD_COUNT,
225
    LLM_KV_ATTENTION_HEAD_COUNT_KV,
226
    LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
227
    LLM_KV_ATTENTION_CLAMP_KQV,
228
    LLM_KV_ATTENTION_KEY_LENGTH,
229
    LLM_KV_ATTENTION_VALUE_LENGTH,
230
    LLM_KV_ATTENTION_LAYERNORM_EPS,
231
    LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
232
    LLM_KV_ATTENTION_GROUPNORM_EPS,
233
    LLM_KV_ATTENTION_GROUPNORM_GROUPS,
234
    LLM_KV_ATTENTION_CAUSAL,
235
    LLM_KV_ATTENTION_Q_LORA_RANK,
236
    LLM_KV_ATTENTION_KV_LORA_RANK,
237
    LLM_KV_ATTENTION_DECAY_LORA_RANK,
238
    LLM_KV_ATTENTION_ICLR_LORA_RANK,
239
    LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
240
    LLM_KV_ATTENTION_GATE_LORA_RANK,
241
    LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
242
    LLM_KV_ATTENTION_SLIDING_WINDOW,
243
    LLM_KV_ATTENTION_SLIDING_WINDOW_PATTERN,
244
    LLM_KV_ATTENTION_SCALE,
245
    LLM_KV_ATTENTION_OUTPUT_SCALE,
246
    LLM_KV_ATTENTION_VALUE_SCALE,
247
    LLM_KV_ATTENTION_TEMPERATURE_LENGTH,
248
    LLM_KV_ATTENTION_TEMPERATURE_SCALE,
249
    LLM_KV_ATTENTION_KEY_LENGTH_MLA,
250
    LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
251
    LLM_KV_ATTENTION_KEY_LENGTH_SWA,
252
    LLM_KV_ATTENTION_VALUE_LENGTH_SWA,
253
    LLM_KV_ATTENTION_INDEXER_HEAD_COUNT,
254
    LLM_KV_ATTENTION_INDEXER_KEY_LENGTH,
255
    LLM_KV_ATTENTION_INDEXER_TOP_K,
256
    LLM_KV_ATTENTION_SHARED_KV_LAYERS,
257
    LLM_KV_ATTENTION_RECURRENT_LAYERS,
258
259
    LLM_KV_ROPE_DIMENSION_COUNT,
260
    LLM_KV_ROPE_DIMENSION_COUNT_SWA,
261
    LLM_KV_ROPE_DIMENSION_SECTIONS,
262
    LLM_KV_ROPE_FREQ_BASE,
263
    LLM_KV_ROPE_FREQ_BASE_SWA,
264
    LLM_KV_ROPE_SCALE_LINEAR,
265
    LLM_KV_ROPE_SCALING_TYPE,
266
    LLM_KV_ROPE_SCALING_FACTOR,
267
    LLM_KV_ROPE_SCALING_ALPHA,
268
    LLM_KV_ROPE_SCALING_ATTN_FACTOR,
269
    LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
270
    LLM_KV_ROPE_SCALING_FINETUNED,
271
    LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
272
    LLM_KV_ROPE_SCALING_YARN_EXT_FACTOR,
273
    LLM_KV_ROPE_SCALING_YARN_ATTN_FACTOR,
274
    LLM_KV_ROPE_SCALING_YARN_BETA_FAST,
275
    LLM_KV_ROPE_SCALING_YARN_BETA_SLOW,
276
277
    LLM_KV_SPLIT_NO,
278
    LLM_KV_SPLIT_COUNT,
279
    LLM_KV_SPLIT_TENSORS_COUNT,
280
281
    LLM_KV_SSM_INNER_SIZE,
282
    LLM_KV_SSM_CONV_KERNEL,
283
    LLM_KV_SSM_STATE_SIZE,
284
    LLM_KV_SSM_TIME_STEP_RANK,
285
    LLM_KV_SSM_GROUP_COUNT,
286
    LLM_KV_SSM_DT_B_C_RMS,
287
288
    LLM_KV_KDA_HEAD_DIM,
289
290
    LLM_KV_WKV_HEAD_SIZE,
291
292
    LLM_KV_TOKENIZER_MODEL,
293
    LLM_KV_TOKENIZER_PRE,
294
    LLM_KV_TOKENIZER_LIST,
295
    LLM_KV_TOKENIZER_TOKEN_TYPE,
296
    LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
297
    LLM_KV_TOKENIZER_SCORES,
298
    LLM_KV_TOKENIZER_MERGES,
299
    LLM_KV_TOKENIZER_BOS_ID,
300
    LLM_KV_TOKENIZER_EOS_ID,
301
    LLM_KV_TOKENIZER_EOT_ID,
302
    LLM_KV_TOKENIZER_EOM_ID,
303
    LLM_KV_TOKENIZER_UNK_ID,
304
    LLM_KV_TOKENIZER_SEP_ID,
305
    LLM_KV_TOKENIZER_PAD_ID,
306
    LLM_KV_TOKENIZER_CLS_ID,
307
    LLM_KV_TOKENIZER_MASK_ID,
308
    LLM_KV_TOKENIZER_ADD_BOS,
309
    LLM_KV_TOKENIZER_ADD_EOS,
310
    LLM_KV_TOKENIZER_ADD_SEP,
311
    LLM_KV_TOKENIZER_ADD_PREFIX,
312
    LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
313
    LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
314
    LLM_KV_TOKENIZER_HF_JSON,
315
    LLM_KV_TOKENIZER_RWKV,
316
    LLM_KV_TOKENIZER_CHAT_TEMPLATE,
317
    LLM_KV_TOKENIZER_NORMALIZER_LOWERCASE,
318
    LLM_KV_TOKENIZER_NORMALIZER_STRIP_ACCENTS,
319
    LLM_KV_TOKENIZER_FIM_PRE_ID,
320
    LLM_KV_TOKENIZER_FIM_SUF_ID,
321
    LLM_KV_TOKENIZER_FIM_MID_ID,
322
    LLM_KV_TOKENIZER_FIM_PAD_ID,
323
    LLM_KV_TOKENIZER_FIM_REP_ID,
324
    LLM_KV_TOKENIZER_FIM_SEP_ID,
325
    LLM_KV_TOKENIZER_SUPPRESS_TOKENS,
326
327
    LLM_KV_ADAPTER_TYPE,
328
    LLM_KV_ADAPTER_LORA_ALPHA,
329
    LLM_KV_ADAPTER_LORA_TASK_NAME,
330
    LLM_KV_ADAPTER_LORA_PROMPT_PREFIX,
331
    LLM_KV_ADAPTER_ALORA_INVOCATION_TOKENS,
332
333
    LLM_KV_POSNET_EMBEDDING_LENGTH,
334
    LLM_KV_POSNET_BLOCK_COUNT,
335
336
    LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
337
    LLM_KV_CONVNEXT_BLOCK_COUNT,
338
339
    LLM_KV_CLASSIFIER_OUTPUT_LABELS,
340
341
    LLM_KV_TARGET_LAYERS,
342
    LLM_KV_TARGET_HIDDEN_SIZE,
343
    LLM_KV_NORM_BEFORE_RESIDUAL,
344
345
    LLM_KV_SHORTCONV_L_CACHE,
346
347
    LLM_KV_XIELU_ALPHA_N,
348
    LLM_KV_XIELU_ALPHA_P,
349
    LLM_KV_XIELU_BETA,
350
    LLM_KV_XIELU_EPS,
351
352
    // deprecated:
353
    LLM_KV_TOKENIZER_PREFIX_ID,
354
    LLM_KV_TOKENIZER_SUFFIX_ID,
355
    LLM_KV_TOKENIZER_MIDDLE_ID,
356
357
    // sentence-transformers dense layers in and out features
358
    LLM_KV_DENSE_2_FEAT_IN,
359
    LLM_KV_DENSE_2_FEAT_OUT,
360
    LLM_KV_DENSE_3_FEAT_IN,
361
    LLM_KV_DENSE_3_FEAT_OUT,
362
};
363
364
enum llm_tensor {
365
    LLM_TENSOR_TOKEN_EMBD,
366
    LLM_TENSOR_TOKEN_EMBD_NORM,
367
    LLM_TENSOR_TOKEN_TYPES,
368
    LLM_TENSOR_POS_EMBD,
369
    LLM_TENSOR_DENSE_2_OUT,
370
    LLM_TENSOR_DENSE_3_OUT,
371
    LLM_TENSOR_OUTPUT,
372
    LLM_TENSOR_OUTPUT_NORM,
373
    LLM_TENSOR_OUTPUT_NORM_LFM2, // fix for wrong tensor name
374
    LLM_TENSOR_ROPE_FREQS,
375
    LLM_TENSOR_ROPE_FACTORS_LONG,
376
    LLM_TENSOR_ROPE_FACTORS_SHORT,
377
    LLM_TENSOR_ATTN_Q,
378
    LLM_TENSOR_ATTN_K,
379
    LLM_TENSOR_ATTN_V,
380
    LLM_TENSOR_ATTN_QKV,
381
    LLM_TENSOR_ATTN_OUT,
382
    LLM_TENSOR_ATTN_NORM,
383
    LLM_TENSOR_ATTN_NORM_2,
384
    LLM_TENSOR_ATTN_OUT_NORM,
385
    LLM_TENSOR_ATTN_POST_NORM,
386
    LLM_TENSOR_ATTN_ROT_EMBD,
387
    LLM_TENSOR_ATTN_SINKS,
388
    LLM_TENSOR_ATTN_GATE,
389
    LLM_TENSOR_FFN_GATE_INP,
390
    LLM_TENSOR_FFN_GATE_INP_SHEXP,
391
    LLM_TENSOR_FFN_NORM,
392
    LLM_TENSOR_FFN_POST_NORM,
393
    LLM_TENSOR_FFN_POST_NORM_1,
394
    LLM_TENSOR_FFN_POST_NORM_2,
395
    LLM_TENSOR_FFN_PRE_NORM_2,
396
    LLM_TENSOR_FFN_GATE,
397
    LLM_TENSOR_FFN_DOWN,
398
    LLM_TENSOR_FFN_UP,
399
    LLM_TENSOR_FFN_ACT,
400
    LLM_TENSOR_FFN_DOWN_EXP,  // split experts for backward compatibility
401
    LLM_TENSOR_FFN_GATE_EXP,
402
    LLM_TENSOR_FFN_UP_EXP,
403
    LLM_TENSOR_FFN_NORM_EXPS,
404
    LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
405
    LLM_TENSOR_FFN_GATE_EXPS,
406
    LLM_TENSOR_FFN_UP_EXPS,
407
    LLM_TENSOR_FFN_GATE_UP_EXPS,
408
    LLM_TENSOR_FFN_DOWN_SHEXP,
409
    LLM_TENSOR_FFN_GATE_SHEXP,
410
    LLM_TENSOR_FFN_UP_SHEXP,
411
    LLM_TENSOR_FFN_DOWN_CHEXPS,
412
    LLM_TENSOR_FFN_GATE_CHEXPS,
413
    LLM_TENSOR_FFN_UP_CHEXPS,
414
    LLM_TENSOR_FFN_EXP_PROBS_B,
415
    LLM_TENSOR_FFN_LATENT_DOWN,
416
    LLM_TENSOR_FFN_LATENT_UP,
417
    LLM_TENSOR_ATTN_Q_NORM,
418
    LLM_TENSOR_ATTN_K_NORM,
419
    LLM_TENSOR_LAYER_OUT_NORM,
420
    LLM_TENSOR_LAYER_OUT_SCALE,
421
    LLM_TENSOR_POST_ATTN_NORM,
422
    LLM_TENSOR_POST_MLP_NORM,
423
    LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n
424
    LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n
425
    LLM_TENSOR_PER_LAYER_INP_GATE,   // gemma3n
426
    LLM_TENSOR_PER_LAYER_PROJ,       // gemma3n
427
    LLM_TENSOR_PER_LAYER_PROJ_NORM,  // gemma3n
428
    LLM_TENSOR_PER_LAYER_POST_NORM,  // gemma3n
429
    LLM_TENSOR_ALTUP_PROJ,           // gemma3n
430
    LLM_TENSOR_ALTUP_UNEMBD_PROJ,    // gemma3n
431
    LLM_TENSOR_ALTUP_CORRECT_COEF,   // gemma3n
432
    LLM_TENSOR_ALTUP_CORRECT_SCALE,  // gemma3n
433
    LLM_TENSOR_ALTUP_PREDICT_COEF,   // gemma3n
434
    LLM_TENSOR_ALTUP_ROUTER,         // gemma3n
435
    LLM_TENSOR_ALTUP_ROUTER_NORM,    // gemma3n
436
    LLM_TENSOR_LAUREL_L,             // gemma3n
437
    LLM_TENSOR_LAUREL_R,             // gemma3n
438
    LLM_TENSOR_LAUREL_POST_NORM,     // gemma3n
439
    LLM_TENSOR_SSM_IN,
440
    LLM_TENSOR_SSM_CONV1D,
441
    LLM_TENSOR_SSM_X,
442
    LLM_TENSOR_SSM_DT,
443
    LLM_TENSOR_SSM_DT_NORM,
444
    LLM_TENSOR_SSM_A,
445
    LLM_TENSOR_SSM_A_NOSCAN,        // qwen3next special case with MUL instead of SSM_SCAN
446
    LLM_TENSOR_SSM_B_NORM,
447
    LLM_TENSOR_SSM_C_NORM,
448
    LLM_TENSOR_SSM_D,
449
    LLM_TENSOR_SSM_NORM,
450
    LLM_TENSOR_SSM_OUT,
451
    LLM_TENSOR_SSM_BETA_ALPHA,      // qwen3next
452
    LLM_TENSOR_SSM_ALPHA,           // qwen3.5
453
    // Kimi Linear KDA (using SSM_ prefix for consistency)
454
    LLM_TENSOR_SSM_CONV1D_Q,        // kimi: Q conv1d weight
455
    LLM_TENSOR_SSM_CONV1D_K,        // kimi: K conv1d weight
456
    LLM_TENSOR_SSM_CONV1D_V,        // kimi: V conv1d weight
457
    LLM_TENSOR_SSM_F_A,             // kimi: forget gate projection A
458
    LLM_TENSOR_SSM_F_B,             // kimi: forget gate projection B
459
    LLM_TENSOR_SSM_BETA,            // kimi: beta mixing coefficient and qwen3.5
460
    LLM_TENSOR_SSM_G_A,             // kimi: output gate projection A
461
    LLM_TENSOR_SSM_G_B,             // kimi: output gate projection B
462
    LLM_TENSOR_TIME_MIX_W0,
463
    LLM_TENSOR_TIME_MIX_W1,
464
    LLM_TENSOR_TIME_MIX_W2,
465
    LLM_TENSOR_TIME_MIX_A0,
466
    LLM_TENSOR_TIME_MIX_A1,
467
    LLM_TENSOR_TIME_MIX_A2,
468
    LLM_TENSOR_TIME_MIX_V0,
469
    LLM_TENSOR_TIME_MIX_V1,
470
    LLM_TENSOR_TIME_MIX_V2,
471
    LLM_TENSOR_TIME_MIX_G1,
472
    LLM_TENSOR_TIME_MIX_G2,
473
    LLM_TENSOR_TIME_MIX_K_K,
474
    LLM_TENSOR_TIME_MIX_K_A,
475
    LLM_TENSOR_TIME_MIX_R_K,
476
    LLM_TENSOR_TIME_MIX_LERP_X,
477
    LLM_TENSOR_TIME_MIX_LERP_W,
478
    LLM_TENSOR_TIME_MIX_LERP_K,
479
    LLM_TENSOR_TIME_MIX_LERP_V,
480
    LLM_TENSOR_TIME_MIX_LERP_R,
481
    LLM_TENSOR_TIME_MIX_LERP_G,
482
    LLM_TENSOR_TIME_MIX_LERP_FUSED,
483
    LLM_TENSOR_TIME_MIX_FIRST,
484
    LLM_TENSOR_TIME_MIX_DECAY,
485
    LLM_TENSOR_TIME_MIX_DECAY_W1,
486
    LLM_TENSOR_TIME_MIX_DECAY_W2,
487
    LLM_TENSOR_TIME_MIX_KEY,
488
    LLM_TENSOR_TIME_MIX_VALUE,
489
    LLM_TENSOR_TIME_MIX_RECEPTANCE,
490
    LLM_TENSOR_TIME_MIX_GATE,
491
    LLM_TENSOR_TIME_MIX_LN,
492
    LLM_TENSOR_TIME_MIX_OUTPUT,
493
    LLM_TENSOR_CHANNEL_MIX_LERP_K,
494
    LLM_TENSOR_CHANNEL_MIX_LERP_R,
495
    LLM_TENSOR_CHANNEL_MIX_KEY,
496
    LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
497
    LLM_TENSOR_CHANNEL_MIX_VALUE,
498
    LLM_TENSOR_ATTN_Q_A,
499
    LLM_TENSOR_ATTN_Q_B,
500
    LLM_TENSOR_ATTN_KV_A_MQA,
501
    LLM_TENSOR_ATTN_KV_B,
502
    LLM_TENSOR_ATTN_K_B,
503
    LLM_TENSOR_ATTN_V_B,
504
    LLM_TENSOR_ATTN_Q_A_NORM,
505
    LLM_TENSOR_ATTN_KV_A_NORM,
506
    LLM_TENSOR_ATTN_SUB_NORM,
507
    LLM_TENSOR_FFN_SUB_NORM,
508
    LLM_TENSOR_DEC_ATTN_NORM,
509
    LLM_TENSOR_DEC_ATTN_Q,
510
    LLM_TENSOR_DEC_ATTN_K,
511
    LLM_TENSOR_DEC_ATTN_V,
512
    LLM_TENSOR_DEC_ATTN_OUT,
513
    LLM_TENSOR_DEC_ATTN_REL_B,
514
    LLM_TENSOR_DEC_CROSS_ATTN_NORM,
515
    LLM_TENSOR_DEC_CROSS_ATTN_Q,
516
    LLM_TENSOR_DEC_CROSS_ATTN_K,
517
    LLM_TENSOR_DEC_CROSS_ATTN_V,
518
    LLM_TENSOR_DEC_CROSS_ATTN_OUT,
519
    LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
520
    LLM_TENSOR_DEC_FFN_NORM,
521
    LLM_TENSOR_DEC_FFN_GATE,
522
    LLM_TENSOR_DEC_FFN_DOWN,
523
    LLM_TENSOR_DEC_FFN_UP,
524
    LLM_TENSOR_DEC_OUTPUT_NORM,
525
    LLM_TENSOR_ENC_ATTN_NORM,
526
    LLM_TENSOR_ENC_ATTN_Q,
527
    LLM_TENSOR_ENC_ATTN_K,
528
    LLM_TENSOR_ENC_ATTN_V,
529
    LLM_TENSOR_ENC_ATTN_OUT,
530
    LLM_TENSOR_ENC_ATTN_REL_B,
531
    LLM_TENSOR_ENC_FFN_NORM,
532
    LLM_TENSOR_ENC_FFN_GATE,
533
    LLM_TENSOR_ENC_FFN_DOWN,
534
    LLM_TENSOR_ENC_FFN_UP,
535
    LLM_TENSOR_ENC_OUTPUT_NORM,
536
    LLM_TENSOR_CLS,
537
    LLM_TENSOR_CLS_OUT,
538
    LLM_TENSOR_CLS_NORM,
539
    LLM_TENSOR_CONV1D,
540
    LLM_TENSOR_CONVNEXT_DW,
541
    LLM_TENSOR_CONVNEXT_NORM,
542
    LLM_TENSOR_CONVNEXT_PW1,
543
    LLM_TENSOR_CONVNEXT_PW2,
544
    LLM_TENSOR_CONVNEXT_GAMMA,
545
    LLM_TENSOR_POS_NET_CONV1,
546
    LLM_TENSOR_POS_NET_CONV2,
547
    LLM_TENSOR_POS_NET_NORM,
548
    LLM_TENSOR_POS_NET_NORM1,
549
    LLM_TENSOR_POS_NET_NORM2,
550
    LLM_TENSOR_POS_NET_ATTN_NORM,
551
    LLM_TENSOR_POS_NET_ATTN_Q,
552
    LLM_TENSOR_POS_NET_ATTN_K,
553
    LLM_TENSOR_POS_NET_ATTN_V,
554
    LLM_TENSOR_POS_NET_ATTN_OUT,
555
    LLM_TENSOR_SHORTCONV_CONV,
556
    LLM_TENSOR_SHORTCONV_INPROJ,
557
    LLM_TENSOR_SHORTCONV_OUTPROJ,
558
    LLM_TENSOR_VISEXP_ATTN_QKV,
559
    LLM_TENSOR_VISEXP_ATTN_OUT,
560
    LLM_TENSOR_VISEXP_FFN_GATE,
561
    LLM_TENSOR_VISEXP_FFN_DOWN,
562
    LLM_TENSOR_VISEXP_FFN_UP,
563
    LLM_TENSOR_INDEXER_K_NORM,
564
    LLM_TENSOR_INDEXER_PROJ,
565
    LLM_TENSOR_INDEXER_ATTN_K,
566
    LLM_TENSOR_INDEXER_ATTN_Q_B,
567
    LLM_TENSOR_NEXTN_PROJ_PRE,
568
    LLM_TENSOR_NEXTN_PROJ_POST,
569
    LLM_TENSOR_NEXTN_EH_PROJ,
570
    LLM_TENSOR_NEXTN_EMBED_TOKENS,
571
    LLM_TENSOR_NEXTN_ENORM,
572
    LLM_TENSOR_NEXTN_HNORM,
573
    LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
574
    LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
575
    LLM_TENSOR_MASKED_EMBD_CENTROIDS,
576
    LLM_TENSOR_MASKED_EMBD_ORDERING,
577
    LLM_TENSOR_FC,
578
    LLM_TENSOR_D2T,
579
};
580
581
582
enum llm_tensor_layer {
583
    LLM_TENSOR_LAYER_INPUT,
584
    LLM_TENSOR_LAYER_REPEATING,
585
    LLM_TENSOR_LAYER_OUTPUT,
586
};
587
588
struct LLM_KV {
589
    LLM_KV(llm_arch arch, const char * suffix = nullptr);
590
591
    llm_arch arch;
592
    const char * suffix;
593
594
    std::string operator()(llm_kv kv) const;
595
};
596
597
// helper to handle gguf constants
598
// usage:
599
//
600
//   const auto tn = LLM_TN(LLM_ARCH_LLAMA);
601
//
602
//   std::string name = tn(LLM_TENSOR_OUTPUT);                     -> "output"
603
//   std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias");         -> "token_embd.bias"
604
//   std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3);     -> "blk.3.attn_norm.weight"
605
//
606
struct LLM_TN_IMPL {
607
    const llm_arch arch;
608
    const llm_tensor tensor;
609
    const char * const suffix;
610
    const int bid;
611
    const int xid;
612
613
    LLM_TN_IMPL(llm_arch arch, llm_tensor tensor, const char * suffix, int bid, int xid);
614
615
    std::string str() const;
616
617
0
    operator std::string() const {
618
0
        return str();
619
0
    }
620
621
0
    friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
622
0
        return str == tn.str();
623
0
    }
624
625
0
    friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
626
0
        return str != tn.str();
627
0
    }
628
};
629
630
struct LLM_TN {
631
0
    LLM_TN(llm_arch arch) : arch(arch) {}
632
633
    llm_arch arch;
634
635
0
    LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
636
0
        return LLM_TN_IMPL(arch, tensor, suffix, bid, xid);
637
0
    }
638
639
0
    LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
640
0
        return LLM_TN_IMPL(arch, tensor, nullptr, bid, xid);
641
0
    }
642
};
643
644
645
struct llm_tensor_info {
646
    llm_tensor_layer layer;
647
    ggml_op op;
648
};
649
650
std::vector<llm_arch> llm_arch_all();
651
652
const char * llm_arch_name(llm_arch arch);
653
654
llm_arch llm_arch_from_string(const std::string & name);
655
656
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
657
658
bool llm_arch_is_recurrent      (const llm_arch & arch);
659
bool llm_arch_is_hybrid         (const llm_arch & arch);
660
bool llm_arch_is_diffusion      (const llm_arch & arch);
661
bool llm_arch_supports_sm_tensor(const llm_arch & arch);
662
bool llm_arch_supports_rs_rollback(const llm_arch & arch);