Coverage Report

Created: 2026-06-22 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/models/models.h
Line
Count
Source
1
#pragma once
2
3
#include "llama-model.h"
4
#include "llama-graph.h"
5
#include "llama-model-loader.h"
6
7
// note: almost all graphs require at least sqrtf, so include cmath globally
8
#include <cmath>
9
10
//
11
// base classes
12
//
13
14
struct llm_build_mamba_base : public llm_graph_context {
15
    llm_build_mamba_base(const llm_graph_params & params);
16
17
    virtual ~llm_build_mamba_base() = default;
18
19
    ggml_tensor * build_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
20
    ggml_tensor * build_mamba2_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il) const;
21
22
};
23
24
struct llm_build_delta_net_base : public llm_graph_context {
25
    llm_build_delta_net_base(const llm_graph_params & params);
26
27
    virtual ~llm_build_delta_net_base() = default;
28
29
    // returns pair of output and new state
30
    std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
31
                ggml_tensor * q,
32
                ggml_tensor * k,
33
                ggml_tensor * v,
34
                ggml_tensor * g,
35
                ggml_tensor * b,
36
                ggml_tensor * s,
37
                        int   il);
38
39
    // returns pair of output and new state
40
    std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
41
                ggml_tensor * q,
42
                ggml_tensor * k,
43
                ggml_tensor * v,
44
                ggml_tensor * g,
45
                ggml_tensor * b,
46
                ggml_tensor * s,
47
                int           il);
48
49
    // use the ggml_gated_delta_net fused operator (K=1; state has shape [S_v, S_v, H_v, n_seqs])
50
    std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_fused(
51
                ggml_tensor * q,
52
                ggml_tensor * k,
53
                ggml_tensor * v,
54
                ggml_tensor * g,
55
                ggml_tensor * b,
56
                ggml_tensor * s,
57
                        int   il);
58
59
    // choose one of two implementations above based on the number of tokens
60
    std::pair<ggml_tensor *, ggml_tensor *> build_delta_net(
61
                ggml_tensor * q,
62
                ggml_tensor * k,
63
                ggml_tensor * v,
64
                ggml_tensor * g,
65
                ggml_tensor * b,
66
                ggml_tensor * s,
67
                        int   il);
68
69
    // read conv state from cache, concat with qkv_mixed, write back (single slot or per-token)
70
    // qkv_mixed: (qkv_dim, n_seq_tokens, n_seqs); returns conv_input: (kernel_size + n_seq_tokens - 1, channels, n_seqs)
71
    ggml_tensor * build_conv_state(
72
            llm_graph_input_rs * inp,
73
            ggml_tensor *        conv_states_all,
74
            ggml_tensor *        qkv_mixed,
75
            int64_t              conv_kernel_size,
76
            int64_t              conv_channels,
77
            int                  il);
78
79
    // run delta-net attention and write the new recurrent state(s) back to ssm_states_all
80
    // s: (head_v_dim, head_v_dim, num_v_heads, n_seqs); returns output: (head_v_dim, num_v_heads, n_seq_tokens, n_seqs)
81
    ggml_tensor * build_recurrent_attn(
82
            llm_graph_input_rs * inp,
83
            ggml_tensor *        ssm_states_all,
84
            ggml_tensor *        q,
85
            ggml_tensor *        k,
86
            ggml_tensor *        v,
87
            ggml_tensor *        g,
88
            ggml_tensor *        b,
89
            ggml_tensor *        s,
90
            int                  il);
91
};
92
93
struct llm_build_rwkv6_base : public llm_graph_context {
94
    const llama_model & model;
95
96
    llm_build_rwkv6_base(const llama_model & model, const llm_graph_params & params);
97
98
    virtual ~llm_build_rwkv6_base() = default;
99
100
    ggml_tensor * build_rwkv6_channel_mix(const llama_layer * layer,
101
                                          ggml_tensor *       cur,
102
                                          ggml_tensor *       x_prev,
103
                                          llm_arch            arch) const;
104
105
    ggml_tensor * build_rwkv6_time_mix(llm_graph_input_rs * inp,
106
                                       ggml_tensor *        cur,
107
                                       ggml_tensor *        x_prev,
108
                                       const llama_ubatch & ubatch,
109
                                       int                  il) const;
110
};
111
112
// Base class for RWKV7-related models
113
struct llm_build_rwkv7_base : public llm_graph_context {
114
    const llama_model & model;
115
116
    llm_build_rwkv7_base(const llama_model & model, const llm_graph_params & params);
117
118
    virtual ~llm_build_rwkv7_base() = default;
119
120
    // RWKV7-specific graph building methods
121
    ggml_tensor * build_rwkv7_channel_mix(const llama_layer * layer,
122
                                          ggml_tensor *       cur,
123
                                          ggml_tensor *       x_prev,
124
                                          llm_arch            arch) const;
125
    ggml_tensor * build_rwkv7_time_mix(llm_graph_input_rs * inp,
126
                                       ggml_tensor *        cur,
127
                                       ggml_tensor *        x_prev,
128
                                       ggml_tensor *&       first_layer_value,
129
                                       const llama_ubatch & ubatch,
130
                                       int                  il) const;
131
};
132
133
//
134
// models
135
//
136
137
struct llama_model_llama : public llama_model_base {
138
0
    llama_model_llama(const struct llama_model_params & params) : llama_model_base(params) {}
139
    void load_arch_hparams(llama_model_loader & ml) override;
140
    void load_arch_tensors(llama_model_loader & ml) override;
141
142
    template <bool embed>
143
    struct graph : public llm_graph_context {
144
        graph(const llama_model & model, const llm_graph_params & params);
145
    };
146
147
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
148
};
149
150
151
struct llama_model_llama4 : public llama_model_base {
152
0
    llama_model_llama4(const struct llama_model_params & params) : llama_model_base(params) {}
153
    void load_arch_hparams(llama_model_loader & ml) override;
154
    void load_arch_tensors(llama_model_loader & ml) override;
155
156
    template <bool iswa>
157
    struct graph : public llm_graph_context {
158
        graph(const llama_model & model, const llm_graph_params & params);
159
    };
160
161
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
162
};
163
164
165
struct llama_model_llama_embed : public llama_model_llama {
166
0
    llama_model_llama_embed(const struct llama_model_params & params) : llama_model_llama(params) {}
167
    // reuse load_arch_hparams and load_arch_tensors from llama_model_llama
168
169
    template <bool embed>
170
    using graph = llama_model_llama::graph<embed>;
171
172
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
173
};
174
175
176
struct llama_model_maincoder : public llama_model_base {
177
0
    llama_model_maincoder(const struct llama_model_params & params) : llama_model_base(params) {}
178
    void load_arch_hparams(llama_model_loader & ml) override;
179
    void load_arch_tensors(llama_model_loader & ml) override;
180
181
    struct graph : public llm_graph_context {
182
        graph(const llama_model & model, const llm_graph_params & params);
183
    };
184
185
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
186
};
187
188
189
struct llama_model_talkie : public llama_model_base {
190
0
    llama_model_talkie(const struct llama_model_params & params) : llama_model_base(params) {}
191
    void load_arch_hparams(llama_model_loader & ml) override;
192
    void load_arch_tensors(llama_model_loader & ml) override;
193
194
    struct graph : public llm_graph_context {
195
        graph(const llama_model & model, const llm_graph_params & params);
196
    };
197
198
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
199
};
200
201
202
struct llama_model_deci : public llama_model_base {
203
0
    llama_model_deci(const struct llama_model_params & params) : llama_model_base(params) {}
204
    void load_arch_hparams(llama_model_loader & ml) override;
205
    void load_arch_tensors(llama_model_loader & ml) override;
206
207
    struct graph : public llm_graph_context {
208
        graph(const llama_model & model, const llm_graph_params & params);
209
    };
210
211
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
212
};
213
214
215
struct llama_model_baichuan : public llama_model_base {
216
0
    llama_model_baichuan(const struct llama_model_params & params) : llama_model_base(params) {}
217
    void load_arch_hparams(llama_model_loader & ml) override;
218
    void load_arch_tensors(llama_model_loader & ml) override;
219
220
    struct graph : public llm_graph_context {
221
        graph(const llama_model & model, const llm_graph_params & params);
222
    };
223
224
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
225
};
226
227
228
struct llama_model_falcon : public llama_model_base {
229
0
    llama_model_falcon(const struct llama_model_params & params) : llama_model_base(params) {}
230
    void load_arch_hparams(llama_model_loader & ml) override;
231
    void load_arch_tensors(llama_model_loader & ml) override;
232
233
    struct graph : public llm_graph_context {
234
        graph(const llama_model & model, const llm_graph_params & params);
235
    };
236
237
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
238
};
239
240
241
struct llama_model_grok : public llama_model_base {
242
0
    llama_model_grok(const struct llama_model_params & params) : llama_model_base(params) {}
243
    void load_arch_hparams(llama_model_loader & ml) override;
244
    void load_arch_tensors(llama_model_loader & ml) override;
245
246
    struct graph : public llm_graph_context {
247
        graph(const llama_model & model, const llm_graph_params & params);
248
    };
249
250
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
251
};
252
253
254
struct llama_model_starcoder : public llama_model_base {
255
0
    llama_model_starcoder(const struct llama_model_params & params) : llama_model_base(params) {}
256
    void load_arch_hparams(llama_model_loader & ml) override;
257
    void load_arch_tensors(llama_model_loader & ml) override;
258
259
    struct graph : public llm_graph_context {
260
        graph(const llama_model & model, const llm_graph_params & params);
261
    };
262
263
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
264
};
265
266
267
struct llama_model_refact : public llama_model_base {
268
0
    llama_model_refact(const struct llama_model_params & params) : llama_model_base(params) {}
269
    void load_arch_hparams(llama_model_loader & ml) override;
270
    void load_arch_tensors(llama_model_loader & ml) override;
271
272
    struct graph : public llm_graph_context {
273
        graph(const llama_model & model, const llm_graph_params & params);
274
    };
275
276
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
277
};
278
279
280
struct llama_model_bert : public llama_model_base {
281
0
    llama_model_bert(const struct llama_model_params & params) : llama_model_base(params) {}
282
    void load_arch_hparams(llama_model_loader & ml) override;
283
    void load_arch_tensors(llama_model_loader & ml) override;
284
285
    struct graph : public llm_graph_context {
286
        graph(const llama_model & model, const llm_graph_params & params);
287
    };
288
289
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
290
};
291
292
293
struct llama_model_jina_bert_v2 : public llama_model_base {
294
0
    llama_model_jina_bert_v2(const struct llama_model_params & params) : llama_model_base(params) {}
295
    void load_arch_hparams(llama_model_loader & ml) override;
296
    void load_arch_tensors(llama_model_loader & ml) override;
297
298
    using graph = llama_model_bert::graph;
299
300
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
301
};
302
303
304
struct llama_model_jina_bert_v3 : public llama_model_base {
305
0
    llama_model_jina_bert_v3(const struct llama_model_params & params) : llama_model_base(params) {}
306
    void load_arch_hparams(llama_model_loader & ml) override;
307
    void load_arch_tensors(llama_model_loader & ml) override;
308
309
    using graph = llama_model_bert::graph;
310
311
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
312
};
313
314
315
struct llama_model_nomic_bert : public llama_model_base {
316
0
    llama_model_nomic_bert(const struct llama_model_params & params) : llama_model_base(params) {}
317
    void load_arch_hparams(llama_model_loader & ml) override;
318
    void load_arch_tensors(llama_model_loader & ml) override;
319
320
    using graph = llama_model_bert::graph;
321
322
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
323
};
324
325
326
struct llama_model_nomic_bert_moe : public llama_model_base {
327
0
    llama_model_nomic_bert_moe(const struct llama_model_params & params) : llama_model_base(params) {}
328
    void load_arch_hparams(llama_model_loader & ml) override;
329
    void load_arch_tensors(llama_model_loader & ml) override;
330
331
    using graph = llama_model_bert::graph;
332
333
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
334
};
335
336
337
struct llama_model_modern_bert : public llama_model_base {
338
0
    llama_model_modern_bert(const struct llama_model_params & params) : llama_model_base(params) {}
339
    void load_arch_hparams(llama_model_loader & ml) override;
340
    void load_arch_tensors(llama_model_loader & ml) override;
341
342
    struct graph : public llm_graph_context {
343
        graph(const llama_model & model, const llm_graph_params & params);
344
    };
345
346
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
347
};
348
349
350
struct llama_model_neo_bert : public llama_model_base {
351
0
    llama_model_neo_bert(const struct llama_model_params & params) : llama_model_base(params) {}
352
    void load_arch_hparams(llama_model_loader & ml) override;
353
    void load_arch_tensors(llama_model_loader & ml) override;
354
355
    struct graph : public llm_graph_context {
356
        graph(const llama_model & model, const llm_graph_params & params);
357
    };
358
359
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
360
};
361
362
363
struct llama_model_eurobert : public llama_model_base {
364
0
    llama_model_eurobert(const struct llama_model_params & params) : llama_model_base(params) {}
365
    void load_arch_hparams(llama_model_loader & ml) override;
366
    void load_arch_tensors(llama_model_loader & ml) override;
367
368
    struct graph : public llm_graph_context {
369
        graph(const llama_model & model, const llm_graph_params & params);
370
    };
371
372
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
373
};
374
375
376
struct llama_model_bloom : public llama_model_base {
377
0
    llama_model_bloom(const struct llama_model_params & params) : llama_model_base(params) {}
378
    void load_arch_hparams(llama_model_loader & ml) override;
379
    void load_arch_tensors(llama_model_loader & ml) override;
380
381
    struct graph : public llm_graph_context {
382
        graph(const llama_model & model, const llm_graph_params & params);
383
    };
384
385
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
386
};
387
388
389
struct llama_model_mpt : public llama_model_base {
390
0
    llama_model_mpt(const struct llama_model_params & params) : llama_model_base(params) {}
391
    void load_arch_hparams(llama_model_loader & ml) override;
392
    void load_arch_tensors(llama_model_loader & ml) override;
393
394
    struct graph : public llm_graph_context {
395
        graph(const llama_model & model, const llm_graph_params & params);
396
    };
397
398
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
399
};
400
401
402
struct llama_model_stablelm : public llama_model_base {
403
0
    llama_model_stablelm(const struct llama_model_params & params) : llama_model_base(params) {}
404
    void load_arch_hparams(llama_model_loader & ml) override;
405
    void load_arch_tensors(llama_model_loader & ml) override;
406
407
    struct graph : public llm_graph_context {
408
        graph(const llama_model & model, const llm_graph_params & params);
409
    };
410
411
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
412
};
413
414
struct llama_model_mellum : public llama_model_base {
415
0
    llama_model_mellum(const struct llama_model_params & params) : llama_model_base(params) {}
416
    void load_arch_hparams(llama_model_loader & ml) override;
417
    void load_arch_tensors(llama_model_loader & ml) override;
418
419
    template <bool iswa>
420
    struct graph : public llm_graph_context {
421
        graph(const llama_model & model, const llm_graph_params & params);
422
    };
423
424
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
425
};
426
427
struct llama_model_qwen : public llama_model_base {
428
0
    llama_model_qwen(const struct llama_model_params & params) : llama_model_base(params) {}
429
    void load_arch_hparams(llama_model_loader & ml) override;
430
    void load_arch_tensors(llama_model_loader & ml) override;
431
432
    struct graph : public llm_graph_context {
433
        graph(const llama_model & model, const llm_graph_params & params);
434
    };
435
436
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
437
};
438
439
440
struct llama_model_qwen2 : public llama_model_base {
441
0
    llama_model_qwen2(const struct llama_model_params & params) : llama_model_base(params) {}
442
    void load_arch_hparams(llama_model_loader & ml) override;
443
    void load_arch_tensors(llama_model_loader & ml) override;
444
445
    struct graph : public llm_graph_context {
446
        graph(const llama_model & model, const llm_graph_params & params);
447
    };
448
449
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
450
};
451
452
453
struct llama_model_dream : public llama_model_base {
454
0
    llama_model_dream(const struct llama_model_params & params) : llama_model_base(params) {}
455
    void load_arch_hparams(llama_model_loader & ml) override;
456
    void load_arch_tensors(llama_model_loader & ml) override;
457
458
    struct graph : public llm_graph_context {
459
        graph(const llama_model & model, const llm_graph_params & params);
460
    };
461
462
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
463
};
464
465
466
struct llama_model_llada : public llama_model_base {
467
0
    llama_model_llada(const struct llama_model_params & params) : llama_model_base(params) {}
468
    void load_arch_hparams(llama_model_loader & ml) override;
469
    void load_arch_tensors(llama_model_loader & ml) override;
470
471
    struct graph : public llm_graph_context {
472
        graph(const llama_model & model, const llm_graph_params & params);
473
    };
474
475
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
476
};
477
478
479
struct llama_model_llada_moe : public llama_model_base {
480
0
    llama_model_llada_moe(const struct llama_model_params & params) : llama_model_base(params) {}
481
    void load_arch_hparams(llama_model_loader & ml) override;
482
    void load_arch_tensors(llama_model_loader & ml) override;
483
484
    struct graph : public llm_graph_context {
485
        graph(const llama_model & model, const llm_graph_params & params);
486
    };
487
488
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
489
};
490
491
492
struct llama_model_rnd1 : public llama_model_base {
493
0
    llama_model_rnd1(const struct llama_model_params & params) : llama_model_base(params) {}
494
    void load_arch_hparams(llama_model_loader & ml) override;
495
    void load_arch_tensors(llama_model_loader & ml) override;
496
497
    struct graph : public llm_graph_context {
498
        graph(const llama_model & model, const llm_graph_params & params);
499
    };
500
501
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
502
};
503
504
505
struct llama_model_qwen2vl : public llama_model_base {
506
0
    llama_model_qwen2vl(const struct llama_model_params & params) : llama_model_base(params) {}
507
    void load_arch_hparams(llama_model_loader & ml) override;
508
    void load_arch_tensors(llama_model_loader & ml) override;
509
510
    struct graph : public llm_graph_context {
511
        graph(const llama_model & model, const llm_graph_params & params);
512
    };
513
514
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
515
};
516
517
518
struct llama_model_qwen2moe : public llama_model_base {
519
0
    llama_model_qwen2moe(const struct llama_model_params & params) : llama_model_base(params) {}
520
    void load_arch_hparams(llama_model_loader & ml) override;
521
    void load_arch_tensors(llama_model_loader & ml) override;
522
523
    struct graph : public llm_graph_context {
524
        graph(const llama_model & model, const llm_graph_params & params);
525
    };
526
527
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
528
};
529
530
531
struct llama_model_qwen3 : public llama_model_base {
532
0
    llama_model_qwen3(const struct llama_model_params & params) : llama_model_base(params) {}
533
    void load_arch_hparams(llama_model_loader & ml) override;
534
    void load_arch_tensors(llama_model_loader & ml) override;
535
536
    struct graph : public llm_graph_context {
537
        graph(const llama_model & model, const llm_graph_params & params);
538
    };
539
540
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
541
};
542
543
544
struct llama_model_qwen3moe : public llama_model_base {
545
0
    llama_model_qwen3moe(const struct llama_model_params & params) : llama_model_base(params) {}
546
    void load_arch_hparams(llama_model_loader & ml) override;
547
    void load_arch_tensors(llama_model_loader & ml) override;
548
549
    struct graph : public llm_graph_context {
550
        graph(const llama_model & model, const llm_graph_params & params);
551
    };
552
553
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
554
};
555
556
557
struct llama_model_qwen3vl : public llama_model_base {
558
0
    llama_model_qwen3vl(const struct llama_model_params & params) : llama_model_base(params) {}
559
    void load_arch_hparams(llama_model_loader & ml) override;
560
    void load_arch_tensors(llama_model_loader & ml) override;
561
562
    struct graph : public llm_graph_context {
563
        graph(const llama_model & model, const llm_graph_params & params);
564
    };
565
566
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
567
};
568
569
570
struct llama_model_qwen3vlmoe : public llama_model_base {
571
0
    llama_model_qwen3vlmoe(const struct llama_model_params & params) : llama_model_base(params) {}
572
    void load_arch_hparams(llama_model_loader & ml) override;
573
    void load_arch_tensors(llama_model_loader & ml) override;
574
575
    struct graph : public llm_graph_context {
576
        graph(const llama_model & model, const llm_graph_params & params);
577
    };
578
579
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
580
};
581
582
583
struct llama_model_phi2 : public llama_model_base {
584
0
    llama_model_phi2(const struct llama_model_params & params) : llama_model_base(params) {}
585
    void load_arch_hparams(llama_model_loader & ml) override;
586
    void load_arch_tensors(llama_model_loader & ml) override;
587
588
    struct graph : public llm_graph_context {
589
        graph(const llama_model & model, const llm_graph_params & params);
590
    };
591
592
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
593
};
594
595
596
struct llama_model_phi3 : public llama_model_base {
597
0
    llama_model_phi3(const struct llama_model_params & params) : llama_model_base(params) {}
598
    void load_arch_hparams(llama_model_loader & ml) override;
599
    void load_arch_tensors(llama_model_loader & ml) override;
600
601
    template <bool iswa>
602
    struct graph : public llm_graph_context {
603
        graph(const llama_model & model, const llm_graph_params & params);
604
    };
605
606
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
607
};
608
609
610
struct llama_model_phimoe : public llama_model_base {
611
0
    llama_model_phimoe(const struct llama_model_params & params) : llama_model_base(params) {}
612
    void load_arch_hparams(llama_model_loader & ml) override;
613
    void load_arch_tensors(llama_model_loader & ml) override;
614
615
    template <bool iswa>
616
    using graph = llama_model_phi3::graph<iswa>;
617
618
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
619
};
620
621
622
struct llama_model_plamo : public llama_model_base {
623
0
    llama_model_plamo(const struct llama_model_params & params) : llama_model_base(params) {}
624
    void load_arch_hparams(llama_model_loader & ml) override;
625
    void load_arch_tensors(llama_model_loader & ml) override;
626
627
    struct graph : public llm_graph_context {
628
        graph(const llama_model & model, const llm_graph_params & params);
629
    };
630
631
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
632
};
633
634
635
struct llama_model_plamo2 : public llama_model_base {
636
0
    llama_model_plamo2(const struct llama_model_params & params) : llama_model_base(params) {}
637
    void load_arch_hparams(llama_model_loader & ml) override;
638
    void load_arch_tensors(llama_model_loader & ml) override;
639
640
    struct graph : public llm_build_mamba_base {
641
        graph(const llama_model & model, const llm_graph_params & params);
642
        private:
643
            ggml_tensor * build_plamo2_mamba_layer(llm_graph_input_rs * inp, ggml_tensor * cur, const llama_model & model, const llama_ubatch & ubatch, int il);
644
            ggml_tensor * build_plamo2_attn_layer(llm_graph_input_attn_kv * inp, ggml_tensor * inp_pos, ggml_tensor * cur,
645
                                                    const llama_model & model, int il);
646
    };
647
648
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
649
};
650
651
652
struct llama_model_plamo3 : public llama_model_base {
653
0
    llama_model_plamo3(const struct llama_model_params & params) : llama_model_base(params) {}
654
    void load_arch_hparams(llama_model_loader & ml) override;
655
    void load_arch_tensors(llama_model_loader & ml) override;
656
657
    template <bool iswa>
658
    struct graph : public llm_graph_context {
659
        graph(const llama_model & model, const llm_graph_params & params);
660
    };
661
662
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
663
};
664
665
666
struct llama_model_gpt2 : public llama_model_base {
667
0
    llama_model_gpt2(const struct llama_model_params & params) : llama_model_base(params) {}
668
    void load_arch_hparams(llama_model_loader & ml) override;
669
    void load_arch_tensors(llama_model_loader & ml) override;
670
671
    struct graph : public llm_graph_context {
672
        graph(const llama_model & model, const llm_graph_params & params);
673
    };
674
675
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
676
};
677
678
679
struct llama_model_codeshell : public llama_model_base {
680
0
    llama_model_codeshell(const struct llama_model_params & params) : llama_model_base(params) {}
681
    void load_arch_hparams(llama_model_loader & ml) override;
682
    void load_arch_tensors(llama_model_loader & ml) override;
683
684
    struct graph : public llm_graph_context {
685
        graph(const llama_model & model, const llm_graph_params & params);
686
    };
687
688
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
689
};
690
691
692
struct llama_model_orion : public llama_model_base {
693
0
    llama_model_orion(const struct llama_model_params & params) : llama_model_base(params) {}
694
    void load_arch_hparams(llama_model_loader & ml) override;
695
    void load_arch_tensors(llama_model_loader & ml) override;
696
697
    struct graph : public llm_graph_context {
698
        graph(const llama_model & model, const llm_graph_params & params);
699
    };
700
701
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
702
};
703
704
705
struct llama_model_internlm2 : public llama_model_base {
706
0
    llama_model_internlm2(const struct llama_model_params & params) : llama_model_base(params) {}
707
    void load_arch_hparams(llama_model_loader & ml) override;
708
    void load_arch_tensors(llama_model_loader & ml) override;
709
710
    struct graph : public llm_graph_context {
711
        graph(const llama_model & model, const llm_graph_params & params);
712
    };
713
714
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
715
};
716
717
718
struct llama_model_minicpm3 : public llama_model_base {
719
0
    llama_model_minicpm3(const struct llama_model_params & params) : llama_model_base(params) {}
720
    void load_arch_hparams(llama_model_loader & ml) override;
721
    void load_arch_tensors(llama_model_loader & ml) override;
722
723
    struct graph : public llm_graph_context {
724
        graph(const llama_model & model, const llm_graph_params & params);
725
    };
726
727
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
728
};
729
730
731
struct llama_model_gemma : public llama_model_base {
732
0
    llama_model_gemma(const struct llama_model_params & params) : llama_model_base(params) {}
733
    void load_arch_hparams(llama_model_loader & ml) override;
734
    void load_arch_tensors(llama_model_loader & ml) override;
735
736
    struct graph : public llm_graph_context {
737
        graph(const llama_model & model, const llm_graph_params & params);
738
    };
739
740
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
741
};
742
743
744
struct llama_model_gemma2 : public llama_model_base {
745
0
    llama_model_gemma2(const struct llama_model_params & params) : llama_model_base(params) {}
746
    void load_arch_hparams(llama_model_loader & ml) override;
747
    void load_arch_tensors(llama_model_loader & ml) override;
748
749
    struct graph : public llm_graph_context {
750
        graph(const llama_model & model, const llm_graph_params & params);
751
    };
752
753
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
754
};
755
756
757
struct llama_model_gemma3 : public llama_model_base {
758
0
    llama_model_gemma3(const struct llama_model_params & params) : llama_model_base(params) {}
759
    void load_arch_hparams(llama_model_loader & ml) override;
760
    void load_arch_tensors(llama_model_loader & ml) override;
761
762
    template <bool iswa>
763
    struct graph : public llm_graph_context {
764
        graph(const llama_model & model, const llm_graph_params & params);
765
    };
766
767
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
768
};
769
770
771
struct llama_model_gemma3n : public llama_model_base {
772
0
    llama_model_gemma3n(const struct llama_model_params & params) : llama_model_base(params) {}
773
    void load_arch_hparams(llama_model_loader & ml) override;
774
    void load_arch_tensors(llama_model_loader & ml) override;
775
776
    struct graph : public llm_graph_context {
777
        const llama_model & model;
778
779
        const int64_t n_embd_head;
780
        const int64_t n_embd_altup;
781
        const int64_t n_altup;
782
        const int     i_altup_act;
783
        const int     n_layer_sparsity = 10; // number of layers using activation sparsity
784
        const float   f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95)
785
786
        graph(const llama_model & model, const llm_graph_params & params);
787
        ggml_tensor * calc_magnitude(ggml_tensor * x);
788
789
        // TODO: refactor in common "per-layer" functionality [TAG_PER_LAYER]
790
        ggml_tensor * build_inp_per_layer();
791
        ggml_tensor * project_per_layer_inputs(ggml_tensor * inp_batch, ggml_tensor * inp_per_layer);
792
793
        ggml_tensor * gaussian_topk(ggml_tensor * x);
794
        ggml_tensor * altup_compute_router_modalities(ggml_tensor * x, int il);
795
        ggml_tensor * altup_predict(ggml_tensor * cur, int il);
796
        ggml_tensor * laurel(ggml_tensor * cur, int il);
797
        ggml_tensor * altup_correct(ggml_tensor * predictions, ggml_tensor * activated, int il);
798
    };
799
800
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
801
};
802
803
804
struct llama_model_gemma4 : public llama_model_base {
805
0
    llama_model_gemma4(const struct llama_model_params & params) : llama_model_base(params) {}
806
    void load_arch_hparams(llama_model_loader & ml) override;
807
    void load_arch_tensors(llama_model_loader & ml) override;
808
809
    struct graph : public llm_graph_context {
810
        const llama_model & model;
811
812
        const int64_t n_embd_per_layer;
813
814
        graph(const llama_model & model, const llm_graph_params & params);
815
816
        // TODO: refactor in common "per-layer" functionality [TAG_PER_LAYER]
817
        ggml_tensor * build_inp_per_layer();
818
        ggml_tensor * project_per_layer_inputs(ggml_tensor * inp_batch, ggml_tensor * inp_per_layer);
819
    };
820
821
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
822
};
823
824
825
struct llama_model_gemma4_assistant : public llama_model_base {
826
0
    llama_model_gemma4_assistant(const struct llama_model_params & params) : llama_model_base(params) {}
827
    void load_arch_hparams(llama_model_loader & ml) override;
828
    void load_arch_tensors(llama_model_loader & ml) override;
829
830
    struct graph : public llm_graph_context {
831
        graph(const llama_model & model, const llm_graph_params & params);
832
    };
833
834
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
835
};
836
837
838
struct llama_model_gemma_embedding : public llama_model_base {
839
0
    llama_model_gemma_embedding(const struct llama_model_params & params) : llama_model_base(params) {}
840
    void load_arch_hparams(llama_model_loader & ml) override;
841
    void load_arch_tensors(llama_model_loader & ml) override;
842
843
    struct graph : public llm_graph_context {
844
        graph(const llama_model & model, const llm_graph_params & params);
845
    };
846
847
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
848
};
849
850
851
struct llama_model_starcoder2 : public llama_model_base {
852
0
    llama_model_starcoder2(const struct llama_model_params & params) : llama_model_base(params) {}
853
    void load_arch_hparams(llama_model_loader & ml) override;
854
    void load_arch_tensors(llama_model_loader & ml) override;
855
856
    struct graph : public llm_graph_context {
857
        graph(const llama_model & model, const llm_graph_params & params);
858
    };
859
860
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
861
};
862
863
864
struct llama_model_mamba : public llama_model_base {
865
0
    llama_model_mamba(const struct llama_model_params & params) : llama_model_base(params) {}
866
    void load_arch_hparams(llama_model_loader & ml) override;
867
    void load_arch_tensors(llama_model_loader & ml) override;
868
869
    struct graph : public llm_build_mamba_base {
870
        graph(const llama_model & model, const llm_graph_params & params);
871
    };
872
873
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
874
};
875
876
877
struct llama_model_mamba2 : public llama_model_base {
878
0
    llama_model_mamba2(const struct llama_model_params & params) : llama_model_base(params) {}
879
    void load_arch_hparams(llama_model_loader & ml) override;
880
    void load_arch_tensors(llama_model_loader & ml) override;
881
882
    using graph = llama_model_mamba::graph;
883
884
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
885
};
886
887
888
struct llama_model_jamba : public llama_model_base {
889
0
    llama_model_jamba(const struct llama_model_params & params) : llama_model_base(params) {}
890
    void load_arch_hparams(llama_model_loader & ml) override;
891
    void load_arch_tensors(llama_model_loader & ml) override;
892
893
    struct graph : public llm_build_mamba_base {
894
        graph(const llama_model & model, const llm_graph_params & params);
895
    };
896
897
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
898
};
899
900
901
struct llama_model_xverse : public llama_model_base {
902
0
    llama_model_xverse(const struct llama_model_params & params) : llama_model_base(params) {}
903
    void load_arch_hparams(llama_model_loader & ml) override;
904
    void load_arch_tensors(llama_model_loader & ml) override;
905
906
    struct graph : public llm_graph_context {
907
        graph(const llama_model & model, const llm_graph_params & params);
908
    };
909
910
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
911
};
912
913
914
struct llama_model_command_r : public llama_model_base {
915
0
    llama_model_command_r(const struct llama_model_params & params) : llama_model_base(params) {}
916
    void load_arch_hparams(llama_model_loader & ml) override;
917
    void load_arch_tensors(llama_model_loader & ml) override;
918
919
    struct graph : public llm_graph_context {
920
        graph(const llama_model & model, const llm_graph_params & params);
921
    };
922
923
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
924
};
925
926
927
struct llama_model_cohere2 : public llama_model_base {
928
0
    llama_model_cohere2(const struct llama_model_params & params) : llama_model_base(params) {}
929
    void load_arch_hparams(llama_model_loader & ml) override;
930
    void load_arch_tensors(llama_model_loader & ml) override;
931
932
    struct graph : public llm_graph_context {
933
        graph(const llama_model & model, const llm_graph_params & params);
934
    };
935
936
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
937
};
938
939
940
struct llama_model_cohere2moe : public llama_model_base {
941
0
    llama_model_cohere2moe(const struct llama_model_params & params) : llama_model_base(params) {}
942
    void load_arch_hparams(llama_model_loader & ml) override;
943
    void load_arch_tensors(llama_model_loader & ml) override;
944
945
    struct graph : public llm_graph_context {
946
        graph(const llama_model & model, const llm_graph_params & params);
947
    };
948
949
    struct graph_mtp : public llm_graph_context {
950
        graph_mtp(const llama_model & model, const llm_graph_params & params);
951
    };
952
953
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
954
};
955
956
957
struct llama_model_dbrx : public llama_model_base {
958
0
    llama_model_dbrx(const struct llama_model_params & params) : llama_model_base(params) {}
959
    void load_arch_hparams(llama_model_loader & ml) override;
960
    void load_arch_tensors(llama_model_loader & ml) override;
961
962
    struct graph : public llm_graph_context {
963
        graph(const llama_model & model, const llm_graph_params & params);
964
    };
965
966
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
967
};
968
969
970
struct llama_model_olmo : public llama_model_base {
971
0
    llama_model_olmo(const struct llama_model_params & params) : llama_model_base(params) {}
972
    void load_arch_hparams(llama_model_loader & ml) override;
973
    void load_arch_tensors(llama_model_loader & ml) override;
974
975
    struct graph : public llm_graph_context {
976
        graph(const llama_model & model, const llm_graph_params & params);
977
    };
978
979
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
980
};
981
982
983
struct llama_model_olmo2 : public llama_model_base {
984
0
    llama_model_olmo2(const struct llama_model_params & params) : llama_model_base(params) {}
985
    void load_arch_hparams(llama_model_loader & ml) override;
986
    void load_arch_tensors(llama_model_loader & ml) override;
987
988
    template <bool iswa>
989
    struct graph : public llm_graph_context {
990
        graph(const llama_model & model, const llm_graph_params & params);
991
    };
992
993
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
994
};
995
996
997
struct llama_model_olmoe : public llama_model_base {
998
0
    llama_model_olmoe(const struct llama_model_params & params) : llama_model_base(params) {}
999
    void load_arch_hparams(llama_model_loader & ml) override;
1000
    void load_arch_tensors(llama_model_loader & ml) override;
1001
1002
    struct graph : public llm_graph_context {
1003
        graph(const llama_model & model, const llm_graph_params & params);
1004
    };
1005
1006
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1007
};
1008
1009
1010
struct llama_model_openelm : public llama_model_base {
1011
0
    llama_model_openelm(const struct llama_model_params & params) : llama_model_base(params) {}
1012
    void load_arch_hparams(llama_model_loader & ml) override;
1013
    void load_arch_tensors(llama_model_loader & ml) override;
1014
1015
    struct graph : public llm_graph_context {
1016
        graph(const llama_model & model, const llm_graph_params & params);
1017
    };
1018
1019
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1020
};
1021
1022
1023
struct llama_model_gptneox : public llama_model_base {
1024
0
    llama_model_gptneox(const struct llama_model_params & params) : llama_model_base(params) {}
1025
    void load_arch_hparams(llama_model_loader & ml) override;
1026
    void load_arch_tensors(llama_model_loader & ml) override;
1027
1028
    struct graph : public llm_graph_context {
1029
        graph(const llama_model & model, const llm_graph_params & params);
1030
    };
1031
1032
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1033
};
1034
1035
1036
struct llama_model_arctic : public llama_model_base {
1037
0
    llama_model_arctic(const struct llama_model_params & params) : llama_model_base(params) {}
1038
    void load_arch_hparams(llama_model_loader & ml) override;
1039
    void load_arch_tensors(llama_model_loader & ml) override;
1040
1041
    struct graph : public llm_graph_context {
1042
        graph(const llama_model & model, const llm_graph_params & params);
1043
    };
1044
1045
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1046
};
1047
1048
1049
struct llama_model_deepseek : public llama_model_base {
1050
0
    llama_model_deepseek(const struct llama_model_params & params) : llama_model_base(params) {}
1051
    void load_arch_hparams(llama_model_loader & ml) override;
1052
    void load_arch_tensors(llama_model_loader & ml) override;
1053
1054
    struct graph : public llm_graph_context {
1055
        graph(const llama_model & model, const llm_graph_params & params);
1056
    };
1057
1058
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1059
};
1060
1061
1062
struct llama_model_deepseek2 : public llama_model_base {
1063
0
    llama_model_deepseek2(const struct llama_model_params & params) : llama_model_base(params) {}
1064
    void load_arch_hparams(llama_model_loader & ml) override;
1065
    void load_arch_tensors(llama_model_loader & ml) override;
1066
1067
    struct graph : public llm_graph_context {
1068
        graph(const llama_model & model, const llm_graph_params & params);
1069
    };
1070
1071
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1072
};
1073
1074
1075
struct llama_model_deepseek32 : public llama_model_base {
1076
0
    llama_model_deepseek32(const struct llama_model_params & params) : llama_model_base(params) {}
1077
    void load_arch_hparams(llama_model_loader & ml) override;
1078
    void load_arch_tensors(llama_model_loader & ml) override;
1079
1080
    struct graph : public llm_graph_context {
1081
        graph(const llama_model & model, const llm_graph_params & params);
1082
    };
1083
1084
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1085
};
1086
1087
1088
struct llama_model_deepseek2ocr : public llama_model_base {
1089
0
    llama_model_deepseek2ocr(const struct llama_model_params & params) : llama_model_base(params) {}
1090
    void load_arch_hparams(llama_model_loader & ml) override;
1091
    void load_arch_tensors(llama_model_loader & ml) override;
1092
1093
    using graph = llama_model_deepseek2::graph;
1094
1095
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1096
};
1097
1098
1099
struct llama_model_glm_dsa : public llama_model_base {
1100
0
    llama_model_glm_dsa(const struct llama_model_params & params) : llama_model_base(params) {}
1101
    void load_arch_hparams(llama_model_loader & ml) override;
1102
    void load_arch_tensors(llama_model_loader & ml) override;
1103
1104
    using graph = llama_model_deepseek2::graph;
1105
1106
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1107
};
1108
1109
struct llama_model_eagle3 : public llama_model_base {
1110
0
    llama_model_eagle3(const struct llama_model_params & params) : llama_model_base(params) {}
1111
    void load_arch_hparams(llama_model_loader & ml) override;
1112
    void load_arch_tensors(llama_model_loader & ml) override;
1113
1114
    template <bool is_enc>
1115
    struct graph : public llm_graph_context {
1116
        graph(const llama_model & model, const llm_graph_params & params);
1117
1118
        ggml_tensor * build_inp_embd_enc() const;
1119
    };
1120
1121
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1122
};
1123
1124
1125
struct llama_model_mistral4 : public llama_model_deepseek2 {
1126
0
    llama_model_mistral4(const struct llama_model_params & params) : llama_model_deepseek2(params) {}
1127
    // reuse load_arch_hparams and load_arch_tensors from llama_model_deepseek2
1128
1129
    using graph = llama_model_deepseek2::graph;
1130
1131
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1132
};
1133
1134
1135
struct llama_model_chatglm : public llama_model_base {
1136
0
    llama_model_chatglm(const struct llama_model_params & params) : llama_model_base(params) {}
1137
    void load_arch_hparams(llama_model_loader & ml) override;
1138
    void load_arch_tensors(llama_model_loader & ml) override;
1139
1140
    struct graph : public llm_graph_context {
1141
        graph(const llama_model & model, const llm_graph_params & params);
1142
    };
1143
1144
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1145
};
1146
1147
1148
struct llama_model_glm4 : public llama_model_base {
1149
0
    llama_model_glm4(const struct llama_model_params & params) : llama_model_base(params) {}
1150
    void load_arch_hparams(llama_model_loader & ml) override;
1151
    void load_arch_tensors(llama_model_loader & ml) override;
1152
1153
    struct graph : public llm_graph_context {
1154
        graph(const llama_model & model, const llm_graph_params & params);
1155
    };
1156
1157
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1158
};
1159
1160
1161
struct llama_model_glm4_moe : public llama_model_base {
1162
0
    llama_model_glm4_moe(const struct llama_model_params & params) : llama_model_base(params) {}
1163
    void load_arch_hparams(llama_model_loader & ml) override;
1164
    void load_arch_tensors(llama_model_loader & ml) override;
1165
1166
    struct graph : public llm_graph_context {
1167
        graph(const llama_model & model, const llm_graph_params & params);
1168
    };
1169
1170
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1171
};
1172
1173
1174
struct llama_model_bitnet : public llama_model_base {
1175
0
    llama_model_bitnet(const struct llama_model_params & params) : llama_model_base(params) {}
1176
    void load_arch_hparams(llama_model_loader & ml) override;
1177
    void load_arch_tensors(llama_model_loader & ml) override;
1178
1179
    struct graph : public llm_graph_context {
1180
        graph(const llama_model & model, const llm_graph_params & params);
1181
    };
1182
1183
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1184
};
1185
1186
1187
struct llama_model_t5 : public llama_model_base {
1188
0
    llama_model_t5(const struct llama_model_params & params) : llama_model_base(params) {}
1189
    void load_arch_hparams(llama_model_loader & ml) override;
1190
    void load_arch_tensors(llama_model_loader & ml) override;
1191
1192
    template <bool is_enc>
1193
    struct graph : public llm_graph_context {
1194
        graph(const llama_model & model, const llm_graph_params & params);
1195
    };
1196
1197
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1198
};
1199
1200
1201
struct llama_model_t5encoder : public llama_model_base {
1202
0
    llama_model_t5encoder(const struct llama_model_params & params) : llama_model_base(params) {}
1203
    void load_arch_hparams(llama_model_loader & ml) override;
1204
    void load_arch_tensors(llama_model_loader & ml) override;
1205
1206
    using graph = llama_model_t5::graph<true>;
1207
1208
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1209
};
1210
1211
1212
struct llama_model_jais : public llama_model_base {
1213
0
    llama_model_jais(const struct llama_model_params & params) : llama_model_base(params) {}
1214
    void load_arch_hparams(llama_model_loader & ml) override;
1215
    void load_arch_tensors(llama_model_loader & ml) override;
1216
1217
    struct graph : public llm_graph_context {
1218
        graph(const llama_model & model, const llm_graph_params & params);
1219
    };
1220
1221
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1222
};
1223
1224
1225
struct llama_model_jais2 : public llama_model_base {
1226
0
    llama_model_jais2(const struct llama_model_params & params) : llama_model_base(params) {}
1227
    void load_arch_hparams(llama_model_loader & ml) override;
1228
    void load_arch_tensors(llama_model_loader & ml) override;
1229
1230
    struct graph : public llm_graph_context {
1231
        graph(const llama_model & model, const llm_graph_params & params);
1232
    };
1233
1234
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1235
};
1236
1237
1238
struct llama_model_nemotron : public llama_model_base {
1239
0
    llama_model_nemotron(const struct llama_model_params & params) : llama_model_base(params) {}
1240
    void load_arch_hparams(llama_model_loader & ml) override;
1241
    void load_arch_tensors(llama_model_loader & ml) override;
1242
1243
    struct graph : public llm_graph_context {
1244
        graph(const llama_model & model, const llm_graph_params & params);
1245
    };
1246
1247
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1248
};
1249
1250
1251
struct llama_model_nemotron_h : public llama_model_base {
1252
0
    llama_model_nemotron_h(const struct llama_model_params & params) : llama_model_base(params) {}
1253
    void load_arch_hparams(llama_model_loader & ml) override;
1254
    void load_arch_tensors(llama_model_loader & ml) override;
1255
1256
    struct graph : public llm_build_mamba_base {
1257
        graph(const llama_model & model, const llm_graph_params & params);
1258
        ggml_tensor * build_ffn_layer(ggml_tensor * cur, const llama_model & model, int il);
1259
        ggml_tensor * build_attention_layer(ggml_tensor * cur, llm_graph_input_attn_kv * inp_attn,
1260
            const llama_model & model, int64_t n_embd_head, int il);
1261
    };
1262
1263
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1264
};
1265
1266
1267
struct llama_model_nemotron_h_moe : public llama_model_nemotron_h {
1268
0
    llama_model_nemotron_h_moe(const struct llama_model_params & params) : llama_model_nemotron_h(params) {}
1269
    // reuse load_arch_hparams and load_arch_tensors from llama_model_nemotron_h
1270
1271
    using graph = llama_model_nemotron_h::graph;
1272
1273
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1274
};
1275
1276
1277
struct llama_model_exaone : public llama_model_base {
1278
0
    llama_model_exaone(const struct llama_model_params & params) : llama_model_base(params) {}
1279
    void load_arch_hparams(llama_model_loader & ml) override;
1280
    void load_arch_tensors(llama_model_loader & ml) override;
1281
1282
    struct graph : public llm_graph_context {
1283
        graph(const llama_model & model, const llm_graph_params & params);
1284
    };
1285
1286
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1287
};
1288
1289
1290
struct llama_model_exaone4 : public llama_model_base {
1291
0
    llama_model_exaone4(const struct llama_model_params & params) : llama_model_base(params) {}
1292
    void load_arch_hparams(llama_model_loader & ml) override;
1293
    void load_arch_tensors(llama_model_loader & ml) override;
1294
1295
    template <bool iswa>
1296
    struct graph : public llm_graph_context {
1297
        graph(const llama_model & model, const llm_graph_params & params);
1298
    };
1299
1300
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1301
};
1302
1303
1304
struct llama_model_exaone_moe : public llama_model_base {
1305
0
    llama_model_exaone_moe(const struct llama_model_params & params) : llama_model_base(params) {}
1306
    void load_arch_hparams(llama_model_loader & ml) override;
1307
    void load_arch_tensors(llama_model_loader & ml) override;
1308
1309
    struct graph : public llm_graph_context {
1310
        graph(const llama_model & model, const llm_graph_params & params);
1311
    };
1312
1313
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1314
};
1315
1316
1317
struct llama_model_rwkv6 : public llama_model_base {
1318
0
    llama_model_rwkv6(const struct llama_model_params & params) : llama_model_base(params) {}
1319
    void load_arch_hparams(llama_model_loader & ml) override;
1320
    void load_arch_tensors(llama_model_loader & ml) override;
1321
1322
    struct graph : public llm_build_rwkv6_base {
1323
        graph(const llama_model & model, const llm_graph_params & params);
1324
    };
1325
1326
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1327
};
1328
1329
1330
struct llama_model_rwkv6qwen2 : public llama_model_base {
1331
0
    llama_model_rwkv6qwen2(const struct llama_model_params & params) : llama_model_base(params) {}
1332
    void load_arch_hparams(llama_model_loader & ml) override;
1333
    void load_arch_tensors(llama_model_loader & ml) override;
1334
1335
    struct graph : public llm_build_rwkv6_base {
1336
        graph(const llama_model & model, const llm_graph_params & params);
1337
    };
1338
1339
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1340
};
1341
1342
1343
struct llama_model_rwkv7 : public llama_model_base {
1344
0
    llama_model_rwkv7(const struct llama_model_params & params) : llama_model_base(params) {}
1345
    void load_arch_hparams(llama_model_loader & ml) override;
1346
    void load_arch_tensors(llama_model_loader & ml) override;
1347
1348
    struct graph : public llm_build_rwkv7_base {
1349
        graph(const llama_model & model, const llm_graph_params & params);
1350
    };
1351
1352
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1353
};
1354
1355
1356
struct llama_model_arwkv7 : public llama_model_base {
1357
0
    llama_model_arwkv7(const struct llama_model_params & params) : llama_model_base(params) {}
1358
    void load_arch_hparams(llama_model_loader & ml) override;
1359
    void load_arch_tensors(llama_model_loader & ml) override;
1360
1361
    struct graph : public llm_build_rwkv7_base {
1362
        graph(const llama_model & model, const llm_graph_params & params);
1363
    };
1364
1365
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1366
};
1367
1368
1369
struct llama_model_granite : public llama_model_base {
1370
0
    llama_model_granite(const struct llama_model_params & params) : llama_model_base(params) {}
1371
    void load_arch_hparams(llama_model_loader & ml) override;
1372
    void load_arch_tensors(llama_model_loader & ml) override;
1373
1374
    struct graph : public llm_graph_context {
1375
        graph(const llama_model & model, const llm_graph_params & params);
1376
1377
    private:
1378
        ggml_tensor * build_attention_layer(
1379
                  ggml_tensor             * cur,
1380
                  ggml_tensor             * inp_pos,
1381
                  llm_graph_input_attn_kv * inp_attn,
1382
            const llama_model             & model,
1383
            const int64_t                 n_embd_head,
1384
            const int                     il);
1385
1386
        ggml_tensor * build_layer_ffn(
1387
                  ggml_tensor       * cur,
1388
                  ggml_tensor       * inpSA,
1389
            const llama_model       & model,
1390
            const int                 il);
1391
    };
1392
1393
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1394
};
1395
1396
1397
struct llama_model_granite_moe : public llama_model_base {
1398
0
    llama_model_granite_moe(const struct llama_model_params & params) : llama_model_base(params) {}
1399
    void load_arch_hparams(llama_model_loader & ml) override;
1400
    void load_arch_tensors(llama_model_loader & ml) override;
1401
1402
    using graph = llama_model_granite::graph;
1403
1404
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1405
};
1406
1407
1408
struct llama_model_minicpm : public llama_model_base {
1409
0
    llama_model_minicpm(const struct llama_model_params & params) : llama_model_base(params) {}
1410
    void load_arch_hparams(llama_model_loader & ml) override;
1411
    void load_arch_tensors(llama_model_loader & ml) override;
1412
1413
    using graph = llama_model_granite::graph;
1414
1415
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1416
};
1417
1418
1419
struct llama_model_granite_hybrid : public llama_model_base {
1420
0
    llama_model_granite_hybrid(const struct llama_model_params & params) : llama_model_base(params) {}
1421
    void load_arch_hparams(llama_model_loader & ml) override;
1422
    void load_arch_tensors(llama_model_loader & ml) override;
1423
1424
    struct graph : public llm_build_mamba_base {
1425
        graph(const llama_model & model, const llm_graph_params & params);
1426
        ggml_tensor * build_layer_ffn(ggml_tensor * cur, ggml_tensor * inpSA, const llama_model & model, const int il);
1427
        ggml_tensor * build_attention_layer(ggml_tensor * cur, ggml_tensor * inp_pos, llm_graph_input_attn_kv * inp_attn,
1428
            const llama_model & model,const int64_t n_embd_head, const int il);
1429
    };
1430
1431
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1432
};
1433
1434
1435
struct llama_model_chameleon : public llama_model_base {
1436
0
    llama_model_chameleon(const struct llama_model_params & params) : llama_model_base(params) {}
1437
    void load_arch_hparams(llama_model_loader & ml) override;
1438
    void load_arch_tensors(llama_model_loader & ml) override;
1439
1440
    struct graph : public llm_graph_context {
1441
        graph(const llama_model & model, const llm_graph_params & params);
1442
    };
1443
1444
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1445
};
1446
1447
1448
struct llama_model_wavtokenizer_dec : public llama_model_base {
1449
0
    llama_model_wavtokenizer_dec(const struct llama_model_params & params) : llama_model_base(params) {}
1450
    void load_arch_hparams(llama_model_loader & ml) override;
1451
    void load_arch_tensors(llama_model_loader & ml) override;
1452
1453
    struct graph : public llm_graph_context {
1454
        graph(const llama_model & model, const llm_graph_params & params);
1455
    };
1456
1457
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1458
};
1459
1460
1461
struct llama_model_plm : public llama_model_base {
1462
0
    llama_model_plm(const struct llama_model_params & params) : llama_model_base(params) {}
1463
    void load_arch_hparams(llama_model_loader & ml) override;
1464
    void load_arch_tensors(llama_model_loader & ml) override;
1465
1466
    struct graph : public llm_graph_context {
1467
        graph(const llama_model & model, const llm_graph_params & params);
1468
    };
1469
1470
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1471
};
1472
1473
1474
struct llama_model_bailingmoe : public llama_model_base {
1475
0
    llama_model_bailingmoe(const struct llama_model_params & params) : llama_model_base(params) {}
1476
    void load_arch_hparams(llama_model_loader & ml) override;
1477
    void load_arch_tensors(llama_model_loader & ml) override;
1478
1479
    struct graph : public llm_graph_context {
1480
        graph(const llama_model & model, const llm_graph_params & params);
1481
    };
1482
1483
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1484
};
1485
1486
1487
struct llama_model_bailingmoe2 : public llama_model_base {
1488
0
    llama_model_bailingmoe2(const struct llama_model_params & params) : llama_model_base(params) {}
1489
    void load_arch_hparams(llama_model_loader & ml) override;
1490
    void load_arch_tensors(llama_model_loader & ml) override;
1491
1492
    struct graph : public llm_graph_context {
1493
        graph(const llama_model & model, const llm_graph_params & params);
1494
    };
1495
1496
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1497
};
1498
1499
1500
struct llama_model_seed_oss : public llama_model_base {
1501
0
    llama_model_seed_oss(const struct llama_model_params & params) : llama_model_base(params) {}
1502
    void load_arch_hparams(llama_model_loader & ml) override;
1503
    void load_arch_tensors(llama_model_loader & ml) override;
1504
1505
    struct graph : public llm_graph_context {
1506
        graph(const llama_model & model, const llm_graph_params & params);
1507
    };
1508
1509
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1510
};
1511
1512
1513
struct llama_model_dots1 : public llama_model_base {
1514
0
    llama_model_dots1(const struct llama_model_params & params) : llama_model_base(params) {}
1515
    void load_arch_hparams(llama_model_loader & ml) override;
1516
    void load_arch_tensors(llama_model_loader & ml) override;
1517
1518
    struct graph : public llm_graph_context {
1519
        graph(const llama_model & model, const llm_graph_params & params);
1520
    };
1521
1522
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1523
};
1524
1525
1526
struct llama_model_arcee : public llama_model_base {
1527
0
    llama_model_arcee(const struct llama_model_params & params) : llama_model_base(params) {}
1528
    void load_arch_hparams(llama_model_loader & ml) override;
1529
    void load_arch_tensors(llama_model_loader & ml) override;
1530
1531
    struct graph : public llm_graph_context {
1532
        graph(const llama_model & model, const llm_graph_params & params);
1533
    };
1534
1535
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1536
};
1537
1538
1539
struct llama_model_afmoe : public llama_model_base {
1540
0
    llama_model_afmoe(const struct llama_model_params & params) : llama_model_base(params) {}
1541
    void load_arch_hparams(llama_model_loader & ml) override;
1542
    void load_arch_tensors(llama_model_loader & ml) override;
1543
1544
    struct graph : public llm_graph_context {
1545
        graph(const llama_model & model, const llm_graph_params & params);
1546
    };
1547
1548
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1549
};
1550
1551
1552
struct llama_model_ernie4_5 : public llama_model_base {
1553
0
    llama_model_ernie4_5(const struct llama_model_params & params) : llama_model_base(params) {}
1554
    void load_arch_hparams(llama_model_loader & ml) override;
1555
    void load_arch_tensors(llama_model_loader & ml) override;
1556
1557
    struct graph : public llm_graph_context {
1558
        graph(const llama_model & model, const llm_graph_params & params);
1559
    };
1560
1561
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1562
};
1563
1564
1565
struct llama_model_ernie4_5_moe : public llama_model_ernie4_5 {
1566
0
    llama_model_ernie4_5_moe(const struct llama_model_params & params) : llama_model_ernie4_5(params) {}
1567
    // reuse load_arch_hparams and load_arch_tensors from llama_model_ernie4_5
1568
1569
    struct graph : public llm_graph_context {
1570
        graph(const llama_model & model, const llm_graph_params & params);
1571
    };
1572
1573
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1574
};
1575
1576
1577
struct llama_model_paddleocr : public llama_model_ernie4_5 {
1578
0
    llama_model_paddleocr(const struct llama_model_params & params) : llama_model_ernie4_5(params) {}
1579
    // reuse load_arch_hparams and load_arch_tensors from llama_model_ernie4_5
1580
1581
    struct graph : public llm_graph_context {
1582
        graph(const llama_model & model, const llm_graph_params & params);
1583
    };
1584
1585
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1586
};
1587
1588
1589
struct llama_model_hunyuan_moe : public llama_model_base {
1590
0
    llama_model_hunyuan_moe(const struct llama_model_params & params) : llama_model_base(params) {}
1591
    void load_arch_hparams(llama_model_loader & ml) override;
1592
    void load_arch_tensors(llama_model_loader & ml) override;
1593
1594
    struct graph : public llm_graph_context {
1595
        graph(const llama_model & model, const llm_graph_params & params);
1596
    };
1597
1598
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1599
};
1600
1601
1602
struct llama_model_hunyuan_vl : public llama_model_base {
1603
0
    llama_model_hunyuan_vl(const struct llama_model_params & params) : llama_model_base(params) {}
1604
    void load_arch_hparams(llama_model_loader & ml) override;
1605
    void load_arch_tensors(llama_model_loader & ml) override;
1606
1607
    struct graph : public llm_graph_context {
1608
        graph(const llama_model & model, const llm_graph_params & params);
1609
    };
1610
1611
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1612
};
1613
1614
1615
struct llama_model_hunyuan_dense : public llama_model_hunyuan_vl {
1616
0
    llama_model_hunyuan_dense(const struct llama_model_params & params) : llama_model_hunyuan_vl(params) {}
1617
    // reuse load_arch_hparams and load_arch_tensors from llama_model_hunyuan_vl
1618
1619
    using graph = llama_model_hunyuan_vl::graph;
1620
1621
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1622
};
1623
1624
1625
struct llama_model_smollm3 : public llama_model_base {
1626
0
    llama_model_smollm3(const struct llama_model_params & params) : llama_model_base(params) {}
1627
    void load_arch_hparams(llama_model_loader & ml) override;
1628
    void load_arch_tensors(llama_model_loader & ml) override;
1629
1630
    struct graph : public llm_graph_context {
1631
        graph(const llama_model & model, const llm_graph_params & params);
1632
    };
1633
1634
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1635
};
1636
1637
1638
struct llama_model_openai_moe : public llama_model_base {
1639
0
    llama_model_openai_moe(const struct llama_model_params & params) : llama_model_base(params) {}
1640
    void load_arch_hparams(llama_model_loader & ml) override;
1641
    void load_arch_tensors(llama_model_loader & ml) override;
1642
1643
    struct graph : public llm_graph_context {
1644
        graph(const llama_model & model, const llm_graph_params & params);
1645
    };
1646
1647
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1648
};
1649
1650
1651
struct llama_model_falcon_h1 : public llama_model_base {
1652
0
    llama_model_falcon_h1(const struct llama_model_params & params) : llama_model_base(params) {}
1653
    void load_arch_hparams(llama_model_loader & ml) override;
1654
    void load_arch_tensors(llama_model_loader & ml) override;
1655
1656
    struct graph : public llm_build_mamba_base {
1657
        graph(const llama_model & model, const llm_graph_params & params);
1658
    };
1659
1660
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1661
};
1662
1663
1664
struct llama_model_lfm2 : public llama_model_base {
1665
0
    llama_model_lfm2(const struct llama_model_params & params) : llama_model_base(params) {}
1666
    void load_arch_hparams(llama_model_loader & ml) override;
1667
    void load_arch_tensors(llama_model_loader & ml) override;
1668
1669
    template <bool iswa>
1670
    struct graph : public llm_graph_context {
1671
        graph(const llama_model & model, const llm_graph_params & params);
1672
    };
1673
1674
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1675
};
1676
1677
1678
struct llama_model_lfm2moe : public llama_model_base {
1679
0
    llama_model_lfm2moe(const struct llama_model_params & params) : llama_model_base(params) {}
1680
    void load_arch_hparams(llama_model_loader & ml) override;
1681
    void load_arch_tensors(llama_model_loader & ml) override;
1682
1683
    template <bool iswa>
1684
    using graph = llama_model_lfm2::graph<iswa>;
1685
1686
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1687
};
1688
1689
1690
struct llama_model_smallthinker : public llama_model_base {
1691
0
    llama_model_smallthinker(const struct llama_model_params & params) : llama_model_base(params) {}
1692
    void load_arch_hparams(llama_model_loader & ml) override;
1693
    void load_arch_tensors(llama_model_loader & ml) override;
1694
1695
    template <bool iswa>
1696
    struct graph : public llm_graph_context {
1697
        graph(const llama_model & model, const llm_graph_params & params);
1698
    };
1699
1700
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1701
};
1702
1703
1704
struct llama_model_grovemoe : public llama_model_base {
1705
0
    llama_model_grovemoe(const struct llama_model_params & params) : llama_model_base(params) {}
1706
    void load_arch_hparams(llama_model_loader & ml) override;
1707
    void load_arch_tensors(llama_model_loader & ml) override;
1708
1709
    struct graph : public llm_graph_context {
1710
        graph(const llama_model & model, const llm_graph_params & params);
1711
    };
1712
1713
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1714
};
1715
1716
1717
struct llama_model_apertus : public llama_model_base {
1718
0
    llama_model_apertus(const struct llama_model_params & params) : llama_model_base(params) {}
1719
    void load_arch_hparams(llama_model_loader & ml) override;
1720
    void load_arch_tensors(llama_model_loader & ml) override;
1721
1722
    struct graph : public llm_graph_context {
1723
        graph(const llama_model & model, const llm_graph_params & params);
1724
    };
1725
1726
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1727
};
1728
1729
1730
struct llama_model_minimax_m2 : public llama_model_base {
1731
0
    llama_model_minimax_m2(const struct llama_model_params & params) : llama_model_base(params) {}
1732
    void load_arch_hparams(llama_model_loader & ml) override;
1733
    void load_arch_tensors(llama_model_loader & ml) override;
1734
1735
    struct graph : public llm_graph_context {
1736
        graph(const llama_model & model, const llm_graph_params & params);
1737
    };
1738
1739
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1740
};
1741
1742
1743
struct llama_model_cogvlm : public llama_model_base {
1744
0
    llama_model_cogvlm(const struct llama_model_params & params) : llama_model_base(params) {}
1745
    void load_arch_hparams(llama_model_loader & ml) override;
1746
    void load_arch_tensors(llama_model_loader & ml) override;
1747
1748
    struct graph : public llm_graph_context {
1749
        graph(const llama_model & model, const llm_graph_params & params);
1750
    };
1751
1752
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1753
};
1754
1755
1756
struct llama_model_pangu_embed : public llama_model_base {
1757
0
    llama_model_pangu_embed(const struct llama_model_params & params) : llama_model_base(params) {}
1758
    void load_arch_hparams(llama_model_loader & ml) override;
1759
    void load_arch_tensors(llama_model_loader & ml) override;
1760
1761
    struct graph : public llm_graph_context {
1762
        graph(const llama_model & model, const llm_graph_params & params);
1763
    };
1764
1765
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1766
};
1767
1768
1769
struct llama_model_qwen3next : public llama_model_base {
1770
0
    llama_model_qwen3next(const struct llama_model_params & params) : llama_model_base(params) {}
1771
    void load_arch_hparams(llama_model_loader & ml) override;
1772
    void load_arch_tensors(llama_model_loader & ml) override;
1773
1774
    struct graph : public llm_build_delta_net_base {
1775
        graph(const llama_model & model, const llm_graph_params & params);
1776
    private:
1777
        ggml_tensor * build_layer_attn(
1778
        llm_graph_input_attn_kv * inp_attn,
1779
                    ggml_tensor * cur,
1780
                    ggml_tensor * inp_pos,
1781
                            int   il);
1782
1783
        ggml_tensor * build_layer_attn_linear(
1784
             llm_graph_input_rs * inp,
1785
                    ggml_tensor * cur,
1786
                            int   il);
1787
1788
        ggml_tensor * build_layer_ffn(
1789
                    ggml_tensor * cur,
1790
                            int   il);
1791
1792
        ggml_tensor * build_norm_gated(
1793
                    ggml_tensor * input,
1794
                    ggml_tensor * weights,
1795
                    ggml_tensor * gate,
1796
                            int   layer);
1797
1798
        // returns pair of qkv, z
1799
        std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
1800
                    ggml_tensor * input,
1801
                            int   il);
1802
1803
        const llama_model & model;
1804
    };
1805
1806
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1807
};
1808
1809
1810
struct llama_model_qwen35 : public llama_model_base {
1811
0
    llama_model_qwen35(const struct llama_model_params & params) : llama_model_base(params) {}
1812
    void load_arch_hparams(llama_model_loader & ml) override;
1813
    void load_arch_tensors(llama_model_loader & ml) override;
1814
1815
    struct graph : public llm_build_delta_net_base {
1816
        graph(const llama_model & model, const llm_graph_params & params);
1817
    private:
1818
        ggml_tensor * build_layer_attn(
1819
        llm_graph_input_attn_kv * inp_attn,
1820
                    ggml_tensor * cur,
1821
                    ggml_tensor * inp_pos,
1822
                            int * sections,
1823
                            int   il);
1824
1825
        ggml_tensor * build_layer_attn_linear(
1826
             llm_graph_input_rs * inp,
1827
                    ggml_tensor * cur,
1828
                            int   il);
1829
1830
        ggml_tensor * build_layer_ffn(
1831
                    ggml_tensor * cur,
1832
                            int   il);
1833
1834
        ggml_tensor * build_norm_gated(
1835
                    ggml_tensor * input,
1836
                    ggml_tensor * weights,
1837
                    ggml_tensor * gate,
1838
                            int   layer);
1839
1840
        // returns pair of qkv, z
1841
        std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
1842
                    ggml_tensor * input,
1843
                            int   il);
1844
1845
        const llama_model & model;
1846
    };
1847
1848
    struct graph_mtp : public llm_graph_context {
1849
        graph_mtp(const llama_model & model, const llm_graph_params & params);
1850
    };
1851
1852
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1853
};
1854
1855
1856
struct llama_model_qwen35moe : public llama_model_base {
1857
0
    llama_model_qwen35moe(const struct llama_model_params & params) : llama_model_base(params) {}
1858
    void load_arch_hparams(llama_model_loader & ml) override;
1859
    void load_arch_tensors(llama_model_loader & ml) override;
1860
1861
    struct graph : public llm_build_delta_net_base {
1862
        graph(const llama_model & model, const llm_graph_params & params);
1863
    private:
1864
        ggml_tensor * build_layer_attn(
1865
        llm_graph_input_attn_kv * inp_attn,
1866
                    ggml_tensor * cur,
1867
                    ggml_tensor * inp_pos,
1868
                            int * sections,
1869
                            int   il);
1870
1871
        ggml_tensor * build_layer_attn_linear(
1872
             llm_graph_input_rs * inp,
1873
                    ggml_tensor * cur,
1874
                            int   il);
1875
1876
        ggml_tensor * build_layer_ffn(
1877
                    ggml_tensor * cur,
1878
                            int   il);
1879
1880
        ggml_tensor * build_norm_gated(
1881
                    ggml_tensor * input,
1882
                    ggml_tensor * weights,
1883
                    ggml_tensor * gate,
1884
                            int   layer);
1885
1886
        // returns pair of qkv, z
1887
        std::pair<ggml_tensor *, ggml_tensor *> build_qkvz(
1888
                    ggml_tensor * input,
1889
                            int   il);
1890
1891
        const llama_model & model;
1892
    };
1893
1894
    struct graph_mtp : public llm_graph_context {
1895
        graph_mtp(const llama_model & model, const llm_graph_params & params);
1896
    };
1897
1898
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1899
};
1900
1901
1902
struct llama_model_mistral3 : public llama_model_base {
1903
0
    llama_model_mistral3(const struct llama_model_params & params) : llama_model_base(params) {}
1904
    void load_arch_hparams(llama_model_loader & ml) override;
1905
    void load_arch_tensors(llama_model_loader & ml) override;
1906
1907
    struct graph : public llm_graph_context {
1908
        graph(const llama_model & model, const llm_graph_params & params);
1909
    };
1910
1911
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1912
};
1913
1914
1915
struct llama_model_mimo2 : public llama_model_base {
1916
0
    llama_model_mimo2(const struct llama_model_params & params) : llama_model_base(params) {}
1917
    void load_arch_hparams(llama_model_loader & ml) override;
1918
    void load_arch_tensors(llama_model_loader & ml) override;
1919
1920
    struct graph : public llm_graph_context {
1921
        graph(const llama_model & model, const llm_graph_params & params);
1922
    };
1923
1924
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1925
};
1926
1927
1928
struct llama_model_kimi_linear : public llama_model_base {
1929
0
    llama_model_kimi_linear(const struct llama_model_params & params) : llama_model_base(params) {}
1930
    void load_arch_hparams(llama_model_loader & ml) override;
1931
    void load_arch_tensors(llama_model_loader & ml) override;
1932
1933
    struct graph : public llm_build_delta_net_base {
1934
        graph(const llama_model & model, const llm_graph_params & params);
1935
1936
        std::pair<ggml_tensor *, ggml_tensor *> build_kda_autoregressive(
1937
                    ggml_tensor * q,
1938
                    ggml_tensor * k,
1939
                    ggml_tensor * v,
1940
                    ggml_tensor * gk,
1941
                    ggml_tensor * beta,
1942
                    ggml_tensor * state,
1943
                            int   il);
1944
1945
        std::pair<ggml_tensor *, ggml_tensor *> build_kda_chunking(
1946
                    ggml_tensor * q,
1947
                    ggml_tensor * k,
1948
                    ggml_tensor * v,
1949
                    ggml_tensor * gk,
1950
                    ggml_tensor * beta,
1951
                    ggml_tensor * state,
1952
                    ggml_tensor * causal_mask,
1953
                    ggml_tensor * identity,
1954
                    ggml_tensor * diag_mask,
1955
                            int   il);
1956
1957
        const llama_model & model;
1958
    };
1959
1960
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1961
};
1962
1963
1964
struct llama_model_step35 : public llama_model_base {
1965
0
    llama_model_step35(const struct llama_model_params & params) : llama_model_base(params) {}
1966
    void load_arch_hparams(llama_model_loader & ml) override;
1967
    void load_arch_tensors(llama_model_loader & ml) override;
1968
1969
    struct graph : public llm_graph_context {
1970
        graph(const llama_model & model, const llm_graph_params & params);
1971
    };
1972
1973
    struct graph_mtp : public llm_graph_context {
1974
        graph_mtp(const llama_model & model, const llm_graph_params & params);
1975
    };
1976
1977
    std::unique_ptr<llm_graph_context> build_arch_graph(const llm_graph_params & params) const override;
1978
};