Coverage Report

Created: 2026-06-13 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/models/mamba2.cpp
Line
Count
Source
1
#include "models.h"
2
3
0
void llama_model_mamba2::load_arch_hparams(llama_model_loader & ml) {
4
0
    ml.get_key(LLM_KV_SSM_CONV_KERNEL,    hparams.ssm_d_conv);
5
0
    ml.get_key(LLM_KV_SSM_INNER_SIZE,     hparams.ssm_d_inner);
6
0
    ml.get_key(LLM_KV_SSM_STATE_SIZE,     hparams.ssm_d_state);
7
0
    ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank);
8
0
    ml.get_key(LLM_KV_SSM_GROUP_COUNT,    hparams.ssm_n_group);
9
10
0
    ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
11
12
0
    switch (hparams.n_layer()) {
13
0
        case 24:
14
0
            switch (hparams.n_embd) {
15
0
                case 768: type = LLM_TYPE_SMALL; break;
16
0
                default: type = LLM_TYPE_UNKNOWN;
17
0
            } break;
18
0
        case 48:
19
0
            switch (hparams.n_embd) {
20
0
                case 1024: type = LLM_TYPE_MEDIUM; break;
21
0
                case 1536: type = LLM_TYPE_LARGE; break;
22
0
                case 2048: type = LLM_TYPE_XL; break;
23
0
                default: type = LLM_TYPE_UNKNOWN;
24
0
            } break;
25
0
        case 64:
26
0
            switch (hparams.n_embd) {
27
0
                case 2560: type = LLM_TYPE_3B; break;
28
0
                case 4096: type = LLM_TYPE_7B; break;
29
0
                default: type = LLM_TYPE_UNKNOWN;
30
0
            } break;
31
0
        default: type = LLM_TYPE_UNKNOWN;
32
0
    }
33
0
}
34
35
0
void llama_model_mamba2::load_arch_tensors(llama_model_loader &) {
36
0
    LLAMA_LOAD_LOCALS;
37
38
0
    const int64_t d_conv  = hparams.ssm_d_conv;
39
0
    const int64_t d_inner = hparams.ssm_d_inner;
40
0
    const int64_t d_state = hparams.ssm_d_state;
41
0
    const int64_t n_group = hparams.ssm_n_group;
42
0
    const int64_t d_in_proj = 2*d_inner + 2*n_group*d_state + n_head;
43
44
    // only an expansion factor of 2 is supported for now
45
0
    GGML_ASSERT(2 * n_embd == d_inner);
46
47
0
    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
48
49
    // output
50
0
    {
51
0
        output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
52
53
0
        output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
54
        // if output is NULL, init from the input tok embed, duplicated to allow offloading
55
0
        if (output == NULL) {
56
0
            output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
57
0
        }
58
0
    }
59
60
0
    for (int i = 0; i < n_layer; ++i) {
61
0
        auto & layer = layers[i];
62
63
        // norm
64
0
        layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
65
66
0
        layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {n_embd, d_in_proj}, 0);
67
68
0
        layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, d_inner + 2*n_group*d_state}, 0);
69
0
        layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {d_inner + 2*n_group*d_state}, 0);
70
71
0
        layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {n_head}, 0);
72
73
        // no "weight" suffix for these
74
0
        layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, n_head}, 0);
75
0
        layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, n_head}, 0);
76
77
0
        layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {d_inner / n_group, n_group}, 0);
78
79
        // out_proj
80
0
        layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd}, 0);
81
0
    }
82
0
}
83
84
0
std::unique_ptr<llm_graph_context> llama_model_mamba2::build_arch_graph(const llm_graph_params & params) const {
85
0
    return std::make_unique<graph>(*this, params);
86
0
}
87