Coverage Report

Created: 2025-11-24 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-hparams.cpp
Line
Count
Source
1
#include "llama-hparams.h"
2
3
#include "ggml.h"
4
#include <cassert>
5
6
0
void llama_hparams::set_swa_pattern(uint32_t n_pattern, bool dense_first) {
7
0
    if (dense_first) {
8
0
        for (uint32_t il = 0; il < n_layer; ++il) {
9
0
            swa_layers[il] = n_pattern == 0 || (il % n_pattern != 0);
10
0
        }
11
0
    } else {
12
0
        for (uint32_t il = 0; il < n_layer; ++il) {
13
0
            swa_layers[il] = n_pattern == 0 || (il % n_pattern < (n_pattern - 1));
14
0
        }
15
0
    }
16
0
}
17
18
0
bool llama_hparams::is_swa_any() const {
19
0
    for (uint32_t il = 0; il < n_layer; ++il) {
20
0
        if (swa_layers[il]) {
21
0
            return true;
22
0
        }
23
0
    }
24
25
0
    return false;
26
0
}
27
28
0
uint32_t llama_hparams::n_head(uint32_t il) const {
29
0
    if (il < n_layer) {
30
0
        return n_head_arr[il];
31
0
    }
32
33
0
    GGML_ABORT("fatal error");
34
0
}
35
36
0
uint32_t llama_hparams::n_head_kv(uint32_t il) const {
37
0
    if (il < n_layer) {
38
0
        return n_head_kv_arr[il];
39
0
    }
40
41
0
    GGML_ABORT("fatal error");
42
0
}
43
44
0
uint32_t llama_hparams::n_ff(uint32_t il) const {
45
0
    if (il < n_layer) {
46
0
        return n_ff_arr[il];
47
0
    }
48
49
0
    GGML_ABORT("fatal error");
50
0
}
51
52
0
uint32_t llama_hparams::n_gqa(uint32_t il) const {
53
0
    const uint32_t n_head    = this->n_head(il);
54
0
    const uint32_t n_head_kv = this->n_head_kv(il);
55
56
0
    if (n_head_kv == 0) {
57
0
        return 0;
58
0
    }
59
60
0
    return n_head/n_head_kv;
61
0
}
62
63
0
uint32_t llama_hparams::n_embd_inp() const {
64
0
    uint32_t n_embd_inp = n_embd;
65
66
0
    if (n_deepstack_layers > 0) {
67
0
        n_embd_inp += n_embd * n_deepstack_layers;
68
0
    }
69
70
0
    return n_embd_inp;
71
0
}
72
73
0
uint32_t llama_hparams::n_embd_k_gqa(uint32_t il) const {
74
0
    const uint32_t n_head_kv = this->n_head_kv(il);
75
76
0
    return n_embd_head_k * n_head_kv;
77
0
}
78
79
0
uint32_t llama_hparams::n_embd_v_gqa(uint32_t il) const {
80
0
    const uint32_t n_head_kv = this->n_head_kv(il);
81
82
0
    return n_embd_head_v * n_head_kv;
83
0
}
84
85
0
bool llama_hparams::is_n_embd_k_gqa_variable() const {
86
0
    const uint32_t val = n_embd_k_gqa();
87
0
    for (uint32_t il = 0; il < n_layer; ++il) {
88
0
        if (val != n_embd_k_gqa(il)) {
89
0
            return true;
90
0
        }
91
0
    }
92
93
0
    return false;
94
0
}
95
96
0
bool llama_hparams::is_n_embd_v_gqa_variable() const {
97
0
    const uint32_t val = n_embd_v_gqa();
98
0
    for (uint32_t il = 0; il < n_layer; ++il) {
99
0
        if (val != n_embd_v_gqa(il)) {
100
0
            return true;
101
0
        }
102
0
    }
103
104
0
    return false;
105
0
}
106
107
0
uint32_t llama_hparams::n_embd_k_gqa_max() const {
108
0
    uint32_t val = n_embd_k_gqa();
109
0
    for (uint32_t il = 0; il < n_layer; ++il) {
110
0
        val = std::max(val, n_embd_k_gqa(il));
111
0
    }
112
113
0
    return val;
114
0
}
115
116
0
uint32_t llama_hparams::n_embd_v_gqa_max() const {
117
0
    uint32_t val = n_embd_v_gqa();
118
0
    for (uint32_t il = 0; il < n_layer; ++il) {
119
0
        val = std::max(val, n_embd_v_gqa(il));
120
0
    }
121
122
0
    return val;
123
0
}
124
125
0
uint32_t llama_hparams::n_embd_r() const {
126
0
    if (wkv_head_size != 0) {
127
        // for RWKV models
128
0
        return token_shift_count * n_embd;
129
0
    }
130
131
0
    if (n_shortconv_l_cache != 0) {
132
        // for LFM2 models
133
0
        return n_embd * (n_shortconv_l_cache - 1);
134
0
    }
135
136
    // TODO: maybe support other convolution strides than 1
137
    // NOTE: since the first column of the conv_state is shifted out each time, it's not actually needed
138
    // Corresponds to Mamba's conv_states size
139
0
    return (ssm_d_conv > 0 ? ssm_d_conv - 1 : 0) * (ssm_d_inner + 2*ssm_n_group*ssm_d_state);
140
0
}
141
142
0
uint32_t llama_hparams::n_embd_s() const {
143
0
    if (wkv_head_size != 0) {
144
        // corresponds to RWKV's wkv_states size
145
0
        return n_embd * wkv_head_size;
146
0
    }
147
148
    // corresponds to Mamba's ssm_states size
149
0
    return ssm_d_state * ssm_d_inner;
150
0
}
151
152
0
bool llama_hparams::is_recurrent(uint32_t il) const {
153
0
    if (il < n_layer) {
154
0
        return recurrent_layer_arr[il];
155
0
    }
156
157
0
    GGML_ABORT("%s: il (%u) out of bounds (n_layer: %u)\n", __func__, il, n_layer);
158
0
}
159
160
0
uint32_t llama_hparams::n_pos_per_embd() const {
161
0
    return rope_type == LLAMA_ROPE_TYPE_MROPE || rope_type == LLAMA_ROPE_TYPE_IMROPE ? 4 : 1;
162
0
}
163
164
0
bool llama_hparams::is_swa(uint32_t il) const {
165
0
    if (il < n_layer) {
166
0
        return swa_layers[il];
167
0
    }
168
169
0
    GGML_ABORT("fatal error");
170
0
}
171
172
0
bool llama_hparams::has_kv(uint32_t il) const {
173
0
    if (n_layer_kv_from_start >= 0) {
174
0
        if (il < (uint32_t) n_layer_kv_from_start) {
175
0
            return true;
176
0
        }
177
178
0
        return false;
179
0
    }
180
181
    // by default, all layers have kv
182
0
    return true;
183
0
}
184
185
0
uint32_t llama_hparams::n_layer_kv() const {
186
0
    uint32_t res = 0;
187
188
0
    for (uint32_t il = 0; il < n_layer; ++il) {
189
0
        if (has_kv(il)) {
190
0
            res++;
191
0
        }
192
0
    }
193
194
0
    return res;
195
0
}
196
197
0
bool llama_hparams::is_masked_swa(uint32_t n_swa, llama_swa_type swa_type, llama_pos p0, llama_pos p1) {
198
0
    assert(p0 >= 0 && p1 >= 0);
199
200
0
    switch (swa_type) {
201
0
        case LLAMA_SWA_TYPE_NONE:
202
0
            {
203
0
            } break;
204
0
        case LLAMA_SWA_TYPE_STANDARD:
205
0
            {
206
0
                if (p1 - p0 >= (int32_t) n_swa) {
207
0
                    return true;
208
0
                }
209
0
            } break;
210
0
        case LLAMA_SWA_TYPE_CHUNKED:
211
0
            {
212
0
                const llama_pos pos_chunk_start = (p1 / n_swa) * n_swa;
213
214
0
                if (p0 < pos_chunk_start) {
215
0
                    return true;
216
0
                }
217
0
            } break;
218
0
        case LLAMA_SWA_TYPE_SYMMETRIC:
219
0
            {
220
0
                const int32_t half_n_swa = (int32_t) n_swa / 2;
221
0
                const int32_t pos_diff = p1 - p0;
222
223
                // Mask if outside the symmetric window
224
0
                if (pos_diff < -half_n_swa || pos_diff > half_n_swa) {
225
0
                    return true;
226
0
                }
227
0
            } break;
228
0
    }
229
230
0
    return false;
231
0
}