Coverage Report

Created: 2026-06-22 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-hparams.cpp
Line
Count
Source
1
#include "llama-hparams.h"
2
3
#include "ggml.h"
4
5
#include <algorithm>
6
#include <cassert>
7
8
0
void llama_hparams::set_swa_pattern(uint32_t n_pattern, bool dense_first) {
9
0
    if (dense_first) {
10
0
        for (uint32_t il = 0; il < n_layer(); ++il) {
11
0
            is_swa_impl[il] = n_pattern == 0 || (il % n_pattern != 0);
12
0
        }
13
0
    } else {
14
0
        for (uint32_t il = 0; il < n_layer(); ++il) {
15
0
            is_swa_impl[il] = n_pattern == 0 || (il % n_pattern < (n_pattern - 1));
16
0
        }
17
0
    }
18
19
0
    for (uint32_t il = n_layer(); il < n_layer_all; ++il) {
20
0
        is_swa_impl[il] = false;
21
0
    }
22
0
}
23
24
0
void llama_hparams::set_recr_pattern(uint32_t n_pattern, bool dense_first) {
25
0
    if (dense_first) {
26
0
        for (uint32_t il = 0; il < n_layer(); ++il) {
27
0
            is_recr_impl[il] = n_pattern == 0 || (il % n_pattern != 0);
28
0
        }
29
0
    } else {
30
0
        for (uint32_t il = 0; il < n_layer(); ++il) {
31
0
            is_recr_impl[il] = n_pattern == 0 || (il % n_pattern < (n_pattern - 1));
32
0
        }
33
0
    }
34
35
0
    for (uint32_t il = n_layer(); il < n_layer_all; ++il) {
36
0
        is_recr_impl[il] = false;
37
0
    }
38
0
}
39
40
0
bool llama_hparams::is_swa_any() const {
41
0
    for (uint32_t il = 0; il < n_layer_all; ++il) {
42
0
        if (is_swa_impl[il]) {
43
0
            return true;
44
0
        }
45
0
    }
46
47
0
    return false;
48
0
}
49
50
0
uint32_t llama_hparams::n_head(uint32_t il) const {
51
0
    if (il < n_layer_all) {
52
0
        return n_head_arr[il];
53
0
    }
54
55
0
    GGML_ABORT("fatal error");
56
0
}
57
58
0
uint32_t llama_hparams::n_head_kv(uint32_t il) const {
59
0
    if (il < n_layer_all) {
60
0
        return n_head_kv_arr[il];
61
0
    }
62
63
0
    GGML_ABORT("fatal error");
64
0
}
65
66
0
uint32_t llama_hparams::n_ff(uint32_t il) const {
67
0
    if (il < n_layer_all) {
68
0
        return n_ff_arr[il];
69
0
    }
70
71
0
    GGML_ABORT("fatal error");
72
0
}
73
74
0
uint32_t llama_hparams::n_gqa(uint32_t il) const {
75
0
    const uint32_t n_head    = this->n_head(il);
76
0
    const uint32_t n_head_kv = this->n_head_kv(il);
77
78
0
    if (n_head_kv == 0) {
79
0
        return 0;
80
0
    }
81
82
0
    return n_head/n_head_kv;
83
0
}
84
85
0
uint32_t llama_hparams::n_rot(uint32_t il) const {
86
0
    if (il < n_layer_all) {
87
0
        return is_swa(il) ? n_rot_swa : n_rot_full;
88
0
    }
89
90
0
    GGML_ABORT("fatal error");
91
0
}
92
93
0
uint32_t llama_hparams::n_embd_inp() const {
94
0
    if (n_embd_inp_impl > 0) {
95
0
        return n_embd_inp_impl;
96
0
    }
97
98
0
    uint32_t n_embd_inp = n_embd;
99
100
0
    if (n_deepstack_layers > 0) {
101
0
        n_embd_inp += n_embd * n_deepstack_layers;
102
0
    }
103
104
0
    return n_embd_inp;
105
0
}
106
107
0
uint32_t llama_hparams::n_embd_inp_enc() const {
108
0
    return n_embd_inp_enc_impl > 0 ? n_embd_inp_enc_impl : n_embd_inp();
109
0
}
110
111
0
uint32_t llama_hparams::n_embd_out() const {
112
0
    return n_embd_out_impl > 0 ? n_embd_out_impl : n_embd;
113
0
}
114
115
0
uint32_t llama_hparams::n_embd_head_k(uint32_t il) const {
116
0
    if (il < n_layer_all) {
117
0
        return is_swa(il) ? n_embd_head_k_swa : n_embd_head_k_full;
118
0
    }
119
120
0
    GGML_ABORT("fatal error");
121
0
}
122
123
0
uint32_t llama_hparams::n_embd_head_v(uint32_t il) const {
124
0
    if (il < n_layer_all) {
125
0
        return is_swa(il) ? n_embd_head_v_swa : n_embd_head_v_full;
126
0
    }
127
128
0
    GGML_ABORT("fatal error");
129
0
}
130
131
0
uint32_t llama_hparams::n_embd_k_gqa(uint32_t il) const {
132
0
    const uint32_t n_head_kv = this->n_head_kv(il);
133
134
0
    return n_embd_head_k(il) * n_head_kv;
135
0
}
136
137
0
uint32_t llama_hparams::n_embd_v_gqa(uint32_t il) const {
138
0
    const uint32_t n_head_kv = this->n_head_kv(il);
139
140
0
    return n_embd_head_v(il) * n_head_kv;
141
0
}
142
143
0
bool llama_hparams::is_n_embd_k_gqa_variable() const {
144
0
    const uint32_t val = n_embd_k_gqa();
145
0
    for (uint32_t il = 0; il < n_layer_all; ++il) {
146
0
        if (val != n_embd_k_gqa(il)) {
147
0
            return true;
148
0
        }
149
0
    }
150
151
0
    return false;
152
0
}
153
154
0
bool llama_hparams::is_n_embd_v_gqa_variable() const {
155
0
    const uint32_t val = n_embd_v_gqa();
156
0
    for (uint32_t il = 0; il < n_layer_all; ++il) {
157
0
        if (val != n_embd_v_gqa(il)) {
158
0
            return true;
159
0
        }
160
0
    }
161
162
0
    return false;
163
0
}
164
165
0
uint32_t llama_hparams::n_embd_k_gqa_max() const {
166
0
    uint32_t val = n_embd_k_gqa();
167
0
    for (uint32_t il = 0; il < n_layer_all; ++il) {
168
0
        val = std::max(val, n_embd_k_gqa(il));
169
0
    }
170
171
0
    return val;
172
0
}
173
174
0
uint32_t llama_hparams::n_embd_v_gqa_max() const {
175
0
    uint32_t val = n_embd_v_gqa();
176
0
    for (uint32_t il = 0; il < n_layer_all; ++il) {
177
0
        val = std::max(val, n_embd_v_gqa(il));
178
0
    }
179
180
0
    return val;
181
0
}
182
183
0
uint32_t llama_hparams::n_embd_r() const {
184
0
    if (wkv_head_size != 0) {
185
        // for RWKV models
186
0
        return token_shift_count * n_embd;
187
0
    }
188
189
0
    if (n_shortconv_l_cache != 0) {
190
        // for LFM2 models
191
0
        return n_embd * (n_shortconv_l_cache - 1);
192
0
    }
193
194
0
    if (n_embd_head_kda != 0) {
195
        // for Kimi KDA layers
196
        // Conv state for Q, K, V: 3 * (d_conv - 1) * n_head * head_dim
197
0
        const uint32_t d_inner = n_head() * n_embd_head_kda;  // 32 * 128 = 4096
198
0
        return 3 * (ssm_d_conv > 0 ? ssm_d_conv - 1 : 3) * d_inner;
199
0
    }
200
201
    // TODO: maybe support other convolution strides than 1
202
    // NOTE: since the first column of the conv_state is shifted out each time, it's not actually needed
203
    // Corresponds to Mamba's conv_states size
204
0
    return (ssm_d_conv > 0 ? ssm_d_conv - 1 : 0) * (ssm_d_inner + 2*ssm_n_group*ssm_d_state);
205
0
}
206
207
0
uint32_t llama_hparams::n_embd_s() const {
208
0
    if (wkv_head_size != 0) {
209
        // corresponds to RWKV's wkv_states size
210
0
        return n_embd * wkv_head_size;
211
0
    }
212
213
0
    if (n_embd_head_kda != 0) {
214
        // for Kimi KDA layers
215
        // Full recurrent state: head_dim * head_dim * n_head
216
        // h tensor shape for delta attention: [head_dim, head_dim, n_head]
217
0
        return n_embd_head_kda * n_embd_head_kda * n_head();  // 128 * 128 * 32 = 524288
218
0
    }
219
220
    // corresponds to Mamba's ssm_states size
221
0
    return ssm_d_state * ssm_d_inner;
222
0
}
223
224
0
bool llama_hparams::is_recr(uint32_t il) const {
225
0
    if (il < n_layer_all) {
226
0
        return is_recr_impl[il];
227
0
    }
228
229
0
    GGML_ABORT("%s: il (%u) out of bounds (n_layer_all: %u)\n", __func__, il, n_layer_all);
230
0
}
231
232
0
uint32_t llama_hparams::n_pos_per_embd() const {
233
0
    return rope_type == LLAMA_ROPE_TYPE_MROPE || rope_type == LLAMA_ROPE_TYPE_IMROPE ? 4 : 1;
234
0
}
235
236
0
bool llama_hparams::is_swa(uint32_t il) const {
237
0
    if (il < n_layer_all) {
238
0
        return is_swa_impl[il];
239
0
    }
240
241
0
    GGML_ABORT("%s: il (%u) out of bounds (n_layer_all: %u)\n", __func__, il, n_layer_all);
242
0
}
243
244
0
bool llama_hparams::is_mla() const {
245
0
    assert((n_embd_head_k_mla_impl == 0 && n_embd_head_v_mla_impl == 0) ||
246
0
           (n_embd_head_k_mla_impl != 0 && n_embd_head_v_mla_impl != 0));
247
248
0
    return n_embd_head_k_mla_impl != 0 && n_embd_head_v_mla_impl != 0;
249
0
}
250
251
0
uint32_t llama_hparams::n_embd_head_k_mla() const {
252
0
    return is_mla() ? n_embd_head_k_mla_impl : n_embd_head_k();
253
0
}
254
255
0
uint32_t llama_hparams::n_embd_head_v_mla() const {
256
0
    return is_mla() ? n_embd_head_v_mla_impl : n_embd_head_v();
257
0
}
258
259
0
bool llama_hparams::has_kv(uint32_t il) const {
260
0
    if (n_layer_kv_from_start >= 0) {
261
0
        if (il < (uint32_t) n_layer_kv_from_start) {
262
0
            return true;
263
0
        }
264
265
0
        return false;
266
0
    }
267
268
    // by default, all layers have kv
269
0
    return true;
270
0
}
271
272
0
uint32_t llama_hparams::n_layer() const {
273
0
    return n_layer_all - n_layer_nextn;
274
0
}
275
276
0
bool llama_hparams::use_mrope() const {
277
0
    return rope_sections[0] > 0 && rope_sections[1] > 0;
278
0
}