Coverage Report

Created: 2025-12-14 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-hparams.cpp
Line
Count
Source
1
#include "llama-hparams.h"
2
3
#include "ggml.h"
4
5
#include <cassert>
6
#include <cmath>
7
8
0
void llama_hparams::set_swa_pattern(uint32_t n_pattern, bool dense_first) {
9
0
    if (dense_first) {
10
0
        for (uint32_t il = 0; il < n_layer; ++il) {
11
0
            swa_layers[il] = n_pattern == 0 || (il % n_pattern != 0);
12
0
        }
13
0
    } else {
14
0
        for (uint32_t il = 0; il < n_layer; ++il) {
15
0
            swa_layers[il] = n_pattern == 0 || (il % n_pattern < (n_pattern - 1));
16
0
        }
17
0
    }
18
0
}
19
20
0
bool llama_hparams::is_swa_any() const {
21
0
    for (uint32_t il = 0; il < n_layer; ++il) {
22
0
        if (swa_layers[il]) {
23
0
            return true;
24
0
        }
25
0
    }
26
27
0
    return false;
28
0
}
29
30
0
uint32_t llama_hparams::n_head(uint32_t il) const {
31
0
    if (il < n_layer) {
32
0
        return n_head_arr[il];
33
0
    }
34
35
0
    GGML_ABORT("fatal error");
36
0
}
37
38
0
uint32_t llama_hparams::n_head_kv(uint32_t il) const {
39
0
    if (il < n_layer) {
40
0
        return n_head_kv_arr[il];
41
0
    }
42
43
0
    GGML_ABORT("fatal error");
44
0
}
45
46
0
uint32_t llama_hparams::n_ff(uint32_t il) const {
47
0
    if (il < n_layer) {
48
0
        return n_ff_arr[il];
49
0
    }
50
51
0
    GGML_ABORT("fatal error");
52
0
}
53
54
0
uint32_t llama_hparams::n_gqa(uint32_t il) const {
55
0
    const uint32_t n_head    = this->n_head(il);
56
0
    const uint32_t n_head_kv = this->n_head_kv(il);
57
58
0
    if (n_head_kv == 0) {
59
0
        return 0;
60
0
    }
61
62
0
    return n_head/n_head_kv;
63
0
}
64
65
0
uint32_t llama_hparams::n_embd_inp() const {
66
0
    uint32_t n_embd_inp = n_embd;
67
68
0
    if (n_deepstack_layers > 0) {
69
0
        n_embd_inp += n_embd * n_deepstack_layers;
70
0
    }
71
72
0
    return n_embd_inp;
73
0
}
74
75
0
uint32_t llama_hparams::n_embd_k_gqa(uint32_t il) const {
76
0
    const uint32_t n_head_kv = this->n_head_kv(il);
77
78
0
    return n_embd_head_k * n_head_kv;
79
0
}
80
81
0
uint32_t llama_hparams::n_embd_v_gqa(uint32_t il) const {
82
0
    const uint32_t n_head_kv = this->n_head_kv(il);
83
84
0
    return n_embd_head_v * n_head_kv;
85
0
}
86
87
0
bool llama_hparams::is_n_embd_k_gqa_variable() const {
88
0
    const uint32_t val = n_embd_k_gqa();
89
0
    for (uint32_t il = 0; il < n_layer; ++il) {
90
0
        if (val != n_embd_k_gqa(il)) {
91
0
            return true;
92
0
        }
93
0
    }
94
95
0
    return false;
96
0
}
97
98
0
bool llama_hparams::is_n_embd_v_gqa_variable() const {
99
0
    const uint32_t val = n_embd_v_gqa();
100
0
    for (uint32_t il = 0; il < n_layer; ++il) {
101
0
        if (val != n_embd_v_gqa(il)) {
102
0
            return true;
103
0
        }
104
0
    }
105
106
0
    return false;
107
0
}
108
109
0
uint32_t llama_hparams::n_embd_k_gqa_max() const {
110
0
    uint32_t val = n_embd_k_gqa();
111
0
    for (uint32_t il = 0; il < n_layer; ++il) {
112
0
        val = std::max(val, n_embd_k_gqa(il));
113
0
    }
114
115
0
    return val;
116
0
}
117
118
0
uint32_t llama_hparams::n_embd_v_gqa_max() const {
119
0
    uint32_t val = n_embd_v_gqa();
120
0
    for (uint32_t il = 0; il < n_layer; ++il) {
121
0
        val = std::max(val, n_embd_v_gqa(il));
122
0
    }
123
124
0
    return val;
125
0
}
126
127
0
uint32_t llama_hparams::n_embd_r() const {
128
0
    if (wkv_head_size != 0) {
129
        // for RWKV models
130
0
        return token_shift_count * n_embd;
131
0
    }
132
133
0
    if (n_shortconv_l_cache != 0) {
134
        // for LFM2 models
135
0
        return n_embd * (n_shortconv_l_cache - 1);
136
0
    }
137
138
    // TODO: maybe support other convolution strides than 1
139
    // NOTE: since the first column of the conv_state is shifted out each time, it's not actually needed
140
    // Corresponds to Mamba's conv_states size
141
0
    return (ssm_d_conv > 0 ? ssm_d_conv - 1 : 0) * (ssm_d_inner + 2*ssm_n_group*ssm_d_state);
142
0
}
143
144
0
uint32_t llama_hparams::n_embd_s() const {
145
0
    if (wkv_head_size != 0) {
146
        // corresponds to RWKV's wkv_states size
147
0
        return n_embd * wkv_head_size;
148
0
    }
149
150
    // corresponds to Mamba's ssm_states size
151
0
    return ssm_d_state * ssm_d_inner;
152
0
}
153
154
0
bool llama_hparams::is_recurrent(uint32_t il) const {
155
0
    if (il < n_layer) {
156
0
        return recurrent_layer_arr[il];
157
0
    }
158
159
0
    GGML_ABORT("%s: il (%u) out of bounds (n_layer: %u)\n", __func__, il, n_layer);
160
0
}
161
162
0
uint32_t llama_hparams::n_pos_per_embd() const {
163
0
    return rope_type == LLAMA_ROPE_TYPE_MROPE || rope_type == LLAMA_ROPE_TYPE_IMROPE ? 4 : 1;
164
0
}
165
166
0
bool llama_hparams::is_swa(uint32_t il) const {
167
0
    if (il < n_layer) {
168
0
        return swa_layers[il];
169
0
    }
170
171
0
    GGML_ABORT("fatal error");
172
0
}
173
174
0
bool llama_hparams::has_kv(uint32_t il) const {
175
0
    if (n_layer_kv_from_start >= 0) {
176
0
        if (il < (uint32_t) n_layer_kv_from_start) {
177
0
            return true;
178
0
        }
179
180
0
        return false;
181
0
    }
182
183
    // by default, all layers have kv
184
0
    return true;
185
0
}
186
187
0
uint32_t llama_hparams::n_layer_kv() const {
188
0
    uint32_t res = 0;
189
190
0
    for (uint32_t il = 0; il < n_layer; ++il) {
191
0
        if (has_kv(il)) {
192
0
            res++;
193
0
        }
194
0
    }
195
196
0
    return res;
197
0
}
198
199
0
bool llama_hparams::is_masked_swa(uint32_t n_swa, llama_swa_type swa_type, llama_pos p0, llama_pos p1) {
200
0
    assert(p0 >= 0 && p1 >= 0);
201
202
0
    switch (swa_type) {
203
0
        case LLAMA_SWA_TYPE_NONE:
204
0
            {
205
0
            } break;
206
0
        case LLAMA_SWA_TYPE_STANDARD:
207
0
            {
208
0
                if (p1 - p0 >= (int32_t) n_swa) {
209
0
                    return true;
210
0
                }
211
0
            } break;
212
0
        case LLAMA_SWA_TYPE_CHUNKED:
213
0
            {
214
0
                const llama_pos pos_chunk_start = (p1 / n_swa) * n_swa;
215
216
0
                if (p0 < pos_chunk_start) {
217
0
                    return true;
218
0
                }
219
0
            } break;
220
0
        case LLAMA_SWA_TYPE_SYMMETRIC:
221
0
            {
222
0
                const int32_t half_n_swa = (int32_t) n_swa / 2;
223
0
                const int32_t pos_diff = p1 - p0;
224
225
                // Mask if outside the symmetric window
226
0
                if (pos_diff < -half_n_swa || pos_diff > half_n_swa) {
227
0
                    return true;
228
0
                }
229
0
            } break;
230
0
    }
231
232
0
    return false;
233
0
}
234
235
0
float llama_hparams::yarn_attn_factor_adjust(float attn_factor, float freq_scale, float ext_factor) {
236
0
    GGML_ASSERT(ext_factor >= 0.0f);
237
238
0
    if (ext_factor != 0.0f) {
239
0
        attn_factor *= 1.0f / (1.0f + 0.1f * logf(1.0f / freq_scale));
240
0
    }
241
242
0
    return attn_factor;
243
0
}