Coverage Report

Created: 2025-11-28 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-model-loader.h
Line
Count
Source
1
#pragma once
2
3
#include "llama.h"
4
5
#include "llama-impl.h"
6
#include "llama-arch.h"
7
#include "llama-mmap.h"
8
9
#include "ggml-cpp.h"
10
11
#include <cstddef>
12
#include <map>
13
#include <stdexcept>
14
#include <unordered_map>
15
16
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
17
18
enum llama_fver {
19
    GGUF_FILE_VERSION_V1 = 1,
20
    GGUF_FILE_VERSION_V2 = 2,
21
    GGUF_FILE_VERSION_V3 = 3,
22
};
23
24
const char * llama_file_version_name(llama_fver version);
25
26
struct llama_model_loader {
27
    // Holds information on a model weight
28
    struct llama_tensor_weight {
29
        uint16_t  idx; // source file index
30
        size_t   offs; // tensor data offset in the original file
31
32
        ggml_tensor * tensor;
33
34
0
        llama_tensor_weight(const llama_file * file, uint16_t idx, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
35
0
            const int tensor_idx = gguf_find_tensor(gguf_ctx,  ggml_get_name(tensor));
36
0
            if (tensor_idx < 0) {
37
0
                throw std::runtime_error(format("tensor '%s' not found in the model", ggml_get_name(tensor)));
38
0
            }
39
40
0
            offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
41
0
            if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size()) {
42
0
                throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", ggml_get_name(tensor)));
43
0
            }
44
0
        }
45
    };
46
47
    // custom comparator to sort weights more nicely by layer
48
    struct weight_name_comparer {
49
0
        bool operator()(const std::string & a, const std::string & b) const {
50
0
            int a_layer = -1;
51
0
            int b_layer = -1;
52
0
            sscanf(a.c_str(), "blk.%d.", &a_layer);
53
0
            sscanf(b.c_str(), "blk.%d.", &b_layer);
54
0
            if (a_layer != b_layer) {
55
0
                return a_layer < b_layer;
56
0
            }
57
0
            return a < b;
58
0
        }
59
    };
60
61
    static const int TENSOR_NOT_REQUIRED = 1 << 0;
62
    static const int TENSOR_DUPLICATED   = 1 << 1;
63
    static const int TENSOR_SKIP         = 1 << 2;
64
65
    int n_kv      = 0;
66
    int n_tensors = 0;
67
    int n_created = 0;
68
69
    uint64_t n_elements = 0;
70
    size_t   n_bytes    = 0;
71
72
    bool use_mmap = false;
73
    bool check_tensors;
74
75
    llama_files files;
76
    llama_ftype ftype;
77
    llama_fver  fver;
78
79
    llama_mmaps mappings;
80
81
    std::map<std::string, llama_tensor_weight, weight_name_comparer> weights_map;
82
    std::unordered_map<std::string, llama_model_kv_override> kv_overrides;
83
    const llama_model_tensor_buft_override * tensor_buft_overrides;
84
85
    gguf_context_ptr meta;
86
    std::vector<ggml_context_ptr> contexts;
87
88
    std::string arch_name;
89
    LLM_KV      llm_kv    = LLM_KV(LLM_ARCH_UNKNOWN);
90
91
    size_t size_done = 0;
92
    size_t size_data = 0;
93
    std::vector<std::pair<size_t, size_t>> mmaps_used;
94
95
    llama_model_loader(
96
        const std::string & fname,
97
        std::vector<std::string> & splits, // optional, only need if the split does not follow naming scheme
98
        bool use_mmap,
99
        bool check_tensors,
100
        const llama_model_kv_override * param_overrides_p,
101
        const llama_model_tensor_buft_override * param_tensor_buft_overrides_p);
102
103
    template<typename T>
104
    typename std::enable_if<std::is_integral<T>::value, bool>::type
105
    get_arr_n(const std::string & key, T & result, bool required = true);
106
107
    template<typename T>
108
    typename std::enable_if<std::is_integral<T>::value, bool>::type
109
    get_arr_n(enum llm_kv kid, T & result, bool required = true);
110
111
    template<typename T>
112
    bool get_arr(const std::string & key, std::vector<T> & result, bool required = true);
113
114
    template<typename T, size_t N_MAX>
115
    bool get_arr(const std::string & key, std::array<T, N_MAX> & result, bool required = true);
116
117
    template<typename T>
118
    bool get_arr(enum llm_kv kid, T & result, bool required = true);
119
120
    template<typename T>
121
    bool get_key(const std::string & key, T & result, bool required = true);
122
123
    template<typename T>
124
    bool get_key(enum llm_kv kid, T & result, bool required = true);
125
126
    template<typename T, size_t N_MAX>
127
    bool get_key_or_arr(const std::string & key, std::array<T, N_MAX> & result, uint32_t n, bool required = true);
128
129
    template<typename T>
130
    bool get_key_or_arr(enum llm_kv kid, T & result, uint32_t n, bool required = true);
131
132
    std::string get_arch_name() const;
133
134
    enum llm_arch get_arch() const;
135
136
    const llama_tensor_weight * get_weight(const char * name) const;
137
138
    const llama_tensor_weight & require_weight(const char * name) const;
139
140
    struct ggml_tensor * get_tensor_meta(const char * name) const;
141
142
    struct ggml_tensor * require_tensor_meta(const std::string & name) const;
143
144
    const struct ggml_tensor * check_tensor_dims(const std::string & name, const std::vector<int64_t> & ne, bool required) const;
145
146
    struct ggml_tensor * create_tensor(struct ggml_context * ctx, const std::string & name, const std::initializer_list<int64_t> & ne, int flags = 0);
147
148
    struct ggml_tensor * create_tensor_as_view(struct ggml_context * ctx, struct ggml_tensor * base, const std::string & name, const std::initializer_list<int64_t> & ne, size_t offset, bool required = true);
149
150
    void done_getting_tensors() const;
151
152
    void init_mappings(bool prefetch = true, llama_mlocks * mlock_mmaps = nullptr);
153
154
    void get_mapping_range(size_t * first, size_t * last, void ** addr, int idx, ggml_context * ctx) const;
155
156
    // for backwards compatibility, does not support ggml-backend
157
    void load_data_for(struct ggml_tensor * cur) const;
158
159
    // Returns false if cancelled by progress_callback
160
    bool load_all_data(
161
            struct ggml_context * ctx,
162
            llama_buf_map & bufs,
163
            llama_mlocks * lmlocks,
164
            llama_progress_callback progress_callback,
165
            void * progress_callback_user_data);
166
167
    std::string ftype_name() const;
168
169
    void print_info() const;
170
};