Coverage Report

Created: 2025-11-28 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/gguf.cpp
Line
Count
Source
1
#include "ggml.h"
2
#include "ggml-backend.h"
3
#include "ggml-impl.h"
4
#include "gguf.h"
5
6
#include <cinttypes>
7
#include <cstddef>
8
#include <cstdint>
9
#include <cstdio>
10
#include <cstdlib>
11
#include <cstring>
12
#include <map>
13
#include <new>
14
#include <stdexcept>
15
#include <string>
16
#include <vector>
17
18
template <typename T>
19
struct type_to_gguf_type;
20
21
template <>
22
struct type_to_gguf_type<uint8_t> {
23
    static constexpr enum gguf_type value = GGUF_TYPE_UINT8;
24
};
25
26
template <>
27
struct type_to_gguf_type<int8_t> {
28
    static constexpr enum gguf_type value = GGUF_TYPE_INT8;
29
};
30
31
template <>
32
struct type_to_gguf_type<uint16_t> {
33
    static constexpr enum gguf_type value = GGUF_TYPE_UINT16;
34
};
35
36
template <>
37
struct type_to_gguf_type<int16_t> {
38
    static constexpr enum gguf_type value = GGUF_TYPE_INT16;
39
};
40
41
template <>
42
struct type_to_gguf_type<uint32_t> {
43
    static constexpr enum gguf_type value = GGUF_TYPE_UINT32;
44
};
45
46
template <>
47
struct type_to_gguf_type<int32_t> {
48
    static constexpr enum gguf_type value = GGUF_TYPE_INT32;
49
};
50
51
template <>
52
struct type_to_gguf_type<float> {
53
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32;
54
};
55
56
template <>
57
struct type_to_gguf_type<bool> {
58
    static constexpr enum gguf_type value = GGUF_TYPE_BOOL;
59
};
60
61
template <>
62
struct type_to_gguf_type<std::string> {
63
    static constexpr enum gguf_type value = GGUF_TYPE_STRING;
64
};
65
66
template <>
67
struct type_to_gguf_type<uint64_t> {
68
    static constexpr enum gguf_type value = GGUF_TYPE_UINT64;
69
};
70
71
template <>
72
struct type_to_gguf_type<int64_t> {
73
    static constexpr enum gguf_type value = GGUF_TYPE_INT64;
74
};
75
76
template <>
77
struct type_to_gguf_type<double> {
78
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64;
79
};
80
81
static const std::map<gguf_type, size_t> GGUF_TYPE_SIZE = {
82
    {GGUF_TYPE_UINT8,   sizeof(uint8_t)},
83
    {GGUF_TYPE_INT8,    sizeof(int8_t)},
84
    {GGUF_TYPE_UINT16,  sizeof(uint16_t)},
85
    {GGUF_TYPE_INT16,   sizeof(int16_t)},
86
    {GGUF_TYPE_UINT32,  sizeof(uint32_t)},
87
    {GGUF_TYPE_INT32,   sizeof(int32_t)},
88
    {GGUF_TYPE_FLOAT32, sizeof(float)},
89
    {GGUF_TYPE_BOOL,    sizeof(int8_t)},
90
    {GGUF_TYPE_STRING,  0}, // undefined
91
    {GGUF_TYPE_ARRAY,   0}, // undefined
92
    {GGUF_TYPE_UINT64,  sizeof(uint64_t)},
93
    {GGUF_TYPE_INT64,   sizeof(int64_t)},
94
    {GGUF_TYPE_FLOAT64, sizeof(double)},
95
};
96
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
97
98
static const std::map<gguf_type, const char *> GGUF_TYPE_NAME = {
99
    {GGUF_TYPE_UINT8,   "u8"},
100
    {GGUF_TYPE_INT8,    "i8"},
101
    {GGUF_TYPE_UINT16,  "u16"},
102
    {GGUF_TYPE_INT16,   "i16"},
103
    {GGUF_TYPE_UINT32,  "u32"},
104
    {GGUF_TYPE_INT32,   "i32"},
105
    {GGUF_TYPE_FLOAT32, "f32"},
106
    {GGUF_TYPE_BOOL,    "bool"},
107
    {GGUF_TYPE_STRING,  "str"},
108
    {GGUF_TYPE_ARRAY,   "arr"},
109
    {GGUF_TYPE_UINT64,  "u64"},
110
    {GGUF_TYPE_INT64,   "i64"},
111
    {GGUF_TYPE_FLOAT64, "f64"},
112
};
113
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
114
115
0
size_t gguf_type_size(enum gguf_type type) {
116
0
    auto it = GGUF_TYPE_SIZE.find(type);
117
0
    return it == GGUF_TYPE_SIZE.end() ? 0 : it->second;
118
0
}
119
120
struct gguf_kv {
121
    std::string key;
122
123
    bool is_array;
124
    enum gguf_type type;
125
126
    std::vector<int8_t>      data;
127
    std::vector<std::string> data_string;
128
129
    template <typename T>
130
    gguf_kv(const std::string & key, const T value)
131
0
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
132
0
        GGML_ASSERT(!key.empty());
133
0
        data.resize(sizeof(T));
134
0
        memcpy(data.data(), &value, sizeof(T));
135
0
    }
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Unexecuted instantiation: gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Unexecuted instantiation: gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Unexecuted instantiation: gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Unexecuted instantiation: gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Unexecuted instantiation: gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Unexecuted instantiation: gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Unexecuted instantiation: gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
136
137
    template <typename T>
138
    gguf_kv(const std::string & key, const std::vector<T> & value)
139
0
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
140
0
        GGML_ASSERT(!key.empty());
141
0
        data.resize(value.size()*sizeof(T));
142
0
        for (size_t i = 0; i < value.size(); ++i) {
143
0
            const T tmp = value[i];
144
0
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
145
0
        }
146
0
    }
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<signed char, std::__1::allocator<signed char> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned short, std::__1::allocator<unsigned short> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<short, std::__1::allocator<short> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<int, std::__1::allocator<int> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<float, std::__1::allocator<float> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<bool, std::__1::allocator<bool> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<long, std::__1::allocator<long> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<double, std::__1::allocator<double> > const&)
147
148
    gguf_kv(const std::string & key, const std::string & value)
149
0
            : key(key), is_array(false), type(GGUF_TYPE_STRING) {
150
0
        GGML_ASSERT(!key.empty());
151
0
        data_string.push_back(value);
152
0
    }
153
154
    gguf_kv(const std::string & key, const std::vector<std::string> & value)
155
0
            : key(key), is_array(true), type(GGUF_TYPE_STRING) {
156
0
        GGML_ASSERT(!key.empty());
157
0
        data_string = value;
158
0
    }
159
160
0
    const std::string & get_key() const {
161
0
        return key;
162
0
    }
163
164
0
    const enum gguf_type & get_type() const {
165
0
        return type;
166
0
    }
167
168
0
    size_t get_ne() const {
169
0
        if (type == GGUF_TYPE_STRING) {
170
0
            const size_t ne = data_string.size();
171
0
            GGML_ASSERT(is_array || ne == 1);
172
0
            return ne;
173
0
        }
174
0
        const size_t type_size = gguf_type_size(type);
175
0
        GGML_ASSERT(data.size() % type_size == 0);
176
0
        const size_t ne = data.size() / type_size;
177
0
        GGML_ASSERT(is_array || ne == 1);
178
0
        return ne;
179
0
    }
180
181
    template <typename T>
182
0
    const T & get_val(const size_t i = 0) const {
183
0
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
184
0
        if constexpr (std::is_same<T, std::string>::value) {
185
0
            GGML_ASSERT(data_string.size() >= i+1);
186
0
            return data_string[i];
187
0
        }
188
0
        const size_t type_size = gguf_type_size(type);
189
0
        GGML_ASSERT(data.size() % type_size == 0);
190
0
        GGML_ASSERT(data.size() >= (i+1)*type_size);
191
0
        return reinterpret_cast<const T *>(data.data())[i];
192
0
    }
Unexecuted instantiation: unsigned char const& gguf_kv::get_val<unsigned char>(unsigned long) const
Unexecuted instantiation: signed char const& gguf_kv::get_val<signed char>(unsigned long) const
Unexecuted instantiation: unsigned short const& gguf_kv::get_val<unsigned short>(unsigned long) const
Unexecuted instantiation: short const& gguf_kv::get_val<short>(unsigned long) const
Unexecuted instantiation: unsigned int const& gguf_kv::get_val<unsigned int>(unsigned long) const
Unexecuted instantiation: int const& gguf_kv::get_val<int>(unsigned long) const
Unexecuted instantiation: float const& gguf_kv::get_val<float>(unsigned long) const
Unexecuted instantiation: unsigned long const& gguf_kv::get_val<unsigned long>(unsigned long) const
Unexecuted instantiation: long const& gguf_kv::get_val<long>(unsigned long) const
Unexecuted instantiation: double const& gguf_kv::get_val<double>(unsigned long) const
Unexecuted instantiation: bool const& gguf_kv::get_val<bool>(unsigned long) const
Unexecuted instantiation: std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const& gguf_kv::get_val<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(unsigned long) const
193
194
0
    void cast(const enum gguf_type new_type) {
195
0
        const size_t new_type_size = gguf_type_size(new_type);
196
0
        GGML_ASSERT(data.size() % new_type_size == 0);
197
0
        type = new_type;
198
0
    }
199
};
200
201
struct gguf_tensor_info {
202
    struct ggml_tensor t; // for holding the equivalent info
203
    uint64_t offset;      // offset from start of `data`, must be a multiple of `ALIGNMENT`
204
};
205
206
struct gguf_context {
207
    uint32_t version = GGUF_VERSION;
208
209
    std::vector<struct gguf_kv> kv;
210
    std::vector<struct gguf_tensor_info> info;
211
212
    size_t alignment = GGUF_DEFAULT_ALIGNMENT;
213
    size_t offset    = 0; // offset of `data` from beginning of file
214
    size_t size      = 0; // size of `data` in bytes
215
216
    void * data = nullptr;
217
};
218
219
struct gguf_reader {
220
    FILE * file;
221
222
0
    gguf_reader(FILE * file) : file(file) {}
223
224
    template <typename T>
225
0
    bool read(T & dst) const {
226
0
        return fread(&dst, 1, sizeof(dst), file) == sizeof(dst);
227
0
    }
Unexecuted instantiation: bool gguf_reader::read<int>(int&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned long>(unsigned long&) const
Unexecuted instantiation: bool gguf_reader::read<char>(char&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned int>(unsigned int&) const
Unexecuted instantiation: bool gguf_reader::read<long>(long&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned char>(unsigned char&) const
Unexecuted instantiation: bool gguf_reader::read<signed char>(signed char&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned short>(unsigned short&) const
Unexecuted instantiation: bool gguf_reader::read<short>(short&) const
Unexecuted instantiation: bool gguf_reader::read<float>(float&) const
Unexecuted instantiation: bool gguf_reader::read<double>(double&) const
228
229
    template <typename T>
230
0
    bool read(std::vector<T> & dst, const size_t n) const {
231
0
        dst.resize(n);
232
0
        for (size_t i = 0; i < dst.size(); ++i) {
233
0
            if constexpr (std::is_same<T, bool>::value) {
234
0
                bool tmp;
235
0
                if (!read(tmp)) {
236
0
                    return false;
237
0
                }
238
0
                dst[i] = tmp;
239
0
            } else {
240
0
                if (!read(dst[i])) {
241
0
                    return false;
242
0
                }
243
0
            }
244
0
        }
245
0
        return true;
246
0
    }
Unexecuted instantiation: bool gguf_reader::read<char>(std::__1::vector<char, std::__1::allocator<char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned char>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<signed char>(std::__1::vector<signed char, std::__1::allocator<signed char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned short>(std::__1::vector<unsigned short, std::__1::allocator<unsigned short> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<short>(std::__1::vector<short, std::__1::allocator<short> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned int>(std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<int>(std::__1::vector<int, std::__1::allocator<int> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<float>(std::__1::vector<float, std::__1::allocator<float> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<bool>(std::__1::vector<bool, std::__1::allocator<bool> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned long>(std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<long>(std::__1::vector<long, std::__1::allocator<long> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<double>(std::__1::vector<double, std::__1::allocator<double> >&, unsigned long) const
247
248
0
    bool read(bool & dst) const {
249
0
        int8_t tmp = -1;
250
0
        if (!read(tmp)) {
251
0
            return false;
252
0
        }
253
0
        dst = tmp != 0;
254
0
        return true;
255
0
    }
256
257
0
    bool read(enum ggml_type & dst) const {
258
0
        int32_t tmp = -1;
259
0
        if (!read(tmp)) {
260
0
            return false;
261
0
        }
262
0
        dst = ggml_type(tmp);
263
0
        return true;
264
0
    }
265
266
0
    bool read(enum gguf_type & dst) const {
267
0
        int32_t tmp = -1;
268
0
        if (!read(tmp)) {
269
0
            return false;
270
0
        }
271
0
        dst = gguf_type(tmp);
272
0
        return true;
273
0
    }
274
275
0
    bool read(std::string & dst) const {
276
0
        uint64_t size = 0;
277
0
        if (!read(size)) {
278
0
            return false;
279
0
        }
280
0
        dst.resize(size);
281
0
        return fread(dst.data(), 1, dst.length(), file) == dst.length();
282
0
    }
283
284
0
    bool read(void * dst, const size_t size) const {
285
0
        return fread(dst, 1, size, file) == size;
286
0
    }
287
};
288
289
0
struct gguf_context * gguf_init_empty(void) {
290
0
    return new gguf_context;
291
0
}
292
293
template<typename T>
294
0
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
295
0
    if (is_array) {
296
0
        std::vector<T> value;
297
0
        try {
298
0
            if (!gr.read(value, n)) {
299
0
                return false;
300
0
            }
301
0
        } catch (std::length_error &) {
302
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
303
0
            return false;
304
0
        } catch (std::bad_alloc &) {
305
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
306
0
            return false;
307
0
        }
308
0
        kv.emplace_back(key, value);
309
0
    } else {
310
0
        T value;
311
0
        if (!gr.read(value)) {
312
0
            return false;
313
0
        }
314
0
        kv.emplace_back(key, value);
315
0
    }
316
0
    return true;
317
0
}
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<signed char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<float>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<bool>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<double>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
318
319
0
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
320
0
    const struct gguf_reader gr(file);
321
0
    struct gguf_context * ctx = new gguf_context;
322
323
0
    bool ok = true;
324
325
    // file magic
326
0
    {
327
0
        std::vector<char> magic;
328
0
        ok = ok && gr.read(magic, 4);
329
330
0
        if (!ok) {
331
0
            GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
332
0
            gguf_free(ctx);
333
0
            return nullptr;
334
0
        }
335
336
0
        for (uint32_t i = 0; i < magic.size(); i++) {
337
0
            if (magic[i] != GGUF_MAGIC[i]) {
338
0
                char c0 = isprint(magic[0]) ? magic[0] : '?';
339
0
                char c1 = isprint(magic[1]) ? magic[1] : '?';
340
0
                char c2 = isprint(magic[2]) ? magic[2] : '?';
341
0
                char c3 = isprint(magic[3]) ? magic[3] : '?';
342
0
                GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
343
0
                gguf_free(ctx);
344
0
                return nullptr;
345
0
            }
346
0
        }
347
0
    }
348
349
    // header
350
0
    int64_t n_kv      = 0;
351
0
    int64_t n_tensors = 0;
352
353
0
    if (ok && gr.read(ctx->version)) {
354
0
        if (ok && ctx->version == 0) {
355
0
            GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
356
0
            ok = false;
357
0
        }
358
359
        /*
360
         * bit layout is different when reading non-native endian models.
361
         * assuming that the GGUF version is 3, the non-native endian model
362
         * would read it as 0x30000000. we can use the AND operation against
363
         * the last 4 hexadecimal digits to check if the model is the same
364
         * endianness as the host system.
365
        */
366
0
        if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
367
0
            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
368
0
            ok = false;
369
0
        }
370
371
0
        if (ok && ctx->version == 1) {
372
0
            GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
373
0
            ok = false;
374
0
        }
375
0
        if (ok && ctx->version > GGUF_VERSION) {
376
0
            GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
377
0
                __func__, ctx->version, GGUF_VERSION);
378
0
            ok = false;
379
0
        }
380
0
    } else {
381
0
        ok = false;
382
0
    }
383
384
0
    if (ok && gr.read(n_tensors)) {
385
0
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
386
0
        if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
387
0
            GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
388
0
                __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
389
0
            ok = false;
390
0
        }
391
0
    } else {
392
0
        ok = false;
393
0
    }
394
395
0
    if (ok && gr.read(n_kv)) {
396
0
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
397
0
        if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
398
0
            GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
399
0
                    __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
400
0
            ok = false;
401
0
        }
402
0
    } else {
403
0
        ok = false;
404
0
    }
405
406
0
    if (!ok) {
407
0
        GGML_LOG_ERROR("%s: failed to read header\n", __func__);
408
0
        gguf_free(ctx);
409
0
        return nullptr;
410
0
    }
411
412
    // KV pairs
413
0
    {
414
0
        for (int64_t i = 0; ok && i < n_kv; ++i) {
415
0
            std::string key;
416
0
            gguf_type   type     = gguf_type(-1);
417
0
            bool        is_array = false;
418
0
            uint64_t    n        = 1;
419
420
0
            try {
421
0
                ok = ok && gr.read(key);
422
0
            } catch (std::length_error &) {
423
0
                GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
424
0
                ok = false;
425
0
            } catch (std::bad_alloc &) {
426
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
427
0
                ok = false;
428
0
            }
429
0
            for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
430
0
                if (key == ctx->kv[j].key) {
431
0
                    GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
432
0
                    ok = false;
433
0
                }
434
0
            }
435
0
            if (!ok) {
436
0
                break;
437
0
            }
438
439
0
            ok = ok && gr.read(type);
440
0
            if (type == GGUF_TYPE_ARRAY) {
441
0
                is_array = true;
442
0
                ok = ok && gr.read(type);
443
0
                ok = ok && gr.read(n);
444
0
            }
445
0
            if (!ok) {
446
0
                break;
447
0
            }
448
449
0
            switch (type) {
450
0
                case GGUF_TYPE_UINT8:   ok = ok && gguf_read_emplace_helper<uint8_t>    (gr, ctx->kv, key, is_array, n); break;
451
0
                case GGUF_TYPE_INT8:    ok = ok && gguf_read_emplace_helper<int8_t>     (gr, ctx->kv, key, is_array, n); break;
452
0
                case GGUF_TYPE_UINT16:  ok = ok && gguf_read_emplace_helper<uint16_t>   (gr, ctx->kv, key, is_array, n); break;
453
0
                case GGUF_TYPE_INT16:   ok = ok && gguf_read_emplace_helper<int16_t>    (gr, ctx->kv, key, is_array, n); break;
454
0
                case GGUF_TYPE_UINT32:  ok = ok && gguf_read_emplace_helper<uint32_t>   (gr, ctx->kv, key, is_array, n); break;
455
0
                case GGUF_TYPE_INT32:   ok = ok && gguf_read_emplace_helper<int32_t>    (gr, ctx->kv, key, is_array, n); break;
456
0
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper<float>      (gr, ctx->kv, key, is_array, n); break;
457
0
                case GGUF_TYPE_BOOL:    ok = ok && gguf_read_emplace_helper<bool>       (gr, ctx->kv, key, is_array, n); break;
458
0
                case GGUF_TYPE_STRING:  ok = ok && gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
459
0
                case GGUF_TYPE_UINT64:  ok = ok && gguf_read_emplace_helper<uint64_t>   (gr, ctx->kv, key, is_array, n); break;
460
0
                case GGUF_TYPE_INT64:   ok = ok && gguf_read_emplace_helper<int64_t>    (gr, ctx->kv, key, is_array, n); break;
461
0
                case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper<double>     (gr, ctx->kv, key, is_array, n); break;
462
0
                case GGUF_TYPE_ARRAY:
463
0
                default:
464
0
                    {
465
0
                        GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
466
0
                        ok = false;
467
0
                    } break;
468
0
            }
469
0
        }
470
471
0
        if (!ok) {
472
0
            GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
473
0
            gguf_free(ctx);
474
0
            return nullptr;
475
0
        }
476
0
        GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
477
478
0
        const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT);
479
0
        ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
480
481
0
        if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
482
0
            GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
483
0
            gguf_free(ctx);
484
0
            return nullptr;
485
0
        }
486
0
    }
487
488
    // read the tensor info
489
0
    for (int64_t i = 0; ok && i < n_tensors; ++i) {
490
0
        struct gguf_tensor_info info;
491
492
        // tensor name
493
0
        {
494
0
            std::string name;
495
0
            try {
496
0
                ok = ok && gr.read(name);
497
0
            } catch (std::length_error &) {
498
0
                GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
499
0
                ok = false;
500
0
            } catch (std::bad_alloc &) {
501
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
502
0
                ok = false;
503
0
            }
504
0
            if (name.length() >= GGML_MAX_NAME) {
505
0
                GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
506
0
                ok = false;
507
0
                break;
508
0
            }
509
0
            ggml_set_name(&info.t, name.c_str());
510
511
            // make sure there are no duplicate tensor names
512
0
            for (int64_t j = 0; ok && j < i; ++j) {
513
0
                if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
514
0
                    GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
515
0
                    ok = false;
516
0
                    break;
517
0
                }
518
0
            }
519
0
        }
520
0
        if (!ok) {
521
0
            break;
522
0
        }
523
524
        // tensor shape
525
0
        {
526
0
            uint32_t n_dims = 0;
527
0
            ok = ok && gr.read(n_dims);
528
0
            if (n_dims > GGML_MAX_DIMS) {
529
0
                GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
530
0
                    __func__, info.t.name, n_dims, GGML_MAX_DIMS);
531
0
                ok = false;
532
0
                break;
533
0
            }
534
0
            for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) {
535
0
                info.t.ne[j] = 1;
536
0
                if (j < n_dims) {
537
0
                    ok = ok && gr.read(info.t.ne[j]);
538
0
                }
539
540
                // check that all ne are non-negative
541
0
                if (info.t.ne[j] < 0) {
542
0
                    GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
543
0
                        __func__, info.t.name, j, info.t.ne[j]);
544
0
                    ok = false;
545
0
                    break;
546
0
                }
547
0
            }
548
549
            // check that the total number of elements is representable
550
0
            if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
551
0
                       (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
552
0
                       (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
553
554
0
                GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
555
0
                    "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
556
0
                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
557
0
                ok = false;
558
0
                break;
559
0
            }
560
0
        }
561
0
        if (!ok) {
562
0
            break;
563
0
        }
564
565
        // tensor type
566
0
        {
567
0
            ok = ok && gr.read(info.t.type);
568
569
            // check that tensor type is within defined range
570
0
            if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
571
0
                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
572
0
                    __func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
573
0
                ok = false;
574
0
                break;
575
0
            }
576
0
            const size_t  type_size = ggml_type_size(info.t.type);
577
0
            const int64_t blck_size = ggml_blck_size(info.t.type);
578
579
            // check that row size is divisible by block size
580
0
            if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
581
0
                GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
582
0
                    "not a multiple of block size (%" PRId64 ")\n",
583
0
                    __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
584
0
                ok = false;
585
0
                break;
586
0
            }
587
588
            // calculate byte offsets given the tensor shape and type
589
0
            info.t.nb[0] = type_size;
590
0
            info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
591
0
            for (int j = 2; j < GGML_MAX_DIMS; ++j) {
592
0
                info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
593
0
            }
594
0
        }
595
0
        if (!ok) {
596
0
            break;
597
0
        }
598
599
        // tensor data offset within buffer
600
0
        ok = ok && gr.read(info.offset);
601
602
0
        ctx->info.push_back(info);
603
0
    }
604
605
0
    if (!ok) {
606
0
        GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
607
0
        gguf_free(ctx);
608
0
        return nullptr;
609
0
    }
610
0
    GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
611
612
    // we require the data section to be aligned, so take into account any padding
613
0
    if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
614
0
        GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
615
0
        gguf_free(ctx);
616
0
        return nullptr;
617
0
    }
618
619
    // store the current file offset - this is where the data section starts
620
0
    ctx->offset = ftell(file);
621
622
    // compute the total size of the data section, taking into account the alignment
623
0
    {
624
0
        ctx->size = 0;
625
0
        for (size_t i = 0; i < ctx->info.size(); ++i) {
626
0
            const gguf_tensor_info & ti = ctx->info[i];
627
0
            if (ti.offset != ctx->size) {
628
0
                GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
629
0
                    __func__, ti.t.name, ti.offset, ctx->size);
630
0
                GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
631
0
                gguf_free(ctx);
632
0
                return nullptr;
633
0
            }
634
0
            size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
635
0
            if (SIZE_MAX - ctx->size < padded_size) {
636
0
                GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
637
0
                    __func__, ti.t.name, ctx->size, padded_size);
638
0
                gguf_free(ctx);
639
0
                return nullptr;
640
0
            }
641
0
            ctx->size += padded_size;
642
0
        }
643
0
    }
644
645
    // load the tensor data only if requested
646
0
    if (params.ctx != nullptr) {
647
        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
648
        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
649
        //   the ggml_tensor structs to the appropriate locations in the binary blob
650
651
        // compute the exact size needed for the new ggml_context
652
0
        const size_t mem_size =
653
0
            params.no_alloc ?
654
0
            (n_tensors    )*ggml_tensor_overhead() :
655
0
            (n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
656
657
0
        struct ggml_init_params pdata = {
658
0
            /*mem_size   =*/ mem_size,
659
0
            /*mem_buffer =*/ nullptr,
660
0
            /*no_alloc   =*/ params.no_alloc,
661
0
        };
662
663
0
        *params.ctx = ggml_init(pdata);
664
0
        if (*params.ctx == nullptr) {
665
0
            GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
666
0
            gguf_free(ctx);
667
0
            return nullptr;
668
0
        }
669
670
0
        struct ggml_context * ctx_data = *params.ctx;
671
672
0
        struct ggml_tensor * data = nullptr;
673
674
0
        if (!params.no_alloc) {
675
0
            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
676
677
0
            ok = ok && data != nullptr;
678
679
0
            if (ok) {
680
0
                ggml_set_name(data, "GGUF tensor data binary blob");
681
0
            }
682
683
            // read the binary blob with the tensor data
684
0
            ok = ok && gr.read(data->data, ctx->size);
685
686
0
            if (!ok) {
687
0
                GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
688
0
                ggml_free(ctx_data);
689
0
                *params.ctx = nullptr;
690
0
                gguf_free(ctx);
691
0
                return nullptr;
692
0
            }
693
694
0
            ctx->data = data->data;
695
0
        }
696
697
0
        ggml_set_no_alloc(ctx_data, true);
698
699
        // create the tensors
700
0
        for (size_t i = 0; i < ctx->info.size(); ++i) {
701
0
            const struct gguf_tensor_info & info = ctx->info[i];
702
703
0
            struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne);
704
705
0
            ok = ok && cur != nullptr;
706
707
0
            if (!ok) {
708
0
                break;
709
0
            }
710
711
0
            ggml_set_name(cur, info.t.name);
712
713
            // point the data member to the appropriate location in the binary blob using the tensor info
714
0
            if (!params.no_alloc) {
715
0
                cur->data = (char *) data->data + info.offset;
716
0
            }
717
0
        }
718
719
0
        if (!ok) {
720
0
            GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
721
0
            ggml_free(ctx_data);
722
0
            *params.ctx = nullptr;
723
0
            gguf_free(ctx);
724
0
            return nullptr;
725
0
        }
726
727
0
        ggml_set_no_alloc(ctx_data, params.no_alloc);
728
0
    }
729
730
0
    return ctx;
731
0
}
732
733
0
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
734
0
    FILE * file = ggml_fopen(fname, "rb");
735
736
0
    if (!file) {
737
0
        GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
738
0
        return nullptr;
739
0
    }
740
741
0
    struct gguf_context * result = gguf_init_from_file_impl(file, params);
742
0
    fclose(file);
743
0
    return result;
744
0
}
745
746
0
void gguf_free(struct gguf_context * ctx) {
747
0
    if (ctx == nullptr) {
748
0
        return;
749
0
    }
750
0
    delete ctx;
751
0
}
752
753
0
const char * gguf_type_name(enum gguf_type type) {
754
0
    auto it = GGUF_TYPE_NAME.find(type);
755
0
    return it == GGUF_TYPE_NAME.end() ? nullptr : it->second;
756
0
}
757
758
0
uint32_t gguf_get_version(const struct gguf_context * ctx) {
759
0
    return ctx->version;
760
0
}
761
762
0
size_t gguf_get_alignment(const struct gguf_context * ctx) {
763
0
    return ctx->alignment;
764
0
}
765
766
0
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
767
0
    return ctx->offset;
768
0
}
769
770
0
int64_t gguf_get_n_kv(const struct gguf_context * ctx) {
771
0
    return ctx->kv.size();
772
0
}
773
774
0
int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) {
775
    // return -1 if key not found
776
0
    int64_t keyfound = -1;
777
778
0
    const int64_t n_kv = gguf_get_n_kv(ctx);
779
780
0
    for (int64_t i = 0; i < n_kv; ++i) {
781
0
        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
782
0
            keyfound = i;
783
0
            break;
784
0
        }
785
0
    }
786
787
0
    return keyfound;
788
0
}
789
790
0
const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) {
791
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
792
0
    return ctx->kv[key_id].get_key().c_str();
793
0
}
794
795
0
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) {
796
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
797
0
    return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
798
0
}
799
800
0
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) {
801
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
802
0
    GGML_ASSERT(ctx->kv[key_id].is_array);
803
0
    return ctx->kv[key_id].get_type();
804
0
}
805
806
0
const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
807
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
808
0
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
809
0
    return ctx->kv[key_id].data.data();
810
0
}
811
812
0
const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
813
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
814
0
    GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
815
0
    return ctx->kv[key_id].data_string[i].c_str();
816
0
}
817
818
0
size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) {
819
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
820
821
0
    if (ctx->kv[key_id].type == GGUF_TYPE_STRING) {
822
0
        return ctx->kv[key_id].data_string.size();
823
0
    }
824
825
0
    const size_t type_size = gguf_type_size(ctx->kv[key_id].type);
826
0
    GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
827
0
    return ctx->kv[key_id].data.size() / type_size;
828
0
}
829
830
0
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) {
831
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
832
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
833
0
    return ctx->kv[key_id].get_val<uint8_t>();
834
0
}
835
836
0
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) {
837
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
838
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
839
0
    return ctx->kv[key_id].get_val<int8_t>();
840
0
}
841
842
0
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) {
843
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
844
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
845
0
    return ctx->kv[key_id].get_val<uint16_t>();
846
0
}
847
848
0
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) {
849
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
850
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
851
0
    return ctx->kv[key_id].get_val<int16_t>();
852
0
}
853
854
0
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) {
855
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
856
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
857
0
    return ctx->kv[key_id].get_val<uint32_t>();
858
0
}
859
860
0
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) {
861
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
862
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
863
0
    return ctx->kv[key_id].get_val<int32_t>();
864
0
}
865
866
0
float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) {
867
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
868
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
869
0
    return ctx->kv[key_id].get_val<float>();
870
0
}
871
872
0
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) {
873
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
874
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
875
0
    return ctx->kv[key_id].get_val<uint64_t>();
876
0
}
877
878
0
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) {
879
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
880
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
881
0
    return ctx->kv[key_id].get_val<int64_t>();
882
0
}
883
884
0
double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) {
885
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
886
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
887
0
    return ctx->kv[key_id].get_val<double>();
888
0
}
889
890
0
bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) {
891
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
892
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
893
0
    return ctx->kv[key_id].get_val<bool>();
894
0
}
895
896
0
const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
897
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
898
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
899
0
    return ctx->kv[key_id].get_val<std::string>().c_str();
900
0
}
901
902
0
const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
903
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
904
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
905
0
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
906
0
    return ctx->kv[key_id].data.data();
907
0
}
908
909
0
int64_t gguf_get_n_tensors(const struct gguf_context * ctx) {
910
0
    return ctx->info.size();
911
0
}
912
913
0
int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
914
    // return -1 if tensor not found
915
0
    int64_t tensor_id = -1;
916
917
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
918
919
0
    for (int64_t i = 0; i < n_tensors; ++i) {
920
0
        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
921
0
            tensor_id = i;
922
0
            break;
923
0
        }
924
0
    }
925
926
0
    return tensor_id;
927
0
}
928
929
0
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) {
930
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
931
0
    return ctx->info[tensor_id].offset;
932
0
}
933
934
0
const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) {
935
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
936
0
    return ctx->info[tensor_id].t.name;
937
0
}
938
939
0
enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) {
940
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
941
0
    return ctx->info[tensor_id].t.type;
942
0
}
943
944
0
size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) {
945
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
946
0
    return ggml_nbytes(&ctx->info[tensor_id].t);
947
0
}
948
949
0
int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) {
950
0
    const int64_t key_id = gguf_find_key(ctx, key);
951
0
    if (key_id >= 0) {
952
0
        ctx->kv.erase(ctx->kv.begin() + key_id);
953
0
    }
954
0
    return key_id;
955
0
}
956
957
template<typename T>
958
0
static void gguf_check_reserved_keys(const std::string & key, const T val) {
959
0
    if (key == GGUF_KEY_GENERAL_ALIGNMENT) {
960
0
        if constexpr (std::is_same<T, uint32_t>::value) {
961
0
            GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
962
0
        } else {
963
0
            GGML_UNUSED(val);
964
0
            GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
965
0
        }
966
0
    }
967
0
}
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<void const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, void const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const**>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const**)
968
969
0
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
970
0
    gguf_check_reserved_keys(key, val);
971
0
    gguf_remove_key(ctx, key);
972
0
    ctx->kv.emplace_back(key, val);
973
0
}
974
975
0
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
976
0
    gguf_check_reserved_keys(key, val);
977
0
    gguf_remove_key(ctx, key);
978
0
    ctx->kv.emplace_back(key, val);
979
0
}
980
981
0
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
982
0
    gguf_check_reserved_keys(key, val);
983
0
    gguf_remove_key(ctx, key);
984
0
    ctx->kv.emplace_back(key, val);
985
0
}
986
987
0
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
988
0
    gguf_check_reserved_keys(key, val);
989
0
    gguf_remove_key(ctx, key);
990
0
    ctx->kv.emplace_back(key, val);
991
0
}
992
993
0
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
994
0
    gguf_check_reserved_keys(key, val);
995
0
    gguf_remove_key(ctx, key);
996
0
    ctx->kv.emplace_back(key, val);
997
0
}
998
999
0
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
1000
0
    gguf_check_reserved_keys(key, val);
1001
0
    gguf_remove_key(ctx, key);
1002
0
    ctx->kv.emplace_back(key, val);
1003
0
}
1004
1005
0
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
1006
0
    gguf_check_reserved_keys(key, val);
1007
0
    gguf_remove_key(ctx, key);
1008
0
    ctx->kv.emplace_back(key, val);
1009
0
}
1010
1011
0
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
1012
0
    gguf_check_reserved_keys(key, val);
1013
0
    gguf_remove_key(ctx, key);
1014
0
    ctx->kv.emplace_back(key, val);
1015
0
}
1016
1017
0
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
1018
0
    gguf_check_reserved_keys(key, val);
1019
0
    gguf_remove_key(ctx, key);
1020
0
    ctx->kv.emplace_back(key, val);
1021
0
}
1022
1023
0
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
1024
0
    gguf_check_reserved_keys(key, val);
1025
0
    gguf_remove_key(ctx, key);
1026
0
    ctx->kv.emplace_back(key, val);
1027
0
}
1028
1029
0
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
1030
0
    gguf_check_reserved_keys(key, val);
1031
0
    gguf_remove_key(ctx, key);
1032
0
    ctx->kv.emplace_back(key, val);
1033
0
}
1034
1035
0
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
1036
0
    gguf_check_reserved_keys(key, val);
1037
0
    gguf_remove_key(ctx, key);
1038
0
    ctx->kv.emplace_back(key, std::string(val));
1039
0
}
1040
1041
0
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) {
1042
0
    gguf_check_reserved_keys(key, data);
1043
0
    gguf_remove_key(ctx, key);
1044
1045
0
    const size_t nbytes = n*gguf_type_size(type);
1046
0
    std::vector<int8_t> tmp(nbytes);
1047
0
    if (!tmp.empty()) {
1048
0
        memcpy(tmp.data(), data, nbytes);
1049
0
    }
1050
0
    ctx->kv.emplace_back(key, tmp);
1051
0
    ctx->kv.back().cast(type);
1052
0
}
1053
1054
0
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) {
1055
0
    gguf_check_reserved_keys(key, data);
1056
0
    gguf_remove_key(ctx, key);
1057
1058
0
    std::vector<std::string> tmp(n);
1059
0
    for (size_t i = 0; i < n; ++i) {
1060
0
        tmp[i] = data[i];
1061
0
    }
1062
0
    ctx->kv.emplace_back(key, tmp);
1063
0
}
1064
1065
// set or add KV pairs from another context
1066
0
void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) {
1067
0
    const int64_t n_kv = gguf_get_n_kv(src);
1068
0
    for (int64_t i = 0; i < n_kv; ++i) {
1069
0
        const struct gguf_kv & kv = src->kv[i];
1070
1071
0
        if (!kv.is_array) {
1072
0
            switch (kv.get_type()) {
1073
0
                case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>());             break;
1074
0
                case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, kv.get_key().c_str(), kv.get_val<int8_t>());              break;
1075
0
                case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>());            break;
1076
0
                case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>());             break;
1077
0
                case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>());            break;
1078
0
                case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>());             break;
1079
0
                case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>());               break;
1080
0
                case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>());            break;
1081
0
                case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>());             break;
1082
0
                case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>());              break;
1083
0
                case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>());                break;
1084
0
                case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1085
0
                case GGUF_TYPE_ARRAY:
1086
0
                default: GGML_ABORT("invalid type");
1087
0
            }
1088
0
            continue;
1089
0
        }
1090
1091
0
        const size_t ne = kv.get_ne();
1092
1093
0
        switch (kv.get_type()) {
1094
0
            case GGUF_TYPE_UINT8:
1095
0
            case GGUF_TYPE_INT8:
1096
0
            case GGUF_TYPE_UINT16:
1097
0
            case GGUF_TYPE_INT16:
1098
0
            case GGUF_TYPE_UINT32:
1099
0
            case GGUF_TYPE_INT32:
1100
0
            case GGUF_TYPE_FLOAT32:
1101
0
            case GGUF_TYPE_UINT64:
1102
0
            case GGUF_TYPE_INT64:
1103
0
            case GGUF_TYPE_FLOAT64:
1104
0
            case GGUF_TYPE_BOOL: {
1105
0
                gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1106
0
            } break;
1107
0
            case GGUF_TYPE_STRING: {
1108
0
                std::vector<const char *> tmp(ne);
1109
0
                for (size_t j = 0; j < ne; ++j) {
1110
0
                    tmp[j] = kv.data_string[j].c_str();
1111
0
                }
1112
0
                gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1113
0
            } break;
1114
0
            case GGUF_TYPE_ARRAY:
1115
0
            default: GGML_ABORT("invalid type");
1116
0
        }
1117
0
    }
1118
0
}
1119
1120
void gguf_add_tensor(
1121
             struct gguf_context * ctx,
1122
0
        const struct ggml_tensor * tensor) {
1123
0
    GGML_ASSERT(tensor);
1124
0
    if (gguf_find_tensor(ctx, tensor->name) != -1) {
1125
0
        GGML_ABORT("duplicate tensor name: %s", tensor->name);
1126
0
    }
1127
1128
0
    struct gguf_tensor_info ti;
1129
0
    ti.t = *tensor;
1130
0
    ti.offset = ctx->info.empty() ? 0 :
1131
0
        ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1132
0
    ctx->info.push_back(ti);
1133
0
}
1134
1135
0
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
1136
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1137
0
    if (tensor_id < 0) {
1138
0
        GGML_ABORT("tensor not found: %s", name);
1139
0
    }
1140
0
    struct ggml_tensor * tensor = &ctx->info[tensor_id].t;
1141
0
    const size_t  type_size = ggml_type_size(type);
1142
0
    const int64_t blck_size = ggml_blck_size(type);
1143
1144
0
    tensor->type = type;
1145
0
    GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1146
1147
0
    tensor->nb[0] = type_size;
1148
0
    tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1149
0
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1150
0
        tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1151
0
    }
1152
1153
    // update offsets
1154
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1155
0
    for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1156
0
        ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1157
0
    }
1158
0
}
1159
1160
0
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) {
1161
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1162
0
    if (tensor_id < 0) {
1163
0
        GGML_ABORT("tensor not found: %s", name);
1164
0
    }
1165
1166
0
    ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1167
0
}
1168
1169
struct gguf_writer_base {
1170
    size_t written_bytes {0u};
1171
1172
0
    ~gguf_writer_base(void) {}
1173
1174
    // we bet on devirtualization
1175
    virtual void write(int8_t val) = 0;
1176
    virtual void write(const std::vector<int8_t> & val) = 0;
1177
    virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
1178
1179
    template <typename T>
1180
0
    void write(const T & val) {
1181
0
        for (size_t i = 0; i < sizeof(val); ++i) {
1182
0
            write(reinterpret_cast<const int8_t *>(&val)[i]);
1183
0
        }
1184
0
    }
Unexecuted instantiation: void gguf_writer_base::write<char>(char const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned int>(unsigned int const&)
Unexecuted instantiation: void gguf_writer_base::write<long>(long const&)
Unexecuted instantiation: void gguf_writer_base::write<int>(int const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned long>(unsigned long const&)
1185
1186
0
    void write(const bool & val) {
1187
0
        const int8_t val8 = val ? 1 : 0;
1188
0
        write(val8);
1189
0
    }
1190
1191
0
    void write(const std::string & val) {
1192
0
        {
1193
0
            const uint64_t n = val.length();
1194
0
            write(n);
1195
0
        }
1196
0
        for (size_t i = 0; i < val.length(); ++i) {
1197
0
            write((val.data())[i]);
1198
0
        }
1199
0
    }
1200
1201
0
    void write(const char * val) {
1202
0
        write(std::string(val));
1203
0
    }
1204
1205
0
    void write(const enum ggml_type & val) {
1206
0
        write(int32_t(val));
1207
0
    }
1208
1209
0
    void write(const enum gguf_type & val) {
1210
0
        write(int32_t(val));
1211
0
    }
1212
1213
0
    void write(const struct gguf_kv & kv) {
1214
0
        const uint64_t ne = kv.get_ne();
1215
1216
0
        write(kv.get_key());
1217
1218
0
        if (kv.is_array) {
1219
0
            write(GGUF_TYPE_ARRAY);
1220
0
            write(kv.get_type());
1221
0
            write(ne);
1222
0
        } else {
1223
0
            write(kv.get_type());
1224
0
        }
1225
1226
0
        switch (kv.get_type()) {
1227
0
            case GGUF_TYPE_UINT8:
1228
0
            case GGUF_TYPE_INT8:
1229
0
            case GGUF_TYPE_UINT16:
1230
0
            case GGUF_TYPE_INT16:
1231
0
            case GGUF_TYPE_UINT32:
1232
0
            case GGUF_TYPE_INT32:
1233
0
            case GGUF_TYPE_FLOAT32:
1234
0
            case GGUF_TYPE_UINT64:
1235
0
            case GGUF_TYPE_INT64:
1236
0
            case GGUF_TYPE_FLOAT64: {
1237
0
                write(kv.data);
1238
0
            } break;
1239
0
            case GGUF_TYPE_BOOL: {
1240
0
                for (size_t i = 0; i < ne; ++i) {
1241
0
                    write(kv.get_val<bool>(i));
1242
0
                }
1243
0
            } break;
1244
0
            case GGUF_TYPE_STRING: {
1245
0
                for (size_t i = 0; i < ne; ++i) {
1246
0
                    write(kv.get_val<std::string>(i));
1247
0
                }
1248
0
            } break;
1249
0
            case GGUF_TYPE_ARRAY:
1250
0
            default: GGML_ABORT("invalid type");
1251
0
        }
1252
0
    }
1253
1254
0
    void write_tensor_meta(const struct gguf_tensor_info & info) {
1255
0
        write(info.t.name);
1256
1257
0
        const uint32_t n_dims = ggml_n_dims(&info.t);
1258
0
        write(n_dims);
1259
1260
0
        for (uint32_t j = 0; j < n_dims; ++j) {
1261
0
            write(info.t.ne[j]);
1262
0
        }
1263
0
        write(info.t.type);
1264
0
        write(info.offset);
1265
0
    }
1266
1267
0
    void pad(const size_t alignment) {
1268
0
        while (written_bytes % alignment != 0) {
1269
0
            const int8_t zero = 0;
1270
0
            write(zero);
1271
0
        }
1272
0
    }
1273
};
1274
1275
// vector buffer based writer
1276
struct gguf_writer_buf final : public gguf_writer_base {
1277
    std::vector<int8_t> & buf;
1278
1279
0
    gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}
1280
1281
    using gguf_writer_base::write;
1282
1283
0
    void write(const int8_t val) override {
1284
0
        buf.push_back(val);
1285
0
        written_bytes++;
1286
0
    }
1287
1288
0
    void write(const std::vector<int8_t> & val) override {
1289
0
        buf.insert(buf.end(), val.begin(), val.end());
1290
0
        written_bytes += val.size();
1291
0
    }
1292
1293
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1294
0
        GGML_ASSERT(buf.size() - offset_data == info.offset);
1295
1296
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1297
0
        const size_t offset = buf.size();
1298
0
        const size_t nbytes = ggml_nbytes(&info.t);
1299
1300
0
        buf.resize(offset + nbytes);
1301
0
        if (info.t.buffer) {
1302
0
            ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1303
0
        } else {
1304
0
            GGML_ASSERT(info.t.data);
1305
0
            memcpy(buf.data() + offset, info.t.data, nbytes);
1306
0
        }
1307
0
        written_bytes += nbytes;
1308
1309
0
        pad(alignment);
1310
0
    }
1311
};
1312
1313
// file based writer
1314
struct gguf_writer_file final : public gguf_writer_base {
1315
    FILE * file;
1316
1317
0
    gguf_writer_file(FILE* file) : file(file) {}
1318
1319
    using gguf_writer_base::write;
1320
1321
0
    void write(const int8_t val) override {
1322
0
        const auto real_val = static_cast<uint8_t>(val);
1323
0
        const auto ret = fputc(real_val, file);
1324
0
        written_bytes++;
1325
0
        if (ret != real_val) {
1326
0
            throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
1327
0
        }
1328
0
    }
1329
1330
0
    void write(const std::vector<int8_t> & val) override {
1331
0
        const auto ret = fwrite(val.data(), 1, val.size(), file);
1332
0
        written_bytes += val.size();
1333
0
        if (ret != val.size()) {
1334
0
            throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
1335
0
        }
1336
0
    }
1337
1338
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1339
0
        GGML_ASSERT(written_bytes - offset_data == info.offset);
1340
1341
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1342
0
        const size_t nbytes = ggml_nbytes(&info.t);
1343
1344
0
        std::vector<int8_t> buf(nbytes);
1345
0
        if (info.t.buffer) {
1346
0
            ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
1347
0
        } else {
1348
0
            GGML_ASSERT(info.t.data);
1349
0
            memcpy(buf.data(), info.t.data, nbytes);
1350
0
        }
1351
0
        write(buf);
1352
1353
0
        pad(alignment);
1354
0
    }
1355
};
1356
1357
template <typename writer_t>
1358
0
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
1359
0
    const int64_t n_kv      = gguf_get_n_kv(ctx);
1360
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1361
1362
    // write header
1363
0
    gw.write(GGUF_MAGIC[0]);
1364
0
    gw.write(GGUF_MAGIC[1]);
1365
0
    gw.write(GGUF_MAGIC[2]);
1366
0
    gw.write(GGUF_MAGIC[3]);
1367
0
    gw.write(ctx->version);
1368
0
    gw.write(n_tensors);
1369
0
    gw.write(n_kv);
1370
1371
    // write key-value pairs
1372
0
    for (int64_t i = 0; i < n_kv; ++i) {
1373
0
        gw.write(ctx->kv[i]);
1374
0
    }
1375
1376
    // write tensor info
1377
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1378
0
        gw.write_tensor_meta(ctx->info[i]);
1379
0
    }
1380
1381
    // we require the data section to be aligned
1382
0
    gw.pad(ctx->alignment);
1383
1384
0
    if (only_meta) {
1385
0
        return;
1386
0
    }
1387
1388
0
    const size_t offset_data = gw.written_bytes;
1389
1390
    // write tensor data
1391
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1392
0
        gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1393
0
    }
1394
0
}
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_buf>(gguf_context const*, gguf_writer_buf&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_file>(gguf_context const*, gguf_writer_file&, bool)
1395
1396
0
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1397
0
    gguf_writer_buf gw(buf);
1398
0
    gguf_write_out(ctx, gw, only_meta);
1399
0
}
1400
1401
0
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
1402
0
    FILE * file = ggml_fopen(fname, "wb");
1403
1404
0
    if (!file) {
1405
0
        GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1406
0
        return false;
1407
0
    }
1408
1409
0
    try {
1410
0
        gguf_writer_file gw(file);
1411
0
        gguf_write_out(ctx, gw, only_meta);
1412
0
    } catch (const std::runtime_error& ex) {
1413
0
        GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
1414
0
        fclose(file);
1415
0
        return false;
1416
0
    }
1417
1418
0
    fclose(file);
1419
0
    return true;
1420
0
}
1421
1422
0
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
1423
    // only return size
1424
0
    std::vector<int8_t> buf;
1425
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1426
0
    return buf.size();
1427
0
}
1428
1429
0
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
1430
0
    std::vector<int8_t> buf;
1431
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1432
0
    memcpy(data, buf.data(), buf.size());
1433
0
}