Coverage Report

Created: 2026-03-07 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/gguf.cpp
Line
Count
Source
1
#include "ggml.h"
2
#include "ggml-backend.h"
3
#include "ggml-impl.h"
4
#include "gguf.h"
5
6
#include <cinttypes>
7
#include <cstddef>
8
#include <cstdint>
9
#include <cstdio>
10
#include <cstdlib>
11
#include <cstring>
12
#include <map>
13
#include <new>
14
#include <stdexcept>
15
#include <string>
16
#include <vector>
17
18
0
#define GGUF_MAX_STRING_LENGTH  (1024*1024*1024)
19
0
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
20
21
#ifdef _WIN32
22
#    define gguf_ftell _ftelli64
23
#    define gguf_fseek _fseeki64
24
#else
25
0
#    define gguf_ftell ftello
26
0
#    define gguf_fseek fseeko
27
#endif
28
29
template <typename T>
30
struct type_to_gguf_type;
31
32
template <>
33
struct type_to_gguf_type<uint8_t> {
34
    static constexpr enum gguf_type value = GGUF_TYPE_UINT8;
35
};
36
37
template <>
38
struct type_to_gguf_type<int8_t> {
39
    static constexpr enum gguf_type value = GGUF_TYPE_INT8;
40
};
41
42
template <>
43
struct type_to_gguf_type<uint16_t> {
44
    static constexpr enum gguf_type value = GGUF_TYPE_UINT16;
45
};
46
47
template <>
48
struct type_to_gguf_type<int16_t> {
49
    static constexpr enum gguf_type value = GGUF_TYPE_INT16;
50
};
51
52
template <>
53
struct type_to_gguf_type<uint32_t> {
54
    static constexpr enum gguf_type value = GGUF_TYPE_UINT32;
55
};
56
57
template <>
58
struct type_to_gguf_type<int32_t> {
59
    static constexpr enum gguf_type value = GGUF_TYPE_INT32;
60
};
61
62
template <>
63
struct type_to_gguf_type<float> {
64
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32;
65
};
66
67
template <>
68
struct type_to_gguf_type<bool> {
69
    static constexpr enum gguf_type value = GGUF_TYPE_BOOL;
70
};
71
72
template <>
73
struct type_to_gguf_type<std::string> {
74
    static constexpr enum gguf_type value = GGUF_TYPE_STRING;
75
};
76
77
template <>
78
struct type_to_gguf_type<uint64_t> {
79
    static constexpr enum gguf_type value = GGUF_TYPE_UINT64;
80
};
81
82
template <>
83
struct type_to_gguf_type<int64_t> {
84
    static constexpr enum gguf_type value = GGUF_TYPE_INT64;
85
};
86
87
template <>
88
struct type_to_gguf_type<double> {
89
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64;
90
};
91
92
static const std::map<gguf_type, size_t> GGUF_TYPE_SIZE = {
93
    {GGUF_TYPE_UINT8,   sizeof(uint8_t)},
94
    {GGUF_TYPE_INT8,    sizeof(int8_t)},
95
    {GGUF_TYPE_UINT16,  sizeof(uint16_t)},
96
    {GGUF_TYPE_INT16,   sizeof(int16_t)},
97
    {GGUF_TYPE_UINT32,  sizeof(uint32_t)},
98
    {GGUF_TYPE_INT32,   sizeof(int32_t)},
99
    {GGUF_TYPE_FLOAT32, sizeof(float)},
100
    {GGUF_TYPE_BOOL,    sizeof(int8_t)},
101
    {GGUF_TYPE_STRING,  0}, // undefined
102
    {GGUF_TYPE_ARRAY,   0}, // undefined
103
    {GGUF_TYPE_UINT64,  sizeof(uint64_t)},
104
    {GGUF_TYPE_INT64,   sizeof(int64_t)},
105
    {GGUF_TYPE_FLOAT64, sizeof(double)},
106
};
107
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
108
109
static const std::map<gguf_type, const char *> GGUF_TYPE_NAME = {
110
    {GGUF_TYPE_UINT8,   "u8"},
111
    {GGUF_TYPE_INT8,    "i8"},
112
    {GGUF_TYPE_UINT16,  "u16"},
113
    {GGUF_TYPE_INT16,   "i16"},
114
    {GGUF_TYPE_UINT32,  "u32"},
115
    {GGUF_TYPE_INT32,   "i32"},
116
    {GGUF_TYPE_FLOAT32, "f32"},
117
    {GGUF_TYPE_BOOL,    "bool"},
118
    {GGUF_TYPE_STRING,  "str"},
119
    {GGUF_TYPE_ARRAY,   "arr"},
120
    {GGUF_TYPE_UINT64,  "u64"},
121
    {GGUF_TYPE_INT64,   "i64"},
122
    {GGUF_TYPE_FLOAT64, "f64"},
123
};
124
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
125
126
0
size_t gguf_type_size(enum gguf_type type) {
127
0
    auto it = GGUF_TYPE_SIZE.find(type);
128
0
    return it == GGUF_TYPE_SIZE.end() ? 0 : it->second;
129
0
}
130
131
struct gguf_kv {
132
    std::string key;
133
134
    bool is_array;
135
    enum gguf_type type;
136
137
    std::vector<int8_t>      data;
138
    std::vector<std::string> data_string;
139
140
    template <typename T>
141
    gguf_kv(const std::string & key, const T value)
142
0
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
0
        GGML_ASSERT(!key.empty());
144
0
        data.resize(sizeof(T));
145
0
        memcpy(data.data(), &value, sizeof(T));
146
0
    }
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Unexecuted instantiation: gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Unexecuted instantiation: gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Unexecuted instantiation: gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Unexecuted instantiation: gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Unexecuted instantiation: gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Unexecuted instantiation: gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Unexecuted instantiation: gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
147
148
    template <typename T>
149
    gguf_kv(const std::string & key, const std::vector<T> & value)
150
0
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
0
        GGML_ASSERT(!key.empty());
152
0
        data.resize(value.size()*sizeof(T));
153
0
        for (size_t i = 0; i < value.size(); ++i) {
154
0
            const T tmp = value[i];
155
0
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
0
        }
157
0
    }
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<signed char, std::__1::allocator<signed char> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned short, std::__1::allocator<unsigned short> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<short, std::__1::allocator<short> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<int, std::__1::allocator<int> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<float, std::__1::allocator<float> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<bool, std::__1::allocator<bool> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<long, std::__1::allocator<long> > const&)
Unexecuted instantiation: gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<double, std::__1::allocator<double> > const&)
158
159
    gguf_kv(const std::string & key, const std::string & value)
160
0
            : key(key), is_array(false), type(GGUF_TYPE_STRING) {
161
0
        GGML_ASSERT(!key.empty());
162
0
        data_string.push_back(value);
163
0
    }
164
165
    gguf_kv(const std::string & key, const std::vector<std::string> & value)
166
0
            : key(key), is_array(true), type(GGUF_TYPE_STRING) {
167
0
        GGML_ASSERT(!key.empty());
168
0
        data_string = value;
169
0
    }
170
171
0
    const std::string & get_key() const {
172
0
        return key;
173
0
    }
174
175
0
    const enum gguf_type & get_type() const {
176
0
        return type;
177
0
    }
178
179
0
    size_t get_ne() const {
180
0
        if (type == GGUF_TYPE_STRING) {
181
0
            const size_t ne = data_string.size();
182
0
            GGML_ASSERT(is_array || ne == 1);
183
0
            return ne;
184
0
        }
185
0
        const size_t type_size = gguf_type_size(type);
186
0
        GGML_ASSERT(data.size() % type_size == 0);
187
0
        const size_t ne = data.size() / type_size;
188
0
        GGML_ASSERT(is_array || ne == 1);
189
0
        return ne;
190
0
    }
191
192
    template <typename T>
193
0
    const T & get_val(const size_t i = 0) const {
194
0
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
195
0
        if constexpr (std::is_same<T, std::string>::value) {
196
0
            GGML_ASSERT(data_string.size() >= i+1);
197
0
            return data_string[i];
198
0
        }
199
0
        const size_t type_size = gguf_type_size(type);
200
0
        GGML_ASSERT(data.size() % type_size == 0);
201
0
        GGML_ASSERT(data.size() >= (i+1)*type_size);
202
0
        return reinterpret_cast<const T *>(data.data())[i];
203
0
    }
Unexecuted instantiation: unsigned char const& gguf_kv::get_val<unsigned char>(unsigned long) const
Unexecuted instantiation: signed char const& gguf_kv::get_val<signed char>(unsigned long) const
Unexecuted instantiation: unsigned short const& gguf_kv::get_val<unsigned short>(unsigned long) const
Unexecuted instantiation: short const& gguf_kv::get_val<short>(unsigned long) const
Unexecuted instantiation: unsigned int const& gguf_kv::get_val<unsigned int>(unsigned long) const
Unexecuted instantiation: int const& gguf_kv::get_val<int>(unsigned long) const
Unexecuted instantiation: float const& gguf_kv::get_val<float>(unsigned long) const
Unexecuted instantiation: unsigned long const& gguf_kv::get_val<unsigned long>(unsigned long) const
Unexecuted instantiation: long const& gguf_kv::get_val<long>(unsigned long) const
Unexecuted instantiation: double const& gguf_kv::get_val<double>(unsigned long) const
Unexecuted instantiation: bool const& gguf_kv::get_val<bool>(unsigned long) const
Unexecuted instantiation: std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const& gguf_kv::get_val<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(unsigned long) const
204
205
0
    void cast(const enum gguf_type new_type) {
206
0
        const size_t new_type_size = gguf_type_size(new_type);
207
0
        GGML_ASSERT(data.size() % new_type_size == 0);
208
0
        type = new_type;
209
0
    }
210
};
211
212
struct gguf_tensor_info {
213
    struct ggml_tensor t; // for holding the equivalent info
214
    uint64_t offset;      // offset from start of `data`, must be a multiple of `ALIGNMENT`
215
};
216
217
struct gguf_context {
218
    uint32_t version = GGUF_VERSION;
219
220
    std::vector<struct gguf_kv> kv;
221
    std::vector<struct gguf_tensor_info> info;
222
223
    size_t alignment = GGUF_DEFAULT_ALIGNMENT;
224
    size_t offset    = 0; // offset of `data` from beginning of file
225
    size_t size      = 0; // size of `data` in bytes
226
227
    void * data = nullptr;
228
};
229
230
struct gguf_reader {
231
0
    gguf_reader(FILE * file) : file(file) {
232
        // read the remaining bytes once and update on each read
233
0
        nbytes_remain = file_remain(file);
234
0
    }
235
236
    // helper for remaining bytes in a file
237
0
    static uint64_t file_remain(FILE * file) {
238
0
        const int64_t cur = gguf_ftell(file);
239
0
        if (cur < 0) {
240
0
            return 0;
241
0
        }
242
0
        if (gguf_fseek(file, 0, SEEK_END) != 0) {
243
0
            gguf_fseek(file, cur, SEEK_SET);
244
245
0
            return 0;
246
0
        }
247
0
        const int64_t end = gguf_ftell(file);
248
0
        if (end < 0) {
249
0
            gguf_fseek(file, cur, SEEK_SET);
250
251
0
            return 0;
252
0
        }
253
0
        gguf_fseek(file, cur, SEEK_SET);
254
0
        return static_cast<uint64_t>(end - cur);
255
0
    }
256
257
    template <typename T>
258
0
    bool read(T & dst) const {
259
0
        const size_t size = sizeof(dst);
260
0
        if (nbytes_remain < size) {
261
0
            return false;
262
0
        }
263
0
        const size_t nread = fread(&dst, 1, size, file);
264
0
        nbytes_remain -= nread;
265
0
        return nread == size;
266
0
    }
Unexecuted instantiation: bool gguf_reader::read<int>(int&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned long>(unsigned long&) const
Unexecuted instantiation: bool gguf_reader::read<char>(char&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned int>(unsigned int&) const
Unexecuted instantiation: bool gguf_reader::read<long>(long&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned char>(unsigned char&) const
Unexecuted instantiation: bool gguf_reader::read<signed char>(signed char&) const
Unexecuted instantiation: bool gguf_reader::read<unsigned short>(unsigned short&) const
Unexecuted instantiation: bool gguf_reader::read<short>(short&) const
Unexecuted instantiation: bool gguf_reader::read<float>(float&) const
Unexecuted instantiation: bool gguf_reader::read<double>(double&) const
267
268
    template <typename T>
269
0
    bool read(std::vector<T> & dst, const size_t n) const {
270
0
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
0
            return false;
272
0
        }
273
0
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
0
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
0
                return false;
277
0
            }
278
0
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
0
                return false;
280
0
            }
281
0
        } else {
282
0
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
0
            if (nbytes_remain < n * sizeof(T)) {
286
0
                return false;
287
0
            }
288
0
        }
289
0
        dst.resize(n);
290
0
        for (size_t i = 0; i < dst.size(); ++i) {
291
0
            if constexpr (std::is_same<T, bool>::value) {
292
0
                bool tmp;
293
0
                if (!read(tmp)) {
294
0
                    return false;
295
0
                }
296
0
                dst[i] = tmp;
297
0
            } else {
298
0
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
0
            }
302
0
        }
303
0
        return true;
304
0
    }
Unexecuted instantiation: bool gguf_reader::read<char>(std::__1::vector<char, std::__1::allocator<char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned char>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<signed char>(std::__1::vector<signed char, std::__1::allocator<signed char> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned short>(std::__1::vector<unsigned short, std::__1::allocator<unsigned short> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<short>(std::__1::vector<short, std::__1::allocator<short> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned int>(std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<int>(std::__1::vector<int, std::__1::allocator<int> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<float>(std::__1::vector<float, std::__1::allocator<float> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<bool>(std::__1::vector<bool, std::__1::allocator<bool> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<unsigned long>(std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<long>(std::__1::vector<long, std::__1::allocator<long> >&, unsigned long) const
Unexecuted instantiation: bool gguf_reader::read<double>(std::__1::vector<double, std::__1::allocator<double> >&, unsigned long) const
305
306
0
    bool read(bool & dst) const {
307
0
        int8_t tmp = -1;
308
0
        if (!read(tmp)) {
309
0
            return false;
310
0
        }
311
0
        dst = tmp != 0;
312
0
        return true;
313
0
    }
314
315
0
    bool read(enum ggml_type & dst) const {
316
0
        int32_t tmp = -1;
317
0
        if (!read(tmp)) {
318
0
            return false;
319
0
        }
320
0
        dst = ggml_type(tmp);
321
0
        return true;
322
0
    }
323
324
0
    bool read(enum gguf_type & dst) const {
325
0
        int32_t tmp = -1;
326
0
        if (!read(tmp)) {
327
0
            return false;
328
0
        }
329
0
        dst = gguf_type(tmp);
330
0
        return true;
331
0
    }
332
333
0
    bool read(std::string & dst) const {
334
0
        uint64_t size = 0;
335
0
        if (!read(size)) {
336
0
            return false;
337
0
        }
338
0
        if (size > GGUF_MAX_STRING_LENGTH) {
339
0
            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds maximum %" PRIu64 "\n", __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH);
340
0
            return false;
341
0
        }
342
0
        if (size > nbytes_remain) {
343
0
            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds remaining file size %" PRIu64 " bytes\n", __func__, size, nbytes_remain);
344
0
            return false;
345
0
        }
346
0
        dst.resize(static_cast<size_t>(size));
347
0
        const size_t nread = fread(dst.data(), 1, size, file);
348
0
        nbytes_remain -= nread;
349
0
        return nread == size;
350
0
    }
351
352
0
    bool read(void * dst, const size_t size) const {
353
0
        if (size > nbytes_remain) {
354
0
            return false;
355
0
        }
356
0
        const size_t nread = fread(dst, 1, size, file);
357
0
        nbytes_remain -= nread;
358
0
        return nread == size;
359
0
    }
360
361
private:
362
    FILE * file;
363
364
    mutable uint64_t nbytes_remain;
365
};
366
367
0
struct gguf_context * gguf_init_empty(void) {
368
0
    return new gguf_context;
369
0
}
370
371
template<typename T>
372
0
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
0
    if (is_array) {
374
0
        std::vector<T> value;
375
0
        try {
376
0
            if (!gr.read(value, n)) {
377
0
                return false;
378
0
            }
379
0
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
0
        kv.emplace_back(key, value);
387
0
    } else {
388
0
        T value;
389
0
        if (!gr.read(value)) {
390
0
            return false;
391
0
        }
392
0
        kv.emplace_back(key, value);
393
0
    }
394
0
    return true;
395
0
}
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<signed char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<float>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<bool>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<unsigned long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Unexecuted instantiation: bool gguf_read_emplace_helper<double>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
396
397
0
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
398
0
    const struct gguf_reader gr(file);
399
0
    struct gguf_context * ctx = new gguf_context;
400
401
0
    bool ok = true;
402
403
    // file magic
404
0
    {
405
0
        std::vector<char> magic;
406
0
        ok = ok && gr.read(magic, 4);
407
408
0
        if (!ok) {
409
0
            GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
410
0
            gguf_free(ctx);
411
0
            return nullptr;
412
0
        }
413
414
0
        for (uint32_t i = 0; i < magic.size(); i++) {
415
0
            if (magic[i] != GGUF_MAGIC[i]) {
416
0
                char c0 = isprint(magic[0]) ? magic[0] : '?';
417
0
                char c1 = isprint(magic[1]) ? magic[1] : '?';
418
0
                char c2 = isprint(magic[2]) ? magic[2] : '?';
419
0
                char c3 = isprint(magic[3]) ? magic[3] : '?';
420
0
                GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
421
0
                gguf_free(ctx);
422
0
                return nullptr;
423
0
            }
424
0
        }
425
0
    }
426
427
    // header
428
0
    int64_t n_kv      = 0;
429
0
    int64_t n_tensors = 0;
430
431
0
    if (ok && gr.read(ctx->version)) {
432
0
        if (ok && ctx->version == 0) {
433
0
            GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
434
0
            ok = false;
435
0
        }
436
437
        /*
438
         * bit layout is different when reading non-native endian models.
439
         * assuming that the GGUF version is 3, the non-native endian model
440
         * would read it as 0x30000000. we can use the AND operation against
441
         * the last 4 hexadecimal digits to check if the model is the same
442
         * endianness as the host system.
443
        */
444
0
        if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
445
0
            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
446
0
            ok = false;
447
0
        }
448
449
0
        if (ok && ctx->version == 1) {
450
0
            GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
451
0
            ok = false;
452
0
        }
453
0
        if (ok && ctx->version > GGUF_VERSION) {
454
0
            GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
455
0
                __func__, ctx->version, GGUF_VERSION);
456
0
            ok = false;
457
0
        }
458
0
    } else {
459
0
        ok = false;
460
0
    }
461
462
0
    if (ok && gr.read(n_tensors)) {
463
0
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
464
0
        if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
465
0
            GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
466
0
                __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
467
0
            ok = false;
468
0
        }
469
0
    } else {
470
0
        ok = false;
471
0
    }
472
473
0
    if (ok && gr.read(n_kv)) {
474
0
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
475
0
        if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
476
0
            GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
477
0
                    __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
478
0
            ok = false;
479
0
        }
480
0
    } else {
481
0
        ok = false;
482
0
    }
483
484
0
    if (!ok) {
485
0
        GGML_LOG_ERROR("%s: failed to read header\n", __func__);
486
0
        gguf_free(ctx);
487
0
        return nullptr;
488
0
    }
489
490
    // KV pairs
491
0
    {
492
0
        for (int64_t i = 0; ok && i < n_kv; ++i) {
493
0
            std::string key;
494
0
            gguf_type   type     = gguf_type(-1);
495
0
            bool        is_array = false;
496
0
            uint64_t    n        = 1;
497
498
0
            try {
499
0
                ok = ok && gr.read(key);
500
0
            } catch (std::length_error &) {
501
0
                GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
502
0
                ok = false;
503
0
            } catch (std::bad_alloc &) {
504
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
505
0
                ok = false;
506
0
            }
507
0
            for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
508
0
                if (key == ctx->kv[j].key) {
509
0
                    GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
510
0
                    ok = false;
511
0
                }
512
0
            }
513
0
            if (!ok) {
514
0
                break;
515
0
            }
516
517
0
            ok = ok && gr.read(type);
518
0
            if (type == GGUF_TYPE_ARRAY) {
519
0
                is_array = true;
520
0
                ok = ok && gr.read(type);
521
0
                ok = ok && gr.read(n);
522
0
            }
523
0
            if (!ok) {
524
0
                break;
525
0
            }
526
527
0
            switch (type) {
528
0
                case GGUF_TYPE_UINT8:   ok = ok && gguf_read_emplace_helper<uint8_t>    (gr, ctx->kv, key, is_array, n); break;
529
0
                case GGUF_TYPE_INT8:    ok = ok && gguf_read_emplace_helper<int8_t>     (gr, ctx->kv, key, is_array, n); break;
530
0
                case GGUF_TYPE_UINT16:  ok = ok && gguf_read_emplace_helper<uint16_t>   (gr, ctx->kv, key, is_array, n); break;
531
0
                case GGUF_TYPE_INT16:   ok = ok && gguf_read_emplace_helper<int16_t>    (gr, ctx->kv, key, is_array, n); break;
532
0
                case GGUF_TYPE_UINT32:  ok = ok && gguf_read_emplace_helper<uint32_t>   (gr, ctx->kv, key, is_array, n); break;
533
0
                case GGUF_TYPE_INT32:   ok = ok && gguf_read_emplace_helper<int32_t>    (gr, ctx->kv, key, is_array, n); break;
534
0
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper<float>      (gr, ctx->kv, key, is_array, n); break;
535
0
                case GGUF_TYPE_BOOL:    ok = ok && gguf_read_emplace_helper<bool>       (gr, ctx->kv, key, is_array, n); break;
536
0
                case GGUF_TYPE_STRING:  ok = ok && gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
537
0
                case GGUF_TYPE_UINT64:  ok = ok && gguf_read_emplace_helper<uint64_t>   (gr, ctx->kv, key, is_array, n); break;
538
0
                case GGUF_TYPE_INT64:   ok = ok && gguf_read_emplace_helper<int64_t>    (gr, ctx->kv, key, is_array, n); break;
539
0
                case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper<double>     (gr, ctx->kv, key, is_array, n); break;
540
0
                case GGUF_TYPE_ARRAY:
541
0
                default:
542
0
                    {
543
0
                        GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
544
0
                        ok = false;
545
0
                    } break;
546
0
            }
547
0
        }
548
549
0
        if (!ok) {
550
0
            GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
551
0
            gguf_free(ctx);
552
0
            return nullptr;
553
0
        }
554
0
        GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
555
556
0
        const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT);
557
0
        ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
558
559
0
        if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
560
0
            GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
561
0
            gguf_free(ctx);
562
0
            return nullptr;
563
0
        }
564
0
    }
565
566
    // read the tensor info
567
0
    for (int64_t i = 0; ok && i < n_tensors; ++i) {
568
0
        struct gguf_tensor_info info;
569
570
        // tensor name
571
0
        {
572
0
            std::string name;
573
0
            try {
574
0
                ok = ok && gr.read(name);
575
0
            } catch (std::length_error &) {
576
0
                GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
577
0
                ok = false;
578
0
            } catch (std::bad_alloc &) {
579
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
580
0
                ok = false;
581
0
            }
582
0
            if (name.length() >= GGML_MAX_NAME) {
583
0
                GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
584
0
                ok = false;
585
0
                break;
586
0
            }
587
0
            ggml_set_name(&info.t, name.c_str());
588
589
            // make sure there are no duplicate tensor names
590
0
            for (int64_t j = 0; ok && j < i; ++j) {
591
0
                if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
592
0
                    GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
593
0
                    ok = false;
594
0
                    break;
595
0
                }
596
0
            }
597
0
        }
598
0
        if (!ok) {
599
0
            break;
600
0
        }
601
602
        // tensor shape
603
0
        {
604
0
            uint32_t n_dims = 0;
605
0
            ok = ok && gr.read(n_dims);
606
0
            if (n_dims > GGML_MAX_DIMS) {
607
0
                GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
608
0
                    __func__, info.t.name, n_dims, GGML_MAX_DIMS);
609
0
                ok = false;
610
0
                break;
611
0
            }
612
0
            for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) {
613
0
                info.t.ne[j] = 1;
614
0
                if (j < n_dims) {
615
0
                    ok = ok && gr.read(info.t.ne[j]);
616
0
                }
617
618
                // check that all ne are non-negative
619
0
                if (info.t.ne[j] < 0) {
620
0
                    GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
621
0
                        __func__, info.t.name, j, info.t.ne[j]);
622
0
                    ok = false;
623
0
                    break;
624
0
                }
625
0
            }
626
627
            // check that the total number of elements is representable
628
0
            if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
629
0
                       (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
630
0
                       (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
631
632
0
                GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
633
0
                    "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
634
0
                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
635
0
                ok = false;
636
0
                break;
637
0
            }
638
0
        }
639
0
        if (!ok) {
640
0
            break;
641
0
        }
642
643
        // tensor type
644
0
        {
645
0
            ok = ok && gr.read(info.t.type);
646
647
            // check that tensor type is within defined range
648
0
            if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
649
0
                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n",
650
0
                    __func__, info.t.name, info.t.type, GGML_TYPE_COUNT);
651
0
                ok = false;
652
0
                break;
653
0
            }
654
0
            const size_t  type_size = ggml_type_size(info.t.type);
655
0
            const int64_t blck_size = ggml_blck_size(info.t.type);
656
657
            // check that row size is divisible by block size
658
0
            if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
659
0
                GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
660
0
                    "not a multiple of block size (%" PRId64 ")\n",
661
0
                    __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
662
0
                ok = false;
663
0
                break;
664
0
            }
665
666
            // check that the size of the tensor in bytes is representable
667
0
            if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) {
668
0
                GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n",
669
0
                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX);
670
0
                ok = false;
671
0
                break;
672
0
            }
673
674
            // calculate byte offsets given the tensor shape and type
675
0
            info.t.nb[0] = type_size;
676
0
            info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
677
0
            for (int j = 2; j < GGML_MAX_DIMS; ++j) {
678
0
                info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
679
0
            }
680
0
        }
681
0
        if (!ok) {
682
0
            break;
683
0
        }
684
685
        // tensor data offset within buffer
686
0
        ok = ok && gr.read(info.offset);
687
688
0
        ctx->info.push_back(info);
689
0
    }
690
691
0
    if (!ok) {
692
0
        GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
693
0
        gguf_free(ctx);
694
0
        return nullptr;
695
0
    }
696
0
    GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
697
698
    // we require the data section to be aligned, so take into account any padding
699
0
    if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) {
700
0
        GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
701
0
        gguf_free(ctx);
702
0
        return nullptr;
703
0
    }
704
705
    // store the current file offset - this is where the data section starts
706
0
    ctx->offset = gguf_ftell(file);
707
708
    // compute the total size of the data section, taking into account the alignment
709
0
    {
710
0
        ctx->size = 0;
711
0
        for (size_t i = 0; i < ctx->info.size(); ++i) {
712
0
            const gguf_tensor_info & ti = ctx->info[i];
713
0
            if (ti.offset != ctx->size) {
714
0
                GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
715
0
                    __func__, ti.t.name, ti.offset, ctx->size);
716
0
                GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
717
0
                gguf_free(ctx);
718
0
                return nullptr;
719
0
            }
720
0
            size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
721
0
            if (SIZE_MAX - ctx->size < padded_size) {
722
0
                GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
723
0
                    __func__, ti.t.name, ctx->size, padded_size);
724
0
                gguf_free(ctx);
725
0
                return nullptr;
726
0
            }
727
0
            ctx->size += padded_size;
728
0
        }
729
0
    }
730
731
    // load the tensor data only if requested
732
0
    if (params.ctx != nullptr) {
733
        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
734
        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
735
        //   the ggml_tensor structs to the appropriate locations in the binary blob
736
737
        // compute the exact size needed for the new ggml_context
738
0
        size_t mem_size = 0;
739
0
        if (params.no_alloc) {
740
0
            if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) {
741
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
742
0
                gguf_free(ctx);
743
0
                return nullptr;
744
0
            }
745
746
0
            const size_t overhead = n_tensors * ggml_tensor_overhead();
747
748
0
            mem_size = overhead;
749
0
        } else {
750
0
            if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) {
751
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
752
0
                gguf_free(ctx);
753
0
                return nullptr;
754
0
            }
755
756
0
            const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead();
757
758
0
            if (SIZE_MAX - overhead < ctx->size) {
759
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
760
0
                gguf_free(ctx);
761
0
                return nullptr;
762
0
            }
763
764
0
            mem_size = overhead + ctx->size;
765
0
        }
766
767
0
        struct ggml_init_params pdata = {
768
0
            /*mem_size   =*/ mem_size,
769
0
            /*mem_buffer =*/ nullptr,
770
0
            /*no_alloc   =*/ params.no_alloc,
771
0
        };
772
773
0
        *params.ctx = ggml_init(pdata);
774
0
        if (*params.ctx == nullptr) {
775
0
            GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
776
0
            gguf_free(ctx);
777
0
            return nullptr;
778
0
        }
779
780
0
        struct ggml_context * ctx_data = *params.ctx;
781
782
0
        struct ggml_tensor * data = nullptr;
783
784
0
        if (!params.no_alloc) {
785
0
            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
786
787
0
            ok = ok && data != nullptr;
788
789
0
            if (ok) {
790
0
                ggml_set_name(data, "GGUF tensor data binary blob");
791
0
            }
792
793
            // read the binary blob with the tensor data
794
0
            ok = ok && gr.read(data->data, ctx->size);
795
796
0
            if (!ok) {
797
0
                GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
798
0
                ggml_free(ctx_data);
799
0
                *params.ctx = nullptr;
800
0
                gguf_free(ctx);
801
0
                return nullptr;
802
0
            }
803
804
0
            ctx->data = data->data;
805
0
        }
806
807
0
        ggml_set_no_alloc(ctx_data, true);
808
809
        // create the tensors
810
0
        for (size_t i = 0; i < ctx->info.size(); ++i) {
811
0
            const struct gguf_tensor_info & info = ctx->info[i];
812
813
0
            struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne);
814
815
0
            ok = ok && cur != nullptr;
816
817
0
            if (!ok) {
818
0
                break;
819
0
            }
820
821
0
            ggml_set_name(cur, info.t.name);
822
823
            // point the data member to the appropriate location in the binary blob using the tensor info
824
0
            if (!params.no_alloc) {
825
0
                cur->data = (char *) data->data + info.offset;
826
0
            }
827
0
        }
828
829
0
        if (!ok) {
830
0
            GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
831
0
            ggml_free(ctx_data);
832
0
            *params.ctx = nullptr;
833
0
            gguf_free(ctx);
834
0
            return nullptr;
835
0
        }
836
837
0
        ggml_set_no_alloc(ctx_data, params.no_alloc);
838
0
    }
839
840
0
    return ctx;
841
0
}
842
843
0
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
844
0
    FILE * file = ggml_fopen(fname, "rb");
845
846
0
    if (!file) {
847
0
        GGML_LOG_ERROR("%s: failed to open GGUF file '%s' (%s)\n", __func__, fname, strerror(errno));
848
0
        return nullptr;
849
0
    }
850
851
0
    struct gguf_context * result = gguf_init_from_file_impl(file, params);
852
0
    fclose(file);
853
0
    return result;
854
0
}
855
856
0
void gguf_free(struct gguf_context * ctx) {
857
0
    if (ctx == nullptr) {
858
0
        return;
859
0
    }
860
0
    delete ctx;
861
0
}
862
863
0
const char * gguf_type_name(enum gguf_type type) {
864
0
    auto it = GGUF_TYPE_NAME.find(type);
865
0
    return it == GGUF_TYPE_NAME.end() ? nullptr : it->second;
866
0
}
867
868
0
uint32_t gguf_get_version(const struct gguf_context * ctx) {
869
0
    return ctx->version;
870
0
}
871
872
0
size_t gguf_get_alignment(const struct gguf_context * ctx) {
873
0
    return ctx->alignment;
874
0
}
875
876
0
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
877
0
    return ctx->offset;
878
0
}
879
880
0
int64_t gguf_get_n_kv(const struct gguf_context * ctx) {
881
0
    return ctx->kv.size();
882
0
}
883
884
0
int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) {
885
    // return -1 if key not found
886
0
    int64_t keyfound = -1;
887
888
0
    const int64_t n_kv = gguf_get_n_kv(ctx);
889
890
0
    for (int64_t i = 0; i < n_kv; ++i) {
891
0
        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
892
0
            keyfound = i;
893
0
            break;
894
0
        }
895
0
    }
896
897
0
    return keyfound;
898
0
}
899
900
0
const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) {
901
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
902
0
    return ctx->kv[key_id].get_key().c_str();
903
0
}
904
905
0
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) {
906
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
907
0
    return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
908
0
}
909
910
0
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) {
911
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
912
0
    GGML_ASSERT(ctx->kv[key_id].is_array);
913
0
    return ctx->kv[key_id].get_type();
914
0
}
915
916
0
const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
917
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
918
0
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
919
0
    return ctx->kv[key_id].data.data();
920
0
}
921
922
0
const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
923
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
924
0
    GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
925
0
    return ctx->kv[key_id].data_string[i].c_str();
926
0
}
927
928
0
size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) {
929
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
930
931
0
    if (ctx->kv[key_id].type == GGUF_TYPE_STRING) {
932
0
        return ctx->kv[key_id].data_string.size();
933
0
    }
934
935
0
    const size_t type_size = gguf_type_size(ctx->kv[key_id].type);
936
0
    GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
937
0
    return ctx->kv[key_id].data.size() / type_size;
938
0
}
939
940
0
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) {
941
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
942
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
943
0
    return ctx->kv[key_id].get_val<uint8_t>();
944
0
}
945
946
0
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) {
947
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
948
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
949
0
    return ctx->kv[key_id].get_val<int8_t>();
950
0
}
951
952
0
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) {
953
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
954
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
955
0
    return ctx->kv[key_id].get_val<uint16_t>();
956
0
}
957
958
0
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) {
959
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
960
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
961
0
    return ctx->kv[key_id].get_val<int16_t>();
962
0
}
963
964
0
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) {
965
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
966
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
967
0
    return ctx->kv[key_id].get_val<uint32_t>();
968
0
}
969
970
0
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) {
971
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
972
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
973
0
    return ctx->kv[key_id].get_val<int32_t>();
974
0
}
975
976
0
float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) {
977
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
978
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
979
0
    return ctx->kv[key_id].get_val<float>();
980
0
}
981
982
0
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) {
983
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
984
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
985
0
    return ctx->kv[key_id].get_val<uint64_t>();
986
0
}
987
988
0
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) {
989
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
990
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
991
0
    return ctx->kv[key_id].get_val<int64_t>();
992
0
}
993
994
0
double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) {
995
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
996
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
997
0
    return ctx->kv[key_id].get_val<double>();
998
0
}
999
1000
0
bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) {
1001
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1002
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1003
0
    return ctx->kv[key_id].get_val<bool>();
1004
0
}
1005
1006
0
const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
1007
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1008
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1009
0
    return ctx->kv[key_id].get_val<std::string>().c_str();
1010
0
}
1011
1012
0
const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
1013
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1014
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1015
0
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
1016
0
    return ctx->kv[key_id].data.data();
1017
0
}
1018
1019
0
int64_t gguf_get_n_tensors(const struct gguf_context * ctx) {
1020
0
    return ctx->info.size();
1021
0
}
1022
1023
0
int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
1024
    // return -1 if tensor not found
1025
0
    int64_t tensor_id = -1;
1026
1027
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1028
1029
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1030
0
        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
1031
0
            tensor_id = i;
1032
0
            break;
1033
0
        }
1034
0
    }
1035
1036
0
    return tensor_id;
1037
0
}
1038
1039
0
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) {
1040
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1041
0
    return ctx->info[tensor_id].offset;
1042
0
}
1043
1044
0
const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) {
1045
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1046
0
    return ctx->info[tensor_id].t.name;
1047
0
}
1048
1049
0
enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) {
1050
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1051
0
    return ctx->info[tensor_id].t.type;
1052
0
}
1053
1054
0
size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) {
1055
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1056
0
    return ggml_nbytes(&ctx->info[tensor_id].t);
1057
0
}
1058
1059
0
int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) {
1060
0
    const int64_t key_id = gguf_find_key(ctx, key);
1061
0
    if (key_id >= 0) {
1062
0
        ctx->kv.erase(ctx->kv.begin() + key_id);
1063
0
    }
1064
0
    return key_id;
1065
0
}
1066
1067
template<typename T>
1068
0
static void gguf_check_reserved_keys(const std::string & key, const T val) {
1069
0
    if (key == GGUF_KEY_GENERAL_ALIGNMENT) {
1070
0
        if constexpr (std::is_same<T, uint32_t>::value) {
1071
0
            GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
1072
0
        } else {
1073
0
            GGML_UNUSED(val);
1074
0
            GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
1075
0
        }
1076
0
    }
1077
0
}
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<void const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, void const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const**>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const**)
1078
1079
0
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
1080
0
    gguf_check_reserved_keys(key, val);
1081
0
    gguf_remove_key(ctx, key);
1082
0
    ctx->kv.emplace_back(key, val);
1083
0
}
1084
1085
0
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
1086
0
    gguf_check_reserved_keys(key, val);
1087
0
    gguf_remove_key(ctx, key);
1088
0
    ctx->kv.emplace_back(key, val);
1089
0
}
1090
1091
0
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
1092
0
    gguf_check_reserved_keys(key, val);
1093
0
    gguf_remove_key(ctx, key);
1094
0
    ctx->kv.emplace_back(key, val);
1095
0
}
1096
1097
0
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
1098
0
    gguf_check_reserved_keys(key, val);
1099
0
    gguf_remove_key(ctx, key);
1100
0
    ctx->kv.emplace_back(key, val);
1101
0
}
1102
1103
0
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
1104
0
    gguf_check_reserved_keys(key, val);
1105
0
    gguf_remove_key(ctx, key);
1106
0
    ctx->kv.emplace_back(key, val);
1107
0
}
1108
1109
0
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
1110
0
    gguf_check_reserved_keys(key, val);
1111
0
    gguf_remove_key(ctx, key);
1112
0
    ctx->kv.emplace_back(key, val);
1113
0
}
1114
1115
0
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
1116
0
    gguf_check_reserved_keys(key, val);
1117
0
    gguf_remove_key(ctx, key);
1118
0
    ctx->kv.emplace_back(key, val);
1119
0
}
1120
1121
0
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
1122
0
    gguf_check_reserved_keys(key, val);
1123
0
    gguf_remove_key(ctx, key);
1124
0
    ctx->kv.emplace_back(key, val);
1125
0
}
1126
1127
0
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
1128
0
    gguf_check_reserved_keys(key, val);
1129
0
    gguf_remove_key(ctx, key);
1130
0
    ctx->kv.emplace_back(key, val);
1131
0
}
1132
1133
0
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
1134
0
    gguf_check_reserved_keys(key, val);
1135
0
    gguf_remove_key(ctx, key);
1136
0
    ctx->kv.emplace_back(key, val);
1137
0
}
1138
1139
0
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
1140
0
    gguf_check_reserved_keys(key, val);
1141
0
    gguf_remove_key(ctx, key);
1142
0
    ctx->kv.emplace_back(key, val);
1143
0
}
1144
1145
0
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
1146
0
    gguf_check_reserved_keys(key, val);
1147
0
    gguf_remove_key(ctx, key);
1148
0
    ctx->kv.emplace_back(key, std::string(val));
1149
0
}
1150
1151
0
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) {
1152
0
    gguf_check_reserved_keys(key, data);
1153
0
    gguf_remove_key(ctx, key);
1154
1155
0
    const size_t nbytes = n*gguf_type_size(type);
1156
0
    std::vector<int8_t> tmp(nbytes);
1157
0
    if (!tmp.empty()) {
1158
0
        memcpy(tmp.data(), data, nbytes);
1159
0
    }
1160
0
    ctx->kv.emplace_back(key, tmp);
1161
0
    ctx->kv.back().cast(type);
1162
0
}
1163
1164
0
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) {
1165
0
    gguf_check_reserved_keys(key, data);
1166
0
    gguf_remove_key(ctx, key);
1167
1168
0
    std::vector<std::string> tmp(n);
1169
0
    for (size_t i = 0; i < n; ++i) {
1170
0
        tmp[i] = data[i];
1171
0
    }
1172
0
    ctx->kv.emplace_back(key, tmp);
1173
0
}
1174
1175
// set or add KV pairs from another context
1176
0
void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) {
1177
0
    const int64_t n_kv = gguf_get_n_kv(src);
1178
0
    for (int64_t i = 0; i < n_kv; ++i) {
1179
0
        const struct gguf_kv & kv = src->kv[i];
1180
1181
0
        if (!kv.is_array) {
1182
0
            switch (kv.get_type()) {
1183
0
                case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>());             break;
1184
0
                case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, kv.get_key().c_str(), kv.get_val<int8_t>());              break;
1185
0
                case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>());            break;
1186
0
                case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>());             break;
1187
0
                case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>());            break;
1188
0
                case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>());             break;
1189
0
                case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>());               break;
1190
0
                case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>());            break;
1191
0
                case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>());             break;
1192
0
                case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>());              break;
1193
0
                case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>());                break;
1194
0
                case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1195
0
                case GGUF_TYPE_ARRAY:
1196
0
                default: GGML_ABORT("invalid type");
1197
0
            }
1198
0
            continue;
1199
0
        }
1200
1201
0
        const size_t ne = kv.get_ne();
1202
1203
0
        switch (kv.get_type()) {
1204
0
            case GGUF_TYPE_UINT8:
1205
0
            case GGUF_TYPE_INT8:
1206
0
            case GGUF_TYPE_UINT16:
1207
0
            case GGUF_TYPE_INT16:
1208
0
            case GGUF_TYPE_UINT32:
1209
0
            case GGUF_TYPE_INT32:
1210
0
            case GGUF_TYPE_FLOAT32:
1211
0
            case GGUF_TYPE_UINT64:
1212
0
            case GGUF_TYPE_INT64:
1213
0
            case GGUF_TYPE_FLOAT64:
1214
0
            case GGUF_TYPE_BOOL: {
1215
0
                gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1216
0
            } break;
1217
0
            case GGUF_TYPE_STRING: {
1218
0
                std::vector<const char *> tmp(ne);
1219
0
                for (size_t j = 0; j < ne; ++j) {
1220
0
                    tmp[j] = kv.data_string[j].c_str();
1221
0
                }
1222
0
                gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1223
0
            } break;
1224
0
            case GGUF_TYPE_ARRAY:
1225
0
            default: GGML_ABORT("invalid type");
1226
0
        }
1227
0
    }
1228
0
}
1229
1230
void gguf_add_tensor(
1231
             struct gguf_context * ctx,
1232
0
        const struct ggml_tensor * tensor) {
1233
0
    GGML_ASSERT(tensor);
1234
0
    if (gguf_find_tensor(ctx, tensor->name) != -1) {
1235
0
        GGML_ABORT("duplicate tensor name: %s", tensor->name);
1236
0
    }
1237
1238
0
    struct gguf_tensor_info ti;
1239
0
    ti.t = *tensor;
1240
0
    ti.offset = ctx->info.empty() ? 0 :
1241
0
        ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1242
0
    ctx->info.push_back(ti);
1243
0
}
1244
1245
0
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
1246
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1247
0
    if (tensor_id < 0) {
1248
0
        GGML_ABORT("tensor not found: %s", name);
1249
0
    }
1250
0
    struct ggml_tensor * tensor = &ctx->info[tensor_id].t;
1251
0
    const size_t  type_size = ggml_type_size(type);
1252
0
    const int64_t blck_size = ggml_blck_size(type);
1253
1254
0
    tensor->type = type;
1255
0
    GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1256
1257
0
    tensor->nb[0] = type_size;
1258
0
    tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1259
0
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1260
0
        tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1261
0
    }
1262
1263
    // update offsets
1264
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1265
0
    for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1266
0
        ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1267
0
    }
1268
0
}
1269
1270
0
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) {
1271
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1272
0
    if (tensor_id < 0) {
1273
0
        GGML_ABORT("tensor not found: %s", name);
1274
0
    }
1275
1276
0
    ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1277
0
}
1278
1279
struct gguf_writer_base {
1280
    size_t written_bytes {0u};
1281
1282
    ~gguf_writer_base(void) = default;
1283
1284
    // we bet on devirtualization
1285
    virtual void write(int8_t val) = 0;
1286
    virtual void write(const std::vector<int8_t> & val) = 0;
1287
    virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
1288
1289
    template <typename T>
1290
0
    void write(const T & val) {
1291
0
        for (size_t i = 0; i < sizeof(val); ++i) {
1292
0
            write(reinterpret_cast<const int8_t *>(&val)[i]);
1293
0
        }
1294
0
    }
Unexecuted instantiation: void gguf_writer_base::write<char>(char const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned int>(unsigned int const&)
Unexecuted instantiation: void gguf_writer_base::write<long>(long const&)
Unexecuted instantiation: void gguf_writer_base::write<int>(int const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned long>(unsigned long const&)
1295
1296
0
    void write(const bool & val) {
1297
0
        const int8_t val8 = val ? 1 : 0;
1298
0
        write(val8);
1299
0
    }
1300
1301
0
    void write(const std::string & val) {
1302
0
        {
1303
0
            const uint64_t n = val.length();
1304
0
            write(n);
1305
0
        }
1306
0
        for (size_t i = 0; i < val.length(); ++i) {
1307
0
            write((val.data())[i]);
1308
0
        }
1309
0
    }
1310
1311
0
    void write(const char * val) {
1312
0
        write(std::string(val));
1313
0
    }
1314
1315
0
    void write(const enum ggml_type & val) {
1316
0
        write(int32_t(val));
1317
0
    }
1318
1319
0
    void write(const enum gguf_type & val) {
1320
0
        write(int32_t(val));
1321
0
    }
1322
1323
0
    void write(const struct gguf_kv & kv) {
1324
0
        const uint64_t ne = kv.get_ne();
1325
1326
0
        write(kv.get_key());
1327
1328
0
        if (kv.is_array) {
1329
0
            write(GGUF_TYPE_ARRAY);
1330
0
            write(kv.get_type());
1331
0
            write(ne);
1332
0
        } else {
1333
0
            write(kv.get_type());
1334
0
        }
1335
1336
0
        switch (kv.get_type()) {
1337
0
            case GGUF_TYPE_UINT8:
1338
0
            case GGUF_TYPE_INT8:
1339
0
            case GGUF_TYPE_UINT16:
1340
0
            case GGUF_TYPE_INT16:
1341
0
            case GGUF_TYPE_UINT32:
1342
0
            case GGUF_TYPE_INT32:
1343
0
            case GGUF_TYPE_FLOAT32:
1344
0
            case GGUF_TYPE_UINT64:
1345
0
            case GGUF_TYPE_INT64:
1346
0
            case GGUF_TYPE_FLOAT64: {
1347
0
                write(kv.data);
1348
0
            } break;
1349
0
            case GGUF_TYPE_BOOL: {
1350
0
                for (size_t i = 0; i < ne; ++i) {
1351
0
                    write(kv.get_val<bool>(i));
1352
0
                }
1353
0
            } break;
1354
0
            case GGUF_TYPE_STRING: {
1355
0
                for (size_t i = 0; i < ne; ++i) {
1356
0
                    write(kv.get_val<std::string>(i));
1357
0
                }
1358
0
            } break;
1359
0
            case GGUF_TYPE_ARRAY:
1360
0
            default: GGML_ABORT("invalid type");
1361
0
        }
1362
0
    }
1363
1364
0
    void write_tensor_meta(const struct gguf_tensor_info & info) {
1365
0
        write(info.t.name);
1366
1367
0
        const uint32_t n_dims = ggml_n_dims(&info.t);
1368
0
        write(n_dims);
1369
1370
0
        for (uint32_t j = 0; j < n_dims; ++j) {
1371
0
            write(info.t.ne[j]);
1372
0
        }
1373
0
        write(info.t.type);
1374
0
        write(info.offset);
1375
0
    }
1376
1377
0
    void pad(const size_t alignment) {
1378
0
        while (written_bytes % alignment != 0) {
1379
0
            const int8_t zero = 0;
1380
0
            write(zero);
1381
0
        }
1382
0
    }
1383
};
1384
1385
// vector buffer based writer
1386
struct gguf_writer_buf final : public gguf_writer_base {
1387
    std::vector<int8_t> & buf;
1388
1389
0
    gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}
1390
1391
    using gguf_writer_base::write;
1392
1393
0
    void write(const int8_t val) override {
1394
0
        buf.push_back(val);
1395
0
        written_bytes++;
1396
0
    }
1397
1398
0
    void write(const std::vector<int8_t> & val) override {
1399
0
        buf.insert(buf.end(), val.begin(), val.end());
1400
0
        written_bytes += val.size();
1401
0
    }
1402
1403
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1404
0
        GGML_ASSERT(buf.size() - offset_data == info.offset);
1405
1406
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1407
0
        const size_t offset = buf.size();
1408
0
        const size_t nbytes = ggml_nbytes(&info.t);
1409
1410
0
        buf.resize(offset + nbytes);
1411
0
        if (info.t.buffer) {
1412
0
            ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1413
0
        } else {
1414
0
            GGML_ASSERT(info.t.data);
1415
0
            memcpy(buf.data() + offset, info.t.data, nbytes);
1416
0
        }
1417
0
        written_bytes += nbytes;
1418
1419
0
        pad(alignment);
1420
0
    }
1421
};
1422
1423
// file based writer
1424
struct gguf_writer_file final : public gguf_writer_base {
1425
    FILE * file;
1426
1427
0
    gguf_writer_file(FILE* file) : file(file) {}
1428
1429
    using gguf_writer_base::write;
1430
1431
0
    void write(const int8_t val) override {
1432
0
        const auto real_val = static_cast<uint8_t>(val);
1433
0
        const auto ret = fputc(real_val, file);
1434
0
        written_bytes++;
1435
0
        if (ret != real_val) {
1436
0
            throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
1437
0
        }
1438
0
    }
1439
1440
0
    void write(const std::vector<int8_t> & val) override {
1441
0
        const auto ret = fwrite(val.data(), 1, val.size(), file);
1442
0
        written_bytes += val.size();
1443
0
        if (ret != val.size()) {
1444
0
            throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
1445
0
        }
1446
0
    }
1447
1448
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1449
0
        GGML_ASSERT(written_bytes - offset_data == info.offset);
1450
1451
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1452
0
        const size_t nbytes = ggml_nbytes(&info.t);
1453
1454
0
        std::vector<int8_t> buf(nbytes);
1455
0
        if (info.t.buffer) {
1456
0
            ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
1457
0
        } else {
1458
0
            GGML_ASSERT(info.t.data);
1459
0
            memcpy(buf.data(), info.t.data, nbytes);
1460
0
        }
1461
0
        write(buf);
1462
1463
0
        pad(alignment);
1464
0
    }
1465
};
1466
1467
template <typename writer_t>
1468
0
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
1469
0
    const int64_t n_kv      = gguf_get_n_kv(ctx);
1470
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1471
1472
    // write header
1473
0
    gw.write(GGUF_MAGIC[0]);
1474
0
    gw.write(GGUF_MAGIC[1]);
1475
0
    gw.write(GGUF_MAGIC[2]);
1476
0
    gw.write(GGUF_MAGIC[3]);
1477
0
    gw.write(ctx->version);
1478
0
    gw.write(n_tensors);
1479
0
    gw.write(n_kv);
1480
1481
    // write key-value pairs
1482
0
    for (int64_t i = 0; i < n_kv; ++i) {
1483
0
        gw.write(ctx->kv[i]);
1484
0
    }
1485
1486
    // write tensor info
1487
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1488
0
        gw.write_tensor_meta(ctx->info[i]);
1489
0
    }
1490
1491
    // we require the data section to be aligned
1492
0
    gw.pad(ctx->alignment);
1493
1494
0
    if (only_meta) {
1495
0
        return;
1496
0
    }
1497
1498
0
    const size_t offset_data = gw.written_bytes;
1499
1500
    // write tensor data
1501
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1502
0
        gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1503
0
    }
1504
0
}
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_buf>(gguf_context const*, gguf_writer_buf&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_file>(gguf_context const*, gguf_writer_file&, bool)
1505
1506
0
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1507
0
    gguf_writer_buf gw(buf);
1508
0
    gguf_write_out(ctx, gw, only_meta);
1509
0
}
1510
1511
0
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
1512
0
    FILE * file = ggml_fopen(fname, "wb");
1513
1514
0
    if (!file) {
1515
0
        GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1516
0
        return false;
1517
0
    }
1518
1519
0
    try {
1520
0
        gguf_writer_file gw(file);
1521
0
        gguf_write_out(ctx, gw, only_meta);
1522
0
    } catch (const std::runtime_error& ex) {
1523
0
        GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
1524
0
        fclose(file);
1525
0
        return false;
1526
0
    }
1527
1528
0
    fclose(file);
1529
0
    return true;
1530
0
}
1531
1532
0
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
1533
    // only return size
1534
0
    std::vector<int8_t> buf;
1535
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1536
0
    return buf.size();
1537
0
}
1538
1539
0
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
1540
0
    std::vector<int8_t> buf;
1541
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1542
0
    memcpy(data, buf.data(), buf.size());
1543
0
}