Coverage Report

Created: 2026-03-21 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/gguf.cpp
Line
Count
Source
1
#include "ggml.h"
2
#include "ggml-backend.h"
3
#include "ggml-impl.h"
4
#include "gguf.h"
5
6
#include <cinttypes>
7
#include <cstddef>
8
#include <cstdint>
9
#include <cstdio>
10
#include <cstdlib>
11
#include <cstring>
12
#include <map>
13
#include <new>
14
#include <stdexcept>
15
#include <string>
16
#include <vector>
17
18
24.8k
#define GGUF_MAX_STRING_LENGTH  (1024*1024*1024)
19
9.22k
#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024)
20
21
#ifdef _WIN32
22
#    define gguf_ftell _ftelli64
23
#    define gguf_fseek _fseeki64
24
#else
25
11.0k
#    define gguf_ftell ftello
26
11.0k
#    define gguf_fseek fseeko
27
#endif
28
29
template <typename T>
30
struct type_to_gguf_type;
31
32
template <>
33
struct type_to_gguf_type<uint8_t> {
34
    static constexpr enum gguf_type value = GGUF_TYPE_UINT8;
35
};
36
37
template <>
38
struct type_to_gguf_type<int8_t> {
39
    static constexpr enum gguf_type value = GGUF_TYPE_INT8;
40
};
41
42
template <>
43
struct type_to_gguf_type<uint16_t> {
44
    static constexpr enum gguf_type value = GGUF_TYPE_UINT16;
45
};
46
47
template <>
48
struct type_to_gguf_type<int16_t> {
49
    static constexpr enum gguf_type value = GGUF_TYPE_INT16;
50
};
51
52
template <>
53
struct type_to_gguf_type<uint32_t> {
54
    static constexpr enum gguf_type value = GGUF_TYPE_UINT32;
55
};
56
57
template <>
58
struct type_to_gguf_type<int32_t> {
59
    static constexpr enum gguf_type value = GGUF_TYPE_INT32;
60
};
61
62
template <>
63
struct type_to_gguf_type<float> {
64
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT32;
65
};
66
67
template <>
68
struct type_to_gguf_type<bool> {
69
    static constexpr enum gguf_type value = GGUF_TYPE_BOOL;
70
};
71
72
template <>
73
struct type_to_gguf_type<std::string> {
74
    static constexpr enum gguf_type value = GGUF_TYPE_STRING;
75
};
76
77
template <>
78
struct type_to_gguf_type<uint64_t> {
79
    static constexpr enum gguf_type value = GGUF_TYPE_UINT64;
80
};
81
82
template <>
83
struct type_to_gguf_type<int64_t> {
84
    static constexpr enum gguf_type value = GGUF_TYPE_INT64;
85
};
86
87
template <>
88
struct type_to_gguf_type<double> {
89
    static constexpr enum gguf_type value = GGUF_TYPE_FLOAT64;
90
};
91
92
static const std::map<gguf_type, size_t> GGUF_TYPE_SIZE = {
93
    {GGUF_TYPE_UINT8,   sizeof(uint8_t)},
94
    {GGUF_TYPE_INT8,    sizeof(int8_t)},
95
    {GGUF_TYPE_UINT16,  sizeof(uint16_t)},
96
    {GGUF_TYPE_INT16,   sizeof(int16_t)},
97
    {GGUF_TYPE_UINT32,  sizeof(uint32_t)},
98
    {GGUF_TYPE_INT32,   sizeof(int32_t)},
99
    {GGUF_TYPE_FLOAT32, sizeof(float)},
100
    {GGUF_TYPE_BOOL,    sizeof(int8_t)},
101
    {GGUF_TYPE_STRING,  0}, // undefined
102
    {GGUF_TYPE_ARRAY,   0}, // undefined
103
    {GGUF_TYPE_UINT64,  sizeof(uint64_t)},
104
    {GGUF_TYPE_INT64,   sizeof(int64_t)},
105
    {GGUF_TYPE_FLOAT64, sizeof(double)},
106
};
107
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
108
109
static const std::map<gguf_type, const char *> GGUF_TYPE_NAME = {
110
    {GGUF_TYPE_UINT8,   "u8"},
111
    {GGUF_TYPE_INT8,    "i8"},
112
    {GGUF_TYPE_UINT16,  "u16"},
113
    {GGUF_TYPE_INT16,   "i16"},
114
    {GGUF_TYPE_UINT32,  "u32"},
115
    {GGUF_TYPE_INT32,   "i32"},
116
    {GGUF_TYPE_FLOAT32, "f32"},
117
    {GGUF_TYPE_BOOL,    "bool"},
118
    {GGUF_TYPE_STRING,  "str"},
119
    {GGUF_TYPE_ARRAY,   "arr"},
120
    {GGUF_TYPE_UINT64,  "u64"},
121
    {GGUF_TYPE_INT64,   "i64"},
122
    {GGUF_TYPE_FLOAT64, "f64"},
123
};
124
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
125
126
5.26k
size_t gguf_type_size(enum gguf_type type) {
127
5.26k
    auto it = GGUF_TYPE_SIZE.find(type);
128
5.26k
    return it == GGUF_TYPE_SIZE.end() ? 0 : it->second;
129
5.26k
}
130
131
struct gguf_kv {
132
    std::string key;
133
134
    bool is_array;
135
    enum gguf_type type;
136
137
    std::vector<int8_t>      data;
138
    std::vector<std::string> data_string;
139
140
    template <typename T>
141
    gguf_kv(const std::string & key, const T value)
142
9.76k
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
9.76k
        GGML_ASSERT(!key.empty());
144
9.76k
        data.resize(sizeof(T));
145
9.76k
        memcpy(data.data(), &value, sizeof(T));
146
9.76k
    }
gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Line
Count
Source
142
2.23k
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
2.23k
        GGML_ASSERT(!key.empty());
144
2.23k
        data.resize(sizeof(T));
145
2.23k
        memcpy(data.data(), &value, sizeof(T));
146
2.23k
    }
gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Line
Count
Source
142
724
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
724
        GGML_ASSERT(!key.empty());
144
724
        data.resize(sizeof(T));
145
724
        memcpy(data.data(), &value, sizeof(T));
146
724
    }
gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Line
Count
Source
142
862
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
862
        GGML_ASSERT(!key.empty());
144
862
        data.resize(sizeof(T));
145
862
        memcpy(data.data(), &value, sizeof(T));
146
862
    }
gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Line
Count
Source
142
790
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
790
        GGML_ASSERT(!key.empty());
144
790
        data.resize(sizeof(T));
145
790
        memcpy(data.data(), &value, sizeof(T));
146
790
    }
gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Line
Count
Source
142
852
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
852
        GGML_ASSERT(!key.empty());
144
852
        data.resize(sizeof(T));
145
852
        memcpy(data.data(), &value, sizeof(T));
146
852
    }
gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Line
Count
Source
142
841
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
841
        GGML_ASSERT(!key.empty());
144
841
        data.resize(sizeof(T));
145
841
        memcpy(data.data(), &value, sizeof(T));
146
841
    }
gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Line
Count
Source
142
714
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
714
        GGML_ASSERT(!key.empty());
144
714
        data.resize(sizeof(T));
145
714
        memcpy(data.data(), &value, sizeof(T));
146
714
    }
gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Line
Count
Source
142
701
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
701
        GGML_ASSERT(!key.empty());
144
701
        data.resize(sizeof(T));
145
701
        memcpy(data.data(), &value, sizeof(T));
146
701
    }
gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Line
Count
Source
142
733
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
733
        GGML_ASSERT(!key.empty());
144
733
        data.resize(sizeof(T));
145
733
        memcpy(data.data(), &value, sizeof(T));
146
733
    }
gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Line
Count
Source
142
633
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
633
        GGML_ASSERT(!key.empty());
144
633
        data.resize(sizeof(T));
145
633
        memcpy(data.data(), &value, sizeof(T));
146
633
    }
gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
Line
Count
Source
142
686
            : key(key), is_array(false), type(type_to_gguf_type<T>::value) {
143
686
        GGML_ASSERT(!key.empty());
144
686
        data.resize(sizeof(T));
145
686
        memcpy(data.data(), &value, sizeof(T));
146
686
    }
147
148
    template <typename T>
149
    gguf_kv(const std::string & key, const std::vector<T> & value)
150
2.81k
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
2.81k
        GGML_ASSERT(!key.empty());
152
2.81k
        data.resize(value.size()*sizeof(T));
153
1.70M
        for (size_t i = 0; i < value.size(); ++i) {
154
1.70M
            const T tmp = value[i];
155
1.70M
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
1.70M
        }
157
2.81k
    }
gguf_kv::gguf_kv<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned char, std::__1::allocator<unsigned char> > const&)
Line
Count
Source
150
155
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
155
        GGML_ASSERT(!key.empty());
152
155
        data.resize(value.size()*sizeof(T));
153
3.58k
        for (size_t i = 0; i < value.size(); ++i) {
154
3.42k
            const T tmp = value[i];
155
3.42k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
3.42k
        }
157
155
    }
gguf_kv::gguf_kv<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<signed char, std::__1::allocator<signed char> > const&)
Line
Count
Source
150
144
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
144
        GGML_ASSERT(!key.empty());
152
144
        data.resize(value.size()*sizeof(T));
153
6.26k
        for (size_t i = 0; i < value.size(); ++i) {
154
6.11k
            const T tmp = value[i];
155
6.11k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
6.11k
        }
157
144
    }
gguf_kv::gguf_kv<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned short, std::__1::allocator<unsigned short> > const&)
Line
Count
Source
150
153
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
153
        GGML_ASSERT(!key.empty());
152
153
        data.resize(value.size()*sizeof(T));
153
18.7k
        for (size_t i = 0; i < value.size(); ++i) {
154
18.6k
            const T tmp = value[i];
155
18.6k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
18.6k
        }
157
153
    }
gguf_kv::gguf_kv<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<short, std::__1::allocator<short> > const&)
Line
Count
Source
150
676
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
676
        GGML_ASSERT(!key.empty());
152
676
        data.resize(value.size()*sizeof(T));
153
3.73k
        for (size_t i = 0; i < value.size(); ++i) {
154
3.05k
            const T tmp = value[i];
155
3.05k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
3.05k
        }
157
676
    }
gguf_kv::gguf_kv<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned int, std::__1::allocator<unsigned int> > const&)
Line
Count
Source
150
115
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
115
        GGML_ASSERT(!key.empty());
152
115
        data.resize(value.size()*sizeof(T));
153
207k
        for (size_t i = 0; i < value.size(); ++i) {
154
207k
            const T tmp = value[i];
155
207k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
207k
        }
157
115
    }
gguf_kv::gguf_kv<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<int, std::__1::allocator<int> > const&)
Line
Count
Source
150
112
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
112
        GGML_ASSERT(!key.empty());
152
112
        data.resize(value.size()*sizeof(T));
153
266k
        for (size_t i = 0; i < value.size(); ++i) {
154
266k
            const T tmp = value[i];
155
266k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
266k
        }
157
112
    }
gguf_kv::gguf_kv<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<float, std::__1::allocator<float> > const&)
Line
Count
Source
150
598
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
598
        GGML_ASSERT(!key.empty());
152
598
        data.resize(value.size()*sizeof(T));
153
186k
        for (size_t i = 0; i < value.size(); ++i) {
154
185k
            const T tmp = value[i];
155
185k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
185k
        }
157
598
    }
gguf_kv::gguf_kv<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<bool, std::__1::allocator<bool> > const&)
Line
Count
Source
150
248
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
248
        GGML_ASSERT(!key.empty());
152
248
        data.resize(value.size()*sizeof(T));
153
641k
        for (size_t i = 0; i < value.size(); ++i) {
154
640k
            const T tmp = value[i];
155
640k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
640k
        }
157
248
    }
gguf_kv::gguf_kv<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<unsigned long, std::__1::allocator<unsigned long> > const&)
Line
Count
Source
150
250
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
250
        GGML_ASSERT(!key.empty());
152
250
        data.resize(value.size()*sizeof(T));
153
78.1k
        for (size_t i = 0; i < value.size(); ++i) {
154
77.8k
            const T tmp = value[i];
155
77.8k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
77.8k
        }
157
250
    }
gguf_kv::gguf_kv<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<long, std::__1::allocator<long> > const&)
Line
Count
Source
150
139
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
139
        GGML_ASSERT(!key.empty());
152
139
        data.resize(value.size()*sizeof(T));
153
234k
        for (size_t i = 0; i < value.size(); ++i) {
154
234k
            const T tmp = value[i];
155
234k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
234k
        }
157
139
    }
gguf_kv::gguf_kv<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::vector<double, std::__1::allocator<double> > const&)
Line
Count
Source
150
223
            : key(key), is_array(true), type(type_to_gguf_type<T>::value) {
151
223
        GGML_ASSERT(!key.empty());
152
223
        data.resize(value.size()*sizeof(T));
153
57.5k
        for (size_t i = 0; i < value.size(); ++i) {
154
57.3k
            const T tmp = value[i];
155
57.3k
            memcpy(data.data() + i*sizeof(T), &tmp, sizeof(T));
156
57.3k
        }
157
223
    }
158
159
    gguf_kv(const std::string & key, const std::string & value)
160
924
            : key(key), is_array(false), type(GGUF_TYPE_STRING) {
161
924
        GGML_ASSERT(!key.empty());
162
924
        data_string.push_back(value);
163
924
    }
164
165
    gguf_kv(const std::string & key, const std::vector<std::string> & value)
166
533
            : key(key), is_array(true), type(GGUF_TYPE_STRING) {
167
533
        GGML_ASSERT(!key.empty());
168
533
        data_string = value;
169
533
    }
170
171
31.4k
    const std::string & get_key() const {
172
31.4k
        return key;
173
31.4k
    }
174
175
17.3k
    const enum gguf_type & get_type() const {
176
17.3k
        return type;
177
17.3k
    }
178
179
5.07k
    size_t get_ne() const {
180
5.07k
        if (type == GGUF_TYPE_STRING) {
181
922
            const size_t ne = data_string.size();
182
922
            GGML_ASSERT(is_array || ne == 1);
183
922
            return ne;
184
922
        }
185
4.14k
        const size_t type_size = gguf_type_size(type);
186
4.14k
        GGML_ASSERT(data.size() % type_size == 0);
187
4.14k
        const size_t ne = data.size() / type_size;
188
4.14k
        GGML_ASSERT(is_array || ne == 1);
189
4.14k
        return ne;
190
5.07k
    }
191
192
    template <typename T>
193
948
    const T & get_val(const size_t i = 0) const {
194
948
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
195
948
        if constexpr (std::is_same<T, std::string>::value) {
196
920
            GGML_ASSERT(data_string.size() >= i+1);
197
920
            return data_string[i];
198
920
        }
199
0
        const size_t type_size = gguf_type_size(type);
200
948
        GGML_ASSERT(data.size() % type_size == 0);
201
948
        GGML_ASSERT(data.size() >= (i+1)*type_size);
202
948
        return reinterpret_cast<const T *>(data.data())[i];
203
948
    }
Unexecuted instantiation: unsigned char const& gguf_kv::get_val<unsigned char>(unsigned long) const
Unexecuted instantiation: signed char const& gguf_kv::get_val<signed char>(unsigned long) const
unsigned short const& gguf_kv::get_val<unsigned short>(unsigned long) const
Line
Count
Source
193
3
    const T & get_val(const size_t i = 0) const {
194
3
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
195
        if constexpr (std::is_same<T, std::string>::value) {
196
            GGML_ASSERT(data_string.size() >= i+1);
197
            return data_string[i];
198
        }
199
3
        const size_t type_size = gguf_type_size(type);
200
3
        GGML_ASSERT(data.size() % type_size == 0);
201
3
        GGML_ASSERT(data.size() >= (i+1)*type_size);
202
3
        return reinterpret_cast<const T *>(data.data())[i];
203
3
    }
Unexecuted instantiation: short const& gguf_kv::get_val<short>(unsigned long) const
unsigned int const& gguf_kv::get_val<unsigned int>(unsigned long) const
Line
Count
Source
193
25
    const T & get_val(const size_t i = 0) const {
194
25
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
195
        if constexpr (std::is_same<T, std::string>::value) {
196
            GGML_ASSERT(data_string.size() >= i+1);
197
            return data_string[i];
198
        }
199
25
        const size_t type_size = gguf_type_size(type);
200
25
        GGML_ASSERT(data.size() % type_size == 0);
201
25
        GGML_ASSERT(data.size() >= (i+1)*type_size);
202
25
        return reinterpret_cast<const T *>(data.data())[i];
203
25
    }
Unexecuted instantiation: int const& gguf_kv::get_val<int>(unsigned long) const
Unexecuted instantiation: float const& gguf_kv::get_val<float>(unsigned long) const
Unexecuted instantiation: unsigned long const& gguf_kv::get_val<unsigned long>(unsigned long) const
Unexecuted instantiation: long const& gguf_kv::get_val<long>(unsigned long) const
Unexecuted instantiation: double const& gguf_kv::get_val<double>(unsigned long) const
Unexecuted instantiation: bool const& gguf_kv::get_val<bool>(unsigned long) const
std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const& gguf_kv::get_val<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(unsigned long) const
Line
Count
Source
193
920
    const T & get_val(const size_t i = 0) const {
194
920
        GGML_ASSERT(type_to_gguf_type<T>::value == type);
195
920
        if constexpr (std::is_same<T, std::string>::value) {
196
920
            GGML_ASSERT(data_string.size() >= i+1);
197
920
            return data_string[i];
198
920
        }
199
0
        const size_t type_size = gguf_type_size(type);
200
920
        GGML_ASSERT(data.size() % type_size == 0);
201
920
        GGML_ASSERT(data.size() >= (i+1)*type_size);
202
920
        return reinterpret_cast<const T *>(data.data())[i];
203
920
    }
204
205
0
    void cast(const enum gguf_type new_type) {
206
0
        const size_t new_type_size = gguf_type_size(new_type);
207
0
        GGML_ASSERT(data.size() % new_type_size == 0);
208
0
        type = new_type;
209
0
    }
210
};
211
212
struct gguf_tensor_info {
213
    struct ggml_tensor t; // for holding the equivalent info
214
    uint64_t offset;      // offset from start of `data`, must be a multiple of `ALIGNMENT`
215
};
216
217
struct gguf_context {
218
    uint32_t version = GGUF_VERSION;
219
220
    std::vector<struct gguf_kv> kv;
221
    std::vector<struct gguf_tensor_info> info;
222
223
    size_t alignment = GGUF_DEFAULT_ALIGNMENT;
224
    size_t offset    = 0; // offset of `data` from beginning of file
225
    size_t size      = 0; // size of `data` in bytes
226
227
    void * data = nullptr;
228
};
229
230
struct gguf_reader {
231
4.91k
    gguf_reader(FILE * file) : file(file) {
232
        // read the remaining bytes once and update on each read
233
4.91k
        nbytes_remain = file_remain(file);
234
4.91k
    }
235
236
    // helper for remaining bytes in a file
237
4.91k
    static uint64_t file_remain(FILE * file) {
238
4.91k
        const int64_t cur = gguf_ftell(file);
239
4.91k
        if (cur < 0) {
240
0
            return 0;
241
0
        }
242
4.91k
        if (gguf_fseek(file, 0, SEEK_END) != 0) {
243
0
            gguf_fseek(file, cur, SEEK_SET);
244
245
0
            return 0;
246
0
        }
247
4.91k
        const int64_t end = gguf_ftell(file);
248
4.91k
        if (end < 0) {
249
0
            gguf_fseek(file, cur, SEEK_SET);
250
251
0
            return 0;
252
0
        }
253
4.91k
        gguf_fseek(file, cur, SEEK_SET);
254
4.91k
        return static_cast<uint64_t>(end - cur);
255
4.91k
    }
256
257
    template <typename T>
258
2.66M
    bool read(T & dst) const {
259
2.66M
        const size_t size = sizeof(dst);
260
2.66M
        if (nbytes_remain < size) {
261
662
            return false;
262
662
        }
263
2.66M
        const size_t nread = fread(&dst, 1, size, file);
264
2.66M
        nbytes_remain -= nread;
265
2.66M
        return nread == size;
266
2.66M
    }
bool gguf_reader::read<int>(int&) const
Line
Count
Source
258
290k
    bool read(T & dst) const {
259
290k
        const size_t size = sizeof(dst);
260
290k
        if (nbytes_remain < size) {
261
91
            return false;
262
91
        }
263
290k
        const size_t nread = fread(&dst, 1, size, file);
264
290k
        nbytes_remain -= nread;
265
290k
        return nread == size;
266
290k
    }
bool gguf_reader::read<unsigned long>(unsigned long&) const
Line
Count
Source
258
111k
    bool read(T & dst) const {
259
111k
        const size_t size = sizeof(dst);
260
111k
        if (nbytes_remain < size) {
261
391
            return false;
262
391
        }
263
111k
        const size_t nread = fread(&dst, 1, size, file);
264
111k
        nbytes_remain -= nread;
265
111k
        return nread == size;
266
111k
    }
bool gguf_reader::read<char>(char&) const
Line
Count
Source
258
19.6k
    bool read(T & dst) const {
259
19.6k
        const size_t size = sizeof(dst);
260
19.6k
        if (nbytes_remain < size) {
261
0
            return false;
262
0
        }
263
19.6k
        const size_t nread = fread(&dst, 1, size, file);
264
19.6k
        nbytes_remain -= nread;
265
19.6k
        return nread == size;
266
19.6k
    }
bool gguf_reader::read<unsigned int>(unsigned int&) const
Line
Count
Source
258
216k
    bool read(T & dst) const {
259
216k
        const size_t size = sizeof(dst);
260
216k
        if (nbytes_remain < size) {
261
34
            return false;
262
34
        }
263
216k
        const size_t nread = fread(&dst, 1, size, file);
264
216k
        nbytes_remain -= nread;
265
216k
        return nread == size;
266
216k
    }
bool gguf_reader::read<long>(long&) const
Line
Count
Source
258
253k
    bool read(T & dst) const {
259
253k
        const size_t size = sizeof(dst);
260
253k
        if (nbytes_remain < size) {
261
130
            return false;
262
130
        }
263
253k
        const size_t nread = fread(&dst, 1, size, file);
264
253k
        nbytes_remain -= nread;
265
253k
        return nread == size;
266
253k
    }
bool gguf_reader::read<unsigned char>(unsigned char&) const
Line
Count
Source
258
5.75k
    bool read(T & dst) const {
259
5.75k
        const size_t size = sizeof(dst);
260
5.75k
        if (nbytes_remain < size) {
261
4
            return false;
262
4
        }
263
5.74k
        const size_t nread = fread(&dst, 1, size, file);
264
5.74k
        nbytes_remain -= nread;
265
5.74k
        return nread == size;
266
5.75k
    }
bool gguf_reader::read<signed char>(signed char&) const
Line
Count
Source
258
1.50M
    bool read(T & dst) const {
259
1.50M
        const size_t size = sizeof(dst);
260
1.50M
        if (nbytes_remain < size) {
261
3
            return false;
262
3
        }
263
1.50M
        const size_t nread = fread(&dst, 1, size, file);
264
1.50M
        nbytes_remain -= nread;
265
1.50M
        return nread == size;
266
1.50M
    }
bool gguf_reader::read<unsigned short>(unsigned short&) const
Line
Count
Source
258
19.4k
    bool read(T & dst) const {
259
19.4k
        const size_t size = sizeof(dst);
260
19.4k
        if (nbytes_remain < size) {
261
2
            return false;
262
2
        }
263
19.4k
        const size_t nread = fread(&dst, 1, size, file);
264
19.4k
        nbytes_remain -= nread;
265
19.4k
        return nread == size;
266
19.4k
    }
bool gguf_reader::read<short>(short&) const
Line
Count
Source
258
3.86k
    bool read(T & dst) const {
259
3.86k
        const size_t size = sizeof(dst);
260
3.86k
        if (nbytes_remain < size) {
261
1
            return false;
262
1
        }
263
3.86k
        const size_t nread = fread(&dst, 1, size, file);
264
3.86k
        nbytes_remain -= nread;
265
3.86k
        return nread == size;
266
3.86k
    }
bool gguf_reader::read<float>(float&) const
Line
Count
Source
258
186k
    bool read(T & dst) const {
259
186k
        const size_t size = sizeof(dst);
260
186k
        if (nbytes_remain < size) {
261
2
            return false;
262
2
        }
263
186k
        const size_t nread = fread(&dst, 1, size, file);
264
186k
        nbytes_remain -= nread;
265
186k
        return nread == size;
266
186k
    }
bool gguf_reader::read<double>(double&) const
Line
Count
Source
258
58.0k
    bool read(T & dst) const {
259
58.0k
        const size_t size = sizeof(dst);
260
58.0k
        if (nbytes_remain < size) {
261
4
            return false;
262
4
        }
263
58.0k
        const size_t nread = fread(&dst, 1, size, file);
264
58.0k
        nbytes_remain -= nread;
265
58.0k
        return nread == size;
266
58.0k
    }
267
268
    template <typename T>
269
9.22k
    bool read(std::vector<T> & dst, const size_t n) const {
270
9.22k
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
636
            return false;
272
636
        }
273
8.58k
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
597
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
0
                return false;
277
0
            }
278
597
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
31
                return false;
280
31
            }
281
7.99k
        } else {
282
7.99k
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
7.99k
            if (nbytes_remain < n * sizeof(T)) {
286
262
                return false;
287
262
            }
288
7.99k
        }
289
8.29k
        dst.resize(n);
290
2.58M
        for (size_t i = 0; i < dst.size(); ++i) {
291
2.57M
            if constexpr (std::is_same<T, bool>::value) {
292
1.49M
                bool tmp;
293
1.49M
                if (!read(tmp)) {
294
0
                    return false;
295
0
                }
296
1.49M
                dst[i] = tmp;
297
1.49M
            } else {
298
1.08M
                if (!read(dst[i])) {
299
33
                    return false;
300
33
                }
301
1.08M
            }
302
2.57M
        }
303
8.58k
        return true;
304
9.22k
    }
bool gguf_reader::read<char>(std::__1::vector<char, std::__1::allocator<char> >&, unsigned long) const
Line
Count
Source
269
4.91k
    bool read(std::vector<T> & dst, const size_t n) const {
270
4.91k
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
0
            return false;
272
0
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
4.91k
        } else {
282
4.91k
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
4.91k
            if (nbytes_remain < n * sizeof(T)) {
286
0
                return false;
287
0
            }
288
4.91k
        }
289
4.91k
        dst.resize(n);
290
24.5k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
19.6k
            } else {
298
19.6k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
19.6k
            }
302
19.6k
        }
303
4.91k
        return true;
304
4.91k
    }
bool gguf_reader::read<unsigned char>(std::__1::vector<unsigned char, std::__1::allocator<unsigned char> >&, unsigned long) const
Line
Count
Source
269
219
    bool read(std::vector<T> & dst, const size_t n) const {
270
219
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
42
            return false;
272
42
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
177
        } else {
282
177
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
177
            if (nbytes_remain < n * sizeof(T)) {
286
22
                return false;
287
22
            }
288
177
        }
289
155
        dst.resize(n);
290
3.69k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
3.51k
            } else {
298
3.51k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
3.51k
            }
302
3.51k
        }
303
177
        return true;
304
219
    }
bool gguf_reader::read<signed char>(std::__1::vector<signed char, std::__1::allocator<signed char> >&, unsigned long) const
Line
Count
Source
269
208
    bool read(std::vector<T> & dst, const size_t n) const {
270
208
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
43
            return false;
272
43
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
165
        } else {
282
165
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
165
            if (nbytes_remain < n * sizeof(T)) {
286
21
                return false;
287
21
            }
288
165
        }
289
144
        dst.resize(n);
290
6.30k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
6.14k
            } else {
298
6.14k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
6.14k
            }
302
6.14k
        }
303
165
        return true;
304
208
    }
bool gguf_reader::read<unsigned short>(std::__1::vector<unsigned short, std::__1::allocator<unsigned short> >&, unsigned long) const
Line
Count
Source
269
207
    bool read(std::vector<T> & dst, const size_t n) const {
270
207
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
26
            return false;
272
26
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
181
        } else {
282
181
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
181
            if (nbytes_remain < n * sizeof(T)) {
286
28
                return false;
287
28
            }
288
181
        }
289
153
        dst.resize(n);
290
18.7k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
18.6k
            } else {
298
18.6k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
18.6k
            }
302
18.6k
        }
303
181
        return true;
304
207
    }
bool gguf_reader::read<short>(std::__1::vector<short, std::__1::allocator<short> >&, unsigned long) const
Line
Count
Source
269
766
    bool read(std::vector<T> & dst, const size_t n) const {
270
766
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
66
            return false;
272
66
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
700
        } else {
282
700
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
700
            if (nbytes_remain < n * sizeof(T)) {
286
24
                return false;
287
24
            }
288
700
        }
289
676
        dst.resize(n);
290
3.77k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
3.07k
            } else {
298
3.07k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
3.07k
            }
302
3.07k
        }
303
700
        return true;
304
766
    }
bool gguf_reader::read<unsigned int>(std::__1::vector<unsigned int, std::__1::allocator<unsigned int> >&, unsigned long) const
Line
Count
Source
269
171
    bool read(std::vector<T> & dst, const size_t n) const {
270
171
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
43
            return false;
272
43
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
128
        } else {
282
128
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
128
            if (nbytes_remain < n * sizeof(T)) {
286
13
                return false;
287
13
            }
288
128
        }
289
115
        dst.resize(n);
290
207k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
207k
            } else {
298
207k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
207k
            }
302
207k
        }
303
128
        return true;
304
171
    }
bool gguf_reader::read<int>(std::__1::vector<int, std::__1::allocator<int> >&, unsigned long) const
Line
Count
Source
269
208
    bool read(std::vector<T> & dst, const size_t n) const {
270
208
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
67
            return false;
272
67
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
141
        } else {
282
141
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
141
            if (nbytes_remain < n * sizeof(T)) {
286
29
                return false;
287
29
            }
288
141
        }
289
112
        dst.resize(n);
290
266k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
266k
            } else {
298
266k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
266k
            }
302
266k
        }
303
141
        return true;
304
208
    }
bool gguf_reader::read<float>(std::__1::vector<float, std::__1::allocator<float> >&, unsigned long) const
Line
Count
Source
269
641
    bool read(std::vector<T> & dst, const size_t n) const {
270
641
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
30
            return false;
272
30
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
611
        } else {
282
611
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
611
            if (nbytes_remain < n * sizeof(T)) {
286
13
                return false;
287
13
            }
288
611
        }
289
598
        dst.resize(n);
290
186k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
185k
            } else {
298
185k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
185k
            }
302
185k
        }
303
611
        return true;
304
641
    }
bool gguf_reader::read<bool>(std::__1::vector<bool, std::__1::allocator<bool> >&, unsigned long) const
Line
Count
Source
269
335
    bool read(std::vector<T> & dst, const size_t n) const {
270
335
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
68
            return false;
272
68
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
267
        } else {
282
267
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
267
            if (nbytes_remain < n * sizeof(T)) {
286
19
                return false;
287
19
            }
288
267
        }
289
248
        dst.resize(n);
290
1.49M
        for (size_t i = 0; i < dst.size(); ++i) {
291
1.49M
            if constexpr (std::is_same<T, bool>::value) {
292
1.49M
                bool tmp;
293
1.49M
                if (!read(tmp)) {
294
0
                    return false;
295
0
                }
296
1.49M
                dst[i] = tmp;
297
            } else {
298
                if (!read(dst[i])) {
299
                    return false;
300
                }
301
            }
302
1.49M
        }
303
267
        return true;
304
335
    }
bool gguf_reader::read<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(std::__1::vector<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, std::__1::allocator<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > > >&, unsigned long) const
Line
Count
Source
269
654
    bool read(std::vector<T> & dst, const size_t n) const {
270
654
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
57
            return false;
272
57
        }
273
597
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
597
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
0
                return false;
277
0
            }
278
597
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
31
                return false;
280
31
            }
281
        } else {
282
            if (n > SIZE_MAX / sizeof(T)) {
283
                return false;
284
            }
285
            if (nbytes_remain < n * sizeof(T)) {
286
                return false;
287
            }
288
        }
289
566
        dst.resize(n);
290
4.65k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
4.05k
            } else {
298
4.05k
                if (!read(dst[i])) {
299
33
                    return false;
300
33
                }
301
4.05k
            }
302
4.05k
        }
303
597
        return true;
304
654
    }
bool gguf_reader::read<unsigned long>(std::__1::vector<unsigned long, std::__1::allocator<unsigned long> >&, unsigned long) const
Line
Count
Source
269
327
    bool read(std::vector<T> & dst, const size_t n) const {
270
327
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
56
            return false;
272
56
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
271
        } else {
282
271
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
271
            if (nbytes_remain < n * sizeof(T)) {
286
21
                return false;
287
21
            }
288
271
        }
289
250
        dst.resize(n);
290
78.1k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
77.8k
            } else {
298
77.8k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
77.8k
            }
302
77.8k
        }
303
271
        return true;
304
327
    }
bool gguf_reader::read<long>(std::__1::vector<long, std::__1::allocator<long> >&, unsigned long) const
Line
Count
Source
269
248
    bool read(std::vector<T> & dst, const size_t n) const {
270
248
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
69
            return false;
272
69
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
179
        } else {
282
179
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
179
            if (nbytes_remain < n * sizeof(T)) {
286
40
                return false;
287
40
            }
288
179
        }
289
139
        dst.resize(n);
290
234k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
234k
            } else {
298
234k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
234k
            }
302
234k
        }
303
179
        return true;
304
248
    }
bool gguf_reader::read<double>(std::__1::vector<double, std::__1::allocator<double> >&, unsigned long) const
Line
Count
Source
269
324
    bool read(std::vector<T> & dst, const size_t n) const {
270
324
        if (n > GGUF_MAX_ARRAY_ELEMENTS) {
271
69
            return false;
272
69
        }
273
        if constexpr (std::is_same<T, std::string>::value) {
274
            // strings are prefixed with their length, so we need to account for that
275
            if (n > SIZE_MAX / sizeof(uint64_t)) {
276
                return false;
277
            }
278
            if (nbytes_remain < n * sizeof(uint64_t)) {
279
                return false;
280
            }
281
255
        } else {
282
255
            if (n > SIZE_MAX / sizeof(T)) {
283
0
                return false;
284
0
            }
285
255
            if (nbytes_remain < n * sizeof(T)) {
286
32
                return false;
287
32
            }
288
255
        }
289
223
        dst.resize(n);
290
57.6k
        for (size_t i = 0; i < dst.size(); ++i) {
291
            if constexpr (std::is_same<T, bool>::value) {
292
                bool tmp;
293
                if (!read(tmp)) {
294
                    return false;
295
                }
296
                dst[i] = tmp;
297
57.3k
            } else {
298
57.3k
                if (!read(dst[i])) {
299
0
                    return false;
300
0
                }
301
57.3k
            }
302
57.3k
        }
303
255
        return true;
304
324
    }
305
306
1.49M
    bool read(bool & dst) const {
307
1.49M
        int8_t tmp = -1;
308
1.49M
        if (!read(tmp)) {
309
1
            return false;
310
1
        }
311
1.49M
        dst = tmp != 0;
312
1.49M
        return true;
313
1.49M
    }
314
315
3.43k
    bool read(enum ggml_type & dst) const {
316
3.43k
        int32_t tmp = -1;
317
3.43k
        if (!read(tmp)) {
318
41
            return false;
319
41
        }
320
3.39k
        dst = ggml_type(tmp);
321
3.39k
        return true;
322
3.43k
    }
323
324
19.6k
    bool read(enum gguf_type & dst) const {
325
19.6k
        int32_t tmp = -1;
326
19.6k
        if (!read(tmp)) {
327
48
            return false;
328
48
        }
329
19.6k
        dst = gguf_type(tmp);
330
19.6k
        return true;
331
19.6k
    }
332
333
25.2k
    bool read(std::string & dst) const {
334
25.2k
        uint64_t size = 0;
335
25.2k
        if (!read(size)) {
336
360
            return false;
337
360
        }
338
24.8k
        if (size > GGUF_MAX_STRING_LENGTH) {
339
364
            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds maximum %" PRIu64 "\n", __func__, size, (uint64_t) GGUF_MAX_STRING_LENGTH);
340
364
            return false;
341
364
        }
342
24.4k
        if (size > nbytes_remain) {
343
197
            GGML_LOG_ERROR("%s: string length %" PRIu64 " exceeds remaining file size %" PRIu64 " bytes\n", __func__, size, nbytes_remain);
344
197
            return false;
345
197
        }
346
24.2k
        dst.resize(static_cast<size_t>(size));
347
24.2k
        const size_t nread = fread(dst.data(), 1, size, file);
348
24.2k
        nbytes_remain -= nread;
349
24.2k
        return nread == size;
350
24.4k
    }
351
352
0
    bool read(void * dst, const size_t size) const {
353
0
        if (size > nbytes_remain) {
354
0
            return false;
355
0
        }
356
0
        const size_t nread = fread(dst, 1, size, file);
357
0
        nbytes_remain -= nread;
358
0
        return nread == size;
359
0
    }
360
361
private:
362
    FILE * file;
363
364
    mutable uint64_t nbytes_remain;
365
};
366
367
0
struct gguf_context * gguf_init_empty(void) {
368
0
    return new gguf_context;
369
0
}
370
371
template<typename T>
372
15.0k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
15.0k
    if (is_array) {
374
4.30k
        std::vector<T> value;
375
4.30k
        try {
376
4.30k
            if (!gr.read(value, n)) {
377
962
                return false;
378
962
            }
379
4.30k
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
3.34k
        kv.emplace_back(key, value);
387
10.7k
    } else {
388
10.7k
        T value;
389
10.7k
        if (!gr.read(value)) {
390
38
            return false;
391
38
        }
392
10.6k
        kv.emplace_back(key, value);
393
10.6k
    }
394
14.0k
    return true;
395
15.0k
}
bool gguf_read_emplace_helper<unsigned char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
2.45k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
2.45k
    if (is_array) {
374
219
        std::vector<T> value;
375
219
        try {
376
219
            if (!gr.read(value, n)) {
377
64
                return false;
378
64
            }
379
219
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
155
        kv.emplace_back(key, value);
387
2.23k
    } else {
388
2.23k
        T value;
389
2.23k
        if (!gr.read(value)) {
390
4
            return false;
391
4
        }
392
2.23k
        kv.emplace_back(key, value);
393
2.23k
    }
394
2.38k
    return true;
395
2.45k
}
bool gguf_read_emplace_helper<signed char>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
934
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
934
    if (is_array) {
374
208
        std::vector<T> value;
375
208
        try {
376
208
            if (!gr.read(value, n)) {
377
64
                return false;
378
64
            }
379
208
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
144
        kv.emplace_back(key, value);
387
726
    } else {
388
726
        T value;
389
726
        if (!gr.read(value)) {
390
2
            return false;
391
2
        }
392
724
        kv.emplace_back(key, value);
393
724
    }
394
868
    return true;
395
934
}
bool gguf_read_emplace_helper<unsigned short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.07k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.07k
    if (is_array) {
374
207
        std::vector<T> value;
375
207
        try {
376
207
            if (!gr.read(value, n)) {
377
54
                return false;
378
54
            }
379
207
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
153
        kv.emplace_back(key, value);
387
864
    } else {
388
864
        T value;
389
864
        if (!gr.read(value)) {
390
2
            return false;
391
2
        }
392
862
        kv.emplace_back(key, value);
393
862
    }
394
1.01k
    return true;
395
1.07k
}
bool gguf_read_emplace_helper<short>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.55k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.55k
    if (is_array) {
374
766
        std::vector<T> value;
375
766
        try {
376
766
            if (!gr.read(value, n)) {
377
90
                return false;
378
90
            }
379
766
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
676
        kv.emplace_back(key, value);
387
791
    } else {
388
791
        T value;
389
791
        if (!gr.read(value)) {
390
1
            return false;
391
1
        }
392
790
        kv.emplace_back(key, value);
393
790
    }
394
1.46k
    return true;
395
1.55k
}
bool gguf_read_emplace_helper<unsigned int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.02k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.02k
    if (is_array) {
374
171
        std::vector<T> value;
375
171
        try {
376
171
            if (!gr.read(value, n)) {
377
56
                return false;
378
56
            }
379
171
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
115
        kv.emplace_back(key, value);
387
858
    } else {
388
858
        T value;
389
858
        if (!gr.read(value)) {
390
6
            return false;
391
6
        }
392
852
        kv.emplace_back(key, value);
393
852
    }
394
967
    return true;
395
1.02k
}
bool gguf_read_emplace_helper<int>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.05k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.05k
    if (is_array) {
374
208
        std::vector<T> value;
375
208
        try {
376
208
            if (!gr.read(value, n)) {
377
96
                return false;
378
96
            }
379
208
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
112
        kv.emplace_back(key, value);
387
843
    } else {
388
843
        T value;
389
843
        if (!gr.read(value)) {
390
2
            return false;
391
2
        }
392
841
        kv.emplace_back(key, value);
393
841
    }
394
953
    return true;
395
1.05k
}
bool gguf_read_emplace_helper<float>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.35k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.35k
    if (is_array) {
374
641
        std::vector<T> value;
375
641
        try {
376
641
            if (!gr.read(value, n)) {
377
43
                return false;
378
43
            }
379
641
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
598
        kv.emplace_back(key, value);
387
716
    } else {
388
716
        T value;
389
716
        if (!gr.read(value)) {
390
2
            return false;
391
2
        }
392
714
        kv.emplace_back(key, value);
393
714
    }
394
1.31k
    return true;
395
1.35k
}
bool gguf_read_emplace_helper<bool>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.03k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.03k
    if (is_array) {
374
335
        std::vector<T> value;
375
335
        try {
376
335
            if (!gr.read(value, n)) {
377
87
                return false;
378
87
            }
379
335
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
248
        kv.emplace_back(key, value);
387
702
    } else {
388
702
        T value;
389
702
        if (!gr.read(value)) {
390
1
            return false;
391
1
        }
392
701
        kv.emplace_back(key, value);
393
701
    }
394
949
    return true;
395
1.03k
}
bool gguf_read_emplace_helper<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.58k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.58k
    if (is_array) {
374
654
        std::vector<T> value;
375
654
        try {
376
654
            if (!gr.read(value, n)) {
377
121
                return false;
378
121
            }
379
654
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
533
        kv.emplace_back(key, value);
387
931
    } else {
388
931
        T value;
389
931
        if (!gr.read(value)) {
390
7
            return false;
391
7
        }
392
924
        kv.emplace_back(key, value);
393
924
    }
394
1.45k
    return true;
395
1.58k
}
bool gguf_read_emplace_helper<unsigned long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.06k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.06k
    if (is_array) {
374
327
        std::vector<T> value;
375
327
        try {
376
327
            if (!gr.read(value, n)) {
377
77
                return false;
378
77
            }
379
327
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
250
        kv.emplace_back(key, value);
387
738
    } else {
388
738
        T value;
389
738
        if (!gr.read(value)) {
390
5
            return false;
391
5
        }
392
733
        kv.emplace_back(key, value);
393
733
    }
394
983
    return true;
395
1.06k
}
bool gguf_read_emplace_helper<long>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
883
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
883
    if (is_array) {
374
248
        std::vector<T> value;
375
248
        try {
376
248
            if (!gr.read(value, n)) {
377
109
                return false;
378
109
            }
379
248
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
139
        kv.emplace_back(key, value);
387
635
    } else {
388
635
        T value;
389
635
        if (!gr.read(value)) {
390
2
            return false;
391
2
        }
392
633
        kv.emplace_back(key, value);
393
633
    }
394
772
    return true;
395
883
}
bool gguf_read_emplace_helper<double>(gguf_reader const&, std::__1::vector<gguf_kv, std::__1::allocator<gguf_kv> >&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool, unsigned long)
Line
Count
Source
372
1.01k
bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct gguf_kv> & kv, const std::string & key, const bool is_array, const size_t n) {
373
1.01k
    if (is_array) {
374
324
        std::vector<T> value;
375
324
        try {
376
324
            if (!gr.read(value, n)) {
377
101
                return false;
378
101
            }
379
324
        } catch (std::length_error &) {
380
0
            GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
381
0
            return false;
382
0
        } catch (std::bad_alloc &) {
383
0
            GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
384
0
            return false;
385
0
        }
386
223
        kv.emplace_back(key, value);
387
690
    } else {
388
690
        T value;
389
690
        if (!gr.read(value)) {
390
4
            return false;
391
4
        }
392
686
        kv.emplace_back(key, value);
393
686
    }
394
909
    return true;
395
1.01k
}
396
397
4.91k
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
398
4.91k
    const struct gguf_reader gr(file);
399
4.91k
    struct gguf_context * ctx = new gguf_context;
400
401
4.91k
    bool ok = true;
402
403
    // file magic
404
4.91k
    {
405
4.91k
        std::vector<char> magic;
406
4.91k
        ok = ok && gr.read(magic, 4);
407
408
4.91k
        if (!ok) {
409
0
            GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
410
0
            gguf_free(ctx);
411
0
            return nullptr;
412
0
        }
413
414
24.5k
        for (uint32_t i = 0; i < magic.size(); i++) {
415
19.6k
            if (magic[i] != GGUF_MAGIC[i]) {
416
0
                char c0 = isprint(magic[0]) ? magic[0] : '?';
417
0
                char c1 = isprint(magic[1]) ? magic[1] : '?';
418
0
                char c2 = isprint(magic[2]) ? magic[2] : '?';
419
0
                char c3 = isprint(magic[3]) ? magic[3] : '?';
420
0
                GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3);
421
0
                gguf_free(ctx);
422
0
                return nullptr;
423
0
            }
424
19.6k
        }
425
4.91k
    }
426
427
    // header
428
4.91k
    int64_t n_kv      = 0;
429
4.91k
    int64_t n_tensors = 0;
430
431
4.91k
    if (ok && gr.read(ctx->version)) {
432
4.91k
        if (ok && ctx->version == 0) {
433
12
            GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version);
434
12
            ok = false;
435
12
        }
436
437
        /*
438
         * bit layout is different when reading non-native endian models.
439
         * assuming that the GGUF version is 3, the non-native endian model
440
         * would read it as 0x30000000. we can use the AND operation against
441
         * the last 4 hexadecimal digits to check if the model is the same
442
         * endianness as the host system.
443
        */
444
4.91k
        if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) {
445
11
            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
446
11
            ok = false;
447
11
        }
448
449
4.91k
        if (ok && ctx->version == 1) {
450
3
            GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
451
3
            ok = false;
452
3
        }
453
4.91k
        if (ok && ctx->version > GGUF_VERSION) {
454
171
            GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
455
171
                __func__, ctx->version, GGUF_VERSION);
456
171
            ok = false;
457
171
        }
458
4.91k
    } else {
459
0
        ok = false;
460
0
    }
461
462
4.91k
    if (ok && gr.read(n_tensors)) {
463
4.71k
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
464
4.71k
        if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
465
65
            GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
466
65
                __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
467
65
            ok = false;
468
65
        }
469
4.71k
    } else {
470
204
        ok = false;
471
204
    }
472
473
4.91k
    if (ok && gr.read(n_kv)) {
474
4.60k
        static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
475
4.60k
        if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
476
99
            GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
477
99
                    __func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
478
99
            ok = false;
479
99
        }
480
4.60k
    } else {
481
310
        ok = false;
482
310
    }
483
484
4.91k
    if (!ok) {
485
409
        GGML_LOG_ERROR("%s: failed to read header\n", __func__);
486
409
        gguf_free(ctx);
487
409
        return nullptr;
488
409
    }
489
490
    // KV pairs
491
4.50k
    {
492
19.4k
        for (int64_t i = 0; ok && i < n_kv; ++i) {
493
16.1k
            std::string key;
494
16.1k
            gguf_type   type     = gguf_type(-1);
495
16.1k
            bool        is_array = false;
496
16.1k
            uint64_t    n        = 1;
497
498
16.1k
            try {
499
16.1k
                ok = ok && gr.read(key);
500
16.1k
            } catch (std::length_error &) {
501
0
                GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
502
0
                ok = false;
503
0
            } catch (std::bad_alloc &) {
504
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
505
0
                ok = false;
506
0
            }
507
97.2k
            for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
508
81.1k
                if (key == ctx->kv[j].key) {
509
17
                    GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
510
17
                    ok = false;
511
17
                }
512
81.1k
            }
513
16.1k
            if (!ok) {
514
773
                break;
515
773
            }
516
517
15.3k
            ok = ok && gr.read(type);
518
15.3k
            if (type == GGUF_TYPE_ARRAY) {
519
4.33k
                is_array = true;
520
4.33k
                ok = ok && gr.read(type);
521
4.33k
                ok = ok && gr.read(n);
522
4.33k
            }
523
15.3k
            if (!ok) {
524
59
                break;
525
59
            }
526
527
15.2k
            switch (type) {
528
2.45k
                case GGUF_TYPE_UINT8:   ok = ok && gguf_read_emplace_helper<uint8_t>    (gr, ctx->kv, key, is_array, n); break;
529
934
                case GGUF_TYPE_INT8:    ok = ok && gguf_read_emplace_helper<int8_t>     (gr, ctx->kv, key, is_array, n); break;
530
1.07k
                case GGUF_TYPE_UINT16:  ok = ok && gguf_read_emplace_helper<uint16_t>   (gr, ctx->kv, key, is_array, n); break;
531
1.55k
                case GGUF_TYPE_INT16:   ok = ok && gguf_read_emplace_helper<int16_t>    (gr, ctx->kv, key, is_array, n); break;
532
1.02k
                case GGUF_TYPE_UINT32:  ok = ok && gguf_read_emplace_helper<uint32_t>   (gr, ctx->kv, key, is_array, n); break;
533
1.05k
                case GGUF_TYPE_INT32:   ok = ok && gguf_read_emplace_helper<int32_t>    (gr, ctx->kv, key, is_array, n); break;
534
1.35k
                case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper<float>      (gr, ctx->kv, key, is_array, n); break;
535
1.03k
                case GGUF_TYPE_BOOL:    ok = ok && gguf_read_emplace_helper<bool>       (gr, ctx->kv, key, is_array, n); break;
536
1.58k
                case GGUF_TYPE_STRING:  ok = ok && gguf_read_emplace_helper<std::string>(gr, ctx->kv, key, is_array, n); break;
537
1.06k
                case GGUF_TYPE_UINT64:  ok = ok && gguf_read_emplace_helper<uint64_t>   (gr, ctx->kv, key, is_array, n); break;
538
883
                case GGUF_TYPE_INT64:   ok = ok && gguf_read_emplace_helper<int64_t>    (gr, ctx->kv, key, is_array, n); break;
539
1.01k
                case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper<double>     (gr, ctx->kv, key, is_array, n); break;
540
0
                case GGUF_TYPE_ARRAY:
541
244
                default:
542
244
                    {
543
244
                        GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
544
244
                        ok = false;
545
244
                    } break;
546
15.2k
            }
547
15.2k
        }
548
549
4.20k
        if (!ok) {
550
2.07k
            GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
551
2.07k
            gguf_free(ctx);
552
2.07k
            return nullptr;
553
2.07k
        }
554
2.12k
        GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv);
555
556
2.12k
        const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT);
557
2.12k
        ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
558
559
2.12k
        if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
560
7
            GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
561
7
            gguf_free(ctx);
562
7
            return nullptr;
563
7
        }
564
2.12k
    }
565
566
    // read the tensor info
567
5.37k
    for (int64_t i = 0; ok && i < n_tensors; ++i) {
568
4.11k
        struct gguf_tensor_info info;
569
570
        // tensor name
571
4.11k
        {
572
4.11k
            std::string name;
573
4.11k
            try {
574
4.11k
                ok = ok && gr.read(name);
575
4.11k
            } catch (std::length_error &) {
576
0
                GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
577
0
                ok = false;
578
0
            } catch (std::bad_alloc &) {
579
0
                GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
580
0
                ok = false;
581
0
            }
582
4.11k
            if (name.length() >= GGML_MAX_NAME) {
583
7
                GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
584
7
                ok = false;
585
7
                break;
586
7
            }
587
4.10k
            ggml_set_name(&info.t, name.c_str());
588
589
            // make sure there are no duplicate tensor names
590
14.8k
            for (int64_t j = 0; ok && j < i; ++j) {
591
10.7k
                if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
592
18
                    GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
593
18
                    ok = false;
594
18
                    break;
595
18
                }
596
10.7k
            }
597
4.10k
        }
598
4.10k
        if (!ok) {
599
143
            break;
600
143
        }
601
602
        // tensor shape
603
3.96k
        {
604
3.96k
            uint32_t n_dims = 0;
605
3.96k
            ok = ok && gr.read(n_dims);
606
3.96k
            if (n_dims > GGML_MAX_DIMS) {
607
115
                GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
608
115
                    __func__, info.t.name, n_dims, GGML_MAX_DIMS);
609
115
                ok = false;
610
115
                break;
611
115
            }
612
18.4k
            for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) {
613
14.8k
                info.t.ne[j] = 1;
614
14.8k
                if (j < n_dims) {
615
9.16k
                    ok = ok && gr.read(info.t.ne[j]);
616
9.16k
                }
617
618
                // check that all ne are non-negative
619
14.8k
                if (info.t.ne[j] < 0) {
620
223
                    GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
621
223
                        __func__, info.t.name, j, info.t.ne[j]);
622
223
                    ok = false;
623
223
                    break;
624
223
                }
625
14.8k
            }
626
627
            // check that the total number of elements is representable
628
3.84k
            if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) ||
629
3.48k
                       (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
630
3.46k
                       (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
631
632
82
                GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
633
82
                    "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
634
82
                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
635
82
                ok = false;
636
82
                break;
637
82
            }
638
3.84k
        }
639
3.76k
        if (!ok) {
640
331
            break;
641
331
        }
642
643
        // tensor type
644
3.43k
        {
645
3.43k
            ok = ok && gr.read(info.t.type);
646
647
            // check that tensor type is within defined range
648
3.43k
            if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
649
158
                GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n",
650
158
                    __func__, info.t.name, info.t.type, GGML_TYPE_COUNT);
651
158
                ok = false;
652
158
                break;
653
158
            }
654
3.27k
            const size_t  type_size = ggml_type_size(info.t.type);
655
3.27k
            const int64_t blck_size = ggml_blck_size(info.t.type);
656
657
            // check that row size is divisible by block size
658
3.27k
            if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
659
16
                GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
660
16
                    "not a multiple of block size (%" PRId64 ")\n",
661
16
                    __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
662
16
                ok = false;
663
16
                break;
664
16
            }
665
666
            // check that the size of the tensor in bytes is representable
667
3.25k
            if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) {
668
7
                GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n",
669
7
                    __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX);
670
7
                ok = false;
671
7
                break;
672
7
            }
673
674
            // calculate byte offsets given the tensor shape and type
675
3.25k
            info.t.nb[0] = type_size;
676
3.25k
            info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);
677
9.75k
            for (int j = 2; j < GGML_MAX_DIMS; ++j) {
678
6.50k
                info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1];
679
6.50k
            }
680
3.25k
        }
681
3.25k
        if (!ok) {
682
1
            break;
683
1
        }
684
685
        // tensor data offset within buffer
686
3.25k
        ok = ok && gr.read(info.offset);
687
688
3.25k
        ctx->info.push_back(info);
689
3.25k
    }
690
691
2.12k
    if (!ok) {
692
875
        GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
693
875
        gguf_free(ctx);
694
875
        return nullptr;
695
875
    }
696
1.24k
    GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
697
698
    // we require the data section to be aligned, so take into account any padding
699
1.24k
    if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) {
700
0
        GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
701
0
        gguf_free(ctx);
702
0
        return nullptr;
703
0
    }
704
705
    // store the current file offset - this is where the data section starts
706
1.24k
    ctx->offset = gguf_ftell(file);
707
708
    // compute the total size of the data section, taking into account the alignment
709
1.24k
    {
710
1.24k
        ctx->size = 0;
711
3.59k
        for (size_t i = 0; i < ctx->info.size(); ++i) {
712
2.51k
            const gguf_tensor_info & ti = ctx->info[i];
713
2.51k
            if (ti.offset != ctx->size) {
714
168
                GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
715
168
                    __func__, ti.t.name, ti.offset, ctx->size);
716
168
                GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
717
168
                gguf_free(ctx);
718
168
                return nullptr;
719
168
            }
720
2.34k
            size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment);
721
2.34k
            if (SIZE_MAX - ctx->size < padded_size) {
722
1
                GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n",
723
1
                    __func__, ti.t.name, ctx->size, padded_size);
724
1
                gguf_free(ctx);
725
1
                return nullptr;
726
1
            }
727
2.34k
            ctx->size += padded_size;
728
2.34k
        }
729
1.24k
    }
730
731
    // load the tensor data only if requested
732
1.07k
    if (params.ctx != nullptr) {
733
        // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob
734
        // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of
735
        //   the ggml_tensor structs to the appropriate locations in the binary blob
736
737
        // compute the exact size needed for the new ggml_context
738
1.07k
        size_t mem_size = 0;
739
1.07k
        if (params.no_alloc) {
740
1.07k
            if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) {
741
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
742
0
                gguf_free(ctx);
743
0
                return nullptr;
744
0
            }
745
746
1.07k
            const size_t overhead = n_tensors * ggml_tensor_overhead();
747
748
1.07k
            mem_size = overhead;
749
1.07k
        } else {
750
0
            if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) {
751
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
752
0
                gguf_free(ctx);
753
0
                return nullptr;
754
0
            }
755
756
0
            const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead();
757
758
0
            if (SIZE_MAX - overhead < ctx->size) {
759
0
                GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__);
760
0
                gguf_free(ctx);
761
0
                return nullptr;
762
0
            }
763
764
0
            mem_size = overhead + ctx->size;
765
0
        }
766
767
1.07k
        struct ggml_init_params pdata = {
768
1.07k
            /*mem_size   =*/ mem_size,
769
1.07k
            /*mem_buffer =*/ nullptr,
770
1.07k
            /*no_alloc   =*/ params.no_alloc,
771
1.07k
        };
772
773
1.07k
        *params.ctx = ggml_init(pdata);
774
1.07k
        if (*params.ctx == nullptr) {
775
0
            GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
776
0
            gguf_free(ctx);
777
0
            return nullptr;
778
0
        }
779
780
1.07k
        struct ggml_context * ctx_data = *params.ctx;
781
782
1.07k
        struct ggml_tensor * data = nullptr;
783
784
1.07k
        if (!params.no_alloc) {
785
0
            data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
786
787
0
            ok = ok && data != nullptr;
788
789
0
            if (ok) {
790
0
                ggml_set_name(data, "GGUF tensor data binary blob");
791
0
            }
792
793
            // read the binary blob with the tensor data
794
0
            ok = ok && gr.read(data->data, ctx->size);
795
796
0
            if (!ok) {
797
0
                GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
798
0
                ggml_free(ctx_data);
799
0
                *params.ctx = nullptr;
800
0
                gguf_free(ctx);
801
0
                return nullptr;
802
0
            }
803
804
0
            ctx->data = data->data;
805
0
        }
806
807
1.07k
        ggml_set_no_alloc(ctx_data, true);
808
809
        // create the tensors
810
3.15k
        for (size_t i = 0; i < ctx->info.size(); ++i) {
811
2.08k
            const struct gguf_tensor_info & info = ctx->info[i];
812
813
2.08k
            struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne);
814
815
2.08k
            ok = ok && cur != nullptr;
816
817
2.08k
            if (!ok) {
818
0
                break;
819
0
            }
820
821
2.08k
            ggml_set_name(cur, info.t.name);
822
823
            // point the data member to the appropriate location in the binary blob using the tensor info
824
2.08k
            if (!params.no_alloc) {
825
0
                cur->data = (char *) data->data + info.offset;
826
0
            }
827
2.08k
        }
828
829
1.07k
        if (!ok) {
830
0
            GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
831
0
            ggml_free(ctx_data);
832
0
            *params.ctx = nullptr;
833
0
            gguf_free(ctx);
834
0
            return nullptr;
835
0
        }
836
837
1.07k
        ggml_set_no_alloc(ctx_data, params.no_alloc);
838
1.07k
    }
839
840
1.07k
    return ctx;
841
1.07k
}
842
843
4.91k
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
844
4.91k
    FILE * file = ggml_fopen(fname, "rb");
845
846
4.91k
    if (!file) {
847
0
        GGML_LOG_ERROR("%s: failed to open GGUF file '%s' (%s)\n", __func__, fname, strerror(errno));
848
0
        return nullptr;
849
0
    }
850
851
4.91k
    struct gguf_context * result = gguf_init_from_file_impl(file, params);
852
4.91k
    fclose(file);
853
4.91k
    return result;
854
4.91k
}
855
856
4.60k
void gguf_free(struct gguf_context * ctx) {
857
4.60k
    if (ctx == nullptr) {
858
0
        return;
859
0
    }
860
4.60k
    delete ctx;
861
4.60k
}
862
863
4.83k
const char * gguf_type_name(enum gguf_type type) {
864
4.83k
    auto it = GGUF_TYPE_NAME.find(type);
865
4.83k
    return it == GGUF_TYPE_NAME.end() ? nullptr : it->second;
866
4.83k
}
867
868
725
uint32_t gguf_get_version(const struct gguf_context * ctx) {
869
725
    return ctx->version;
870
725
}
871
872
0
size_t gguf_get_alignment(const struct gguf_context * ctx) {
873
0
    return ctx->alignment;
874
0
}
875
876
1.50k
size_t gguf_get_data_offset(const struct gguf_context * ctx) {
877
1.50k
    return ctx->offset;
878
1.50k
}
879
880
59.7k
int64_t gguf_get_n_kv(const struct gguf_context * ctx) {
881
59.7k
    return ctx->kv.size();
882
59.7k
}
883
884
5.02k
int64_t gguf_find_key(const struct gguf_context * ctx, const char * key) {
885
    // return -1 if key not found
886
5.02k
    int64_t keyfound = -1;
887
888
5.02k
    const int64_t n_kv = gguf_get_n_kv(ctx);
889
890
30.6k
    for (int64_t i = 0; i < n_kv; ++i) {
891
25.9k
        if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
892
298
            keyfound = i;
893
298
            break;
894
298
        }
895
25.9k
    }
896
897
5.02k
    return keyfound;
898
5.02k
}
899
900
31.4k
const char * gguf_get_key(const struct gguf_context * ctx, int64_t key_id) {
901
31.4k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
902
31.4k
    return ctx->kv[key_id].get_key().c_str();
903
31.4k
}
904
905
11.2k
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int64_t key_id) {
906
11.2k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
907
11.2k
    return ctx->kv[key_id].is_array ? GGUF_TYPE_ARRAY : ctx->kv[key_id].get_type();
908
11.2k
}
909
910
1.34k
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int64_t key_id) {
911
1.34k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
912
1.34k
    GGML_ASSERT(ctx->kv[key_id].is_array);
913
1.34k
    return ctx->kv[key_id].get_type();
914
1.34k
}
915
916
544
const void * gguf_get_arr_data(const struct gguf_context * ctx, int64_t key_id) {
917
544
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
918
544
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
919
544
    return ctx->kv[key_id].data.data();
920
544
}
921
922
1.44k
const char * gguf_get_arr_str(const struct gguf_context * ctx, int64_t key_id, size_t i) {
923
1.44k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
924
1.44k
    GGML_ASSERT(ctx->kv[key_id].get_type() == GGUF_TYPE_STRING);
925
1.44k
    return ctx->kv[key_id].data_string[i].c_str();
926
1.44k
}
927
928
1.34k
size_t gguf_get_arr_n(const struct gguf_context * ctx, int64_t key_id) {
929
1.34k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
930
931
1.34k
    if (ctx->kv[key_id].type == GGUF_TYPE_STRING) {
932
254
        return ctx->kv[key_id].data_string.size();
933
254
    }
934
935
1.08k
    const size_t type_size = gguf_type_size(ctx->kv[key_id].type);
936
1.08k
    GGML_ASSERT(ctx->kv[key_id].data.size() % type_size == 0);
937
1.08k
    return ctx->kv[key_id].data.size() / type_size;
938
1.34k
}
939
940
0
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int64_t key_id) {
941
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
942
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
943
0
    return ctx->kv[key_id].get_val<uint8_t>();
944
0
}
945
946
0
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int64_t key_id) {
947
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
948
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
949
0
    return ctx->kv[key_id].get_val<int8_t>();
950
0
}
951
952
3
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int64_t key_id) {
953
3
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
954
3
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
955
3
    return ctx->kv[key_id].get_val<uint16_t>();
956
3
}
957
958
0
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int64_t key_id) {
959
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
960
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
961
0
    return ctx->kv[key_id].get_val<int16_t>();
962
0
}
963
964
27
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int64_t key_id) {
965
27
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
966
27
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
967
27
    return ctx->kv[key_id].get_val<uint32_t>();
968
27
}
969
970
0
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int64_t key_id) {
971
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
972
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
973
0
    return ctx->kv[key_id].get_val<int32_t>();
974
0
}
975
976
0
float gguf_get_val_f32(const struct gguf_context * ctx, int64_t key_id) {
977
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
978
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
979
0
    return ctx->kv[key_id].get_val<float>();
980
0
}
981
982
0
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int64_t key_id) {
983
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
984
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
985
0
    return ctx->kv[key_id].get_val<uint64_t>();
986
0
}
987
988
0
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int64_t key_id) {
989
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
990
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
991
0
    return ctx->kv[key_id].get_val<int64_t>();
992
0
}
993
994
0
double gguf_get_val_f64(const struct gguf_context * ctx, int64_t key_id) {
995
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
996
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
997
0
    return ctx->kv[key_id].get_val<double>();
998
0
}
999
1000
0
bool gguf_get_val_bool(const struct gguf_context * ctx, int64_t key_id) {
1001
0
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1002
0
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1003
0
    return ctx->kv[key_id].get_val<bool>();
1004
0
}
1005
1006
920
const char * gguf_get_val_str(const struct gguf_context * ctx, int64_t key_id) {
1007
920
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1008
920
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1009
920
    return ctx->kv[key_id].get_val<std::string>().c_str();
1010
920
}
1011
1012
4.12k
const void * gguf_get_val_data(const struct gguf_context * ctx, int64_t key_id) {
1013
4.12k
    GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
1014
4.12k
    GGML_ASSERT(ctx->kv[key_id].get_ne() == 1);
1015
4.12k
    GGML_ASSERT(ctx->kv[key_id].get_type() != GGUF_TYPE_STRING);
1016
4.12k
    return ctx->kv[key_id].data.data();
1017
4.12k
}
1018
1019
8.51k
int64_t gguf_get_n_tensors(const struct gguf_context * ctx) {
1020
8.51k
    return ctx->info.size();
1021
8.51k
}
1022
1023
1.50k
int64_t gguf_find_tensor(const struct gguf_context * ctx, const char * name) {
1024
    // return -1 if tensor not found
1025
1.50k
    int64_t tensor_id = -1;
1026
1027
1.50k
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1028
1029
5.51k
    for (int64_t i = 0; i < n_tensors; ++i) {
1030
5.51k
        if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) {
1031
1.50k
            tensor_id = i;
1032
1.50k
            break;
1033
1.50k
        }
1034
5.51k
    }
1035
1036
1.50k
    return tensor_id;
1037
1.50k
}
1038
1039
1.50k
size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int64_t tensor_id) {
1040
1.50k
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1041
1.50k
    return ctx->info[tensor_id].offset;
1042
1.50k
}
1043
1044
5.51k
const char * gguf_get_tensor_name(const struct gguf_context * ctx, int64_t tensor_id) {
1045
5.51k
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1046
5.51k
    return ctx->info[tensor_id].t.name;
1047
5.51k
}
1048
1049
0
enum ggml_type gguf_get_tensor_type(const struct gguf_context * ctx, int64_t tensor_id) {
1050
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1051
0
    return ctx->info[tensor_id].t.type;
1052
0
}
1053
1054
0
size_t gguf_get_tensor_size(const struct gguf_context * ctx, int64_t tensor_id) {
1055
0
    GGML_ASSERT(tensor_id >= 0 && tensor_id < gguf_get_n_tensors(ctx));
1056
0
    return ggml_nbytes(&ctx->info[tensor_id].t);
1057
0
}
1058
1059
0
int64_t gguf_remove_key(struct gguf_context * ctx, const char * key) {
1060
0
    const int64_t key_id = gguf_find_key(ctx, key);
1061
0
    if (key_id >= 0) {
1062
0
        ctx->kv.erase(ctx->kv.begin() + key_id);
1063
0
    }
1064
0
    return key_id;
1065
0
}
1066
1067
template<typename T>
1068
0
static void gguf_check_reserved_keys(const std::string & key, const T val) {
1069
0
    if (key == GGUF_KEY_GENERAL_ALIGNMENT) {
1070
0
        if constexpr (std::is_same<T, uint32_t>::value) {
1071
0
            GGML_ASSERT(val > 0 && (val & (val - 1)) == 0 && GGUF_KEY_GENERAL_ALIGNMENT " must be power of 2");
1072
0
        } else {
1073
0
            GGML_UNUSED(val);
1074
0
            GGML_ABORT(GGUF_KEY_GENERAL_ALIGNMENT " must be type u32");
1075
0
        }
1076
0
    }
1077
0
}
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<signed char>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, signed char)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<short>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, short)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<int>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, int)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<float>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, float)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<unsigned long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, unsigned long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<long>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, long)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<double>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, double)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<bool>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<void const*>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, void const*)
Unexecuted instantiation: gguf.cpp:void gguf_check_reserved_keys<char const**>(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, char const**)
1078
1079
0
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
1080
0
    gguf_check_reserved_keys(key, val);
1081
0
    gguf_remove_key(ctx, key);
1082
0
    ctx->kv.emplace_back(key, val);
1083
0
}
1084
1085
0
void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
1086
0
    gguf_check_reserved_keys(key, val);
1087
0
    gguf_remove_key(ctx, key);
1088
0
    ctx->kv.emplace_back(key, val);
1089
0
}
1090
1091
0
void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
1092
0
    gguf_check_reserved_keys(key, val);
1093
0
    gguf_remove_key(ctx, key);
1094
0
    ctx->kv.emplace_back(key, val);
1095
0
}
1096
1097
0
void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
1098
0
    gguf_check_reserved_keys(key, val);
1099
0
    gguf_remove_key(ctx, key);
1100
0
    ctx->kv.emplace_back(key, val);
1101
0
}
1102
1103
0
void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
1104
0
    gguf_check_reserved_keys(key, val);
1105
0
    gguf_remove_key(ctx, key);
1106
0
    ctx->kv.emplace_back(key, val);
1107
0
}
1108
1109
0
void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
1110
0
    gguf_check_reserved_keys(key, val);
1111
0
    gguf_remove_key(ctx, key);
1112
0
    ctx->kv.emplace_back(key, val);
1113
0
}
1114
1115
0
void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
1116
0
    gguf_check_reserved_keys(key, val);
1117
0
    gguf_remove_key(ctx, key);
1118
0
    ctx->kv.emplace_back(key, val);
1119
0
}
1120
1121
0
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
1122
0
    gguf_check_reserved_keys(key, val);
1123
0
    gguf_remove_key(ctx, key);
1124
0
    ctx->kv.emplace_back(key, val);
1125
0
}
1126
1127
0
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
1128
0
    gguf_check_reserved_keys(key, val);
1129
0
    gguf_remove_key(ctx, key);
1130
0
    ctx->kv.emplace_back(key, val);
1131
0
}
1132
1133
0
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
1134
0
    gguf_check_reserved_keys(key, val);
1135
0
    gguf_remove_key(ctx, key);
1136
0
    ctx->kv.emplace_back(key, val);
1137
0
}
1138
1139
0
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
1140
0
    gguf_check_reserved_keys(key, val);
1141
0
    gguf_remove_key(ctx, key);
1142
0
    ctx->kv.emplace_back(key, val);
1143
0
}
1144
1145
0
void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
1146
0
    gguf_check_reserved_keys(key, val);
1147
0
    gguf_remove_key(ctx, key);
1148
0
    ctx->kv.emplace_back(key, std::string(val));
1149
0
}
1150
1151
0
void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, size_t n) {
1152
0
    gguf_check_reserved_keys(key, data);
1153
0
    gguf_remove_key(ctx, key);
1154
1155
0
    const size_t nbytes = n*gguf_type_size(type);
1156
0
    std::vector<int8_t> tmp(nbytes);
1157
0
    if (!tmp.empty()) {
1158
0
        memcpy(tmp.data(), data, nbytes);
1159
0
    }
1160
0
    ctx->kv.emplace_back(key, tmp);
1161
0
    ctx->kv.back().cast(type);
1162
0
}
1163
1164
0
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, size_t n) {
1165
0
    gguf_check_reserved_keys(key, data);
1166
0
    gguf_remove_key(ctx, key);
1167
1168
0
    std::vector<std::string> tmp(n);
1169
0
    for (size_t i = 0; i < n; ++i) {
1170
0
        tmp[i] = data[i];
1171
0
    }
1172
0
    ctx->kv.emplace_back(key, tmp);
1173
0
}
1174
1175
// set or add KV pairs from another context
1176
0
void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src) {
1177
0
    const int64_t n_kv = gguf_get_n_kv(src);
1178
0
    for (int64_t i = 0; i < n_kv; ++i) {
1179
0
        const struct gguf_kv & kv = src->kv[i];
1180
1181
0
        if (!kv.is_array) {
1182
0
            switch (kv.get_type()) {
1183
0
                case GGUF_TYPE_UINT8:   gguf_set_val_u8  (ctx, kv.get_key().c_str(), kv.get_val<uint8_t>());             break;
1184
0
                case GGUF_TYPE_INT8:    gguf_set_val_i8  (ctx, kv.get_key().c_str(), kv.get_val<int8_t>());              break;
1185
0
                case GGUF_TYPE_UINT16:  gguf_set_val_u16 (ctx, kv.get_key().c_str(), kv.get_val<uint16_t>());            break;
1186
0
                case GGUF_TYPE_INT16:   gguf_set_val_i16 (ctx, kv.get_key().c_str(), kv.get_val<int16_t>());             break;
1187
0
                case GGUF_TYPE_UINT32:  gguf_set_val_u32 (ctx, kv.get_key().c_str(), kv.get_val<uint32_t>());            break;
1188
0
                case GGUF_TYPE_INT32:   gguf_set_val_i32 (ctx, kv.get_key().c_str(), kv.get_val<int32_t>());             break;
1189
0
                case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, kv.get_key().c_str(), kv.get_val<float>());               break;
1190
0
                case GGUF_TYPE_UINT64:  gguf_set_val_u64 (ctx, kv.get_key().c_str(), kv.get_val<uint64_t>());            break;
1191
0
                case GGUF_TYPE_INT64:   gguf_set_val_i64 (ctx, kv.get_key().c_str(), kv.get_val<int64_t>());             break;
1192
0
                case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, kv.get_key().c_str(), kv.get_val<double>());              break;
1193
0
                case GGUF_TYPE_BOOL:    gguf_set_val_bool(ctx, kv.get_key().c_str(), kv.get_val<bool>());                break;
1194
0
                case GGUF_TYPE_STRING:  gguf_set_val_str (ctx, kv.get_key().c_str(), kv.get_val<std::string>().c_str()); break;
1195
0
                case GGUF_TYPE_ARRAY:
1196
0
                default: GGML_ABORT("invalid type");
1197
0
            }
1198
0
            continue;
1199
0
        }
1200
1201
0
        const size_t ne = kv.get_ne();
1202
1203
0
        switch (kv.get_type()) {
1204
0
            case GGUF_TYPE_UINT8:
1205
0
            case GGUF_TYPE_INT8:
1206
0
            case GGUF_TYPE_UINT16:
1207
0
            case GGUF_TYPE_INT16:
1208
0
            case GGUF_TYPE_UINT32:
1209
0
            case GGUF_TYPE_INT32:
1210
0
            case GGUF_TYPE_FLOAT32:
1211
0
            case GGUF_TYPE_UINT64:
1212
0
            case GGUF_TYPE_INT64:
1213
0
            case GGUF_TYPE_FLOAT64:
1214
0
            case GGUF_TYPE_BOOL: {
1215
0
                gguf_set_arr_data(ctx, kv.get_key().c_str(), kv.get_type(), kv.data.data(), ne);
1216
0
            } break;
1217
0
            case GGUF_TYPE_STRING: {
1218
0
                std::vector<const char *> tmp(ne);
1219
0
                for (size_t j = 0; j < ne; ++j) {
1220
0
                    tmp[j] = kv.data_string[j].c_str();
1221
0
                }
1222
0
                gguf_set_arr_str(ctx, kv.get_key().c_str(), tmp.data(), ne);
1223
0
            } break;
1224
0
            case GGUF_TYPE_ARRAY:
1225
0
            default: GGML_ABORT("invalid type");
1226
0
        }
1227
0
    }
1228
0
}
1229
1230
void gguf_add_tensor(
1231
             struct gguf_context * ctx,
1232
0
        const struct ggml_tensor * tensor) {
1233
0
    GGML_ASSERT(tensor);
1234
0
    if (gguf_find_tensor(ctx, tensor->name) != -1) {
1235
0
        GGML_ABORT("duplicate tensor name: %s", tensor->name);
1236
0
    }
1237
1238
0
    struct gguf_tensor_info ti;
1239
0
    ti.t = *tensor;
1240
0
    ti.offset = ctx->info.empty() ? 0 :
1241
0
        ctx->info.back().offset + GGML_PAD(ggml_nbytes(&ctx->info.back().t), ctx->alignment);
1242
0
    ctx->info.push_back(ti);
1243
0
}
1244
1245
0
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
1246
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1247
0
    if (tensor_id < 0) {
1248
0
        GGML_ABORT("tensor not found: %s", name);
1249
0
    }
1250
0
    struct ggml_tensor * tensor = &ctx->info[tensor_id].t;
1251
0
    const size_t  type_size = ggml_type_size(type);
1252
0
    const int64_t blck_size = ggml_blck_size(type);
1253
1254
0
    tensor->type = type;
1255
0
    GGML_ASSERT(tensor->ne[0] % blck_size == 0 && "tensor row size not divisible by block size of new type");
1256
1257
0
    tensor->nb[0] = type_size;
1258
0
    tensor->nb[1] = tensor->nb[0]*(tensor->ne[0]/blck_size);
1259
0
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1260
0
        tensor->nb[i] = tensor->nb[i - 1]*tensor->ne[i - 1];
1261
0
    }
1262
1263
    // update offsets
1264
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1265
0
    for (int64_t i = tensor_id + 1; i < n_tensors; ++i) {
1266
0
        ctx->info[i].offset = ctx->info[i - 1].offset + GGML_PAD(ggml_nbytes(&ctx->info[i - 1].t), ctx->alignment);
1267
0
    }
1268
0
}
1269
1270
0
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data) {
1271
0
    const int64_t tensor_id = gguf_find_tensor(ctx, name);
1272
0
    if (tensor_id < 0) {
1273
0
        GGML_ABORT("tensor not found: %s", name);
1274
0
    }
1275
1276
0
    ctx->info[tensor_id].t.data = (void *)(uintptr_t)data; // double cast suppresses warning about casting away const
1277
0
}
1278
1279
struct gguf_writer_base {
1280
    size_t written_bytes {0u};
1281
1282
    ~gguf_writer_base(void) = default;
1283
1284
    // we bet on devirtualization
1285
    virtual void write(int8_t val) = 0;
1286
    virtual void write(const std::vector<int8_t> & val) = 0;
1287
    virtual void write_tensor_data(const struct gguf_tensor_info & info, size_t offset_data, size_t alignment) = 0;
1288
1289
    template <typename T>
1290
0
    void write(const T & val) {
1291
0
        for (size_t i = 0; i < sizeof(val); ++i) {
1292
0
            write(reinterpret_cast<const int8_t *>(&val)[i]);
1293
0
        }
1294
0
    }
Unexecuted instantiation: void gguf_writer_base::write<char>(char const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned int>(unsigned int const&)
Unexecuted instantiation: void gguf_writer_base::write<long>(long const&)
Unexecuted instantiation: void gguf_writer_base::write<int>(int const&)
Unexecuted instantiation: void gguf_writer_base::write<unsigned long>(unsigned long const&)
1295
1296
0
    void write(const bool & val) {
1297
0
        const int8_t val8 = val ? 1 : 0;
1298
0
        write(val8);
1299
0
    }
1300
1301
0
    void write(const std::string & val) {
1302
0
        {
1303
0
            const uint64_t n = val.length();
1304
0
            write(n);
1305
0
        }
1306
0
        for (size_t i = 0; i < val.length(); ++i) {
1307
0
            write((val.data())[i]);
1308
0
        }
1309
0
    }
1310
1311
0
    void write(const char * val) {
1312
0
        write(std::string(val));
1313
0
    }
1314
1315
0
    void write(const enum ggml_type & val) {
1316
0
        write(int32_t(val));
1317
0
    }
1318
1319
0
    void write(const enum gguf_type & val) {
1320
0
        write(int32_t(val));
1321
0
    }
1322
1323
0
    void write(const struct gguf_kv & kv) {
1324
0
        const uint64_t ne = kv.get_ne();
1325
1326
0
        write(kv.get_key());
1327
1328
0
        if (kv.is_array) {
1329
0
            write(GGUF_TYPE_ARRAY);
1330
0
            write(kv.get_type());
1331
0
            write(ne);
1332
0
        } else {
1333
0
            write(kv.get_type());
1334
0
        }
1335
1336
0
        switch (kv.get_type()) {
1337
0
            case GGUF_TYPE_UINT8:
1338
0
            case GGUF_TYPE_INT8:
1339
0
            case GGUF_TYPE_UINT16:
1340
0
            case GGUF_TYPE_INT16:
1341
0
            case GGUF_TYPE_UINT32:
1342
0
            case GGUF_TYPE_INT32:
1343
0
            case GGUF_TYPE_FLOAT32:
1344
0
            case GGUF_TYPE_UINT64:
1345
0
            case GGUF_TYPE_INT64:
1346
0
            case GGUF_TYPE_FLOAT64: {
1347
0
                write(kv.data);
1348
0
            } break;
1349
0
            case GGUF_TYPE_BOOL: {
1350
0
                for (size_t i = 0; i < ne; ++i) {
1351
0
                    write(kv.get_val<bool>(i));
1352
0
                }
1353
0
            } break;
1354
0
            case GGUF_TYPE_STRING: {
1355
0
                for (size_t i = 0; i < ne; ++i) {
1356
0
                    write(kv.get_val<std::string>(i));
1357
0
                }
1358
0
            } break;
1359
0
            case GGUF_TYPE_ARRAY:
1360
0
            default: GGML_ABORT("invalid type");
1361
0
        }
1362
0
    }
1363
1364
0
    void write_tensor_meta(const struct gguf_tensor_info & info) {
1365
0
        write(info.t.name);
1366
1367
0
        const uint32_t n_dims = ggml_n_dims(&info.t);
1368
0
        write(n_dims);
1369
1370
0
        for (uint32_t j = 0; j < n_dims; ++j) {
1371
0
            write(info.t.ne[j]);
1372
0
        }
1373
0
        write(info.t.type);
1374
0
        write(info.offset);
1375
0
    }
1376
1377
0
    void pad(const size_t alignment) {
1378
0
        while (written_bytes % alignment != 0) {
1379
0
            const int8_t zero = 0;
1380
0
            write(zero);
1381
0
        }
1382
0
    }
1383
};
1384
1385
// vector buffer based writer
1386
struct gguf_writer_buf final : public gguf_writer_base {
1387
    std::vector<int8_t> & buf;
1388
1389
0
    gguf_writer_buf(std::vector<int8_t> & buf) : buf(buf) {}
1390
1391
    using gguf_writer_base::write;
1392
1393
0
    void write(const int8_t val) override {
1394
0
        buf.push_back(val);
1395
0
        written_bytes++;
1396
0
    }
1397
1398
0
    void write(const std::vector<int8_t> & val) override {
1399
0
        buf.insert(buf.end(), val.begin(), val.end());
1400
0
        written_bytes += val.size();
1401
0
    }
1402
1403
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1404
0
        GGML_ASSERT(buf.size() - offset_data == info.offset);
1405
1406
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1407
0
        const size_t offset = buf.size();
1408
0
        const size_t nbytes = ggml_nbytes(&info.t);
1409
1410
0
        buf.resize(offset + nbytes);
1411
0
        if (info.t.buffer) {
1412
0
            ggml_backend_tensor_get(&info.t, buf.data() + offset, 0, nbytes);
1413
0
        } else {
1414
0
            GGML_ASSERT(info.t.data);
1415
0
            memcpy(buf.data() + offset, info.t.data, nbytes);
1416
0
        }
1417
0
        written_bytes += nbytes;
1418
1419
0
        pad(alignment);
1420
0
    }
1421
};
1422
1423
// file based writer
1424
struct gguf_writer_file final : public gguf_writer_base {
1425
    FILE * file;
1426
1427
0
    gguf_writer_file(FILE* file) : file(file) {}
1428
1429
    using gguf_writer_base::write;
1430
1431
0
    void write(const int8_t val) override {
1432
0
        const auto real_val = static_cast<uint8_t>(val);
1433
0
        const auto ret = fputc(real_val, file);
1434
0
        written_bytes++;
1435
0
        if (ret != real_val) {
1436
0
            throw std::runtime_error("unexpected fputc result '" + std::to_string(ret) + "' instead of '" + std::to_string((int)real_val) + "'");
1437
0
        }
1438
0
    }
1439
1440
0
    void write(const std::vector<int8_t> & val) override {
1441
0
        const auto ret = fwrite(val.data(), 1, val.size(), file);
1442
0
        written_bytes += val.size();
1443
0
        if (ret != val.size()) {
1444
0
            throw std::runtime_error("unexpected fwrite number of bytes written, '" + std::to_string(ret) + "' instead of '" + std::to_string(val.size()) + "'");
1445
0
        }
1446
0
    }
1447
1448
0
    void write_tensor_data(const struct gguf_tensor_info & info, const size_t offset_data, const size_t alignment) override {
1449
0
        GGML_ASSERT(written_bytes - offset_data == info.offset);
1450
1451
0
        GGML_ASSERT(ggml_is_contiguous(&info.t));
1452
0
        const size_t nbytes = ggml_nbytes(&info.t);
1453
1454
0
        std::vector<int8_t> buf(nbytes);
1455
0
        if (info.t.buffer) {
1456
0
            ggml_backend_tensor_get(&info.t, buf.data(), 0, nbytes);
1457
0
        } else {
1458
0
            GGML_ASSERT(info.t.data);
1459
0
            memcpy(buf.data(), info.t.data, nbytes);
1460
0
        }
1461
0
        write(buf);
1462
1463
0
        pad(alignment);
1464
0
    }
1465
};
1466
1467
template <typename writer_t>
1468
0
static void gguf_write_out(const struct gguf_context * ctx, writer_t & gw, bool only_meta) {
1469
0
    const int64_t n_kv      = gguf_get_n_kv(ctx);
1470
0
    const int64_t n_tensors = gguf_get_n_tensors(ctx);
1471
1472
    // write header
1473
0
    gw.write(GGUF_MAGIC[0]);
1474
0
    gw.write(GGUF_MAGIC[1]);
1475
0
    gw.write(GGUF_MAGIC[2]);
1476
0
    gw.write(GGUF_MAGIC[3]);
1477
0
    gw.write(ctx->version);
1478
0
    gw.write(n_tensors);
1479
0
    gw.write(n_kv);
1480
1481
    // write key-value pairs
1482
0
    for (int64_t i = 0; i < n_kv; ++i) {
1483
0
        gw.write(ctx->kv[i]);
1484
0
    }
1485
1486
    // write tensor info
1487
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1488
0
        gw.write_tensor_meta(ctx->info[i]);
1489
0
    }
1490
1491
    // we require the data section to be aligned
1492
0
    gw.pad(ctx->alignment);
1493
1494
0
    if (only_meta) {
1495
0
        return;
1496
0
    }
1497
1498
0
    const size_t offset_data = gw.written_bytes;
1499
1500
    // write tensor data
1501
0
    for (int64_t i = 0; i < n_tensors; ++i) {
1502
0
        gw.write_tensor_data(ctx->info[i], offset_data, ctx->alignment);
1503
0
    }
1504
0
}
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_buf>(gguf_context const*, gguf_writer_buf&, bool)
Unexecuted instantiation: gguf.cpp:void gguf_write_out<gguf_writer_file>(gguf_context const*, gguf_writer_file&, bool)
1505
1506
0
void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta) {
1507
0
    gguf_writer_buf gw(buf);
1508
0
    gguf_write_out(ctx, gw, only_meta);
1509
0
}
1510
1511
0
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
1512
0
    FILE * file = ggml_fopen(fname, "wb");
1513
1514
0
    if (!file) {
1515
0
        GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
1516
0
        return false;
1517
0
    }
1518
1519
0
    try {
1520
0
        gguf_writer_file gw(file);
1521
0
        gguf_write_out(ctx, gw, only_meta);
1522
0
    } catch (const std::runtime_error& ex) {
1523
0
        GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
1524
0
        fclose(file);
1525
0
        return false;
1526
0
    }
1527
1528
0
    fclose(file);
1529
0
    return true;
1530
0
}
1531
1532
0
size_t gguf_get_meta_size(const struct gguf_context * ctx) {
1533
    // only return size
1534
0
    std::vector<int8_t> buf;
1535
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1536
0
    return buf.size();
1537
0
}
1538
1539
0
void gguf_get_meta_data(const struct gguf_context * ctx, void * data) {
1540
0
    std::vector<int8_t> buf;
1541
0
    gguf_write_to_buf(ctx, buf, /*only_meta =*/ true);
1542
0
    memcpy(data, buf.data(), buf.size());
1543
0
}