Coverage Report

Created: 2026-06-22 06:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama.cpp
Line
Count
Source
1
#include "llama.h"
2
3
#include "llama-impl.h"
4
5
#include "llama-chat.h"
6
#include "llama-context.h"
7
#include "llama-mmap.h"
8
#include "llama-vocab.h"
9
#include "llama-model-loader.h"
10
#include "llama-model-saver.h"
11
#include "llama-model.h"
12
13
#include "ggml.h"
14
#include "ggml-cpp.h"
15
#include "ggml-backend.h"
16
#include "gguf.h"
17
18
#include <algorithm>
19
#include <cassert>
20
#include <cinttypes>
21
#include <cstddef>
22
#include <cstdint>
23
#include <cstdio>
24
#include <cstring>
25
#include <ctime>
26
#include <stdexcept>
27
#include <vector>
28
29
#if defined(_MSC_VER)
30
#pragma warning(disable: 4244 4267) // possible loss of data
31
#endif
32
33
//
34
// interface implementation
35
//
36
37
0
const char * llama_flash_attn_type_name(enum llama_flash_attn_type flash_attn_type) {
38
0
    switch (flash_attn_type) {
39
0
        case LLAMA_FLASH_ATTN_TYPE_AUTO:
40
0
            return "auto";
41
0
        case LLAMA_FLASH_ATTN_TYPE_DISABLED:
42
0
            return "disabled";
43
0
        case LLAMA_FLASH_ATTN_TYPE_ENABLED:
44
0
            return "enabled";
45
0
    }
46
0
    GGML_ABORT("fatal error");
47
0
}
48
49
0
struct llama_sampler_chain_params llama_sampler_chain_default_params() {
50
0
    struct llama_sampler_chain_params result = {
51
0
        /*.no_perf =*/ true,
52
0
    };
53
54
0
    return result;
55
0
}
56
57
4.45k
size_t llama_max_devices(void) {
58
4.45k
    return 16;
59
4.45k
}
60
61
0
size_t llama_max_tensor_buft_overrides() {
62
0
    return 4096;
63
0
}
64
65
0
bool llama_supports_mmap(void) {
66
0
    return llama_mmap::SUPPORTED;
67
0
}
68
69
0
bool llama_supports_mlock(void) {
70
0
    return llama_mlock::SUPPORTED;
71
0
}
72
73
0
bool llama_supports_gpu_offload(void) {
74
0
    if (!ggml_backend_reg_count()) {
75
0
        ggml_backend_load_all();
76
0
    }
77
0
    return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
78
0
           ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
79
0
           llama_supports_rpc();
80
0
}
81
82
0
bool llama_supports_rpc(void) {
83
0
    if (!ggml_backend_reg_count()) {
84
0
        ggml_backend_load_all();
85
0
    }
86
0
    return ggml_backend_reg_by_name("RPC") != nullptr;
87
0
}
88
89
4.46k
void llama_backend_init(void) {
90
4.46k
    ggml_time_init();
91
92
    // needed to initialize f16 tables
93
4.46k
    {
94
4.46k
        struct ggml_init_params params = { 0, NULL, false };
95
4.46k
        struct ggml_context * ctx = ggml_init(params);
96
4.46k
        ggml_free(ctx);
97
4.46k
    }
98
99
4.46k
    if (!ggml_backend_reg_count()) {
100
0
        ggml_backend_load_all();
101
0
    }
102
4.46k
}
103
104
0
void llama_numa_init(enum ggml_numa_strategy numa) {
105
0
    if (numa != GGML_NUMA_STRATEGY_DISABLED) {
106
0
        auto * dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
107
0
        GGML_ASSERT(dev && "CPU backend is not loaded");
108
0
        auto * reg = ggml_backend_dev_backend_reg(dev);
109
0
        auto * numa_init_fn = (decltype(ggml_numa_init) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_numa_init");
110
0
        if (numa_init_fn) {
111
0
            numa_init_fn(numa);
112
0
        }
113
0
    }
114
0
}
115
116
4.46k
void llama_backend_free(void) {
117
4.46k
    ggml_quantize_free();
118
4.46k
}
119
120
0
int64_t llama_time_us(void) {
121
0
    return ggml_time_us();
122
0
}
123
124
// returns true on success
125
264
static bool llama_prepare_model_devices(const llama_model_params & params, llama_model * model) {
126
    // create list of devices to use with this model
127
264
    if (params.devices) {
128
0
        if (params.split_mode == LLAMA_SPLIT_MODE_TENSOR) {
129
0
            size_t n_devs = 0;
130
0
            while (params.devices[n_devs]) {
131
0
                n_devs++;
132
0
            }
133
0
            if (n_devs == 0) {
134
0
                LLAMA_LOG_ERROR("%s: LLAMA_SPLIT_MODE_TENSOR needs >= 1 devices\n", __func__);
135
0
                return false;
136
0
            }
137
0
            LLAMA_LOG_INFO("%s: creating a Meta device with %zu devices\n", __func__, n_devs);
138
0
            for (size_t i = 0; i < n_devs; ++i) {
139
0
                LLAMA_LOG_INFO("%s: - device %zu: %s\n", __func__, i, ggml_backend_dev_name(params.devices[i]));
140
0
            }
141
0
            model->get_split_state_ud.n_devices = n_devs;
142
0
            model->get_split_state_ud.model = model;
143
0
            model->devices.push_back({
144
0
                true, ggml_backend_meta_device(
145
0
                params.devices, n_devs, llama_meta_device_get_split_state, &model->get_split_state_ud)
146
0
            });
147
0
        } else {
148
0
            for (ggml_backend_dev_t * dev = params.devices; *dev; ++dev) {
149
0
                model->devices.push_back({false, *dev});
150
0
            }
151
0
        }
152
264
    } else {
153
        // default device selection
154
155
        // build list of available devices
156
264
        std::vector<llama_device> gpus;
157
264
        std::vector<llama_device> igpus;
158
264
        std::vector<llama_device> rpc_servers;
159
160
264
        if (params.split_mode == LLAMA_SPLIT_MODE_TENSOR) {
161
0
            std::vector<ggml_backend_dev_t> devs;
162
0
            devs.reserve(ggml_backend_dev_count());
163
0
            for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
164
0
                auto * dev = ggml_backend_dev_get(i);
165
0
                if (ggml_backend_dev_buffer_type(dev) == ggml_backend_cpu_buffer_type()) {
166
0
                    LLAMA_LOG_INFO("%s: skipping %s (%s) for tensor parallelism\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev));
167
0
                    continue;
168
0
                }
169
0
                devs.push_back(dev);
170
0
            }
171
0
            if (devs.empty()) {
172
0
                LLAMA_LOG_ERROR("%s: LLAMA_SPLIT_MODE_TENSOR needs >= 1 devices\n", __func__);
173
0
                return false;
174
0
            }
175
176
0
            LLAMA_LOG_INFO("%s: creating a Meta device for tensor parallelism from %zu devices:\n", __func__, devs.size());
177
0
            for (size_t i = 0; i < devs.size(); ++i) {
178
0
                LLAMA_LOG_INFO("%s: - device %zu: %s (%s)\n", __func__, i, ggml_backend_dev_name(devs[i]), ggml_backend_dev_description(devs[i]));
179
0
            }
180
181
0
            GGML_ASSERT(!devs.empty());
182
0
            model->get_split_state_ud.n_devices = devs.size();
183
0
            model->get_split_state_ud.model     = model;
184
0
            gpus.push_back({
185
0
                true, ggml_backend_meta_device(
186
0
                devs.data(), devs.size(), llama_meta_device_get_split_state, &model->get_split_state_ud)
187
0
            });
188
264
        } else {
189
528
            for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
190
264
                ggml_backend_dev_t dev = ggml_backend_dev_get(i);
191
264
                switch (ggml_backend_dev_type(dev)) {
192
264
                    case GGML_BACKEND_DEVICE_TYPE_CPU:
193
264
                    case GGML_BACKEND_DEVICE_TYPE_ACCEL:
194
                        // skip CPU backends since they are handled separately
195
264
                        break;
196
197
0
                    case GGML_BACKEND_DEVICE_TYPE_GPU: {
198
0
                        ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
199
0
                        if (ggml_backend_reg_name(reg) == std::string("RPC")) {
200
0
                            rpc_servers.push_back({false, dev});
201
0
                        } else {
202
                            // check if there is already a GPU with the same device id
203
0
                            ggml_backend_dev_props props;
204
0
                            ggml_backend_dev_get_props(dev, &props);
205
0
                            auto it = std::find_if(gpus.begin(), gpus.end(), [&props](const llama_device & d) {
206
0
                                ggml_backend_dev_props d_props;
207
0
                                ggml_backend_dev_get_props(d.dev, &d_props);
208
0
                                if (props.device_id && d_props.device_id) {
209
0
                                    return strcmp(props.device_id, d_props.device_id) == 0;
210
0
                                }
211
0
                                return false;
212
0
                            });
213
214
0
                            if (it != gpus.end()) {
215
0
                                LLAMA_LOG_INFO("%s: skipping device %s (%s) with id %s - already using device %s (%s) with the same id\n",
216
0
                                        __func__,
217
0
                                        ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
218
0
                                        props.device_id ? props.device_id : "unknown id",
219
0
                                        ggml_backend_dev_name(it->dev), ggml_backend_dev_description(it->dev));
220
0
                            } else {
221
0
                                gpus.push_back({false, dev});
222
0
                            }
223
0
                        }
224
0
                        break;
225
264
                    }
226
227
0
                    case GGML_BACKEND_DEVICE_TYPE_IGPU:
228
0
                        if (igpus.empty()) {
229
0
                            igpus.push_back({false, dev});
230
0
                        }
231
0
                        break;
232
0
                    case GGML_BACKEND_DEVICE_TYPE_META:
233
0
                        GGML_ABORT("fatal error");
234
264
                }
235
264
            }
236
264
        }
237
238
        // add RPC servers at the front of the list to minimize network transfers
239
264
        model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
240
241
        // add GPUs
242
264
        model->devices.insert(model->devices.end(), gpus.begin(), gpus.end());
243
244
        // add integrated GPUs only if no discrete GPUs were found
245
        // (RPC servers do not count, otherwise the local iGPU would be dropped on iGPU+RPC setups)
246
264
        if (gpus.empty()) {
247
264
            model->devices.insert(model->devices.end(), igpus.begin(), igpus.end());
248
264
        }
249
264
    }
250
251
    // if using single GPU mode, remove all except the main GPU
252
264
    if (params.split_mode == LLAMA_SPLIT_MODE_NONE && !model->devices.empty()) {
253
0
        if (params.main_gpu < 0) {
254
0
            model->devices.clear();
255
0
        } else {
256
0
            if (params.main_gpu >= (int)model->devices.size()) {
257
0
                LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %zu)\n", __func__, params.main_gpu, model->devices.size());
258
0
                return false;
259
0
            }
260
0
            llama_device main_gpu = model->devices[params.main_gpu];
261
0
            model->devices.clear();
262
0
            model->devices.push_back(main_gpu);
263
0
        }
264
0
    }
265
266
264
    for (const auto & dev : model->devices) {
267
0
        ggml_backend_dev_props props;
268
0
        ggml_backend_dev_get_props(dev.dev, &props);
269
0
        LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
270
0
                ggml_backend_dev_name(dev.dev), ggml_backend_dev_description(dev.dev),
271
0
                props.device_id ? props.device_id : "unknown id",
272
0
                props.memory_free/1024/1024);
273
0
    }
274
275
264
    return true;
276
264
}
277
278
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
279
static std::pair<int, llama_model *> llama_model_load(struct gguf_context * metadata, llama_model_set_tensor_data_t set_tensor_data, void * set_tensor_data_ud,
280
4.46k
        const std::string & fname, std::vector<std::string> & splits, FILE * file, llama_model_params & params) {
281
4.46k
    try {
282
4.46k
        llama_model_loader ml(metadata, set_tensor_data, set_tensor_data_ud, fname, splits, file, params.use_mmap, params.use_direct_io,
283
4.46k
            params.check_tensors, params.no_alloc, params.kv_overrides, params.tensor_buft_overrides);
284
285
4.46k
        ml.print_info();
286
4.46k
        std::unique_ptr<llama_model> model_ptr(llama_model_create(ml, params));
287
288
4.46k
        bool ok = llama_prepare_model_devices(params, model_ptr.get());
289
4.46k
        if (!ok) {
290
0
            return {-1, nullptr};
291
0
        }
292
293
4.46k
        auto * model = dynamic_cast<llama_model_base *>(model_ptr.get());
294
4.46k
        if (model == nullptr) {
295
0
            GGML_ABORT("fatal error: model does not implement llama_model_base");
296
0
        }
297
298
        // loading time will be recalculated after the first eval, so
299
        // we take page faults deferred by mmap() into consideration
300
4.46k
        model->t_load_us = 0;
301
4.46k
        time_meas tm(model->t_load_us);
302
303
4.46k
        model->t_start_us = tm.t_start_us;
304
305
4.46k
        model->hparams.vocab_only = params.vocab_only;
306
4.46k
        model->hparams.no_alloc   = params.no_alloc;
307
308
4.46k
        try {
309
4.46k
            model->load_hparams(ml);
310
4.46k
        } catch(const std::exception & e) {
311
264
            throw std::runtime_error("error loading model hyperparameters: " + std::string(e.what()));
312
264
        }
313
0
        if (model->arch == LLM_ARCH_CLIP) {
314
0
            throw std::runtime_error("CLIP cannot be used as main model, use it with --mmproj instead");
315
0
        }
316
0
        try {
317
0
            model->load_vocab(ml);
318
0
        } catch(const std::exception & e) {
319
0
            throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
320
0
        }
321
322
0
        model->load_stats(ml);
323
0
        model->print_info();
324
325
0
        if (params.vocab_only) {
326
0
            LLAMA_LOG_INFO("%s: vocab only - skipping tensors\n", __func__);
327
0
            return {0, model_ptr.release()};
328
0
        }
329
330
0
        if (!model->load_tensors(ml)) {
331
0
            return {-2, nullptr};
332
0
        }
333
334
0
        return {0, model_ptr.release()};
335
4.17k
    } catch (const std::exception & err) {
336
4.17k
        LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
337
4.17k
        return {-1, nullptr};
338
4.17k
    }
339
4.46k
}
340
341
static struct llama_model * llama_model_load_from_file_impl(
342
        struct gguf_context * metadata,
343
        llama_model_set_tensor_data_t set_tensor_data,
344
        void * set_tensor_data_ud,
345
        const std::string & path_model,
346
        std::vector<std::string> & splits,
347
        FILE * file,
348
4.46k
        struct llama_model_params params) {
349
4.46k
    {
350
4.46k
        int n_sources_defined = 0;
351
4.46k
        if (metadata != nullptr) {
352
0
            n_sources_defined++;
353
0
        }
354
4.46k
        if (!path_model.empty()) {
355
4.46k
            n_sources_defined++;
356
4.46k
        }
357
4.46k
        if (file != nullptr) {
358
0
            n_sources_defined++;
359
0
        }
360
4.46k
        if (n_sources_defined != 1) {
361
0
            LLAMA_LOG_ERROR("%s: exactly one out metadata, path_model, and file must be defined\n", __func__);
362
0
            return nullptr;
363
0
        }
364
4.46k
    }
365
4.46k
    ggml_time_init();
366
367
4.46k
    if (!params.vocab_only && ggml_backend_reg_count() == 0) {
368
0
        LLAMA_LOG_ERROR("%s: no backends are loaded. hint: use ggml_backend_load() or ggml_backend_load_all() to load a backend before calling this function\n", __func__);
369
0
        return nullptr;
370
0
    }
371
372
4.46k
    unsigned cur_percentage = 0;
373
4.46k
    if (params.progress_callback == NULL) {
374
4.45k
        params.progress_callback_user_data = &cur_percentage;
375
4.45k
        params.progress_callback = [](float progress, void * ctx) {
376
0
            unsigned * cur_percentage_p = (unsigned *) ctx;
377
0
            unsigned percentage = (unsigned) (100 * progress);
378
0
            while (percentage > *cur_percentage_p) {
379
0
                *cur_percentage_p = percentage;
380
0
                LLAMA_LOG_CONT(".");
381
0
                if (percentage >= 100) {
382
0
                    LLAMA_LOG_CONT("\n");
383
0
                }
384
0
            }
385
0
            return true;
386
0
        };
387
4.45k
    }
388
389
4.46k
    const auto [status, model] = llama_model_load(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, file, params);
390
4.46k
    GGML_ASSERT(status <= 0);
391
4.46k
    if (status < 0) {
392
4.17k
        if (status == -1) {
393
4.17k
            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
394
4.17k
        } else if (status == -2) {
395
0
            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
396
0
        }
397
398
4.17k
        if (model) {
399
0
            llama_model_free(model);
400
0
        }
401
4.17k
        return nullptr;
402
4.17k
    }
403
404
293
    return model;
405
4.46k
}
406
407
struct llama_model * llama_model_init_from_user(
408
        struct gguf_context * metadata,
409
        llama_model_set_tensor_data_t set_tensor_data,
410
        void * set_tensor_data_ud,
411
0
        struct llama_model_params params) {
412
0
    GGML_ASSERT(metadata != nullptr);
413
0
    std::string path_model;
414
0
    std::vector<std::string> splits = {};
415
0
    params.use_mmap = false;
416
0
    params.use_extra_bufts = false;
417
0
    return llama_model_load_from_file_impl(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, /*file*/ nullptr, params);
418
0
}
419
// deprecated
420
struct llama_model * llama_load_model_from_file(
421
        const char * path_model,
422
4.46k
        struct llama_model_params params) {
423
4.46k
    return llama_model_load_from_file(path_model, params);
424
4.46k
}
425
426
struct llama_model * llama_model_load_from_file(
427
        const char * path_model,
428
4.46k
        struct llama_model_params params) {
429
4.46k
    std::vector<std::string> splits = {};
430
4.46k
    return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, /*file*/ nullptr, params);
431
4.46k
}
432
433
struct llama_model * llama_model_load_from_splits(
434
        const char ** paths,
435
        size_t n_paths,
436
0
        struct llama_model_params params) {
437
0
    std::vector<std::string> splits;
438
0
    if (n_paths == 0) {
439
0
        LLAMA_LOG_ERROR("%s: list of splits is empty\n", __func__);
440
0
        return nullptr;
441
0
    }
442
0
    splits.reserve(n_paths);
443
0
    for (size_t i = 0; i < n_paths; ++i) {
444
0
        splits.push_back(paths[i]);
445
0
    }
446
0
    return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, splits.front(), splits, /*file*/ nullptr, params);
447
0
}
448
449
0
struct llama_model * llama_model_load_from_file_ptr(FILE * file, struct llama_model_params params) {
450
0
    if (!file) {
451
0
        LLAMA_LOG_ERROR("%s: file is NULL\n", __func__);
452
0
        return nullptr;
453
0
    }
454
0
    std::string path_model;
455
0
    std::vector<std::string> splits = {};
456
0
    return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, file, params);
457
0
}
458
459
0
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
460
0
    llama_model_saver ms(model);
461
0
    ms.add_kv_from_model();
462
0
    ms.add_tensors_from_model();
463
0
    ms.save(path_model);
464
0
}
465
466
//
467
// chat templates
468
//
469
470
int32_t llama_chat_apply_template(
471
                              const char * tmpl,
472
         const struct llama_chat_message * chat,
473
                                  size_t   n_msg,
474
                                    bool   add_ass,
475
                                    char * buf,
476
1.65k
                                 int32_t   length) {
477
1.65k
    const std::string curr_tmpl(tmpl == nullptr ? "chatml" : tmpl);
478
479
    // format the chat to string
480
1.65k
    std::vector<const llama_chat_message *> chat_vec;
481
1.65k
    chat_vec.resize(n_msg);
482
11.5k
    for (size_t i = 0; i < n_msg; i++) {
483
9.93k
        chat_vec[i] = &chat[i];
484
9.93k
    }
485
486
1.65k
    std::string formatted_chat;
487
1.65k
    llm_chat_template detected_tmpl = llm_chat_detect_template(curr_tmpl);
488
1.65k
    if (detected_tmpl == LLM_CHAT_TEMPLATE_UNKNOWN) {
489
753
        return -1;
490
753
    }
491
902
    int32_t res = llm_chat_apply_template(detected_tmpl, chat_vec, formatted_chat, add_ass);
492
902
    if (res < 0) {
493
0
        return res;
494
0
    }
495
902
    if (buf && length > 0) {
496
902
        strncpy(buf, formatted_chat.c_str(), length);
497
902
    }
498
902
    return res;
499
902
}
500
501
//
502
// model split
503
//
504
505
int32_t llama_split_path(
506
    char * split_path,
507
    size_t maxlen,
508
    const char * path_prefix,
509
    int32_t split_no,
510
0
    int32_t split_count) {
511
512
0
    static const char * const SPLIT_PATH_FORMAT = "%s-%05d-of-%05d.gguf";
513
514
0
    const int written = snprintf(
515
0
        split_path,
516
0
        maxlen,
517
0
        SPLIT_PATH_FORMAT,
518
0
        path_prefix,
519
0
        split_no + 1,
520
0
        split_count
521
0
    );
522
523
0
    if (written < 0 || (size_t) written >= maxlen) {
524
0
        return 0;
525
0
    }
526
527
0
    return (int32_t) written;
528
0
}
529
530
int32_t llama_split_prefix(
531
    char * split_prefix,
532
    size_t maxlen,
533
    const char * split_path,
534
    int32_t split_no,
535
2
    int32_t split_count) {
536
537
2
    const std::string str_split_path(split_path);
538
539
2
    char postfix[32];
540
2
    snprintf(postfix, sizeof(postfix), "-%05d-of-%05d.gguf", split_no + 1, split_count);
541
542
2
    const std::string str_postfix(postfix);
543
2
    if (str_split_path.size() <= str_postfix.size()) {
544
2
        return 0;
545
2
    }
546
547
0
    const size_t size_prefix = str_split_path.size() - str_postfix.size();
548
549
0
    if (str_split_path.compare(size_prefix, std::string::npos, str_postfix) == 0) {
550
0
        const size_t copy_len = std::min(size_prefix + 1, maxlen);
551
0
        snprintf(split_prefix, copy_len, "%s", split_path);
552
553
0
        return (int32_t) size_prefix;
554
0
    }
555
556
0
    return 0;
557
0
}
558
559
0
const char * llama_print_system_info(void) {
560
0
    static std::string s;
561
0
    s.clear(); // Clear the string, since it's static, otherwise it will accumulate data from previous calls.
562
563
0
    for (size_t i = 0; i < ggml_backend_reg_count(); i++) {
564
0
        auto * reg = ggml_backend_reg_get(i);
565
0
        auto * get_features_fn = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
566
0
        if (get_features_fn) {
567
0
            ggml_backend_feature * features = get_features_fn(reg);
568
0
            s += ggml_backend_reg_name(reg);
569
0
            s += " : ";
570
0
            for (; features->name; features++) {
571
0
                s += features->name;
572
0
                s += " = ";
573
0
                s += features->value;
574
0
                s += " | ";
575
0
            }
576
0
        }
577
0
    }
578
579
0
    return s.c_str();
580
0
}
581