LLVMFuzzerTestOneInput: 140| 17|extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 141| 17| if (size < 256) { ------------------ | Branch (141:7): [True: 14, False: 3] ------------------ 142| 14| return 0; 143| 14| } 144| 3| llama_backend_init(); 145| 3| FuzzedDataProvider fdp(data, size); 146| | 147| 3| auto params = llama_model_params{}; 148| 3| memset(¶ms, 0x0, sizeof(struct llama_model_params)); 149| 3| params.use_mmap = false; 150| 3| params.progress_callback = [](float progress, void *ctx) { 151| 3| (void)ctx; 152| 3| return progress > 0.50; 153| 3| }; 154| | 155| 3| int overwrite_idx = 0; 156| | 157| | // set the architecture 158| 3| std::string arch_key = "general.architecture"; 159| 3| uint8_t arch_index = 160| 3| fdp.ConsumeIntegralInRange(0, possible_architectures.size() - 1); 161| | 162| 3| std::string arch_val = std::string(possible_architectures[arch_index]); 163| 3| fuzz_kv_overrides[overwrite_idx].tag = LLAMA_KV_OVERRIDE_TYPE_STR; 164| 3| strcpy(fuzz_kv_overrides[overwrite_idx].key, arch_key.c_str()); 165| 3| strcpy(fuzz_kv_overrides[overwrite_idx].val_str, arch_val.c_str()); 166| 3| overwrite_idx++; 167| | 168| 153| for (int i = 0; i < possible_prefix_keys.size(); i++) { ------------------ | Branch (168:19): [True: 150, False: 3] ------------------ 169| 150| std::string key; 170| 150| std::string val; 171| | 172| | // Get the key 173| 150| key = arch_val + possible_prefix_keys[i]; 174| 150| val = fdp.ConsumeRandomLengthString(32); 175| | 176| | // Copy the data into the overrides array 177| 150| fuzz_kv_overrides[overwrite_idx].tag = fdp.PickValueInArray(arrayed_enums); 178| 150| strcpy(fuzz_kv_overrides[overwrite_idx].key, key.c_str()); 179| 150| strcpy(fuzz_kv_overrides[overwrite_idx].val_str, val.c_str()); 180| 150| overwrite_idx++; 181| 150| } 182| | 183| | // Create the model 184| 3| std::string model_payload = fdp.ConsumeRandomLengthString(); 185| 3| if (model_payload.size() < 10) { ------------------ | Branch (185:7): [True: 1, False: 2] ------------------ 186| 1| return 0; 187| 1| } 188| 2| model_payload[0] = 'G'; 189| 2| model_payload[1] = 'G'; 190| 2| model_payload[2] = 'U'; 191| 2| model_payload[3] = 'F'; 192| | 193| 2| char filename[256]; 194| 2| sprintf(filename, "/tmp/libfuzzer.%d", getpid()); 195| | 196| 2| FILE *fp = fopen(filename, "wb"); 197| 2| if (!fp) { ------------------ | Branch (197:7): [True: 0, False: 2] ------------------ 198| 0| return 0; 199| 0| } 200| 2| fwrite(model_payload.data(), model_payload.size(), 1, fp); 201| 2| fclose(fp); 202| | 203| | // Override an arbitrary set of arguments 204| 50| for (int i = overwrite_idx; i < NUM_OVERRIDES; i++) { ------------------ | | 26| 50|#define NUM_OVERRIDES 75 ------------------ | Branch (204:31): [True: 48, False: 2] ------------------ 205| 48| std::string key; 206| 48| std::string val; 207| | 208| | // Get the key 209| 48| if (fdp.ConsumeProbability() > 0.90) { ------------------ | Branch (209:9): [True: 0, False: 48] ------------------ 210| 0| key = fdp.ConsumeRandomLengthString(20); 211| 48| } else { 212| 48| int i = fdp.ConsumeIntegralInRange(0, possible_keys.size() - 1); 213| 48| key = possible_keys[i]; 214| 48| } 215| 48| val = fdp.ConsumeRandomLengthString(30); 216| | 217| | // Copy the data into the overrides array 218| 48| fuzz_kv_overrides[i].tag = fdp.PickValueInArray(arrayed_enums); 219| 48| strcpy(fuzz_kv_overrides[i].key, key.c_str()); 220| 48| strcpy(fuzz_kv_overrides[i].val_str, val.c_str()); 221| 48| } 222| | 223| | // For debugging 224| | // std::cout << "--- overwrote ---\n"; 225| | // for (int m = 0; m < NUM_OVERRIDES-1; m++) { 226| | // std::cout << "=== " << fuzz_kv_overrides[m].key << "\n"; 227| | //} 228| | // std::cout << "#############\n"; 229| | 230| 2| params.kv_overrides = 231| 2| (const struct llama_model_kv_override *)fuzz_kv_overrides; 232| | 233| 2| if (setjmp(fuzzing_jmp_buf) == 0) { ------------------ | Branch (233:7): [True: 2, False: 0] ------------------ 234| 2| auto *model = llama_load_model_from_file(filename, params); 235| 2| if (model != nullptr) { ------------------ | Branch (235:9): [True: 0, False: 2] ------------------ 236| 0| llama_free_model(model); 237| 0| } 238| 2| } 239| 2| llama_backend_free(); 240| | 241| | // close any open descriptors. 242| 2.04k| for (int i = 3; i < 1024; i++) { ------------------ | Branch (242:19): [True: 2.04k, False: 2] ------------------ 243| 2.04k| close(i); 244| 2.04k| } 245| | 246| 2| unlink(filename); 247| 2| return 0; 248| 2|} ggml_backend_reg_count: 309| 5|size_t ggml_backend_reg_count() { 310| 5| return get_reg().backends.size(); 311| 5|} ggml-backend-reg.cpp:_ZL7get_regv: 285| 5|static ggml_backend_registry & get_reg() { 286| 5| static ggml_backend_registry reg; 287| 5| return reg; 288| 5|} _ZN21ggml_backend_registryC2Ev: 115| 1| ggml_backend_registry() { 116| |#ifdef GGML_USE_CUDA 117| | register_backend(ggml_backend_cuda_reg()); 118| |#endif 119| |#ifdef GGML_USE_METAL 120| | register_backend(ggml_backend_metal_reg()); 121| |#endif 122| |#ifdef GGML_USE_SYCL 123| | register_backend(ggml_backend_sycl_reg()); 124| |#endif 125| |#ifdef GGML_USE_VULKAN 126| | // Add runtime disable check 127| | if (getenv("GGML_DISABLE_VULKAN") == nullptr) { 128| | register_backend(ggml_backend_vk_reg()); 129| | } else { 130| | GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n"); 131| | } 132| |#endif 133| |#ifdef GGML_USE_WEBGPU 134| | register_backend(ggml_backend_webgpu_reg()); 135| |#endif 136| |#ifdef GGML_USE_ZDNN 137| | register_backend(ggml_backend_zdnn_reg()); 138| |#endif 139| |#ifdef GGML_USE_VIRTGPU_FRONTEND 140| | register_backend(ggml_backend_virtgpu_reg()); 141| |#endif 142| | 143| |#ifdef GGML_USE_OPENCL 144| | register_backend(ggml_backend_opencl_reg()); 145| |#endif 146| |#ifdef GGML_USE_ZENDNN 147| | register_backend(ggml_backend_zendnn_reg()); 148| |#endif 149| |#ifdef GGML_USE_HEXAGON 150| | register_backend(ggml_backend_hexagon_reg()); 151| |#endif 152| |#ifdef GGML_USE_CANN 153| | register_backend(ggml_backend_cann_reg()); 154| |#endif 155| |#ifdef GGML_USE_BLAS 156| | register_backend(ggml_backend_blas_reg()); 157| |#endif 158| |#ifdef GGML_USE_RPC 159| | register_backend(ggml_backend_rpc_reg()); 160| |#endif 161| |#ifdef GGML_USE_OPENVINO 162| | register_backend(ggml_backend_openvino_reg()); 163| |#endif 164| 1|#ifdef GGML_USE_CPU 165| 1| register_backend(ggml_backend_cpu_reg()); 166| 1|#endif 167| 1| } _ZN21ggml_backend_registryD2Ev: 169| 1| ~ggml_backend_registry() { 170| | // FIXME: backends cannot be safely unloaded without a function to destroy all the backend resources, 171| | // since backend threads may still be running and accessing resources from the dynamic library 172| 1| for (auto & entry : backends) { ------------------ | Branch (172:27): [True: 1, False: 1] ------------------ 173| 1| if (entry.handle) { ------------------ | Branch (173:17): [True: 0, False: 1] ------------------ 174| 0| entry.handle.release(); // NOLINT 175| 0| } 176| 1| } 177| 1| } _ZN21ggml_backend_registry16register_backendEP16ggml_backend_regNSt3__110unique_ptrIv17dl_handle_deleterEE: 179| 1| void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) { 180| 1| if (!reg) { ------------------ | Branch (180:13): [True: 0, False: 1] ------------------ 181| 0| return; 182| 0| } 183| | 184| 1| for (auto & entry : backends) { ------------------ | Branch (184:27): [True: 0, False: 1] ------------------ 185| 0| if (entry.reg == reg) { ------------------ | Branch (185:17): [True: 0, False: 0] ------------------ 186| 0| return; 187| 0| } 188| 0| } 189| | 190| |#ifndef NDEBUG 191| | GGML_LOG_DEBUG("%s: registered backend %s (%zu devices)\n", 192| | __func__, ggml_backend_reg_name(reg), ggml_backend_reg_dev_count(reg)); 193| |#endif 194| 1| backends.push_back({ reg, std::move(handle) }); 195| 2| for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) { ------------------ | Branch (195:28): [True: 1, False: 1] ------------------ 196| 1| register_device(ggml_backend_reg_dev_get(reg, i)); 197| 1| } 198| 1| } _ZN21ggml_backend_registry15register_deviceEP19ggml_backend_device: 200| 1| void register_device(ggml_backend_dev_t device) { 201| 1| for (auto & dev : devices) { ------------------ | Branch (201:25): [True: 0, False: 1] ------------------ 202| 0| if (dev == device) { ------------------ | Branch (202:17): [True: 0, False: 0] ------------------ 203| 0| return; 204| 0| } 205| 0| } 206| | 207| |#ifndef NDEBUG 208| | GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device)); 209| |#endif 210| 1| devices.push_back(device); 211| 1| } ggml_backend_reg_dev_count: 649| 2|size_t ggml_backend_reg_dev_count(ggml_backend_reg_t reg) { 650| 2| GGML_ASSERT(reg); ------------------ | | 288| 2|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 2] | | ------------------ ------------------ 651| 2| return reg->iface.get_device_count(reg); 652| 2|} ggml_backend_reg_dev_get: 654| 1|ggml_backend_dev_t ggml_backend_reg_dev_get(ggml_backend_reg_t reg, size_t index) { 655| 1| GGML_ASSERT(reg); ------------------ | | 288| 1|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 1] | | ------------------ ------------------ 656| 1| return reg->iface.get_device(reg, index); 657| 1|} ggml_cpu_init: 3768| 1|void ggml_cpu_init(void) { 3769| | // needed to initialize ggml_time 3770| 1| { 3771| 1| struct ggml_init_params params = { 0, NULL, false }; 3772| 1| struct ggml_context * ctx = ggml_init(params); 3773| 1| ggml_free(ctx); 3774| 1| } 3775| | 3776| 1| ggml_critical_section_start(); 3777| | 3778| 1| static bool is_first_call = true; 3779| | 3780| 1| if (is_first_call) { ------------------ | Branch (3780:9): [True: 1, False: 0] ------------------ 3781| | // initialize GELU, Quick GELU, SILU and EXP F32 tables 3782| 1| { 3783| 1| const uint64_t t_start = ggml_time_us(); UNUSED(t_start); ------------------ | | 76| 1|#define UNUSED GGML_UNUSED | | ------------------ | | | | 258| 1|#define GGML_UNUSED(x) (void)(x) | | ------------------ ------------------ 3784| | 3785| 65.5k| for (int i = 0; i < (1 << 16); ++i) { ------------------ | Branch (3785:29): [True: 65.5k, False: 1] ------------------ 3786| 65.5k| union { 3787| 65.5k| uint16_t u16; 3788| 65.5k| ggml_fp16_t fp16; 3789| 65.5k| } u = {i}; 3790| 65.5k| float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16); ------------------ | | 433| 65.5k|#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) ------------------ 3791| 65.5k| ggml_table_f32_f16[i] = f; 3792| 65.5k| ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f)); ------------------ | | 144| 65.5k|#define GGML_CPU_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) | | ------------------ | | | | 434| 65.5k|#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | | ------------------ ------------------ 3793| 65.5k| ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f)); ------------------ | | 144| 65.5k|#define GGML_CPU_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) | | ------------------ | | | | 434| 65.5k|#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) | | ------------------ ------------------ 3794| 65.5k| } 3795| | 3796| | // initialize E8M0 half table (256 entries) 3797| 257| for (int i = 0; i < (1 << 8); ++i) { ------------------ | Branch (3797:29): [True: 256, False: 1] ------------------ 3798| 256| ggml_table_f32_e8m0_half[i] = GGML_E8M0_TO_FP32_HALF(i); ------------------ | | 498| 256|#define GGML_E8M0_TO_FP32_HALF(x) ggml_e8m0_to_fp32_half(x) ------------------ 3799| 256| } 3800| | 3801| 1| const uint64_t t_end = ggml_time_us(); UNUSED(t_end); ------------------ | | 76| 1|#define UNUSED GGML_UNUSED | | ------------------ | | | | 258| 1|#define GGML_UNUSED(x) (void)(x) | | ------------------ ------------------ 3802| | 3803| 1| GGML_PRINT_DEBUG("%s: GELU, Quick GELU, SILU and EXP tables initialized in %f ms\n", __func__, (t_end - t_start)/1000.0); 3804| | 3805| |#ifdef GGML_USE_OPENMP 3806| | //if (!getenv("OMP_WAIT_POLICY")) { 3807| | // // set the wait policy to active, so that OpenMP threads don't sleep 3808| | // setenv("OMP_WAIT_POLICY", "active", 0) 3809| | //} 3810| | 3811| | if (!getenv("KMP_BLOCKTIME")) { 3812| | // set the time to wait before sleeping a thread 3813| | // this is less aggressive than setting the wait policy to active, but should achieve similar results in most cases 3814| |#ifdef _WIN32 3815| | _putenv_s("KMP_BLOCKTIME", "200"); // 200ms 3816| |#else 3817| | setenv("KMP_BLOCKTIME", "200", 0); // 200ms 3818| |#endif 3819| | } 3820| |#endif 3821| 1| } 3822| | 3823| |#if defined(__ARM_ARCH) 3824| | ggml_init_arm_arch_features(); 3825| |#endif 3826| | 3827| |#if defined(__riscv) 3828| | ggml_init_riscv_arch_features(); 3829| |#endif 3830| | 3831| 1| { 3832| 1| const char * env = getenv("GGML_CPU_DISABLE_FUSION"); 3833| 1| ggml_cpu_disable_fusion = (env != NULL && atoi(env) == 1); ------------------ | Branch (3833:40): [True: 0, False: 1] | Branch (3833:55): [True: 0, False: 0] ------------------ 3834| 1| } 3835| | 3836| 1| is_first_call = false; 3837| 1| } 3838| | 3839| 1| ggml_critical_section_end(); 3840| 1|} ggml_backend_cpu_reg: 690| 1|ggml_backend_reg_t ggml_backend_cpu_reg(void) { 691| | // init CPU feature detection 692| 1| ggml_cpu_init(); 693| | 694| 1| static struct ggml_backend_reg ggml_backend_cpu_reg = { 695| 1| /* .api_version = */ GGML_BACKEND_API_VERSION, ------------------ | | 11| 1| #define GGML_BACKEND_API_VERSION 2 ------------------ 696| 1| /* .iface = */ ggml_backend_cpu_reg_i, 697| 1| /* .context = */ NULL, 698| 1| }; 699| | 700| 1| return &ggml_backend_cpu_reg; 701| 1|} ggml-cpu.cpp:_ZL37ggml_backend_cpu_reg_get_device_countP16ggml_backend_reg: 507| 2|static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) { 508| 2| return 1; 509| | 510| 0| GGML_UNUSED(reg); ------------------ | | 258| 0|#define GGML_UNUSED(x) (void)(x) ------------------ 511| 0|} ggml-cpu.cpp:_ZL31ggml_backend_cpu_reg_get_deviceP16ggml_backend_regm: 513| 1|static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) { 514| 1| GGML_ASSERT(index == 0); ------------------ | | 288| 1|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 1] | | ------------------ ------------------ 515| | 516| 1| static ggml_backend_cpu_device_context ctx; 517| 1| static ggml_backend_device ggml_backend_cpu_device = { 518| 1| /* .iface = */ ggml_backend_cpu_device_i, 519| 1| /* .reg = */ reg, 520| 1| /* .context = */ &ctx, 521| 1| }; 522| | 523| 1| return &ggml_backend_cpu_device; 524| 1|} _ZN31ggml_backend_cpu_device_contextC2Ev: 292| 1| ggml_backend_cpu_device_context() { 293| |#ifdef __APPLE__ 294| | size_t len = 0; 295| | if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) { 296| | description.resize(len); 297| | sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0); // NOLINT 298| | } 299| |#elif defined(__linux__) 300| | FILE * f = fopen("/proc/cpuinfo", "r"); 301| 1| if (f) { ------------------ | Branch (301:13): [True: 1, False: 0] ------------------ 302| 1| char buf[1024]; 303| 5| while (fgets(buf, sizeof(buf), f)) { ------------------ | Branch (303:20): [True: 5, False: 0] ------------------ 304| 5| if (strncmp(buf, "model name", 10) == 0) { ------------------ | Branch (304:21): [True: 1, False: 4] ------------------ 305| 1| char * p = strchr(buf, ':'); 306| 1| if (p) { ------------------ | Branch (306:25): [True: 1, False: 0] ------------------ 307| 1| p++; 308| 2| while (std::isspace(*p)) { ------------------ | Branch (308:32): [True: 1, False: 1] ------------------ 309| 1| p++; 310| 1| } 311| 2| while (std::isspace(p[strlen(p) - 1])) { ------------------ | Branch (311:32): [True: 1, False: 1] ------------------ 312| 1| p[strlen(p) - 1] = '\0'; 313| 1| } 314| 1| description = p; 315| 1| break; 316| 1| } 317| 1| } 318| 5| } 319| 1| fclose(f); 320| 1| } 321| |#elif defined(_WIN32) 322| | HKEY hKey; 323| | if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, 324| | TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"), 325| | 0, 326| | KEY_READ, 327| | &hKey) == ERROR_SUCCESS) { 328| | DWORD cpu_brand_size = 0; 329| | if (RegQueryValueExA(hKey, 330| | "ProcessorNameString", 331| | NULL, 332| | NULL, 333| | NULL, 334| | &cpu_brand_size) == ERROR_SUCCESS) { 335| | description.resize(cpu_brand_size); 336| | if (RegQueryValueExA(hKey, 337| | "ProcessorNameString", 338| | NULL, 339| | NULL, 340| | (LPBYTE)&description[0], // NOLINT 341| | &cpu_brand_size) == ERROR_SUCCESS) { 342| | if (description.find('\0') != std::string::npos) { 343| | description.resize(description.find('\0')); 344| | } 345| | } 346| | } 347| | RegCloseKey(hKey); 348| | } 349| |#endif 350| 1| } ggml-cpu.c:ggml_gelu_f32: 968| 65.5k|inline static float ggml_gelu_f32(float x) { 969| 65.5k| return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); 970| 65.5k|} ggml-cpu.c:ggml_gelu_quick_f32: 1017| 65.5k|inline static float ggml_gelu_quick_f32(float x) { 1018| 65.5k| return x*(1.0f/(1.0f+expf(GELU_QUICK_COEF*x))); 1019| 65.5k|} ggml-cpu.c:ggml_compute_fp32_to_fp16: 407| 131k|static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { 408| 131k|#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) 409| 131k| const float scale_to_inf = 0x1.0p+112f; 410| 131k| const float scale_to_zero = 0x1.0p-110f; 411| |#else 412| | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); 413| | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); 414| |#endif 415| 131k| float base = (fabsf(f) * scale_to_inf) * scale_to_zero; 416| | 417| 131k| const uint32_t w = fp32_to_bits(f); 418| 131k| const uint32_t shl1_w = w + w; 419| 131k| const uint32_t sign = w & UINT32_C(0x80000000); 420| 131k| uint32_t bias = shl1_w & UINT32_C(0xFF000000); 421| 131k| if (bias < UINT32_C(0x71000000)) { ------------------ | Branch (421:9): [True: 36.1k, False: 94.9k] ------------------ 422| 36.1k| bias = UINT32_C(0x71000000); 423| 36.1k| } 424| | 425| 131k| base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; 426| 131k| const uint32_t bits = fp32_to_bits(base); 427| 131k| const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); 428| 131k| const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); 429| 131k| const uint32_t nonsign = exp_bits + mantissa_bits; 430| 131k| return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); ------------------ | Branch (430:28): [True: 4.09k, False: 126k] ------------------ 431| 131k|} ggml-cpu.c:fp32_to_bits: 375| 327k|static inline uint32_t fp32_to_bits(float f) { 376| 327k| union { 377| 327k| float as_value; 378| 327k| uint32_t as_bits; 379| 327k| } fp32; 380| 327k| fp32.as_value = f; 381| 327k| return fp32.as_bits; 382| 327k|} ggml-cpu.c:fp32_from_bits: 366| 327k|static inline float fp32_from_bits(uint32_t w) { 367| 327k| union { 368| 327k| uint32_t as_bits; 369| 327k| float as_value; 370| 327k| } fp32; 371| 327k| fp32.as_bits = w; 372| 327k| return fp32.as_value; 373| 327k|} ggml-cpu.c:ggml_compute_fp16_to_fp32: 384| 65.5k|static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { 385| 65.5k| const uint32_t w = (uint32_t) h << 16; 386| 65.5k| const uint32_t sign = w & UINT32_C(0x80000000); 387| 65.5k| const uint32_t two_w = w + w; 388| | 389| 65.5k| const uint32_t exp_offset = UINT32_C(0xE0) << 23; 390| 65.5k|#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) 391| 65.5k| const float exp_scale = 0x1.0p-112f; 392| |#else 393| | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); 394| |#endif 395| 65.5k| const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; 396| | 397| 65.5k| const uint32_t magic_mask = UINT32_C(126) << 23; 398| 65.5k| const float magic_bias = 0.5f; 399| 65.5k| const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; 400| | 401| 65.5k| const uint32_t denormalized_cutoff = UINT32_C(1) << 27; 402| 65.5k| const uint32_t result = sign | 403| 65.5k| (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); ------------------ | Branch (403:10): [True: 2.04k, False: 63.4k] ------------------ 404| 65.5k| return fp32_from_bits(result); 405| 65.5k|} ggml-cpu.c:ggml_e8m0_to_fp32_half: 477| 256|static inline float ggml_e8m0_to_fp32_half(uint8_t x) { 478| 256| uint32_t bits; 479| | 480| | // For x < 2: use precomputed denormal patterns 481| 256| if (x < 2) { ------------------ | Branch (481:9): [True: 2, False: 254] ------------------ 482| | // 0x00200000 = 2^(-128), 0x00400000 = 2^(-127) 483| 2| bits = 0x00200000 << x; 484| 2| } 485| | // For x >= 2: normalized exponent adjustment 486| 254| else { 487| | // 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1) 488| 254| bits = (uint32_t)(x - 1) << 23; 489| 254| } 490| | // Note: NaNs are not handled here 491| | 492| 256| float result; 493| 256| memcpy(&result, &bits, sizeof(float)); 494| 256| return result; 495| 256|} iq2xs_free_impl: 3188| 10|void iq2xs_free_impl(enum ggml_type type) { 3189| 10| GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S); ------------------ | | 288| 60|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:30): [True: 2, False: 8] | | | Branch (288:30): [True: 2, False: 6] | | | Branch (288:30): [True: 2, False: 4] | | | Branch (288:30): [True: 2, False: 2] | | | Branch (288:30): [True: 2, False: 0] | | ------------------ ------------------ 3190| 10| const int gindex = iq2_data_index(type); 3191| 10| if (iq2_data[gindex].grid) { ------------------ | Branch (3191:9): [True: 0, False: 10] ------------------ 3192| 0| free(iq2_data[gindex].grid); iq2_data[gindex].grid = NULL; 3193| 0| free(iq2_data[gindex].map); iq2_data[gindex].map = NULL; 3194| | free(iq2_data[gindex].neighbours); iq2_data[gindex].neighbours = NULL; 3195| 0| } 3196| 10|} iq3xs_free_impl: 3832| 4|void iq3xs_free_impl(int grid_size) { 3833| 4| GGML_ASSERT(grid_size == 256 || grid_size == 512); ------------------ | | 288| 6|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:30): [True: 2, False: 2] | | | Branch (288:30): [True: 2, False: 0] | | ------------------ ------------------ 3834| 4| const int gindex = iq3_data_index(grid_size); 3835| 4| if (iq3_data[gindex].grid) { ------------------ | Branch (3835:9): [True: 0, False: 4] ------------------ 3836| 0| free(iq3_data[gindex].grid); iq3_data[gindex].grid = NULL; 3837| 0| free(iq3_data[gindex].map); iq3_data[gindex].map = NULL; 3838| | free(iq3_data[gindex].neighbours); iq3_data[gindex].neighbours = NULL; 3839| 0| } 3840| 4|} ggml-quants.c:iq2_data_index: 2761| 10|static inline int iq2_data_index(enum ggml_type type) { 2762| 10| GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S); ------------------ | | 288| 60|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:30): [True: 2, False: 8] | | | Branch (288:30): [True: 2, False: 6] | | | Branch (288:30): [True: 2, False: 4] | | | Branch (288:30): [True: 2, False: 2] | | | Branch (288:30): [True: 2, False: 0] | | ------------------ ------------------ 2763| 10| return type == GGML_TYPE_IQ2_XXS ? 0 : ------------------ | Branch (2763:12): [True: 2, False: 8] ------------------ 2764| 10| type == GGML_TYPE_IQ2_XS ? 1 : ------------------ | Branch (2764:12): [True: 2, False: 6] ------------------ 2765| 8| type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? 2 : 3; ------------------ | Branch (2765:12): [True: 2, False: 4] | Branch (2765:39): [True: 2, False: 2] ------------------ 2766| 10|} ggml-quants.c:iq3_data_index: 3619| 4|static inline int iq3_data_index(int grid_size) { 3620| 4| (void)grid_size; 3621| 4| GGML_ASSERT(grid_size == 256 || grid_size == 512); ------------------ | | 288| 6|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:30): [True: 2, False: 2] | | | Branch (288:30): [True: 2, False: 0] | | ------------------ ------------------ 3622| 4| return grid_size == 256 ? 0 : 1; ------------------ | Branch (3622:12): [True: 2, False: 2] ------------------ 3623| 4|} ggml_critical_section_start: 6| 7|void ggml_critical_section_start() { 7| 7| ggml_critical_section_mutex.lock(); 8| 7|} ggml_critical_section_end: 10| 7|void ggml_critical_section_end(void) { 11| 7| ggml_critical_section_mutex.unlock(); 12| 7|} ggml_log_internal: 306| 4|void ggml_log_internal(enum ggml_log_level level, const char * format, ...) { 307| 4| va_list args; 308| 4| va_start(args, format); 309| 4| ggml_log_internal_v(level, format, args); 310| | va_end(args); 311| 4|} ggml_log_callback_default: 313| 4|void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { 314| 4| (void) level; 315| 4| (void) user_data; 316| 4| fputs(text, stderr); 317| | fflush(stderr); 318| 4|} ggml_aligned_malloc: 331| 4|void * ggml_aligned_malloc(size_t size) { 332| |#if defined(__s390x__) 333| | const int alignment = 256; 334| |#else 335| 4| const int alignment = 64; 336| 4|#endif 337| | 338| |#if defined(_MSC_VER) || defined(__MINGW32__) 339| | return _aligned_malloc(size, alignment); 340| |#else 341| 4| if (size == 0) { ------------------ | Branch (341:9): [True: 0, False: 4] ------------------ 342| 0| GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); ------------------ | | 120| 0|#define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 343| 0| return NULL; 344| 0| } 345| 4| void * aligned_memory = NULL; 346| | #ifdef GGML_USE_CPU_HBM 347| | int result = hbw_posix_memalign(&aligned_memory, alignment, size); 348| | #elif TARGET_OS_OSX 349| | GGML_UNUSED(alignment); 350| | kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); 351| | int result = EFAULT; 352| | switch (alloc_status) { 353| | case KERN_SUCCESS: 354| | result = 0; 355| | break; 356| | case KERN_INVALID_ADDRESS: 357| | result = EINVAL; 358| | break; 359| | case KERN_NO_SPACE: 360| | result = ENOMEM; 361| | break; 362| | default: 363| | result = EFAULT; 364| | break; 365| | } 366| | #else 367| 4| int result = posix_memalign(&aligned_memory, alignment, size); 368| 4| #endif 369| 4| if (result != 0) { ------------------ | Branch (369:9): [True: 0, False: 4] ------------------ 370| | // Handle allocation failure 371| 0| const char *error_desc = "unknown allocation error"; 372| 0| switch (result) { ------------------ | Branch (372:17): [True: 0, False: 0] ------------------ 373| 0| case EINVAL: ------------------ | Branch (373:13): [True: 0, False: 0] ------------------ 374| 0| error_desc = "invalid alignment value"; 375| 0| break; 376| 0| case ENOMEM: ------------------ | Branch (376:13): [True: 0, False: 0] ------------------ 377| 0| error_desc = "insufficient memory"; 378| 0| break; 379| 0| } 380| 0| GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 381| 0| return NULL; 382| 0| } 383| 4| return aligned_memory; 384| 4|#endif 385| 4|} ggml_aligned_free: 387| 4|void ggml_aligned_free(void * ptr, size_t size) { 388| 4| GGML_UNUSED(size); ------------------ | | 258| 4|#define GGML_UNUSED(x) (void)(x) ------------------ 389| |#if defined(_MSC_VER) || defined(__MINGW32__) 390| | _aligned_free(ptr); 391| |#elif GGML_USE_CPU_HBM 392| | if (ptr != NULL) { 393| | hbw_free(ptr); 394| | } 395| |#elif TARGET_OS_OSX 396| | if (ptr != NULL) { 397| | vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); 398| | } 399| |#else 400| 4| free(ptr); 401| 4|#endif 402| 4|} ggml_time_init: 552| 9|void ggml_time_init(void) {} ggml_time_us: 559| 2|int64_t ggml_time_us(void) { 560| 2| struct timespec ts; 561| | clock_gettime(CLOCK_MONOTONIC, &ts); 562| 2| return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; 563| 2|} ggml_fopen: 598| 2|FILE * ggml_fopen(const char * fname, const char * mode) { 599| |#ifdef _WIN32 600| | FILE * file = NULL; 601| | 602| | // convert fname (UTF-8) 603| | wchar_t * wfname = ggml_mbstowcs(fname); 604| | if (wfname) { 605| | // convert mode (UTF-8) 606| | wchar_t * wmode = ggml_mbstowcs(mode); 607| | if (wmode) { 608| | // open file 609| | file = _wfopen(wfname, wmode); 610| | GGML_FREE(wmode); 611| | } 612| | 613| | GGML_FREE(wfname); 614| | } 615| | 616| | return file; 617| |#else 618| 2| return fopen(fname, mode); 619| 2|#endif 620| | 621| 2|} ggml_init: 1573| 4|struct ggml_context * ggml_init(struct ggml_init_params params) { 1574| 4| bool is_first_call = true; 1575| | 1576| 4| ggml_critical_section_start(); 1577| | 1578| 4| if (is_first_call) { ------------------ | Branch (1578:9): [True: 4, False: 0] ------------------ 1579| | // initialize time system (required on Windows) 1580| 4| ggml_time_init(); 1581| | 1582| 4| is_first_call = false; 1583| 4| } 1584| | 1585| 4| ggml_critical_section_end(); 1586| | 1587| 4| struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context)); ------------------ | | 434| 4|#define GGML_MALLOC(size) ggml_malloc(size) ------------------ 1588| | 1589| | // allow to call ggml_init with 0 size 1590| 4| if (params.mem_size == 0) { ------------------ | Branch (1590:9): [True: 4, False: 0] ------------------ 1591| 4| params.mem_size = GGML_MEM_ALIGN; ------------------ | | 243| 4| #define GGML_MEM_ALIGN 16 ------------------ 1592| 4| } 1593| | 1594| 4| const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); ------------------ | | 267| 4|#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) ------------------ | Branch (1594:29): [True: 0, False: 4] ------------------ 1595| | 1596| 4| *ctx = (struct ggml_context) { 1597| 4| /*.mem_size =*/ mem_size, 1598| 4| /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), ------------------ | Branch (1598:35): [True: 0, False: 4] ------------------ 1599| 4| /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, ------------------ | Branch (1599:35): [True: 0, False: 4] ------------------ 1600| 4| /*.no_alloc =*/ params.no_alloc, 1601| 4| /*.n_objects =*/ 0, 1602| 4| /*.objects_begin =*/ NULL, 1603| 4| /*.objects_end =*/ NULL, 1604| 4| }; 1605| | 1606| 4| GGML_ASSERT(ctx->mem_buffer != NULL); ------------------ | | 288| 4|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 4] | | ------------------ ------------------ 1607| | 1608| 4| GGML_ASSERT_ALIGNED(ctx->mem_buffer); ------------------ | | 1569| 4| GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) | | ------------------ | | | | 288| 4|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | | | ------------------ | | | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | | | ------------------ | | | | | Branch (288:28): [True: 0, False: 4] | | | | ------------------ | | ------------------ ------------------ 1609| | 1610| 4| GGML_PRINT_DEBUG("%s: context initialized\n", __func__); 1611| | 1612| 4| return ctx; 1613| 4|} ggml_free: 1625| 4|void ggml_free(struct ggml_context * ctx) { 1626| 4| if (ctx == NULL) { ------------------ | Branch (1626:9): [True: 0, False: 4] ------------------ 1627| 0| return; 1628| 0| } 1629| | 1630| 4| if (ctx->mem_buffer_owned) { ------------------ | Branch (1630:9): [True: 4, False: 0] ------------------ 1631| 4| ggml_aligned_free(ctx->mem_buffer, ctx->mem_size); 1632| 4| } 1633| | 1634| 4| GGML_FREE(ctx); ------------------ | | 437| 4|#define GGML_FREE(ptr) free(ptr) ------------------ 1635| 4|} ggml_quantize_free: 7686| 2|void ggml_quantize_free(void) { 7687| 2| ggml_critical_section_start(); 7688| | 7689| 2| iq2xs_free_impl(GGML_TYPE_IQ2_XXS); 7690| 2| iq2xs_free_impl(GGML_TYPE_IQ2_XS); 7691| 2| iq2xs_free_impl(GGML_TYPE_IQ2_S); 7692| 2| iq2xs_free_impl(GGML_TYPE_IQ1_S); 7693| 2| iq2xs_free_impl(GGML_TYPE_IQ1_M); 7694| 2| iq3xs_free_impl(256); 7695| 2| iq3xs_free_impl(512); 7696| | 7697| 2| ggml_critical_section_end(); 7698| 2|} ggml.c:ggml_log_internal_v: 286| 4|static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { 287| 4| if (format == NULL) { ------------------ | Branch (287:9): [True: 0, False: 4] ------------------ 288| 0| return; 289| 0| } 290| 4| va_list args_copy; 291| 4| va_copy(args_copy, args); 292| 4| char buffer[128]; 293| 4| int len = vsnprintf(buffer, 128, format, args); 294| 4| if (len < 128) { ------------------ | Branch (294:9): [True: 4, False: 0] ------------------ 295| 4| g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); 296| 4| } else { 297| 0| char * buffer2 = (char *) calloc(len + 1, sizeof(char)); 298| 0| vsnprintf(buffer2, len + 1, format, args_copy); 299| 0| buffer2[len] = 0; 300| 0| g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); 301| 0| free(buffer2); 302| 0| } 303| | va_end(args_copy); 304| 4|} ggml.c:ggml_malloc: 405| 4|inline static void * ggml_malloc(size_t size) { 406| 4| if (size == 0) { ------------------ | Branch (406:9): [True: 0, False: 4] ------------------ 407| 0| GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n"); ------------------ | | 120| 0|#define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 408| 0| return NULL; 409| 0| } 410| 4| void * result = malloc(size); 411| 4| if (result == NULL) { ------------------ | Branch (411:9): [True: 0, False: 4] ------------------ 412| 0| GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 413| 0| GGML_ABORT("fatal error"); ------------------ | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) ------------------ 414| 0| } 415| 4| return result; 416| 4|} gguf_init_from_file_ptr: 928| 2|struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) { 929| 2| if (!file) { ------------------ | Branch (929:9): [True: 0, False: 2] ------------------ 930| 0| return nullptr; 931| 0| } 932| | 933| 2| const int64_t cur = gguf_ftell(file); ------------------ | | 25| 2|# define gguf_ftell ftello ------------------ 934| 2| if (cur < 0) { ------------------ | Branch (934:9): [True: 0, False: 2] ------------------ 935| 0| return nullptr; 936| 0| } 937| | 938| 2| gguf_file_reader reader = { 939| 2| /*.file = */ file, 940| 2| /*.offset = */ static_cast(cur), 941| 2| }; 942| | const struct gguf_reader gr(gguf_file_reader_callback, &reader, SIZE_MAX, reader.offset, gguf_reader::file_remain(file)); 943| 2| return gguf_init_from_reader(gr, params); 944| 2|} gguf_init_from_file: 979| 2|struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { 980| 2| FILE * file = ggml_fopen(fname, "rb"); 981| | 982| 2| if (!file) { ------------------ | Branch (982:9): [True: 0, False: 2] ------------------ 983| 0| GGML_LOG_ERROR("%s: failed to open GGUF file '%s' (%s)\n", __func__, fname, strerror(errno)); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 984| 0| return nullptr; 985| 0| } 986| | 987| 2| struct gguf_context * result = gguf_init_from_file_ptr(file, params); 988| 2| fclose(file); 989| 2| return result; 990| 2|} gguf_free: 992| 2|void gguf_free(struct gguf_context * ctx) { 993| 2| if (ctx == nullptr) { ------------------ | Branch (993:9): [True: 0, False: 2] ------------------ 994| 0| return; 995| 0| } 996| 2| delete ctx; 997| 2|} _ZN11gguf_readerC2EPFmPvS0_mmES0_mmm: 237| 2| : callback(callback), 238| 2| userdata(userdata), 239| 2| max_chunk_read(max_chunk_read), 240| 2| data_offset(data_offset), 241| 2| nbytes_remain(nbytes_remain) { 242| 2| GGML_ASSERT(max_chunk_read > 0); ------------------ | | 288| 2|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 2] | | ------------------ ------------------ 243| 2| } gguf.cpp:_ZL21gguf_init_from_readerRK11gguf_reader16gguf_init_params: 451| 2|static struct gguf_context * gguf_init_from_reader(const struct gguf_reader & gr, struct gguf_init_params params) { 452| 2| struct gguf_context * ctx = new gguf_context; 453| | 454| 2| bool ok = true; 455| | 456| | // file magic 457| 2| { 458| 2| std::vector magic; 459| 2| ok = ok && gr.read(magic, 4); ------------------ | Branch (459:14): [True: 2, False: 0] | Branch (459:20): [True: 2, False: 0] ------------------ 460| | 461| 2| if (!ok) { ------------------ | Branch (461:13): [True: 0, False: 2] ------------------ 462| 0| GGML_LOG_ERROR("%s: failed to read magic\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 463| 0| gguf_free(ctx); 464| 0| return nullptr; 465| 0| } 466| | 467| 10| for (uint32_t i = 0; i < magic.size(); i++) { ------------------ | Branch (467:30): [True: 8, False: 2] ------------------ 468| 8| if (magic[i] != GGUF_MAGIC[i]) { ------------------ | | 41| 8|#define GGUF_MAGIC "GGUF" ------------------ | Branch (468:17): [True: 0, False: 8] ------------------ 469| 0| char c0 = isprint(magic[0]) ? magic[0] : '?'; ------------------ | Branch (469:27): [True: 0, False: 0] ------------------ 470| 0| char c1 = isprint(magic[1]) ? magic[1] : '?'; ------------------ | Branch (470:27): [True: 0, False: 0] ------------------ 471| 0| char c2 = isprint(magic[2]) ? magic[2] : '?'; ------------------ | Branch (471:27): [True: 0, False: 0] ------------------ 472| 0| char c3 = isprint(magic[3]) ? magic[3] : '?'; ------------------ | Branch (472:27): [True: 0, False: 0] ------------------ 473| 0| GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 474| 0| gguf_free(ctx); 475| 0| return nullptr; 476| 0| } 477| 8| } 478| 2| } 479| | 480| | // header 481| 2| int64_t n_kv = 0; 482| 2| int64_t n_tensors = 0; 483| | 484| 2| if (ok && gr.read(ctx->version)) { ------------------ | Branch (484:9): [True: 2, False: 0] | Branch (484:15): [True: 2, False: 0] ------------------ 485| 2| if (ok && ctx->version == 0) { ------------------ | Branch (485:13): [True: 2, False: 0] | Branch (485:19): [True: 1, False: 1] ------------------ 486| 1| GGML_LOG_ERROR("%s: bad GGUF version: %" PRIu32 "\n", __func__, ctx->version); ------------------ | | 121| 1|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 487| 1| ok = false; 488| 1| } 489| | 490| | /* 491| | * bit layout is different when reading non-native endian models. 492| | * assuming that the GGUF version is 3, the non-native endian model 493| | * would read it as 0x30000000. we can use the AND operation against 494| | * the last 4 hexadecimal digits to check if the model is the same 495| | * endianness as the host system. 496| | */ 497| 2| if (ok && (ctx->version & 0x0000FFFF) == 0x00000000) { ------------------ | Branch (497:13): [True: 1, False: 1] | Branch (497:19): [True: 0, False: 1] ------------------ 498| 0| GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 499| 0| ok = false; 500| 0| } 501| | 502| 2| if (ok && ctx->version == 1) { ------------------ | Branch (502:13): [True: 1, False: 1] | Branch (502:19): [True: 0, False: 1] ------------------ 503| 0| GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 504| 0| ok = false; 505| 0| } 506| 2| if (ok && ctx->version > GGUF_VERSION) { ------------------ | | 42| 1|#define GGUF_VERSION 3 ------------------ | Branch (506:13): [True: 1, False: 1] | Branch (506:19): [True: 1, False: 0] ------------------ 507| 1| GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n", ------------------ | | 121| 1|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 508| 1| __func__, ctx->version, GGUF_VERSION); 509| 1| ok = false; 510| 1| } 511| 2| } else { 512| 0| ok = false; 513| 0| } 514| | 515| 2| if (ok && gr.read(n_tensors)) { ------------------ | Branch (515:9): [True: 0, False: 2] | Branch (515:15): [True: 0, False: 0] ------------------ 516| 0| static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing"); 517| 0| if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) { ------------------ | Branch (517:13): [True: 0, False: 0] | Branch (517:30): [True: 0, False: 0] ------------------ 518| 0| GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 519| 0| __func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info)); 520| 0| ok = false; 521| 0| } 522| 2| } else { 523| 2| ok = false; 524| 2| } 525| | 526| 2| if (ok && gr.read(n_kv)) { ------------------ | Branch (526:9): [True: 0, False: 2] | Branch (526:15): [True: 0, False: 0] ------------------ 527| 0| static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing"); 528| 0| if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) { ------------------ | Branch (528:13): [True: 0, False: 0] | Branch (528:25): [True: 0, False: 0] ------------------ 529| 0| GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 530| 0| __func__, n_kv, SIZE_MAX/sizeof(gguf_kv)); 531| 0| ok = false; 532| 0| } 533| 2| } else { 534| 2| ok = false; 535| 2| } 536| | 537| 2| if (!ok) { ------------------ | Branch (537:9): [True: 2, False: 0] ------------------ 538| 2| GGML_LOG_ERROR("%s: failed to read header\n", __func__); ------------------ | | 121| 2|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 539| 2| gguf_free(ctx); 540| 2| return nullptr; 541| 2| } 542| | 543| | // KV pairs 544| 0| { 545| 0| for (int64_t i = 0; ok && i < n_kv; ++i) { ------------------ | Branch (545:29): [True: 0, False: 0] | Branch (545:35): [True: 0, False: 0] ------------------ 546| 0| std::string key; 547| 0| gguf_type type = gguf_type(-1); 548| 0| bool is_array = false; 549| 0| uint64_t n = 1; 550| | 551| 0| try { 552| 0| ok = ok && gr.read(key); ------------------ | Branch (552:22): [True: 0, False: 0] | Branch (552:28): [True: 0, False: 0] ------------------ 553| 0| } catch (std::length_error &) { 554| 0| GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 555| 0| ok = false; 556| 0| } catch (std::bad_alloc &) { 557| 0| GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 558| 0| ok = false; 559| 0| } 560| 0| for (size_t j = 0; ok && j < ctx->kv.size(); ++j) { ------------------ | Branch (560:32): [True: 0, False: 0] | Branch (560:38): [True: 0, False: 0] ------------------ 561| 0| if (key == ctx->kv[j].key) { ------------------ | Branch (561:21): [True: 0, False: 0] ------------------ 562| 0| GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 563| 0| ok = false; 564| 0| } 565| 0| } 566| 0| if (!ok) { ------------------ | Branch (566:17): [True: 0, False: 0] ------------------ 567| 0| break; 568| 0| } 569| | 570| 0| ok = ok && gr.read(type); ------------------ | Branch (570:18): [True: 0, False: 0] | Branch (570:24): [True: 0, False: 0] ------------------ 571| 0| if (type == GGUF_TYPE_ARRAY) { ------------------ | Branch (571:17): [True: 0, False: 0] ------------------ 572| 0| is_array = true; 573| 0| ok = ok && gr.read(type); ------------------ | Branch (573:22): [True: 0, False: 0] | Branch (573:28): [True: 0, False: 0] ------------------ 574| 0| ok = ok && gr.read(n); ------------------ | Branch (574:22): [True: 0, False: 0] | Branch (574:28): [True: 0, False: 0] ------------------ 575| 0| } 576| 0| if (!ok) { ------------------ | Branch (576:17): [True: 0, False: 0] ------------------ 577| 0| break; 578| 0| } 579| | 580| 0| switch (type) { 581| 0| case GGUF_TYPE_UINT8: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (581:17): [True: 0, False: 0] | Branch (581:46): [True: 0, False: 0] | Branch (581:52): [True: 0, False: 0] ------------------ 582| 0| case GGUF_TYPE_INT8: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (582:17): [True: 0, False: 0] | Branch (582:46): [True: 0, False: 0] | Branch (582:52): [True: 0, False: 0] ------------------ 583| 0| case GGUF_TYPE_UINT16: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (583:17): [True: 0, False: 0] | Branch (583:46): [True: 0, False: 0] | Branch (583:52): [True: 0, False: 0] ------------------ 584| 0| case GGUF_TYPE_INT16: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (584:17): [True: 0, False: 0] | Branch (584:46): [True: 0, False: 0] | Branch (584:52): [True: 0, False: 0] ------------------ 585| 0| case GGUF_TYPE_UINT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (585:17): [True: 0, False: 0] | Branch (585:46): [True: 0, False: 0] | Branch (585:52): [True: 0, False: 0] ------------------ 586| 0| case GGUF_TYPE_INT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (586:17): [True: 0, False: 0] | Branch (586:46): [True: 0, False: 0] | Branch (586:52): [True: 0, False: 0] ------------------ 587| 0| case GGUF_TYPE_FLOAT32: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (587:17): [True: 0, False: 0] | Branch (587:46): [True: 0, False: 0] | Branch (587:52): [True: 0, False: 0] ------------------ 588| 0| case GGUF_TYPE_BOOL: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (588:17): [True: 0, False: 0] | Branch (588:46): [True: 0, False: 0] | Branch (588:52): [True: 0, False: 0] ------------------ 589| 0| case GGUF_TYPE_STRING: ok = ok && gguf_read_emplace_helper(gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (589:17): [True: 0, False: 0] | Branch (589:46): [True: 0, False: 0] | Branch (589:52): [True: 0, False: 0] ------------------ 590| 0| case GGUF_TYPE_UINT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (590:17): [True: 0, False: 0] | Branch (590:46): [True: 0, False: 0] | Branch (590:52): [True: 0, False: 0] ------------------ 591| 0| case GGUF_TYPE_INT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (591:17): [True: 0, False: 0] | Branch (591:46): [True: 0, False: 0] | Branch (591:52): [True: 0, False: 0] ------------------ 592| 0| case GGUF_TYPE_FLOAT64: ok = ok && gguf_read_emplace_helper (gr, ctx->kv, key, is_array, n); break; ------------------ | Branch (592:17): [True: 0, False: 0] | Branch (592:46): [True: 0, False: 0] | Branch (592:52): [True: 0, False: 0] ------------------ 593| 0| case GGUF_TYPE_ARRAY: ------------------ | Branch (593:17): [True: 0, False: 0] ------------------ 594| 0| default: ------------------ | Branch (594:17): [True: 0, False: 0] ------------------ 595| 0| { 596| 0| GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 597| 0| ok = false; 598| 0| } break; 599| 0| } 600| 0| } 601| | 602| 0| if (!ok) { ------------------ | Branch (602:13): [True: 0, False: 0] ------------------ 603| 0| GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 604| 0| gguf_free(ctx); 605| 0| return nullptr; 606| 0| } 607| 0| GGML_ASSERT(int64_t(ctx->kv.size()) == n_kv); ------------------ | | 288| 0|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 0] | | ------------------ ------------------ 608| | 609| 0| const int alignment_idx = gguf_find_key(ctx, GGUF_KEY_GENERAL_ALIGNMENT); ------------------ | | 44| 0|#define GGUF_KEY_GENERAL_ALIGNMENT "general.alignment" ------------------ 610| 0| ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx); ------------------ | | 46| 0|#define GGUF_DEFAULT_ALIGNMENT 32 ------------------ | Branch (610:26): [True: 0, False: 0] ------------------ 611| | 612| 0| if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) { ------------------ | Branch (612:13): [True: 0, False: 0] | Branch (612:36): [True: 0, False: 0] ------------------ 613| 0| GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 614| 0| gguf_free(ctx); 615| 0| return nullptr; 616| 0| } 617| 0| } 618| | 619| | // read the tensor info 620| 0| for (int64_t i = 0; ok && i < n_tensors; ++i) { ------------------ | Branch (620:25): [True: 0, False: 0] | Branch (620:31): [True: 0, False: 0] ------------------ 621| 0| struct gguf_tensor_info info; 622| | 623| | // tensor name 624| 0| { 625| 0| std::string name; 626| 0| try { 627| 0| ok = ok && gr.read(name); ------------------ | Branch (627:22): [True: 0, False: 0] | Branch (627:28): [True: 0, False: 0] ------------------ 628| 0| } catch (std::length_error &) { 629| 0| GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 630| 0| ok = false; 631| 0| } catch (std::bad_alloc &) { 632| 0| GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 633| 0| ok = false; 634| 0| } 635| 0| if (name.length() >= GGML_MAX_NAME) { ------------------ | | 229| 0|# define GGML_MAX_NAME 64 ------------------ | Branch (635:17): [True: 0, False: 0] ------------------ 636| 0| GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 637| 0| ok = false; 638| 0| break; 639| 0| } 640| 0| ggml_set_name(&info.t, name.c_str()); 641| | 642| | // make sure there are no duplicate tensor names 643| 0| for (int64_t j = 0; ok && j < i; ++j) { ------------------ | Branch (643:33): [True: 0, False: 0] | Branch (643:39): [True: 0, False: 0] ------------------ 644| 0| if (strcmp(info.t.name, ctx->info[j].t.name) == 0) { ------------------ | Branch (644:21): [True: 0, False: 0] ------------------ 645| 0| GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 646| 0| ok = false; 647| 0| break; 648| 0| } 649| 0| } 650| 0| } 651| 0| if (!ok) { ------------------ | Branch (651:13): [True: 0, False: 0] ------------------ 652| 0| break; 653| 0| } 654| | 655| | // tensor shape 656| 0| { 657| 0| uint32_t n_dims = 0; 658| 0| ok = ok && gr.read(n_dims); ------------------ | Branch (658:18): [True: 0, False: 0] | Branch (658:24): [True: 0, False: 0] ------------------ 659| 0| if (n_dims > GGML_MAX_DIMS) { ------------------ | | 222| 0|#define GGML_MAX_DIMS 4 ------------------ | Branch (659:17): [True: 0, False: 0] ------------------ 660| 0| GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 661| 0| __func__, info.t.name, n_dims, GGML_MAX_DIMS); 662| 0| ok = false; 663| 0| break; 664| 0| } 665| 0| for (uint32_t j = 0; ok && j < GGML_MAX_DIMS; ++j) { ------------------ | | 222| 0|#define GGML_MAX_DIMS 4 ------------------ | Branch (665:34): [True: 0, False: 0] | Branch (665:40): [True: 0, False: 0] ------------------ 666| 0| info.t.ne[j] = 1; 667| 0| if (j < n_dims) { ------------------ | Branch (667:21): [True: 0, False: 0] ------------------ 668| 0| ok = ok && gr.read(info.t.ne[j]); ------------------ | Branch (668:26): [True: 0, False: 0] | Branch (668:32): [True: 0, False: 0] ------------------ 669| 0| } 670| | 671| | // check that all ne are non-negative 672| 0| if (info.t.ne[j] < 0) { ------------------ | Branch (672:21): [True: 0, False: 0] ------------------ 673| 0| GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 674| 0| __func__, info.t.name, j, info.t.ne[j]); 675| 0| ok = false; 676| 0| break; 677| 0| } 678| 0| } 679| | 680| | // check that the total number of elements is representable 681| 0| if (ok && ((INT64_MAX/info.t.ne[1] <= info.t.ne[0]) || ------------------ | Branch (681:17): [True: 0, False: 0] | Branch (681:24): [True: 0, False: 0] ------------------ 682| 0| (INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) || ------------------ | Branch (682:24): [True: 0, False: 0] ------------------ 683| 0| (INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) { ------------------ | Branch (683:24): [True: 0, False: 0] ------------------ 684| | 685| 0| GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape " ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 686| 0| "(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n", 687| 0| __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX); 688| 0| ok = false; 689| 0| break; 690| 0| } 691| 0| } 692| 0| if (!ok) { ------------------ | Branch (692:13): [True: 0, False: 0] ------------------ 693| 0| break; 694| 0| } 695| | 696| | // tensor type 697| 0| { 698| 0| ok = ok && gr.read(info.t.type); ------------------ | Branch (698:18): [True: 0, False: 0] | Branch (698:24): [True: 0, False: 0] ------------------ 699| | 700| | // check that tensor type is within defined range 701| 0| if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) { ------------------ | Branch (701:17): [True: 0, False: 0] | Branch (701:36): [True: 0, False: 0] ------------------ 702| 0| GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d. should be in [0, %d)\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 703| 0| __func__, info.t.name, info.t.type, GGML_TYPE_COUNT); 704| 0| ok = false; 705| 0| break; 706| 0| } 707| 0| const size_t type_size = ggml_type_size(info.t.type); 708| 0| const int64_t blck_size = ggml_blck_size(info.t.type); 709| | 710| | // check that row size is divisible by block size 711| 0| if (blck_size == 0 || info.t.ne[0] % blck_size != 0) { ------------------ | Branch (711:17): [True: 0, False: 0] | Branch (711:35): [True: 0, False: 0] ------------------ 712| 0| GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, " ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 713| 0| "not a multiple of block size (%" PRId64 ")\n", 714| 0| __func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size); 715| 0| ok = false; 716| 0| break; 717| 0| } 718| | 719| | // check that the size of the tensor in bytes is representable 720| 0| if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) { ------------------ | Branch (720:17): [True: 0, False: 0] | Branch (720:23): [True: 0, False: 0] ------------------ 721| 0| GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 722| 0| __func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX); 723| 0| ok = false; 724| 0| break; 725| 0| } 726| | 727| | // calculate byte offsets given the tensor shape and type 728| 0| info.t.nb[0] = type_size; 729| 0| info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size); 730| 0| for (int j = 2; j < GGML_MAX_DIMS; ++j) { ------------------ | | 222| 0|#define GGML_MAX_DIMS 4 ------------------ | Branch (730:29): [True: 0, False: 0] ------------------ 731| 0| info.t.nb[j] = info.t.nb[j - 1]*info.t.ne[j - 1]; 732| 0| } 733| 0| } 734| 0| if (!ok) { ------------------ | Branch (734:13): [True: 0, False: 0] ------------------ 735| 0| break; 736| 0| } 737| | 738| | // tensor data offset within buffer 739| 0| ok = ok && gr.read(info.offset); ------------------ | Branch (739:14): [True: 0, False: 0] | Branch (739:20): [True: 0, False: 0] ------------------ 740| | 741| 0| ctx->info.push_back(info); 742| 0| } 743| | 744| 0| if (!ok) { ------------------ | Branch (744:9): [True: 0, False: 0] ------------------ 745| 0| GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 746| 0| gguf_free(ctx); 747| 0| return nullptr; 748| 0| } 749| 0| GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors); ------------------ | | 288| 0|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 0] | | ------------------ ------------------ 750| | 751| | // we require the data section to be aligned, so take into account any padding 752| 0| if (n_tensors > 0 && !gr.seek(GGML_PAD(gr.tell(), ctx->alignment))) { ------------------ | | 267| 0|#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) ------------------ | Branch (752:9): [True: 0, False: 0] | Branch (752:26): [True: 0, False: 0] ------------------ 753| 0| GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 754| 0| gguf_free(ctx); 755| 0| return nullptr; 756| 0| } 757| | 758| | // store the current file offset - this is where the data section starts 759| 0| ctx->offset = gr.tell(); 760| | 761| | // compute the total size of the data section, taking into account the alignment 762| 0| { 763| 0| ctx->size = 0; 764| 0| for (size_t i = 0; i < ctx->info.size(); ++i) { ------------------ | Branch (764:28): [True: 0, False: 0] ------------------ 765| 0| const gguf_tensor_info & ti = ctx->info[i]; 766| 0| if (ti.offset != ctx->size) { ------------------ | Branch (766:17): [True: 0, False: 0] ------------------ 767| 0| GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 768| 0| __func__, ti.t.name, ti.offset, ctx->size); 769| 0| GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 770| 0| gguf_free(ctx); 771| 0| return nullptr; 772| 0| } 773| 0| size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment); ------------------ | | 267| 0|#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1)) ------------------ 774| 0| if (SIZE_MAX - ctx->size < padded_size) { ------------------ | Branch (774:17): [True: 0, False: 0] ------------------ 775| 0| GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n", ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 776| 0| __func__, ti.t.name, ctx->size, padded_size); 777| 0| gguf_free(ctx); 778| 0| return nullptr; 779| 0| } 780| 0| ctx->size += padded_size; 781| 0| } 782| 0| } 783| | 784| | // load the tensor data only if requested 785| 0| if (params.ctx != nullptr) { ------------------ | Branch (785:9): [True: 0, False: 0] ------------------ 786| | // if the provided gguf_context is no_alloc, then we create "empty" tensors and do not read the binary blob 787| | // otherwise, we load the binary blob into the created ggml_context as well, and point the "data" members of 788| | // the ggml_tensor structs to the appropriate locations in the binary blob 789| | 790| | // compute the exact size needed for the new ggml_context 791| 0| size_t mem_size = 0; 792| 0| if (params.no_alloc) { ------------------ | Branch (792:13): [True: 0, False: 0] ------------------ 793| 0| if (n_tensors != 0 && SIZE_MAX / n_tensors < ggml_tensor_overhead()) { ------------------ | Branch (793:17): [True: 0, False: 0] | Branch (793:35): [True: 0, False: 0] ------------------ 794| 0| GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 795| 0| gguf_free(ctx); 796| 0| return nullptr; 797| 0| } 798| | 799| 0| const size_t overhead = n_tensors * ggml_tensor_overhead(); 800| | 801| 0| mem_size = overhead; 802| 0| } else { 803| 0| if ((n_tensors + 1) != 0 && SIZE_MAX / (n_tensors + 1) < ggml_tensor_overhead()) { ------------------ | Branch (803:17): [True: 0, False: 0] | Branch (803:41): [True: 0, False: 0] ------------------ 804| 0| GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 805| 0| gguf_free(ctx); 806| 0| return nullptr; 807| 0| } 808| | 809| 0| const size_t overhead = (n_tensors + 1) * ggml_tensor_overhead(); 810| | 811| 0| if (SIZE_MAX - overhead < ctx->size) { ------------------ | Branch (811:17): [True: 0, False: 0] ------------------ 812| 0| GGML_LOG_ERROR("%s: memory size overflow while allocating ggml context\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 813| 0| gguf_free(ctx); 814| 0| return nullptr; 815| 0| } 816| | 817| 0| mem_size = overhead + ctx->size; 818| 0| } 819| | 820| 0| struct ggml_init_params pdata = { 821| 0| /*mem_size =*/ mem_size, 822| 0| /*mem_buffer =*/ nullptr, 823| 0| /*no_alloc =*/ params.no_alloc, 824| 0| }; 825| | 826| 0| *params.ctx = ggml_init(pdata); 827| 0| if (*params.ctx == nullptr) { ------------------ | Branch (827:13): [True: 0, False: 0] ------------------ 828| 0| GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 829| 0| gguf_free(ctx); 830| 0| return nullptr; 831| 0| } 832| | 833| 0| struct ggml_context * ctx_data = *params.ctx; 834| | 835| 0| struct ggml_tensor * data = nullptr; 836| | 837| 0| if (!params.no_alloc) { ------------------ | Branch (837:13): [True: 0, False: 0] ------------------ 838| 0| data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size); 839| | 840| 0| ok = ok && data != nullptr; ------------------ | Branch (840:18): [True: 0, False: 0] | Branch (840:24): [True: 0, False: 0] ------------------ 841| | 842| 0| if (ok) { ------------------ | Branch (842:17): [True: 0, False: 0] ------------------ 843| 0| ggml_set_name(data, "GGUF tensor data binary blob"); 844| 0| } 845| | 846| | // read the binary blob with the tensor data 847| 0| ok = ok && gr.read(data->data, ctx->size); ------------------ | Branch (847:18): [True: 0, False: 0] | Branch (847:24): [True: 0, False: 0] ------------------ 848| | 849| 0| if (!ok) { ------------------ | Branch (849:17): [True: 0, False: 0] ------------------ 850| 0| GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 851| 0| ggml_free(ctx_data); 852| 0| *params.ctx = nullptr; 853| 0| gguf_free(ctx); 854| 0| return nullptr; 855| 0| } 856| | 857| 0| ctx->data = data->data; 858| 0| } 859| | 860| 0| ggml_set_no_alloc(ctx_data, true); 861| | 862| | // create the tensors 863| 0| for (size_t i = 0; i < ctx->info.size(); ++i) { ------------------ | Branch (863:28): [True: 0, False: 0] ------------------ 864| 0| const struct gguf_tensor_info & info = ctx->info[i]; 865| | 866| 0| struct ggml_tensor * cur = ggml_new_tensor(ctx_data, info.t.type, GGML_MAX_DIMS, info.t.ne); ------------------ | | 222| 0|#define GGML_MAX_DIMS 4 ------------------ 867| | 868| 0| ok = ok && cur != nullptr; ------------------ | Branch (868:18): [True: 0, False: 0] | Branch (868:24): [True: 0, False: 0] ------------------ 869| | 870| 0| if (!ok) { ------------------ | Branch (870:17): [True: 0, False: 0] ------------------ 871| 0| break; 872| 0| } 873| | 874| 0| ggml_set_name(cur, info.t.name); 875| | 876| | // point the data member to the appropriate location in the binary blob using the tensor info 877| 0| if (!params.no_alloc) { ------------------ | Branch (877:17): [True: 0, False: 0] ------------------ 878| 0| cur->data = (char *) data->data + info.offset; 879| 0| } 880| 0| } 881| | 882| 0| if (!ok) { ------------------ | Branch (882:13): [True: 0, False: 0] ------------------ 883| 0| GGML_LOG_ERROR("%s: failed to create tensors\n", __func__); ------------------ | | 121| 0|#define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 884| 0| ggml_free(ctx_data); 885| 0| *params.ctx = nullptr; 886| 0| gguf_free(ctx); 887| 0| return nullptr; 888| 0| } 889| | 890| 0| ggml_set_no_alloc(ctx_data, params.no_alloc); 891| 0| } 892| | 893| 0| return ctx; 894| 0|} _ZNK11gguf_reader4readIcEEbRNSt3__16vectorIT_NS1_9allocatorIS3_EEEEm: 276| 2| bool read(std::vector & dst, const size_t n) const { 277| 2| if (n > GGUF_MAX_ARRAY_ELEMENTS) { ------------------ | | 19| 2|#define GGUF_MAX_ARRAY_ELEMENTS (1024*1024*1024) ------------------ | Branch (277:13): [True: 0, False: 2] ------------------ 278| 0| return false; 279| 0| } 280| | if constexpr (std::is_same::value) { 281| | // strings are prefixed with their length, so we need to account for that 282| | if (n > SIZE_MAX / sizeof(uint64_t)) { 283| | return false; 284| | } 285| | if (nbytes_remain < n * sizeof(uint64_t)) { 286| | return false; 287| | } 288| 2| } else { 289| 2| if (n > SIZE_MAX / sizeof(T)) { ------------------ | Branch (289:17): [True: 0, False: 2] ------------------ 290| 0| return false; 291| 0| } 292| 2| if (nbytes_remain < n * sizeof(T)) { ------------------ | Branch (292:17): [True: 0, False: 2] ------------------ 293| 0| return false; 294| 0| } 295| 2| } 296| 2| dst.resize(n); 297| 10| for (size_t i = 0; i < dst.size(); ++i) { ------------------ | Branch (297:28): [True: 8, False: 2] ------------------ 298| | if constexpr (std::is_same::value) { 299| | bool tmp; 300| | if (!read(tmp)) { 301| | return false; 302| | } 303| | dst[i] = tmp; 304| 8| } else { 305| 8| if (!read(dst[i])) { ------------------ | Branch (305:21): [True: 0, False: 8] ------------------ 306| 0| return false; 307| 0| } 308| 8| } 309| 8| } 310| 2| return true; 311| 2| } _ZNK11gguf_reader4readIcEEbRT_: 267| 8| bool read(T & dst) const { 268| 8| const size_t size = sizeof(dst); 269| 8| if (size > nbytes_remain) { ------------------ | Branch (269:13): [True: 0, False: 8] ------------------ 270| 0| return false; 271| 0| } 272| 8| return read_raw(&dst, size) == size; 273| 8| } _ZNK11gguf_reader8read_rawEPvm: 381| 10| size_t read_raw(void * dst, size_t size) const { 382| 10| if (callback == nullptr || size == 0) { ------------------ | Branch (382:13): [True: 0, False: 10] | Branch (382:36): [True: 0, False: 10] ------------------ 383| 0| return 0; 384| 0| } 385| | 386| 10| uint8_t * data = static_cast(dst); 387| 10| size_t total_nread = 0; 388| 10| bool reached_eof = false; 389| | 390| 20| while (total_nread < size) { ------------------ | Branch (390:16): [True: 10, False: 10] ------------------ 391| 10| const size_t chunk_size = std::min(max_chunk_read, size - total_nread); 392| 10| if (data_offset + total_nread < data_offset) { ------------------ | Branch (392:17): [True: 0, False: 10] ------------------ 393| 0| break; 394| 0| } 395| 10| const size_t nread = callback(userdata, static_cast(data + total_nread), data_offset + total_nread, chunk_size); 396| 10| total_nread += nread; 397| 10| if (nread != chunk_size) { ------------------ | Branch (397:17): [True: 0, False: 10] ------------------ 398| 0| reached_eof = true; 399| 0| break; 400| 0| } 401| 10| } 402| | 403| 10| data_offset += total_nread; 404| 10| GGML_ASSERT(total_nread <= nbytes_remain); ------------------ | | 288| 10|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 10] | | ------------------ ------------------ 405| 10| nbytes_remain -= total_nread; 406| | 407| 10| if (reached_eof) { ------------------ | Branch (407:13): [True: 0, False: 10] ------------------ 408| 0| nbytes_remain = 0; 409| 0| } 410| | 411| 10| return total_nread; 412| 10| } _ZNK11gguf_reader4readIjEEbRT_: 267| 2| bool read(T & dst) const { 268| 2| const size_t size = sizeof(dst); 269| 2| if (size > nbytes_remain) { ------------------ | Branch (269:13): [True: 0, False: 2] ------------------ 270| 0| return false; 271| 0| } 272| 2| return read_raw(&dst, size) == size; 273| 2| } gguf.cpp:_ZL25gguf_file_reader_callbackPvS_mm: 910| 10|static size_t gguf_file_reader_callback(void * userdata, void * output, uint64_t offset, size_t len) { 911| 10| GGML_ASSERT(len > 0); ------------------ | | 288| 10|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 10] | | ------------------ ------------------ 912| | 913| 10| gguf_file_reader & reader = *static_cast(userdata); 914| | 915| 10| if (reader.offset != offset) { ------------------ | Branch (915:9): [True: 0, False: 10] ------------------ 916| 0| if (offset > INT64_MAX || gguf_fseek(reader.file, static_cast(offset), SEEK_SET) != 0) { ------------------ | | 26| 0|# define gguf_fseek fseeko ------------------ | Branch (916:13): [True: 0, False: 0] | Branch (916:35): [True: 0, False: 0] ------------------ 917| 0| return 0; 918| 0| } 919| | 920| 0| reader.offset = offset; 921| 0| } 922| | 923| 10| const size_t nread = fread(static_cast(output), 1, len, reader.file); 924| 10| reader.offset += nread; 925| 10| return nread; 926| 10|} _ZN11gguf_reader11file_remainEP8_IO_FILE: 246| 2| static uint64_t file_remain(FILE * file) { 247| 2| const int64_t cur = gguf_ftell(file); ------------------ | | 25| 2|# define gguf_ftell ftello ------------------ 248| 2| if (cur < 0) { ------------------ | Branch (248:13): [True: 0, False: 2] ------------------ 249| 0| return 0; 250| 0| } 251| 2| if (gguf_fseek(file, 0, SEEK_END) != 0) { ------------------ | | 26| 2|# define gguf_fseek fseeko ------------------ | Branch (251:13): [True: 0, False: 2] ------------------ 252| 0| gguf_fseek(file, cur, SEEK_SET); ------------------ | | 26| 0|# define gguf_fseek fseeko ------------------ 253| | 254| 0| return 0; 255| 0| } 256| 2| const int64_t end = gguf_ftell(file); ------------------ | | 25| 2|# define gguf_ftell ftello ------------------ 257| 2| if (end < 0) { ------------------ | Branch (257:13): [True: 0, False: 2] ------------------ 258| 0| gguf_fseek(file, cur, SEEK_SET); ------------------ | | 26| 0|# define gguf_fseek fseeko ------------------ 259| | 260| 0| return 0; 261| 0| } 262| 2| gguf_fseek(file, cur, SEEK_SET); ------------------ | | 26| 2|# define gguf_fseek fseeko ------------------ 263| 2| return static_cast(end - cur); 264| 2| } _ZN6LLM_KVC2E8llm_archPKc: 803| 2|LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {} _Z18llama_log_internal14ggml_log_levelPKcz: 55| 4|void llama_log_internal(ggml_log_level level, const char * format, ...) { 56| 4| va_list args; 57| 4| va_start(args, format); 58| 4| llama_log_internal_v(level, format, args); 59| | va_end(args); 60| 4|} _Z26llama_log_callback_default14ggml_log_levelPKcPv: 62| 4|void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data) { 63| 4| (void) level; 64| 4| (void) user_data; 65| 4| fputs(text, stderr); 66| | fflush(stderr); 67| 4|} _Z6formatPKcz: 86| 2|std::string format(const char * fmt, ...) { 87| 2| va_list ap; 88| 2| va_list ap2; 89| 2| va_start(ap, fmt); 90| 2| va_copy(ap2, ap); 91| 2| int size = vsnprintf(NULL, 0, fmt, ap); 92| 2| GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT ------------------ | | 288| 4|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:30): [True: 2, False: 0] | | | Branch (288:30): [True: 2, False: 0] | | ------------------ ------------------ 93| 2| std::vector buf(size + 1); 94| 2| int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); 95| 2| GGML_ASSERT(size2 == size); ------------------ | | 288| 2|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 2] | | ------------------ ------------------ 96| 2| va_end(ap2); 97| | va_end(ap); 98| 2| return std::string(buf.data(), size); 99| 2|} llama-impl.cpp:_ZL20llama_log_internal_v14ggml_log_levelPKcP13__va_list_tag: 38| 4|static void llama_log_internal_v(ggml_log_level level, const char * format, va_list args) { 39| 4| va_list args_copy; 40| 4| va_copy(args_copy, args); 41| 4| char buffer[128]; 42| 4| int len = vsnprintf(buffer, 128, format, args); 43| 4| if (len < 128) { ------------------ | Branch (43:9): [True: 4, False: 0] ------------------ 44| 4| g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); 45| 4| } else { 46| 0| char * buffer2 = new char[len + 1]; 47| 0| vsnprintf(buffer2, len + 1, format, args_copy); 48| 0| buffer2[len] = 0; 49| 0| g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); 50| 0| delete[] buffer2; 51| 0| } 52| | va_end(args_copy); 53| 4|} _ZN18llama_model_loaderC2EP12gguf_contextPFvP11ggml_tensorPvES4_RKNSt3__112basic_stringIcNS7_11char_traitsIcEENS7_9allocatorIcEEEERNS7_6vectorISD_NSB_ISD_EEEEP8_IO_FILEbbbbPK23llama_model_kv_overridePK32llama_model_tensor_buft_override: 525| 2| : metadata(meta), set_tensor_data(set_tensor_data), set_tensor_data_ud(set_tensor_data_ud) { 526| 2| int trace = 0; 527| 2| if (getenv("LLAMA_TRACE")) { ------------------ | Branch (527:9): [True: 0, False: 2] ------------------ 528| 0| trace = atoi(getenv("LLAMA_TRACE")); 529| 0| } 530| | 531| 2| if (param_overrides_p != nullptr) { ------------------ | Branch (531:9): [True: 2, False: 0] ------------------ 532| 152| for (const struct llama_model_kv_override * p = param_overrides_p; p->key[0] != 0; p++) { ------------------ | Branch (532:76): [True: 150, False: 2] ------------------ 533| 150| kv_overrides.insert({std::string(p->key), *p}); 534| 150| } 535| 2| } 536| | 537| 2| tensor_buft_overrides = param_tensor_buft_overrides_p; 538| | 539| 2| if (!fname.empty()) { ------------------ | Branch (539:9): [True: 2, False: 0] ------------------ 540| | // Load the main GGUF 541| 2| struct ggml_context * ctx = NULL; 542| 2| struct gguf_init_params params = { 543| 2| /*.no_alloc = */ true, 544| 2| /*.ctx = */ &ctx, 545| 2| }; 546| | 547| 2| metadata_ptr.reset(gguf_init_from_file(fname.c_str(), params)); 548| 2| metadata = metadata_ptr.get(); 549| 2| if (metadata == nullptr) { ------------------ | Branch (549:13): [True: 2, False: 0] ------------------ 550| 2| throw std::runtime_error(format("%s: failed to load model from %s", __func__, fname.c_str())); 551| 2| } 552| | 553| 0| get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); 554| 0| llm_kv = LLM_KV(llm_arch_from_string(arch_name)); 555| | 556| 0| files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io)); 557| 0| contexts.emplace_back(ctx); 558| | 559| 0| if (use_mmap && use_direct_io) { ------------------ | Branch (559:13): [True: 0, False: 0] | Branch (559:25): [True: 0, False: 0] ------------------ 560| 0| if (files.back()->has_direct_io()) { ------------------ | Branch (560:17): [True: 0, False: 0] ------------------ 561| 0| LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__); ------------------ | | 29| 0|#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 562| 0| use_mmap = false; 563| 0| } else { 564| 0| LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__); ------------------ | | 29| 0|#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 565| 0| use_direct_io = false; 566| | 567| | // reopen file using std::fopen for mmap 568| 0| files.pop_back(); 569| 0| files.emplace_back(new llama_file(fname.c_str(), "rb", false)); 570| 0| } 571| 0| } 572| | 573| | // Save tensors data offset of the main file. 574| | // For subsidiary files, `meta` tensor data offset must not be used, 575| | // so we build a unified tensors index for weights. 576| 0| for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { ------------------ | Branch (576:62): [True: 0, False: 0] ------------------ 577| 0| std::string tensor_name = std::string(cur->name); 578| | // make sure there is no duplicated tensor names 579| 0| if (weights_map.find(tensor_name) != weights_map.end()) { ------------------ | Branch (579:17): [True: 0, False: 0] ------------------ 580| 0| throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur))); 581| 0| } 582| 0| n_elements += ggml_nelements(cur); 583| 0| n_bytes += ggml_nbytes(cur); 584| 0| weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), 0, metadata, cur)); 585| 0| } 586| 0| uint16_t n_split = 0; 587| 0| get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false); 588| | 589| | // Load additional GGML contexts 590| 0| if (n_split > 1) { ------------------ | Branch (590:13): [True: 0, False: 0] ------------------ 591| | // make sure the main file is loaded first 592| 0| uint16_t idx = 0; 593| 0| const std::string kv_split_no = llm_kv(LLM_KV_SPLIT_NO); 594| 0| get_key(kv_split_no, idx); 595| 0| if (idx != 0) { ------------------ | Branch (595:17): [True: 0, False: 0] ------------------ 596| 0| throw std::runtime_error(format("illegal split file idx: %d (file: %s), model must be loaded with the first split", idx, fname.c_str())); 597| 0| } 598| | 599| | // generate list of splits if needed 600| 0| if (splits.empty()) { ------------------ | Branch (600:17): [True: 0, False: 0] ------------------ 601| 0| splits = llama_get_list_splits(fname, idx, n_split); 602| 0| } 603| | 604| | // in case user give a custom list of splits, check if it matches the expected number 605| 0| if (n_split != (uint16_t)splits.size()) { ------------------ | Branch (605:17): [True: 0, False: 0] ------------------ 606| 0| throw std::runtime_error(format("invalid split count, given: %zu splits, but expected %d", splits.size(), n_split)); 607| 0| } 608| | 609| 0| if (trace > 0) { ------------------ | Branch (609:17): [True: 0, False: 0] ------------------ 610| 0| LLAMA_LOG_INFO("%s: loading additional %d GGUFs\n", __func__, n_split); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 611| 0| } 612| | 613| | // load other splits 614| 0| for (idx = 1; idx < n_split; idx++) { ------------------ | Branch (614:27): [True: 0, False: 0] ------------------ 615| 0| const char * fname_split = splits[idx].c_str(); 616| | 617| 0| struct gguf_init_params split_params = { 618| 0| /*.no_alloc = */ true, 619| 0| /*.ctx = */ &ctx, 620| 0| }; 621| 0| gguf_context_ptr ctx_gguf { gguf_init_from_file(fname_split, split_params) }; 622| 0| if (!ctx_gguf) { ------------------ | Branch (622:21): [True: 0, False: 0] ------------------ 623| 0| throw std::runtime_error(format("%s: failed to load GGUF split from %s", __func__, fname_split)); 624| 0| } 625| | 626| | // check idx 627| 0| { 628| 0| const int kid = gguf_find_key(ctx_gguf.get(), kv_split_no.c_str()); 629| 0| if (kid < 0) { ------------------ | Branch (629:25): [True: 0, False: 0] ------------------ 630| 0| throw std::runtime_error(format("missing key %s in GGUF split %s", kv_split_no.c_str(), fname_split)); 631| 0| } 632| 0| int idx_gguf = gguf_get_val_u16(ctx_gguf.get(), kid); 633| 0| if (idx_gguf != idx) { ------------------ | Branch (633:25): [True: 0, False: 0] ------------------ 634| 0| throw std::runtime_error(format("invalid split file idx: %d (file: %s), expected %d", idx_gguf, fname_split, idx)); 635| 0| } 636| 0| } 637| | 638| 0| files.emplace_back(new llama_file(fname_split, "rb", use_direct_io)); 639| 0| contexts.emplace_back(ctx); 640| | 641| | // Save tensors data offset info of the shard. 642| 0| for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { ------------------ | Branch (642:70): [True: 0, False: 0] ------------------ 643| 0| std::string tensor_name = std::string(cur->name); 644| | // make sure there is no duplicated tensor names 645| 0| if (weights_map.find(tensor_name) != weights_map.end()) { ------------------ | Branch (645:25): [True: 0, False: 0] ------------------ 646| 0| throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur))); 647| 0| } 648| 0| n_elements += ggml_nelements(cur); 649| 0| n_bytes += ggml_nbytes(cur); 650| 0| weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), idx, ctx_gguf.get(), cur)); 651| 0| } 652| 0| } 653| | 654| 0| get_key(llm_kv(LLM_KV_SPLIT_TENSORS_COUNT), n_tensors); 655| | 656| | // sanity check 657| 0| { 658| 0| const int n_tensors_loaded = (int) weights_map.size(); 659| 0| if (n_tensors != n_tensors_loaded) { ------------------ | Branch (659:21): [True: 0, False: 0] ------------------ 660| 0| throw std::runtime_error(format("corrupted model: %d tensors expected but %d found", n_tensors, n_tensors_loaded)); 661| 0| } 662| 0| } 663| | 664| 0| LLAMA_LOG_INFO("%s: additional %d GGUFs metadata loaded.\n", __func__, n_split - 1); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 665| 0| } 666| 0| } else if (file != nullptr) { ------------------ | Branch (666:16): [True: 0, False: 0] ------------------ 667| 0| struct ggml_context * ctx = NULL; 668| 0| struct gguf_init_params params = { 669| 0| /*.no_alloc = */ true, 670| 0| /*.ctx = */ &ctx, 671| 0| }; 672| | 673| 0| metadata_ptr.reset(gguf_init_from_file_ptr(file, params)); 674| 0| metadata = metadata_ptr.get(); 675| 0| if (metadata == nullptr) { ------------------ | Branch (675:13): [True: 0, False: 0] ------------------ 676| 0| throw std::runtime_error(format("%s: failed to load model from file pointer", __func__)); 677| 0| } 678| | 679| 0| get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); 680| 0| llm_kv = LLM_KV(llm_arch_from_string(arch_name)); 681| | 682| 0| files.emplace_back(new llama_file(file)); 683| 0| contexts.emplace_back(ctx); 684| | 685| | // Save tensors data offset info of the main file. 686| 0| for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { ------------------ | Branch (686:62): [True: 0, False: 0] ------------------ 687| 0| std::string tensor_name = std::string(cur->name); 688| | // make sure there is no duplicated tensor names 689| 0| if (weights_map.find(tensor_name) != weights_map.end()) { ------------------ | Branch (689:17): [True: 0, False: 0] ------------------ 690| 0| throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", ggml_get_name(cur))); 691| 0| } 692| 0| n_elements += ggml_nelements(cur); 693| 0| n_bytes += ggml_nbytes(cur); 694| 0| weights_map.emplace(tensor_name, llama_tensor_weight(files.back().get(), 0, metadata, cur)); 695| 0| } 696| 0| } else { 697| 0| get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); 698| 0| llm_kv = LLM_KV(llm_arch_from_string(arch_name)); 699| 0| } 700| | 701| 0| n_kv = gguf_get_n_kv(metadata); 702| 0| n_tensors = weights_map.size(); 703| | 704| 0| fver = (enum llama_fver) gguf_get_version(metadata); 705| | 706| 0| LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n", ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) | | ------------------ | | | Branch (28:71): [True: 0, False: 0] | | ------------------ ------------------ 707| 0| __func__, n_kv, n_tensors, fname.empty() ? "(file*)" : fname.c_str(), llama_file_version_name(fver)); 708| | 709| | // determine file type based on the number of tensors for each quantization and print meta data 710| | // TODO: make optional 711| 0| { 712| 0| std::map n_type; 713| | 714| 0| uint32_t n_type_max = 0; 715| 0| enum ggml_type type_max = GGML_TYPE_F32; 716| | 717| 0| for (const auto & it : weights_map) { ------------------ | Branch (717:30): [True: 0, False: 0] ------------------ 718| 0| const llama_tensor_weight & w = it.second; 719| 0| const ggml_tensor * tensor = w.tensor; 720| | 721| 0| enum ggml_type type = tensor->type; 722| | 723| 0| n_type[type]++; 724| | 725| 0| if (n_type_max < n_type[type]) { ------------------ | Branch (725:17): [True: 0, False: 0] ------------------ 726| 0| n_type_max = n_type[type]; 727| 0| type_max = type; 728| 0| } 729| | 730| 0| if (trace > 0) { ------------------ | Branch (730:17): [True: 0, False: 0] ------------------ 731| 0| const uint16_t sid = w.idx; 732| 0| LLAMA_LOG_INFO("%s: - tensor split %2d: %32s %-8s [ %s ] %8.2f MiB\n", __func__, ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 733| 0| sid, ggml_get_name(tensor), ggml_type_name(type), llama_format_tensor_shape(tensor).c_str(), 734| 0| ggml_nbytes(tensor)/1024.0f/1024.0f); 735| 0| } 736| 0| } 737| | 738| 0| switch (type_max) { 739| 0| case GGML_TYPE_F32: ftype = LLAMA_FTYPE_ALL_F32; break; ------------------ | Branch (739:13): [True: 0, False: 0] ------------------ 740| 0| case GGML_TYPE_F16: ftype = LLAMA_FTYPE_MOSTLY_F16; break; ------------------ | Branch (740:13): [True: 0, False: 0] ------------------ 741| 0| case GGML_TYPE_BF16: ftype = LLAMA_FTYPE_MOSTLY_BF16; break; ------------------ | Branch (741:13): [True: 0, False: 0] ------------------ 742| 0| case GGML_TYPE_Q4_0: ftype = LLAMA_FTYPE_MOSTLY_Q4_0; break; ------------------ | Branch (742:13): [True: 0, False: 0] ------------------ 743| 0| case GGML_TYPE_Q4_1: ftype = LLAMA_FTYPE_MOSTLY_Q4_1; break; ------------------ | Branch (743:13): [True: 0, False: 0] ------------------ 744| 0| case GGML_TYPE_Q5_0: ftype = LLAMA_FTYPE_MOSTLY_Q5_0; break; ------------------ | Branch (744:13): [True: 0, False: 0] ------------------ 745| 0| case GGML_TYPE_Q5_1: ftype = LLAMA_FTYPE_MOSTLY_Q5_1; break; ------------------ | Branch (745:13): [True: 0, False: 0] ------------------ 746| 0| case GGML_TYPE_Q8_0: ftype = LLAMA_FTYPE_MOSTLY_Q8_0; break; ------------------ | Branch (746:13): [True: 0, False: 0] ------------------ 747| 0| case GGML_TYPE_Q2_K: ftype = LLAMA_FTYPE_MOSTLY_Q2_K; break; ------------------ | Branch (747:13): [True: 0, False: 0] ------------------ 748| 0| case GGML_TYPE_Q3_K: ftype = LLAMA_FTYPE_MOSTLY_Q3_K_M; break; ------------------ | Branch (748:13): [True: 0, False: 0] ------------------ 749| 0| case GGML_TYPE_Q4_K: ftype = LLAMA_FTYPE_MOSTLY_Q4_K_M; break; ------------------ | Branch (749:13): [True: 0, False: 0] ------------------ 750| 0| case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break; ------------------ | Branch (750:13): [True: 0, False: 0] ------------------ 751| 0| case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break; ------------------ | Branch (751:13): [True: 0, False: 0] ------------------ 752| 0| case GGML_TYPE_TQ1_0: ftype = LLAMA_FTYPE_MOSTLY_TQ1_0; break; ------------------ | Branch (752:13): [True: 0, False: 0] ------------------ 753| 0| case GGML_TYPE_TQ2_0: ftype = LLAMA_FTYPE_MOSTLY_TQ2_0; break; ------------------ | Branch (753:13): [True: 0, False: 0] ------------------ 754| 0| case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break; ------------------ | Branch (754:13): [True: 0, False: 0] ------------------ 755| 0| case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break; ------------------ | Branch (755:13): [True: 0, False: 0] ------------------ 756| 0| case GGML_TYPE_IQ2_S: ftype = LLAMA_FTYPE_MOSTLY_IQ2_S; break; ------------------ | Branch (756:13): [True: 0, False: 0] ------------------ 757| 0| case GGML_TYPE_IQ3_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ3_XXS; break; ------------------ | Branch (757:13): [True: 0, False: 0] ------------------ 758| 0| case GGML_TYPE_IQ1_S: ftype = LLAMA_FTYPE_MOSTLY_IQ1_S; break; ------------------ | Branch (758:13): [True: 0, False: 0] ------------------ 759| 0| case GGML_TYPE_IQ1_M: ftype = LLAMA_FTYPE_MOSTLY_IQ1_M; break; ------------------ | Branch (759:13): [True: 0, False: 0] ------------------ 760| 0| case GGML_TYPE_IQ4_NL: ftype = LLAMA_FTYPE_MOSTLY_IQ4_NL; break; ------------------ | Branch (760:13): [True: 0, False: 0] ------------------ 761| 0| case GGML_TYPE_IQ4_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ4_XS; break; ------------------ | Branch (761:13): [True: 0, False: 0] ------------------ 762| 0| case GGML_TYPE_IQ3_S: ftype = LLAMA_FTYPE_MOSTLY_IQ3_S; break; ------------------ | Branch (762:13): [True: 0, False: 0] ------------------ 763| 0| case GGML_TYPE_NVFP4: ftype = LLAMA_FTYPE_MOSTLY_NVFP4; break; ------------------ | Branch (763:13): [True: 0, False: 0] ------------------ 764| 0| case GGML_TYPE_Q1_0: ftype = LLAMA_FTYPE_MOSTLY_Q1_0; break; ------------------ | Branch (764:13): [True: 0, False: 0] ------------------ 765| 0| default: ------------------ | Branch (765:13): [True: 0, False: 0] ------------------ 766| 0| { 767| 0| LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max)); ------------------ | | 29| 0|#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 768| 0| ftype = LLAMA_FTYPE_ALL_F32; 769| 0| } break; 770| 0| } 771| | 772| | // this is a way to mark that we have "guessed" the file type 773| 0| ftype = (llama_ftype) (ftype | LLAMA_FTYPE_GUESSED); 774| | 775| 0| { 776| 0| uint32_t ftype_val = 0; 777| 0| if (get_key(LLM_KV_GENERAL_FILE_TYPE, ftype_val, false)) { ------------------ | Branch (777:17): [True: 0, False: 0] ------------------ 778| 0| ftype = (llama_ftype) ftype_val; 779| 0| } 780| 0| } 781| | 782| 0| LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 783| | 784| 0| for (int i = 0; i < n_kv; i++) { ------------------ | Branch (784:25): [True: 0, False: 0] ------------------ 785| 0| const char * name = gguf_get_key(metadata, i); 786| 0| const enum gguf_type type = gguf_get_kv_type(metadata, i); 787| 0| const std::string type_name = 788| 0| type == GGUF_TYPE_ARRAY ------------------ | Branch (788:17): [True: 0, False: 0] ------------------ 789| 0| ? format("%s[%s,%zu]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(metadata, i)), gguf_get_arr_n(metadata, i)) 790| 0| : gguf_type_name(type); 791| | 792| 0| std::string value = gguf_kv_to_str(metadata, i); 793| 0| const size_t MAX_VALUE_LEN = 40; 794| 0| if (value.size() > MAX_VALUE_LEN) { ------------------ | Branch (794:17): [True: 0, False: 0] ------------------ 795| 0| value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); 796| 0| } 797| 0| replace_all(value, "\n", "\\n"); 798| | 799| 0| LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 800| 0| } 801| | 802| | // print type counts 803| 0| for (auto & kv : n_type) { ------------------ | Branch (803:24): [True: 0, False: 0] ------------------ 804| 0| if (kv.second == 0) { ------------------ | Branch (804:17): [True: 0, False: 0] ------------------ 805| 0| continue; 806| 0| } 807| | 808| 0| LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 809| 0| } 810| 0| } 811| | 812| 0| if (!llama_mmap::SUPPORTED) { ------------------ | Branch (812:9): [True: 0, False: 0] ------------------ 813| 0| LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__); ------------------ | | 29| 0|#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) ------------------ 814| 0| use_mmap = false; 815| 0| } 816| | 817| 0| this->use_mmap = use_mmap; 818| 0| this->use_direct_io = use_direct_io; 819| 0| this->check_tensors = check_tensors; 820| 0| this->no_alloc = no_alloc; 821| 0|} llama_backend_init: 89| 3|void llama_backend_init(void) { 90| 3| ggml_time_init(); 91| | 92| | // needed to initialize f16 tables 93| 3| { 94| 3| struct ggml_init_params params = { 0, NULL, false }; 95| 3| struct ggml_context * ctx = ggml_init(params); 96| 3| ggml_free(ctx); 97| 3| } 98| | 99| 3| if (!ggml_backend_reg_count()) { ------------------ | Branch (99:9): [True: 0, False: 3] ------------------ 100| 0| ggml_backend_load_all(); 101| 0| } 102| 3|} llama_backend_free: 116| 2|void llama_backend_free(void) { 117| 2| ggml_quantize_free(); 118| 2|} llama_load_model_from_file: 422| 2| struct llama_model_params params) { 423| 2| return llama_model_load_from_file(path_model, params); 424| 2|} llama_model_load_from_file: 428| 2| struct llama_model_params params) { 429| 2| std::vector splits = {}; 430| 2| return llama_model_load_from_file_impl(nullptr, nullptr, nullptr, path_model, splits, /*file*/ nullptr, params); 431| 2|} llama.cpp:_ZL31llama_model_load_from_file_implP12gguf_contextPFvP11ggml_tensorPvES3_RKNSt3__112basic_stringIcNS6_11char_traitsIcEENS6_9allocatorIcEEEERNS6_6vectorISC_NSA_ISC_EEEEP8_IO_FILE18llama_model_params: 348| 2| struct llama_model_params params) { 349| 2| { 350| 2| int n_sources_defined = 0; 351| 2| if (metadata != nullptr) { ------------------ | Branch (351:13): [True: 0, False: 2] ------------------ 352| 0| n_sources_defined++; 353| 0| } 354| 2| if (!path_model.empty()) { ------------------ | Branch (354:13): [True: 2, False: 0] ------------------ 355| 2| n_sources_defined++; 356| 2| } 357| 2| if (file != nullptr) { ------------------ | Branch (357:13): [True: 0, False: 2] ------------------ 358| 0| n_sources_defined++; 359| 0| } 360| 2| if (n_sources_defined != 1) { ------------------ | Branch (360:13): [True: 0, False: 2] ------------------ 361| 0| LLAMA_LOG_ERROR("%s: exactly one out metadata, path_model, and file must be defined\n", __func__); ------------------ | | 30| 0|#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 362| 0| return nullptr; 363| 0| } 364| 2| } 365| 2| ggml_time_init(); 366| | 367| 2| if (!params.vocab_only && ggml_backend_reg_count() == 0) { ------------------ | Branch (367:9): [True: 2, False: 0] | Branch (367:31): [True: 0, False: 2] ------------------ 368| 0| LLAMA_LOG_ERROR("%s: no backends are loaded. hint: use ggml_backend_load() or ggml_backend_load_all() to load a backend before calling this function\n", __func__); ------------------ | | 30| 0|#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 369| 0| return nullptr; 370| 0| } 371| | 372| 2| unsigned cur_percentage = 0; 373| 2| if (params.progress_callback == NULL) { ------------------ | Branch (373:9): [True: 0, False: 2] ------------------ 374| 0| params.progress_callback_user_data = &cur_percentage; 375| 0| params.progress_callback = [](float progress, void * ctx) { 376| 0| unsigned * cur_percentage_p = (unsigned *) ctx; 377| 0| unsigned percentage = (unsigned) (100 * progress); 378| 0| while (percentage > *cur_percentage_p) { 379| 0| *cur_percentage_p = percentage; 380| 0| LLAMA_LOG_CONT("."); 381| 0| if (percentage >= 100) { 382| 0| LLAMA_LOG_CONT("\n"); 383| 0| } 384| 0| } 385| 0| return true; 386| 0| }; 387| 0| } 388| | 389| 2| const auto [status, model] = llama_model_load(metadata, set_tensor_data, set_tensor_data_ud, path_model, splits, file, params); 390| 2| GGML_ASSERT(status <= 0); ------------------ | | 288| 2|#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x) | | ------------------ | | | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) | | ------------------ | | | Branch (288:28): [True: 0, False: 2] | | ------------------ ------------------ 391| 2| if (status < 0) { ------------------ | Branch (391:9): [True: 2, False: 0] ------------------ 392| 2| if (status == -1) { ------------------ | Branch (392:13): [True: 2, False: 0] ------------------ 393| 2| LLAMA_LOG_ERROR("%s: failed to load model\n", __func__); ------------------ | | 30| 2|#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 394| 2| } else if (status == -2) { ------------------ | Branch (394:20): [True: 0, False: 0] ------------------ 395| 0| LLAMA_LOG_INFO("%s: cancelled model load\n", __func__); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 396| 0| } 397| | 398| 2| if (model) { ------------------ | Branch (398:13): [True: 0, False: 2] ------------------ 399| 0| llama_model_free(model); 400| 0| } 401| 2| return nullptr; 402| 2| } 403| | 404| 0| return model; 405| 2|} llama.cpp:_ZL16llama_model_loadP12gguf_contextPFvP11ggml_tensorPvES3_RKNSt3__112basic_stringIcNS6_11char_traitsIcEENS6_9allocatorIcEEEERNS6_6vectorISC_NSA_ISC_EEEEP8_IO_FILER18llama_model_params: 280| 2| const std::string & fname, std::vector & splits, FILE * file, llama_model_params & params) { 281| 2| try { 282| 2| llama_model_loader ml(metadata, set_tensor_data, set_tensor_data_ud, fname, splits, file, params.use_mmap, params.use_direct_io, 283| 2| params.check_tensors, params.no_alloc, params.kv_overrides, params.tensor_buft_overrides); 284| | 285| 2| ml.print_info(); 286| 2| std::unique_ptr model_ptr(llama_model_create(ml, params)); 287| | 288| 2| bool ok = llama_prepare_model_devices(params, model_ptr.get()); 289| 2| if (!ok) { ------------------ | Branch (289:13): [True: 0, False: 2] ------------------ 290| 0| return {-1, nullptr}; 291| 0| } 292| | 293| 2| auto * model = dynamic_cast(model_ptr.get()); 294| 2| if (model == nullptr) { ------------------ | Branch (294:13): [True: 0, False: 2] ------------------ 295| 0| GGML_ABORT("fatal error: model does not implement llama_model_base"); ------------------ | | 287| 0|#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__) ------------------ 296| 0| } 297| | 298| | // loading time will be recalculated after the first eval, so 299| | // we take page faults deferred by mmap() into consideration 300| 2| model->t_load_us = 0; 301| 2| time_meas tm(model->t_load_us); 302| | 303| 2| model->t_start_us = tm.t_start_us; 304| | 305| 2| model->hparams.vocab_only = params.vocab_only; 306| 2| model->hparams.no_alloc = params.no_alloc; 307| | 308| 2| try { 309| 2| model->load_hparams(ml); 310| 2| } catch(const std::exception & e) { 311| 0| throw std::runtime_error("error loading model hyperparameters: " + std::string(e.what())); 312| 0| } 313| 0| if (model->arch == LLM_ARCH_CLIP) { ------------------ | Branch (313:13): [True: 0, False: 0] ------------------ 314| 0| throw std::runtime_error("CLIP cannot be used as main model, use it with --mmproj instead"); 315| 0| } 316| 0| try { 317| 0| model->load_vocab(ml); 318| 0| } catch(const std::exception & e) { 319| 0| throw std::runtime_error("error loading model vocabulary: " + std::string(e.what())); 320| 0| } 321| | 322| 0| model->load_stats(ml); 323| 0| model->print_info(); 324| | 325| 0| if (params.vocab_only) { ------------------ | Branch (325:13): [True: 0, False: 0] ------------------ 326| 0| LLAMA_LOG_INFO("%s: vocab only - skipping tensors\n", __func__); ------------------ | | 28| 0|#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) ------------------ 327| 0| return {0, model_ptr.release()}; 328| 0| } 329| | 330| 0| if (!model->load_tensors(ml)) { ------------------ | Branch (330:13): [True: 0, False: 0] ------------------ 331| 0| return {-2, nullptr}; 332| 0| } 333| | 334| 0| return {0, model_ptr.release()}; 335| 2| } catch (const std::exception & err) { 336| 2| LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what()); ------------------ | | 30| 2|#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) ------------------ 337| 2| return {-1, nullptr}; 338| 2| } 339| 2|}