/src/llama.cpp/ggml/src/ggml-impl.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | // GGML internal header |
4 | | |
5 | | #include "ggml.h" |
6 | | #include "gguf.h" |
7 | | |
8 | | #include <assert.h> |
9 | | #include <math.h> |
10 | | #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/ |
11 | | #include <stdbool.h> |
12 | | #include <stdint.h> |
13 | | #include <string.h> |
14 | | |
15 | | #ifdef __ARM_FEATURE_SVE |
16 | | #include <arm_sve.h> |
17 | | #endif // __ARM_FEATURE_SVE |
18 | | |
19 | | #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__) |
20 | | // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: |
21 | | // |
22 | | // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ |
23 | | // |
24 | | #include <arm_neon.h> |
25 | | #endif |
26 | | |
27 | | #if defined(__F16C__) |
28 | | #include <immintrin.h> |
29 | | #endif |
30 | | |
31 | | #ifdef __cplusplus |
32 | | extern "C" { |
33 | | #endif |
34 | | |
35 | | void ggml_print_backtrace(void); |
36 | | |
37 | | #ifndef MIN |
38 | 0 | # define MIN(a, b) ((a) < (b) ? (a) : (b)) |
39 | | #endif |
40 | | |
41 | | #ifndef MAX |
42 | 0 | # define MAX(a, b) ((a) > (b) ? (a) : (b)) |
43 | | #endif |
44 | | |
45 | | // required for mmap as gguf only guarantees 32-byte alignment |
46 | 0 | #define TENSOR_ALIGNMENT 32 |
47 | | |
48 | | // static_assert should be a #define, but if it's not, |
49 | | // fall back to the _Static_assert C11 keyword. |
50 | | // if C99 - static_assert is noop |
51 | | // ref: https://stackoverflow.com/a/53923785/4039976 |
52 | | #ifndef __cplusplus |
53 | | #ifndef static_assert |
54 | | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) |
55 | | #define static_assert(cond, msg) _Static_assert(cond, msg) |
56 | | #else |
57 | | #define static_assert(cond, msg) struct global_scope_noop_trick |
58 | | #endif |
59 | | #endif |
60 | | #endif |
61 | | |
62 | 0 | static inline int ggml_up32(int n) { |
63 | 0 | return (n + 31) & ~31; |
64 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_up32(int) Unexecuted instantiation: ggml-cpu.cpp:ggml_up32(int) Unexecuted instantiation: repack.cpp:ggml_up32(int) Unexecuted instantiation: traits.cpp:ggml_up32(int) Unexecuted instantiation: ggml-cpu.c:ggml_up32 Unexecuted instantiation: quants.c:ggml_up32 Unexecuted instantiation: binary-ops.cpp:ggml_up32(int) Unexecuted instantiation: unary-ops.cpp:ggml_up32(int) Unexecuted instantiation: vec.cpp:ggml_up32(int) Unexecuted instantiation: ops.cpp:ggml_up32(int) Unexecuted instantiation: sgemm.cpp:ggml_up32(int) Unexecuted instantiation: ggml.c:ggml_up32 Unexecuted instantiation: ggml-alloc.c:ggml_up32 Unexecuted instantiation: ggml-backend.cpp:ggml_up32(int) Unexecuted instantiation: ggml-opt.cpp:ggml_up32(int) Unexecuted instantiation: ggml-quants.c:ggml_up32 Unexecuted instantiation: gguf.cpp:ggml_up32(int) |
65 | | |
66 | | //static inline int ggml_up64(int n) { |
67 | | // return (n + 63) & ~63; |
68 | | //} |
69 | | |
70 | 0 | static inline int ggml_up(int n, int m) { |
71 | | // assert m is a power of 2 |
72 | 0 | GGML_ASSERT((m & (m - 1)) == 0); |
73 | 0 | return (n + m - 1) & ~(m - 1); |
74 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_up(int, int) Unexecuted instantiation: repack.cpp:ggml_up(int, int) Unexecuted instantiation: traits.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-cpu.c:ggml_up Unexecuted instantiation: quants.c:ggml_up Unexecuted instantiation: binary-ops.cpp:ggml_up(int, int) Unexecuted instantiation: unary-ops.cpp:ggml_up(int, int) Unexecuted instantiation: vec.cpp:ggml_up(int, int) Unexecuted instantiation: ops.cpp:ggml_up(int, int) Unexecuted instantiation: sgemm.cpp:ggml_up(int, int) Unexecuted instantiation: ggml.c:ggml_up Unexecuted instantiation: ggml-alloc.c:ggml_up Unexecuted instantiation: ggml-backend.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-quants.c:ggml_up Unexecuted instantiation: gguf.cpp:ggml_up(int, int) |
75 | | |
76 | | // TODO: move to ggml.h? (won't be able to inline) |
77 | 0 | static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { |
78 | 0 | if (a->type != b->type) { |
79 | 0 | return false; |
80 | 0 | } |
81 | 0 | for (int i = 0; i < GGML_MAX_DIMS; i++) { |
82 | 0 | if (a->ne[i] != b->ne[i]) { |
83 | 0 | return false; |
84 | 0 | } |
85 | 0 | if (a->nb[i] != b->nb[i]) { |
86 | 0 | return false; |
87 | 0 | } |
88 | 0 | } |
89 | 0 | return true; |
90 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_are_same_layout Unexecuted instantiation: quants.c:ggml_are_same_layout Unexecuted instantiation: binary-ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_are_same_layout Unexecuted instantiation: ggml-alloc.c:ggml_are_same_layout Unexecuted instantiation: ggml-backend.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_are_same_layout Unexecuted instantiation: gguf.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) |
91 | | |
92 | 0 | static bool ggml_op_is_empty(enum ggml_op op) { |
93 | 0 | switch (op) { |
94 | 0 | case GGML_OP_NONE: |
95 | 0 | case GGML_OP_RESHAPE: |
96 | 0 | case GGML_OP_TRANSPOSE: |
97 | 0 | case GGML_OP_VIEW: |
98 | 0 | case GGML_OP_PERMUTE: |
99 | 0 | return true; |
100 | 0 | default: |
101 | 0 | return false; |
102 | 0 | } |
103 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-cpu.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: repack.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: traits.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-cpu.c:ggml_op_is_empty Unexecuted instantiation: quants.c:ggml_op_is_empty Unexecuted instantiation: binary-ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: unary-ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: vec.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: sgemm.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml.c:ggml_op_is_empty Unexecuted instantiation: ggml-alloc.c:ggml_op_is_empty Unexecuted instantiation: ggml-backend.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-opt.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-quants.c:ggml_op_is_empty Unexecuted instantiation: gguf.cpp:ggml_op_is_empty(ggml_op) |
104 | | |
105 | 0 | static inline float ggml_compute_softplus_f32(float input) { |
106 | 0 | return (input > 20.0f) ? input : logf(1 + expf(input)); |
107 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: repack.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: traits.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-cpu.c:ggml_compute_softplus_f32 Unexecuted instantiation: quants.c:ggml_compute_softplus_f32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: vec.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml.c:ggml_compute_softplus_f32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_softplus_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_softplus_f32 Unexecuted instantiation: gguf.cpp:ggml_compute_softplus_f32(float) |
108 | | // |
109 | | // logging |
110 | | // |
111 | | |
112 | | GGML_ATTRIBUTE_FORMAT(2, 3) |
113 | | GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...); |
114 | | GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data); |
115 | | |
116 | | #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__) |
117 | 0 | #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) |
118 | 0 | #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) |
119 | 0 | #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) |
120 | 0 | #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) |
121 | | #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__) |
122 | | |
123 | | #define GGML_DEBUG 0 |
124 | | |
125 | | #if (GGML_DEBUG >= 1) |
126 | | #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__) |
127 | | #else |
128 | | #define GGML_PRINT_DEBUG(...) |
129 | | #endif |
130 | | |
131 | | #if (GGML_DEBUG >= 5) |
132 | | #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__) |
133 | | #else |
134 | | #define GGML_PRINT_DEBUG_5(...) |
135 | | #endif |
136 | | |
137 | | #if (GGML_DEBUG >= 10) |
138 | | #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__) |
139 | | #else |
140 | | #define GGML_PRINT_DEBUG_10(...) |
141 | | #endif |
142 | | |
143 | | // tensor params |
144 | | |
145 | 0 | static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { |
146 | 0 | GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings |
147 | 0 | assert(params_size <= GGML_MAX_OP_PARAMS); |
148 | 0 | memcpy(tensor->op_params, params, params_size); |
149 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params Unexecuted instantiation: quants.c:ggml_set_op_params Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml.c:ggml_set_op_params Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params Unexecuted instantiation: gguf.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) |
150 | | |
151 | 0 | static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { |
152 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |
153 | 0 | return ((const int32_t *)(tensor->op_params))[i]; |
154 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: repack.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: traits.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.c:ggml_get_op_params_i32 Unexecuted instantiation: quants.c:ggml_get_op_params_i32 Unexecuted instantiation: binary-ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: unary-ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: vec.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: sgemm.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml.c:ggml_get_op_params_i32 Unexecuted instantiation: ggml-alloc.c:ggml_get_op_params_i32 Unexecuted instantiation: ggml-backend.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-opt.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-quants.c:ggml_get_op_params_i32 Unexecuted instantiation: gguf.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) |
155 | | |
156 | 0 | static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) { |
157 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(float)); |
158 | 0 | return ((const float *)(tensor->op_params))[i]; |
159 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: repack.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: traits.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.c:ggml_get_op_params_f32 Unexecuted instantiation: quants.c:ggml_get_op_params_f32 Unexecuted instantiation: binary-ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: unary-ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: vec.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: sgemm.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml.c:ggml_get_op_params_f32 Unexecuted instantiation: ggml-alloc.c:ggml_get_op_params_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-opt.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-quants.c:ggml_get_op_params_f32 Unexecuted instantiation: gguf.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) |
160 | | |
161 | 0 | static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { |
162 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |
163 | 0 | ((int32_t *)(tensor->op_params))[i] = value; |
164 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: repack.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: traits.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params_i32 Unexecuted instantiation: quants.c:ggml_set_op_params_i32 Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: vec.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml.c:ggml_set_op_params_i32 Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params_i32 Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params_i32 Unexecuted instantiation: gguf.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) |
165 | | |
166 | 0 | static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) { |
167 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(float)); |
168 | 0 | ((float *)(tensor->op_params))[i] = value; |
169 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: repack.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: traits.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params_f32 Unexecuted instantiation: quants.c:ggml_set_op_params_f32 Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: vec.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml.c:ggml_set_op_params_f32 Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params_f32 Unexecuted instantiation: gguf.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) |
170 | | |
171 | | struct ggml_map_custom1_op_params { |
172 | | ggml_custom1_op_t fun; |
173 | | int n_tasks; |
174 | | void * userdata; |
175 | | }; |
176 | | |
177 | | struct ggml_map_custom2_op_params { |
178 | | ggml_custom2_op_t fun; |
179 | | int n_tasks; |
180 | | void * userdata; |
181 | | }; |
182 | | |
183 | | struct ggml_map_custom3_op_params { |
184 | | ggml_custom3_op_t fun; |
185 | | int n_tasks; |
186 | | void * userdata; |
187 | | }; |
188 | | |
189 | | struct ggml_custom_op_params { |
190 | | ggml_custom_op_t fun; |
191 | | int n_tasks; |
192 | | void * userdata; |
193 | | }; |
194 | | |
195 | | // bitset |
196 | | |
197 | | typedef uint32_t ggml_bitset_t; |
198 | | |
199 | | static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated"); |
200 | 0 | #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8) |
201 | 0 | #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1) |
202 | | |
203 | 0 | static size_t ggml_bitset_size(size_t n) { |
204 | 0 | return (n + BITSET_MASK) >> BITSET_SHR; |
205 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_size Unexecuted instantiation: quants.c:ggml_bitset_size Unexecuted instantiation: binary-ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_size Unexecuted instantiation: ggml-alloc.c:ggml_bitset_size Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_size Unexecuted instantiation: gguf.cpp:ggml_bitset_size(unsigned long) |
206 | | |
207 | 0 | static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) { |
208 | 0 | return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK))); |
209 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_get Unexecuted instantiation: quants.c:ggml_bitset_get Unexecuted instantiation: binary-ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_get Unexecuted instantiation: ggml-alloc.c:ggml_bitset_get Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_get Unexecuted instantiation: gguf.cpp:ggml_bitset_get(unsigned int const*, unsigned long) |
210 | | |
211 | 0 | static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) { |
212 | 0 | bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK)); |
213 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_set Unexecuted instantiation: quants.c:ggml_bitset_set Unexecuted instantiation: binary-ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_set Unexecuted instantiation: ggml-alloc.c:ggml_bitset_set Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_set Unexecuted instantiation: gguf.cpp:ggml_bitset_set(unsigned int*, unsigned long) |
214 | | |
215 | 0 | static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) { |
216 | 0 | bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK)); |
217 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_clear Unexecuted instantiation: quants.c:ggml_bitset_clear Unexecuted instantiation: binary-ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_clear Unexecuted instantiation: ggml-alloc.c:ggml_bitset_clear Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_clear Unexecuted instantiation: gguf.cpp:ggml_bitset_clear(unsigned int*, unsigned long) |
218 | | |
219 | | // hash set |
220 | | |
221 | 0 | #define GGML_HASHSET_FULL ((size_t)-1) |
222 | 0 | #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2) |
223 | | |
224 | | struct ggml_hash_set { |
225 | | size_t size; |
226 | | ggml_bitset_t * used; // whether or not the keys are in use i.e. set |
227 | | struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i) |
228 | | }; |
229 | | |
230 | | struct ggml_hash_set ggml_hash_set_new(size_t size); |
231 | | void ggml_hash_set_free(struct ggml_hash_set * hash_set); |
232 | | |
233 | | // returns the minimum size for a hash set that can hold min_sz elements |
234 | | size_t ggml_hash_size(size_t min_sz); |
235 | | |
236 | | // remove all elements from the hash set |
237 | | void ggml_hash_set_reset(struct ggml_hash_set * hash_set); |
238 | | |
239 | | // returns true if key is in the hash set |
240 | | static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
241 | | |
242 | | // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted |
243 | | static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key); |
244 | | |
245 | | // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full |
246 | | static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
247 | | |
248 | | // return index, asserts if table is full |
249 | | static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
250 | | |
251 | | // hash function for ggml_tensor |
252 | 0 | static inline size_t ggml_hash(const struct ggml_tensor * p) { |
253 | | // the last 4 bits are always zero due to alignment |
254 | 0 | return (size_t)(uintptr_t)p >> 4; |
255 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_hash Unexecuted instantiation: quants.c:ggml_hash Unexecuted instantiation: binary-ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_hash Unexecuted instantiation: ggml-alloc.c:ggml_hash Unexecuted instantiation: ggml-backend.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_hash Unexecuted instantiation: gguf.cpp:ggml_hash(ggml_tensor const*) |
256 | | |
257 | 0 | static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key) { |
258 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
259 | | |
260 | | // linear probing |
261 | 0 | size_t i = h; |
262 | 0 | while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) { |
263 | 0 | i = (i + 1) % hash_set->size; |
264 | 0 | if (i == h) { |
265 | | // visited all hash table entries -> not found |
266 | 0 | return GGML_HASHSET_FULL; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | return i; |
270 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_find Unexecuted instantiation: quants.c:ggml_hash_find Unexecuted instantiation: binary-ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_hash_find Unexecuted instantiation: ggml-alloc.c:ggml_hash_find Unexecuted instantiation: ggml-backend.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_hash_find Unexecuted instantiation: gguf.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) |
271 | | |
272 | 0 | static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
273 | 0 | size_t i = ggml_hash_find(hash_set, key); |
274 | 0 | return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i); |
275 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_contains Unexecuted instantiation: quants.c:ggml_hash_contains Unexecuted instantiation: binary-ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_contains Unexecuted instantiation: ggml-alloc.c:ggml_hash_contains Unexecuted instantiation: ggml-backend.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_contains Unexecuted instantiation: gguf.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) |
276 | | |
277 | 0 | static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
278 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
279 | | |
280 | | // linear probing |
281 | 0 | size_t i = h; |
282 | 0 | do { |
283 | 0 | if (!ggml_bitset_get(hash_set->used, i)) { |
284 | 0 | ggml_bitset_set(hash_set->used, i); |
285 | 0 | hash_set->keys[i] = key; |
286 | 0 | return i; |
287 | 0 | } |
288 | 0 | if (hash_set->keys[i] == key) { |
289 | 0 | return GGML_HASHSET_ALREADY_EXISTS; |
290 | 0 | } |
291 | 0 | i = (i + 1) % hash_set->size; |
292 | 0 | } while (i != h); |
293 | | |
294 | | // visited all hash table entries -> not found |
295 | 0 | GGML_ABORT("fatal error"); |
296 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_insert Unexecuted instantiation: quants.c:ggml_hash_insert Unexecuted instantiation: binary-ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_insert Unexecuted instantiation: ggml-alloc.c:ggml_hash_insert Unexecuted instantiation: ggml-backend.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_insert Unexecuted instantiation: gguf.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) |
297 | | |
298 | 0 | static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
299 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
300 | | |
301 | | // linear probing |
302 | 0 | size_t i = h; |
303 | 0 | do { |
304 | 0 | if (!ggml_bitset_get(hash_set->used, i)) { |
305 | 0 | ggml_bitset_set(hash_set->used, i); |
306 | 0 | hash_set->keys[i] = key; |
307 | 0 | return i; |
308 | 0 | } |
309 | 0 | if (hash_set->keys[i] == key) { |
310 | 0 | return i; |
311 | 0 | } |
312 | 0 | i = (i + 1) % hash_set->size; |
313 | 0 | } while (i != h); |
314 | | |
315 | | // visited all hash table entries -> not found |
316 | 0 | GGML_ABORT("fatal error"); |
317 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_find_or_insert Unexecuted instantiation: quants.c:ggml_hash_find_or_insert Unexecuted instantiation: binary-ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_find_or_insert Unexecuted instantiation: ggml-alloc.c:ggml_hash_find_or_insert Unexecuted instantiation: ggml-backend.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_find_or_insert Unexecuted instantiation: gguf.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) |
318 | | |
319 | | // computation graph |
320 | | |
321 | | enum ggml_cgraph_eval_order { |
322 | | GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0, |
323 | | GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT, |
324 | | GGML_CGRAPH_EVAL_ORDER_COUNT |
325 | | }; |
326 | | |
327 | | struct ggml_cgraph { |
328 | | int size; // maximum number of nodes/leafs/grads/grad_accs |
329 | | int n_nodes; // number of nodes currently in use |
330 | | int n_leafs; // number of leafs currently in use |
331 | | |
332 | | struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated |
333 | | struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes |
334 | | struct ggml_tensor ** grad_accs; // accumulators for node gradients |
335 | | struct ggml_tensor ** leafs; // tensors with constant data |
336 | | int32_t * use_counts;// number of uses of each tensor, indexed by hash table slot |
337 | | |
338 | | struct ggml_hash_set visited_hash_set; |
339 | | |
340 | | enum ggml_cgraph_eval_order order; |
341 | | }; |
342 | | |
343 | | // returns a slice of cgraph with nodes [i0, i1) |
344 | | // the slice does not have leafs or gradients |
345 | | // if you need the gradients, get them from the original graph |
346 | | struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); |
347 | | |
348 | | // ggml-alloc.c: true if the operation can reuse memory from its sources |
349 | | GGML_API bool ggml_op_can_inplace(enum ggml_op op); |
350 | | |
351 | | |
352 | | // Memory allocation |
353 | | |
354 | | GGML_API void * ggml_aligned_malloc(size_t size); |
355 | | GGML_API void ggml_aligned_free(void * ptr, size_t size); |
356 | | |
357 | | // FP16 <-> FP32 |
358 | | // ref: https://github.com/Maratyszcza/FP16 |
359 | | |
360 | 0 | static inline float fp32_from_bits(uint32_t w) { |
361 | 0 | union { |
362 | 0 | uint32_t as_bits; |
363 | 0 | float as_value; |
364 | 0 | } fp32; |
365 | 0 | fp32.as_bits = w; |
366 | 0 | return fp32.as_value; |
367 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-cpu.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: repack.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: traits.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-cpu.c:fp32_from_bits Unexecuted instantiation: quants.c:fp32_from_bits Unexecuted instantiation: binary-ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: unary-ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: vec.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: sgemm.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml.c:fp32_from_bits Unexecuted instantiation: ggml-alloc.c:fp32_from_bits Unexecuted instantiation: ggml-backend.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-opt.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-quants.c:fp32_from_bits Unexecuted instantiation: gguf.cpp:fp32_from_bits(unsigned int) |
368 | | |
369 | 0 | static inline uint32_t fp32_to_bits(float f) { |
370 | 0 | union { |
371 | 0 | float as_value; |
372 | 0 | uint32_t as_bits; |
373 | 0 | } fp32; |
374 | 0 | fp32.as_value = f; |
375 | 0 | return fp32.as_bits; |
376 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-cpu.cpp:fp32_to_bits(float) Unexecuted instantiation: repack.cpp:fp32_to_bits(float) Unexecuted instantiation: traits.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-cpu.c:fp32_to_bits Unexecuted instantiation: quants.c:fp32_to_bits Unexecuted instantiation: binary-ops.cpp:fp32_to_bits(float) Unexecuted instantiation: unary-ops.cpp:fp32_to_bits(float) Unexecuted instantiation: vec.cpp:fp32_to_bits(float) Unexecuted instantiation: ops.cpp:fp32_to_bits(float) Unexecuted instantiation: sgemm.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml.c:fp32_to_bits Unexecuted instantiation: ggml-alloc.c:fp32_to_bits Unexecuted instantiation: ggml-backend.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-opt.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-quants.c:fp32_to_bits Unexecuted instantiation: gguf.cpp:fp32_to_bits(float) |
377 | | |
378 | 0 | static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { |
379 | 0 | const uint32_t w = (uint32_t) h << 16; |
380 | 0 | const uint32_t sign = w & UINT32_C(0x80000000); |
381 | 0 | const uint32_t two_w = w + w; |
382 | |
|
383 | 0 | const uint32_t exp_offset = UINT32_C(0xE0) << 23; |
384 | 0 | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) |
385 | 0 | const float exp_scale = 0x1.0p-112f; |
386 | | #else |
387 | | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); |
388 | | #endif |
389 | 0 | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; |
390 | |
|
391 | 0 | const uint32_t magic_mask = UINT32_C(126) << 23; |
392 | 0 | const float magic_bias = 0.5f; |
393 | 0 | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; |
394 | |
|
395 | 0 | const uint32_t denormalized_cutoff = UINT32_C(1) << 27; |
396 | 0 | const uint32_t result = sign | |
397 | 0 | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); |
398 | 0 | return fp32_from_bits(result); |
399 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: repack.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: traits.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-cpu.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: quants.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: vec.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_compute_fp16_to_fp32(unsigned short) |
400 | | |
401 | 0 | static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { |
402 | 0 | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) |
403 | 0 | const float scale_to_inf = 0x1.0p+112f; |
404 | 0 | const float scale_to_zero = 0x1.0p-110f; |
405 | | #else |
406 | | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); |
407 | | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); |
408 | | #endif |
409 | 0 | float base = (fabsf(f) * scale_to_inf) * scale_to_zero; |
410 | |
|
411 | 0 | const uint32_t w = fp32_to_bits(f); |
412 | 0 | const uint32_t shl1_w = w + w; |
413 | 0 | const uint32_t sign = w & UINT32_C(0x80000000); |
414 | 0 | uint32_t bias = shl1_w & UINT32_C(0xFF000000); |
415 | 0 | if (bias < UINT32_C(0x71000000)) { |
416 | 0 | bias = UINT32_C(0x71000000); |
417 | 0 | } |
418 | |
|
419 | 0 | base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; |
420 | 0 | const uint32_t bits = fp32_to_bits(base); |
421 | 0 | const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); |
422 | 0 | const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); |
423 | 0 | const uint32_t nonsign = exp_bits + mantissa_bits; |
424 | 0 | return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); |
425 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: repack.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: traits.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-cpu.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: quants.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: vec.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: gguf.cpp:ggml_compute_fp32_to_fp16(float) |
426 | | |
427 | 0 | #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) |
428 | 0 | #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) |
429 | | |
430 | 0 | #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) |
431 | 0 | #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) |
432 | | |
433 | 0 | static inline float ggml_e8m0_to_fp32(uint8_t x) { |
434 | 0 | uint32_t bits; // Stores the raw bit representation of the float |
435 | 0 |
|
436 | 0 | // Handle special case for minimum exponent (denormalized float) |
437 | 0 | if (x == 0) { |
438 | 0 | // Bit pattern for 2^(-127): |
439 | 0 | // - Sign bit: 0 (positive) |
440 | 0 | // - Exponent: 0 (denormalized number) |
441 | 0 | // - Mantissa: 0x400000 (0.5 in fractional form) |
442 | 0 | // Value = 0.5 * 2^(-126) = 2^(-127) |
443 | 0 | bits = 0x00400000; |
444 | 0 | } |
445 | 0 | // note: disabled as we don't need to handle NaNs |
446 | 0 | //// Handle special case for NaN (all bits set) |
447 | 0 | //else if (x == 0xFF) { |
448 | 0 | // // Standard quiet NaN pattern: |
449 | 0 | // // - Sign bit: 0 |
450 | 0 | // // - Exponent: all 1s (0xFF) |
451 | 0 | // // - Mantissa: 0x400000 (quiet NaN flag) |
452 | 0 | // bits = 0x7FC00000; |
453 | 0 | //} |
454 | 0 | // Normalized values (most common case) |
455 | 0 | else { |
456 | 0 | // Construct normalized float by shifting exponent into position: |
457 | 0 | // - Exponent field: 8 bits (positions 30-23) |
458 | 0 | // - Mantissa: 0 (implicit leading 1) |
459 | 0 | // Value = 2^(x - 127) |
460 | 0 | bits = (uint32_t) x << 23; |
461 | 0 | } |
462 | 0 |
|
463 | 0 | float result; // Final float value |
464 | 0 | // Safely reinterpret bit pattern as float without type-punning issues |
465 | 0 | memcpy(&result, &bits, sizeof(float)); |
466 | 0 | return result; |
467 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-cpu.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: repack.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: traits.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-cpu.c:ggml_e8m0_to_fp32 Unexecuted instantiation: quants.c:ggml_e8m0_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: unary-ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: vec.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: sgemm.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml.c:ggml_e8m0_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_e8m0_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-opt.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-quants.c:ggml_e8m0_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_e8m0_to_fp32(unsigned char) |
468 | | |
469 | | // Equal to ggml_e8m0_to_fp32/2 |
470 | | // Useful with MXFP4 quantization since the E0M2 values are doubled |
471 | 0 | static inline float ggml_e8m0_to_fp32_half(uint8_t x) { |
472 | 0 | uint32_t bits; |
473 | | |
474 | | // For x < 2: use precomputed denormal patterns |
475 | 0 | if (x < 2) { |
476 | | // 0x00200000 = 2^(-128), 0x00400000 = 2^(-127) |
477 | 0 | bits = 0x00200000 << x; |
478 | 0 | } |
479 | | // For x >= 2: normalized exponent adjustment |
480 | 0 | else { |
481 | | // 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1) |
482 | 0 | bits = (uint32_t)(x - 1) << 23; |
483 | 0 | } |
484 | | // Note: NaNs are not handled here |
485 | |
|
486 | 0 | float result; |
487 | 0 | memcpy(&result, &bits, sizeof(float)); |
488 | 0 | return result; |
489 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-cpu.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: repack.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: traits.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-cpu.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: quants.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: binary-ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: unary-ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: vec.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: sgemm.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: ggml-alloc.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: ggml-backend.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-opt.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-quants.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: gguf.cpp:ggml_e8m0_to_fp32_half(unsigned char) |
490 | | |
491 | | #define GGML_E8M0_TO_FP32(x) ggml_e8m0_to_fp32(x) |
492 | 0 | #define GGML_E8M0_TO_FP32_HALF(x) ggml_e8m0_to_fp32_half(x) |
493 | | |
494 | | /** |
495 | | * Converts brain16 to float32. |
496 | | * |
497 | | * The bfloat16 floating point format has the following structure: |
498 | | * |
499 | | * ┌sign |
500 | | * │ |
501 | | * │ ┌exponent |
502 | | * │ │ |
503 | | * │ │ ┌mantissa |
504 | | * │ │ │ |
505 | | * │┌──┴───┐┌─┴───┐ |
506 | | * 0b0000000000000000 brain16 |
507 | | * |
508 | | * Since bf16 has the same number of exponent bits as a 32bit float, |
509 | | * encoding and decoding numbers becomes relatively straightforward. |
510 | | * |
511 | | * ┌sign |
512 | | * │ |
513 | | * │ ┌exponent |
514 | | * │ │ |
515 | | * │ │ ┌mantissa |
516 | | * │ │ │ |
517 | | * │┌──┴───┐┌─┴───────────────────┐ |
518 | | * 0b00000000000000000000000000000000 IEEE binary32 |
519 | | * |
520 | | * For comparison, the standard fp16 format has fewer exponent bits. |
521 | | * |
522 | | * ┌sign |
523 | | * │ |
524 | | * │ ┌exponent |
525 | | * │ │ |
526 | | * │ │ ┌mantissa |
527 | | * │ │ │ |
528 | | * │┌─┴─┐┌─┴──────┐ |
529 | | * 0b0000000000000000 IEEE binary16 |
530 | | * |
531 | | * @see IEEE 754-2008 |
532 | | */ |
533 | 0 | static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) { |
534 | 0 | union { |
535 | 0 | float f; |
536 | 0 | uint32_t i; |
537 | 0 | } u; |
538 | 0 | u.i = (uint32_t)h.bits << 16; |
539 | 0 | return u.f; |
540 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: repack.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: traits.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-cpu.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: quants.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: unary-ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: vec.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: sgemm.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-quants.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) |
541 | | |
542 | | /** |
543 | | * Converts float32 to brain16. |
544 | | * |
545 | | * This is binary identical with Google Brain float conversion. |
546 | | * Floats shall round to nearest even, and NANs shall be quiet. |
547 | | * Subnormals aren't flushed to zero, except perhaps when used. |
548 | | * This code should vectorize nicely if using modern compilers. |
549 | | */ |
550 | 0 | static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { |
551 | 0 | ggml_bf16_t h; |
552 | 0 | union { |
553 | 0 | float f; |
554 | 0 | uint32_t i; |
555 | 0 | } u; |
556 | 0 | u.f = s; |
557 | 0 | if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */ |
558 | 0 | h.bits = (u.i >> 16) | 64; /* force to quiet */ |
559 | 0 | return h; |
560 | 0 | } |
561 | 0 | h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16; |
562 | 0 | return h; |
563 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: repack.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: traits.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-cpu.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: quants.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: vec.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: gguf.cpp:ggml_compute_fp32_to_bf16(float) |
564 | | |
565 | 0 | #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) |
566 | 0 | #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) |
567 | | |
568 | 0 | static inline int32_t ggml_node_get_use_count(const struct ggml_cgraph * cgraph, int node_idx) { |
569 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idx]; |
570 | |
|
571 | 0 | size_t hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); |
572 | 0 | if (!ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) { |
573 | 0 | return 0; |
574 | 0 | } |
575 | 0 | return cgraph->use_counts[hash_pos]; |
576 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: repack.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: traits.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_node_get_use_count Unexecuted instantiation: quants.c:ggml_node_get_use_count Unexecuted instantiation: binary-ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: vec.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: sgemm.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml.c:ggml_node_get_use_count Unexecuted instantiation: ggml-alloc.c:ggml_node_get_use_count Unexecuted instantiation: ggml-backend.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-quants.c:ggml_node_get_use_count Unexecuted instantiation: gguf.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) |
577 | | |
578 | | // return true if the node's results are only used by N other nodes |
579 | | // and can be fused into their calculations. |
580 | 0 | static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int node_idx, int32_t n_uses) { |
581 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idx]; |
582 | 0 |
|
583 | 0 | // check the use count against how many we're replacing |
584 | 0 | if (ggml_node_get_use_count(cgraph, node_idx) != n_uses) { |
585 | 0 | return false; |
586 | 0 | } |
587 | 0 |
|
588 | 0 | // if node is a view, some other node might be using the intermediate result |
589 | 0 | // via the view source. |
590 | 0 | if (node->view_src) { |
591 | 0 | return false; |
592 | 0 | } |
593 | 0 |
|
594 | 0 | // If the user requested output for the node, can't fuse |
595 | 0 | if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { |
596 | 0 | return false; |
597 | 0 | } |
598 | 0 |
|
599 | 0 | return true; |
600 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: repack.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: traits.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-cpu.c:ggml_node_has_n_uses Unexecuted instantiation: quants.c:ggml_node_has_n_uses Unexecuted instantiation: binary-ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: unary-ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: vec.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: sgemm.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml.c:ggml_node_has_n_uses Unexecuted instantiation: ggml-alloc.c:ggml_node_has_n_uses Unexecuted instantiation: ggml-backend.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-quants.c:ggml_node_has_n_uses Unexecuted instantiation: gguf.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) |
601 | | |
602 | | // Returns true if nodes with indices { node_idxs } are the sequence of ggml_ops in ops[] |
603 | | // and are fusable. Nodes are considered fusable according to this function if: |
604 | | // - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses). |
605 | | // - all nodes except the last are a src of the following node. |
606 | | // - all nodes are the same shape. |
607 | | // TODO: Consider allowing GGML_OP_NONE nodes in between |
608 | 0 | static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) { |
609 | 0 | for (int i = 0; i < num_ops; ++i) { |
610 | 0 | if (node_idxs[i] >= cgraph->n_nodes) { |
611 | 0 | return false; |
612 | 0 | } |
613 | 0 |
|
614 | 0 | struct ggml_tensor * node = cgraph->nodes[node_idxs[i]]; |
615 | 0 | if (node->op != ops[i]) { |
616 | 0 | return false; |
617 | 0 | } |
618 | 0 | if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) { |
619 | 0 | return false; |
620 | 0 | } |
621 | 0 | if (i > 0) { |
622 | 0 | struct ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]]; |
623 | 0 | if (node->src[0] != prev && node->src[1] != prev) { |
624 | 0 | return false; |
625 | 0 | } |
626 | 0 | if (!ggml_are_same_shape(node, prev)) { |
627 | 0 | return false; |
628 | 0 | } |
629 | 0 | } |
630 | 0 | } |
631 | 0 | return true; |
632 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse_ext Unexecuted instantiation: quants.c:ggml_can_fuse_ext Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse_ext Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse_ext Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse_ext Unexecuted instantiation: gguf.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) |
633 | | |
634 | | // same as above, for sequential indices starting at node_idx |
635 | 0 | static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { |
636 | 0 | assert(num_ops < 32); |
637 | 0 |
|
638 | 0 | if (node_idx + num_ops > cgraph->n_nodes) { |
639 | 0 | return false; |
640 | 0 | } |
641 | 0 |
|
642 | 0 | int idxs[32]; |
643 | 0 | for (int i = 0; i < num_ops; ++i) { |
644 | 0 | idxs[i] = node_idx + i; |
645 | 0 | } |
646 | 0 |
|
647 | 0 | return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops); |
648 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse Unexecuted instantiation: quants.c:ggml_can_fuse Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse Unexecuted instantiation: gguf.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) |
649 | | |
650 | | GGML_API bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph, |
651 | | const int * node_idxs, |
652 | | int count, |
653 | | const enum ggml_op * ops, |
654 | | const int * outputs, |
655 | | int num_outputs); |
656 | | |
657 | | // Returns true if the subgraph formed by {node_idxs} can be fused |
658 | | // checks whethers all nodes which are not part of outputs can be elided |
659 | | // by checking if their num_uses are confined to the subgraph |
660 | | static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph, |
661 | | int node_idx, |
662 | | int count, |
663 | | const enum ggml_op * ops, |
664 | | const int * outputs, |
665 | 0 | int num_outputs) { |
666 | 0 | GGML_ASSERT(count < 32); |
667 | 0 | if (node_idx + count > cgraph->n_nodes) { |
668 | 0 | return false; |
669 | 0 | } |
670 | 0 |
|
671 | 0 | int idxs[32]; |
672 | 0 |
|
673 | 0 | for (int i = 0; i < count; ++i) { |
674 | 0 | idxs[i] = node_idx + i; |
675 | 0 | } |
676 | 0 |
|
677 | 0 | return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs); |
678 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse_subgraph Unexecuted instantiation: quants.c:ggml_can_fuse_subgraph Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse_subgraph Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse_subgraph Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse_subgraph Unexecuted instantiation: gguf.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) |
679 | | |
680 | | #ifdef __cplusplus |
681 | | } |
682 | | #endif |
683 | | |
684 | | #ifdef __cplusplus |
685 | | #include <array> |
686 | | #include <initializer_list> |
687 | | #include <vector> |
688 | | |
689 | | // nicer C++ syntax for ggml_can_fuse |
690 | 0 | inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops) { |
691 | 0 | return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size()); |
692 | 0 | } |
693 | | |
694 | | inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph, |
695 | | int start_idx, |
696 | | std::initializer_list<enum ggml_op> ops, |
697 | 0 | std::initializer_list<int> outputs = {}) { |
698 | 0 | return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size()); |
699 | 0 | } |
700 | | |
701 | | // Return true if the edges in the graph match expectations. |
702 | | inline bool ggml_check_edges(const struct ggml_cgraph * cgraph, |
703 | | int start_idx, |
704 | 0 | std::initializer_list<std::array<int, 3>> edges) { |
705 | 0 | for (const auto & edge : edges) { |
706 | 0 | int dst_node = edge[0]; |
707 | 0 | int src_idx = edge[1]; |
708 | 0 | int src_node = edge[2]; |
709 | 0 | if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) { |
710 | 0 | return false; |
711 | 0 | } |
712 | 0 | } |
713 | 0 | return true; |
714 | 0 | } |
715 | | |
716 | | // expose GGUF internals for test code |
717 | | GGML_API size_t gguf_type_size(enum gguf_type type); |
718 | | GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); |
719 | | GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta); |
720 | | #endif // __cplusplus |