/src/llama.cpp/ggml/src/ggml-impl.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | // GGML internal header |
4 | | |
5 | | #include "ggml.h" |
6 | | #include "gguf.h" |
7 | | |
8 | | #include <assert.h> |
9 | | #include <math.h> |
10 | | #include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/ |
11 | | #include <stdbool.h> |
12 | | #include <stdint.h> |
13 | | #include <string.h> |
14 | | |
15 | | #ifdef __ARM_FEATURE_SVE |
16 | | #include <arm_sve.h> |
17 | | #endif // __ARM_FEATURE_SVE |
18 | | |
19 | | #if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__) |
20 | | // if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example: |
21 | | // |
22 | | // $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ |
23 | | // |
24 | | #include <arm_neon.h> |
25 | | #endif |
26 | | |
27 | | #ifdef __cplusplus |
28 | | extern "C" { |
29 | | #endif |
30 | | |
31 | | void ggml_print_backtrace(void); |
32 | | |
33 | | #ifndef MIN |
34 | 0 | # define MIN(a, b) ((a) < (b) ? (a) : (b)) |
35 | | #endif |
36 | | |
37 | | #ifndef MAX |
38 | 0 | # define MAX(a, b) ((a) > (b) ? (a) : (b)) |
39 | | #endif |
40 | | |
41 | | // required for mmap as gguf only guarantees 32-byte alignment |
42 | 0 | #define TENSOR_ALIGNMENT 32 |
43 | | |
44 | | // static_assert should be a #define, but if it's not, |
45 | | // fall back to the _Static_assert C11 keyword. |
46 | | // if C99 - static_assert is noop |
47 | | // ref: https://stackoverflow.com/a/53923785/4039976 |
48 | | #ifndef __cplusplus |
49 | | #ifndef static_assert |
50 | | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L) |
51 | | #define static_assert(cond, msg) _Static_assert(cond, msg) |
52 | | #else |
53 | | #define static_assert(cond, msg) struct global_scope_noop_trick |
54 | | #endif |
55 | | #endif |
56 | | #endif |
57 | | |
58 | 0 | static inline int ggml_up32(int n) { |
59 | 0 | return (n + 31) & ~31; |
60 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_up32(int) Unexecuted instantiation: ggml-cpu.cpp:ggml_up32(int) Unexecuted instantiation: repack.cpp:ggml_up32(int) Unexecuted instantiation: traits.cpp:ggml_up32(int) Unexecuted instantiation: ggml-cpu.c:ggml_up32 Unexecuted instantiation: quants.c:ggml_up32 Unexecuted instantiation: binary-ops.cpp:ggml_up32(int) Unexecuted instantiation: unary-ops.cpp:ggml_up32(int) Unexecuted instantiation: vec.cpp:ggml_up32(int) Unexecuted instantiation: ops.cpp:ggml_up32(int) Unexecuted instantiation: sgemm.cpp:ggml_up32(int) Unexecuted instantiation: ggml.c:ggml_up32 Unexecuted instantiation: ggml-alloc.c:ggml_up32 Unexecuted instantiation: ggml-backend.cpp:ggml_up32(int) Unexecuted instantiation: ggml-opt.cpp:ggml_up32(int) Unexecuted instantiation: ggml-quants.c:ggml_up32 Unexecuted instantiation: gguf.cpp:ggml_up32(int) |
61 | | |
62 | | //static inline int ggml_up64(int n) { |
63 | | // return (n + 63) & ~63; |
64 | | //} |
65 | | |
66 | 0 | static inline int ggml_up(int n, int m) { |
67 | | // assert m is a power of 2 |
68 | 0 | GGML_ASSERT((m & (m - 1)) == 0); |
69 | 0 | return (n + m - 1) & ~(m - 1); |
70 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_up(int, int) Unexecuted instantiation: repack.cpp:ggml_up(int, int) Unexecuted instantiation: traits.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-cpu.c:ggml_up Unexecuted instantiation: quants.c:ggml_up Unexecuted instantiation: binary-ops.cpp:ggml_up(int, int) Unexecuted instantiation: unary-ops.cpp:ggml_up(int, int) Unexecuted instantiation: vec.cpp:ggml_up(int, int) Unexecuted instantiation: ops.cpp:ggml_up(int, int) Unexecuted instantiation: sgemm.cpp:ggml_up(int, int) Unexecuted instantiation: ggml.c:ggml_up Unexecuted instantiation: ggml-alloc.c:ggml_up Unexecuted instantiation: ggml-backend.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_up(int, int) Unexecuted instantiation: ggml-quants.c:ggml_up Unexecuted instantiation: gguf.cpp:ggml_up(int, int) |
71 | | |
72 | | // TODO: move to ggml.h? (won't be able to inline) |
73 | 0 | static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { |
74 | 0 | if (a->type != b->type) { |
75 | 0 | return false; |
76 | 0 | } |
77 | 0 | for (int i = 0; i < GGML_MAX_DIMS; i++) { |
78 | 0 | if (a->ne[i] != b->ne[i]) { |
79 | 0 | return false; |
80 | 0 | } |
81 | 0 | if (a->nb[i] != b->nb[i]) { |
82 | 0 | return false; |
83 | 0 | } |
84 | 0 | } |
85 | 0 | return true; |
86 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_are_same_layout Unexecuted instantiation: quants.c:ggml_are_same_layout Unexecuted instantiation: binary-ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_are_same_layout Unexecuted instantiation: ggml-alloc.c:ggml_are_same_layout Unexecuted instantiation: ggml-backend.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_are_same_layout Unexecuted instantiation: gguf.cpp:ggml_are_same_layout(ggml_tensor const*, ggml_tensor const*) |
87 | | |
88 | 0 | static bool ggml_op_is_empty(enum ggml_op op) { |
89 | 0 | switch (op) { |
90 | 0 | case GGML_OP_NONE: |
91 | 0 | case GGML_OP_RESHAPE: |
92 | 0 | case GGML_OP_TRANSPOSE: |
93 | 0 | case GGML_OP_VIEW: |
94 | 0 | case GGML_OP_PERMUTE: |
95 | 0 | return true; |
96 | 0 | default: |
97 | 0 | return false; |
98 | 0 | } |
99 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-cpu.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: repack.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: traits.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-cpu.c:ggml_op_is_empty Unexecuted instantiation: quants.c:ggml_op_is_empty Unexecuted instantiation: binary-ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: unary-ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: vec.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ops.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: sgemm.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml.c:ggml_op_is_empty Unexecuted instantiation: ggml-alloc.c:ggml_op_is_empty Unexecuted instantiation: ggml-backend.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-opt.cpp:ggml_op_is_empty(ggml_op) Unexecuted instantiation: ggml-quants.c:ggml_op_is_empty Unexecuted instantiation: gguf.cpp:ggml_op_is_empty(ggml_op) |
100 | | |
101 | 0 | static inline float ggml_compute_softplus_f32(float input) { |
102 | 0 | return (input > 20.0f) ? input : logf(1 + expf(input)); |
103 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: repack.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: traits.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-cpu.c:ggml_compute_softplus_f32 Unexecuted instantiation: quants.c:ggml_compute_softplus_f32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: vec.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ops.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml.c:ggml_compute_softplus_f32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_softplus_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_softplus_f32(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_softplus_f32 Unexecuted instantiation: gguf.cpp:ggml_compute_softplus_f32(float) |
104 | | // |
105 | | // logging |
106 | | // |
107 | | |
108 | | GGML_ATTRIBUTE_FORMAT(2, 3) |
109 | | GGML_API void ggml_log_internal (enum ggml_log_level level, const char * format, ...); |
110 | | GGML_API void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data); |
111 | | |
112 | | #define GGML_LOG(...) ggml_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__) |
113 | 0 | #define GGML_LOG_INFO(...) ggml_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) |
114 | 0 | #define GGML_LOG_WARN(...) ggml_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) |
115 | 0 | #define GGML_LOG_ERROR(...) ggml_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) |
116 | 0 | #define GGML_LOG_DEBUG(...) ggml_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) |
117 | | #define GGML_LOG_CONT(...) ggml_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__) |
118 | | |
119 | | #define GGML_DEBUG 0 |
120 | | |
121 | | #if (GGML_DEBUG >= 1) |
122 | | #define GGML_PRINT_DEBUG(...) GGML_LOG_DEBUG(__VA_ARGS__) |
123 | | #else |
124 | | #define GGML_PRINT_DEBUG(...) |
125 | | #endif |
126 | | |
127 | | #if (GGML_DEBUG >= 5) |
128 | | #define GGML_PRINT_DEBUG_5(...) GGML_LOG_DEBUG(__VA_ARGS__) |
129 | | #else |
130 | | #define GGML_PRINT_DEBUG_5(...) |
131 | | #endif |
132 | | |
133 | | #if (GGML_DEBUG >= 10) |
134 | | #define GGML_PRINT_DEBUG_10(...) GGML_LOG_DEBUG(__VA_ARGS__) |
135 | | #else |
136 | | #define GGML_PRINT_DEBUG_10(...) |
137 | | #endif |
138 | | |
139 | | // tensor params |
140 | | |
141 | 0 | static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { |
142 | 0 | GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings |
143 | 0 | assert(params_size <= GGML_MAX_OP_PARAMS); |
144 | 0 | memcpy(tensor->op_params, params, params_size); |
145 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params Unexecuted instantiation: quants.c:ggml_set_op_params Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml.c:ggml_set_op_params Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params Unexecuted instantiation: gguf.cpp:ggml_set_op_params(ggml_tensor*, void const*, unsigned long) |
146 | | |
147 | 0 | static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { |
148 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |
149 | 0 | return ((const int32_t *)(tensor->op_params))[i]; |
150 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: repack.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: traits.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.c:ggml_get_op_params_i32 Unexecuted instantiation: quants.c:ggml_get_op_params_i32 Unexecuted instantiation: binary-ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: unary-ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: vec.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ops.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: sgemm.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml.c:ggml_get_op_params_i32 Unexecuted instantiation: ggml-alloc.c:ggml_get_op_params_i32 Unexecuted instantiation: ggml-backend.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-opt.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-quants.c:ggml_get_op_params_i32 Unexecuted instantiation: gguf.cpp:ggml_get_op_params_i32(ggml_tensor const*, unsigned int) |
151 | | |
152 | 0 | static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) { |
153 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(float)); |
154 | 0 | return ((const float *)(tensor->op_params))[i]; |
155 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: repack.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: traits.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-cpu.c:ggml_get_op_params_f32 Unexecuted instantiation: quants.c:ggml_get_op_params_f32 Unexecuted instantiation: binary-ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: unary-ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: vec.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ops.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: sgemm.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml.c:ggml_get_op_params_f32 Unexecuted instantiation: ggml-alloc.c:ggml_get_op_params_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-opt.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) Unexecuted instantiation: ggml-quants.c:ggml_get_op_params_f32 Unexecuted instantiation: gguf.cpp:ggml_get_op_params_f32(ggml_tensor const*, unsigned int) |
156 | | |
157 | 0 | static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { |
158 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |
159 | 0 | ((int32_t *)(tensor->op_params))[i] = value; |
160 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: repack.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: traits.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params_i32 Unexecuted instantiation: quants.c:ggml_set_op_params_i32 Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: vec.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ops.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml.c:ggml_set_op_params_i32 Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params_i32 Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params_i32 Unexecuted instantiation: gguf.cpp:ggml_set_op_params_i32(ggml_tensor*, unsigned int, int) |
161 | | |
162 | 0 | static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) { |
163 | 0 | assert(i < GGML_MAX_OP_PARAMS / sizeof(float)); |
164 | 0 | ((float *)(tensor->op_params))[i] = value; |
165 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-cpu.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: repack.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: traits.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-cpu.c:ggml_set_op_params_f32 Unexecuted instantiation: quants.c:ggml_set_op_params_f32 Unexecuted instantiation: binary-ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: unary-ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: vec.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ops.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: sgemm.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml.c:ggml_set_op_params_f32 Unexecuted instantiation: ggml-alloc.c:ggml_set_op_params_f32 Unexecuted instantiation: ggml-backend.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-opt.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) Unexecuted instantiation: ggml-quants.c:ggml_set_op_params_f32 Unexecuted instantiation: gguf.cpp:ggml_set_op_params_f32(ggml_tensor*, unsigned int, float) |
166 | | |
167 | | struct ggml_map_custom1_op_params { |
168 | | ggml_custom1_op_t fun; |
169 | | int n_tasks; |
170 | | void * userdata; |
171 | | }; |
172 | | |
173 | | struct ggml_map_custom2_op_params { |
174 | | ggml_custom2_op_t fun; |
175 | | int n_tasks; |
176 | | void * userdata; |
177 | | }; |
178 | | |
179 | | struct ggml_map_custom3_op_params { |
180 | | ggml_custom3_op_t fun; |
181 | | int n_tasks; |
182 | | void * userdata; |
183 | | }; |
184 | | |
185 | | struct ggml_custom_op_params { |
186 | | ggml_custom_op_t fun; |
187 | | int n_tasks; |
188 | | void * userdata; |
189 | | }; |
190 | | |
191 | | // bitset |
192 | | |
193 | | typedef uint32_t ggml_bitset_t; |
194 | | |
195 | | static_assert(sizeof(ggml_bitset_t) == 4, "bitset_t constants must be updated"); |
196 | 0 | #define BITSET_SHR 5 // log2(sizeof(ggml_bitset_t)*8) |
197 | 0 | #define BITSET_MASK (sizeof(ggml_bitset_t)*8 - 1) |
198 | | |
199 | 0 | static size_t ggml_bitset_size(size_t n) { |
200 | 0 | return (n + BITSET_MASK) >> BITSET_SHR; |
201 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_size Unexecuted instantiation: quants.c:ggml_bitset_size Unexecuted instantiation: binary-ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_size Unexecuted instantiation: ggml-alloc.c:ggml_bitset_size Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_size(unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_size Unexecuted instantiation: gguf.cpp:ggml_bitset_size(unsigned long) |
202 | | |
203 | 0 | static inline bool ggml_bitset_get(const ggml_bitset_t * bitset, size_t i) { |
204 | 0 | return !!(bitset[i >> BITSET_SHR] & (1u << (i & BITSET_MASK))); |
205 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_get Unexecuted instantiation: quants.c:ggml_bitset_get Unexecuted instantiation: binary-ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_get Unexecuted instantiation: ggml-alloc.c:ggml_bitset_get Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_get(unsigned int const*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_get Unexecuted instantiation: gguf.cpp:ggml_bitset_get(unsigned int const*, unsigned long) |
206 | | |
207 | 0 | static inline void ggml_bitset_set(ggml_bitset_t * bitset, size_t i) { |
208 | 0 | bitset[i >> BITSET_SHR] |= (1u << (i & BITSET_MASK)); |
209 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_set Unexecuted instantiation: quants.c:ggml_bitset_set Unexecuted instantiation: binary-ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_set Unexecuted instantiation: ggml-alloc.c:ggml_bitset_set Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_set(unsigned int*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_set Unexecuted instantiation: gguf.cpp:ggml_bitset_set(unsigned int*, unsigned long) |
210 | | |
211 | 0 | static inline void ggml_bitset_clear(ggml_bitset_t * bitset, size_t i) { |
212 | 0 | bitset[i >> BITSET_SHR] &= ~(1u << (i & BITSET_MASK)); |
213 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: repack.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: traits.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-cpu.c:ggml_bitset_clear Unexecuted instantiation: quants.c:ggml_bitset_clear Unexecuted instantiation: binary-ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: unary-ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: vec.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ops.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: sgemm.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml.c:ggml_bitset_clear Unexecuted instantiation: ggml-alloc.c:ggml_bitset_clear Unexecuted instantiation: ggml-backend.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-opt.cpp:ggml_bitset_clear(unsigned int*, unsigned long) Unexecuted instantiation: ggml-quants.c:ggml_bitset_clear Unexecuted instantiation: gguf.cpp:ggml_bitset_clear(unsigned int*, unsigned long) |
214 | | |
215 | | // hash set |
216 | | |
217 | 0 | #define GGML_HASHSET_FULL ((size_t)-1) |
218 | 0 | #define GGML_HASHSET_ALREADY_EXISTS ((size_t)-2) |
219 | | |
220 | | struct ggml_hash_set { |
221 | | size_t size; |
222 | | ggml_bitset_t * used; // whether or not the keys are in use i.e. set |
223 | | struct ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if ggml_bitset_get(used, i) |
224 | | }; |
225 | | |
226 | | struct ggml_hash_set ggml_hash_set_new(size_t size); |
227 | | void ggml_hash_set_free(struct ggml_hash_set * hash_set); |
228 | | |
229 | | // returns the minimum size for a hash set that can hold min_sz elements |
230 | | size_t ggml_hash_size(size_t min_sz); |
231 | | |
232 | | // remove all elements from the hash set |
233 | | void ggml_hash_set_reset(struct ggml_hash_set * hash_set); |
234 | | |
235 | | // returns true if key is in the hash set |
236 | | static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
237 | | |
238 | | // returns GGML_HASHSET_FULL if table is full, otherwise the current index of the key or where it should be inserted |
239 | | static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key); |
240 | | |
241 | | // returns GGML_HASHSET_ALREADY_EXISTS if key already exists, index otherwise, asserts if table is full |
242 | | static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
243 | | |
244 | | // return index, asserts if table is full |
245 | | static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key); |
246 | | |
247 | | // hash function for ggml_tensor |
248 | 0 | static inline size_t ggml_hash(const struct ggml_tensor * p) { |
249 | | // the last 4 bits are always zero due to alignment |
250 | 0 | return (size_t)(uintptr_t)p >> 4; |
251 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_hash Unexecuted instantiation: quants.c:ggml_hash Unexecuted instantiation: binary-ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_hash Unexecuted instantiation: ggml-alloc.c:ggml_hash Unexecuted instantiation: ggml-backend.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash(ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_hash Unexecuted instantiation: gguf.cpp:ggml_hash(ggml_tensor const*) |
252 | | |
253 | 0 | static size_t ggml_hash_find(const struct ggml_hash_set * hash_set, const struct ggml_tensor * key) { |
254 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
255 | | |
256 | | // linear probing |
257 | 0 | size_t i = h; |
258 | 0 | while (ggml_bitset_get(hash_set->used, i) && hash_set->keys[i] != key) { |
259 | 0 | i = (i + 1) % hash_set->size; |
260 | 0 | if (i == h) { |
261 | | // visited all hash table entries -> not found |
262 | 0 | return GGML_HASHSET_FULL; |
263 | 0 | } |
264 | 0 | } |
265 | 0 | return i; |
266 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: repack.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: traits.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_find Unexecuted instantiation: quants.c:ggml_hash_find Unexecuted instantiation: binary-ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: vec.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ops.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: sgemm.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml.c:ggml_hash_find Unexecuted instantiation: ggml-alloc.c:ggml_hash_find Unexecuted instantiation: ggml-backend.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) Unexecuted instantiation: ggml-quants.c:ggml_hash_find Unexecuted instantiation: gguf.cpp:ggml_hash_find(ggml_hash_set const*, ggml_tensor const*) |
267 | | |
268 | 0 | static bool ggml_hash_contains(const struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
269 | 0 | size_t i = ggml_hash_find(hash_set, key); |
270 | 0 | return i != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, i); |
271 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_contains Unexecuted instantiation: quants.c:ggml_hash_contains Unexecuted instantiation: binary-ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_contains Unexecuted instantiation: ggml-alloc.c:ggml_hash_contains Unexecuted instantiation: ggml-backend.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_contains Unexecuted instantiation: gguf.cpp:ggml_hash_contains(ggml_hash_set const*, ggml_tensor*) |
272 | | |
273 | 0 | static size_t ggml_hash_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
274 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
275 | | |
276 | | // linear probing |
277 | 0 | size_t i = h; |
278 | 0 | do { |
279 | 0 | if (!ggml_bitset_get(hash_set->used, i)) { |
280 | 0 | ggml_bitset_set(hash_set->used, i); |
281 | 0 | hash_set->keys[i] = key; |
282 | 0 | return i; |
283 | 0 | } |
284 | 0 | if (hash_set->keys[i] == key) { |
285 | 0 | return GGML_HASHSET_ALREADY_EXISTS; |
286 | 0 | } |
287 | 0 | i = (i + 1) % hash_set->size; |
288 | 0 | } while (i != h); |
289 | | |
290 | | // visited all hash table entries -> not found |
291 | 0 | GGML_ABORT("fatal error"); |
292 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_insert Unexecuted instantiation: quants.c:ggml_hash_insert Unexecuted instantiation: binary-ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_insert Unexecuted instantiation: ggml-alloc.c:ggml_hash_insert Unexecuted instantiation: ggml-backend.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_insert Unexecuted instantiation: gguf.cpp:ggml_hash_insert(ggml_hash_set*, ggml_tensor*) |
293 | | |
294 | 0 | static size_t ggml_hash_find_or_insert(struct ggml_hash_set * hash_set, struct ggml_tensor * key) { |
295 | 0 | size_t h = ggml_hash(key) % hash_set->size; |
296 | | |
297 | | // linear probing |
298 | 0 | size_t i = h; |
299 | 0 | do { |
300 | 0 | if (!ggml_bitset_get(hash_set->used, i)) { |
301 | 0 | ggml_bitset_set(hash_set->used, i); |
302 | 0 | hash_set->keys[i] = key; |
303 | 0 | return i; |
304 | 0 | } |
305 | 0 | if (hash_set->keys[i] == key) { |
306 | 0 | return i; |
307 | 0 | } |
308 | 0 | i = (i + 1) % hash_set->size; |
309 | 0 | } while (i != h); |
310 | | |
311 | | // visited all hash table entries -> not found |
312 | 0 | GGML_ABORT("fatal error"); |
313 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: repack.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: traits.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-cpu.c:ggml_hash_find_or_insert Unexecuted instantiation: quants.c:ggml_hash_find_or_insert Unexecuted instantiation: binary-ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: unary-ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: vec.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ops.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: sgemm.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml.c:ggml_hash_find_or_insert Unexecuted instantiation: ggml-alloc.c:ggml_hash_find_or_insert Unexecuted instantiation: ggml-backend.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-opt.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) Unexecuted instantiation: ggml-quants.c:ggml_hash_find_or_insert Unexecuted instantiation: gguf.cpp:ggml_hash_find_or_insert(ggml_hash_set*, ggml_tensor*) |
314 | | |
315 | | // computation graph |
316 | | |
317 | | enum ggml_cgraph_eval_order { |
318 | | GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0, |
319 | | GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT, |
320 | | GGML_CGRAPH_EVAL_ORDER_COUNT |
321 | | }; |
322 | | |
323 | | struct ggml_cgraph { |
324 | | int size; // maximum number of nodes/leafs/grads/grad_accs |
325 | | int n_nodes; // number of nodes currently in use |
326 | | int n_leafs; // number of leafs currently in use |
327 | | |
328 | | struct ggml_tensor ** nodes; // tensors with data that can change if the graph is evaluated |
329 | | struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes |
330 | | struct ggml_tensor ** grad_accs; // accumulators for node gradients |
331 | | struct ggml_tensor ** leafs; // tensors with constant data |
332 | | int32_t * use_counts;// number of uses of each tensor, indexed by hash table slot |
333 | | |
334 | | struct ggml_hash_set visited_hash_set; |
335 | | |
336 | | enum ggml_cgraph_eval_order order; |
337 | | }; |
338 | | |
339 | | // returns a slice of cgraph with nodes [i0, i1) |
340 | | // the slice does not have leafs or gradients |
341 | | // if you need the gradients, get them from the original graph |
342 | | struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); |
343 | | |
344 | | // ggml-alloc.c: true if the operation can reuse memory from its sources |
345 | | GGML_API bool ggml_op_can_inplace(enum ggml_op op); |
346 | | |
347 | | |
348 | | // Memory allocation |
349 | | |
350 | | GGML_API void * ggml_aligned_malloc(size_t size); |
351 | | GGML_API void ggml_aligned_free(void * ptr, size_t size); |
352 | | |
353 | | // FP16 <-> FP32 |
354 | | // ref: https://github.com/Maratyszcza/FP16 |
355 | | |
356 | 327k | static inline float fp32_from_bits(uint32_t w) { |
357 | 327k | union { |
358 | 327k | uint32_t as_bits; |
359 | 327k | float as_value; |
360 | 327k | } fp32; |
361 | 327k | fp32.as_bits = w; |
362 | 327k | return fp32.as_value; |
363 | 327k | } Unexecuted instantiation: ggml-backend-reg.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-cpu.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: repack.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: traits.cpp:fp32_from_bits(unsigned int) ggml-cpu.c:fp32_from_bits Line | Count | Source | 356 | 327k | static inline float fp32_from_bits(uint32_t w) { | 357 | 327k | union { | 358 | 327k | uint32_t as_bits; | 359 | 327k | float as_value; | 360 | 327k | } fp32; | 361 | 327k | fp32.as_bits = w; | 362 | 327k | return fp32.as_value; | 363 | 327k | } |
Unexecuted instantiation: quants.c:fp32_from_bits Unexecuted instantiation: binary-ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: unary-ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: vec.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ops.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: sgemm.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml.c:fp32_from_bits Unexecuted instantiation: ggml-alloc.c:fp32_from_bits Unexecuted instantiation: ggml-backend.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-opt.cpp:fp32_from_bits(unsigned int) Unexecuted instantiation: ggml-quants.c:fp32_from_bits Unexecuted instantiation: gguf.cpp:fp32_from_bits(unsigned int) |
364 | | |
365 | 327k | static inline uint32_t fp32_to_bits(float f) { |
366 | 327k | union { |
367 | 327k | float as_value; |
368 | 327k | uint32_t as_bits; |
369 | 327k | } fp32; |
370 | 327k | fp32.as_value = f; |
371 | 327k | return fp32.as_bits; |
372 | 327k | } Unexecuted instantiation: ggml-backend-reg.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-cpu.cpp:fp32_to_bits(float) Unexecuted instantiation: repack.cpp:fp32_to_bits(float) Unexecuted instantiation: traits.cpp:fp32_to_bits(float) Line | Count | Source | 365 | 327k | static inline uint32_t fp32_to_bits(float f) { | 366 | 327k | union { | 367 | 327k | float as_value; | 368 | 327k | uint32_t as_bits; | 369 | 327k | } fp32; | 370 | 327k | fp32.as_value = f; | 371 | 327k | return fp32.as_bits; | 372 | 327k | } |
Unexecuted instantiation: quants.c:fp32_to_bits Unexecuted instantiation: binary-ops.cpp:fp32_to_bits(float) Unexecuted instantiation: unary-ops.cpp:fp32_to_bits(float) Unexecuted instantiation: vec.cpp:fp32_to_bits(float) Unexecuted instantiation: ops.cpp:fp32_to_bits(float) Unexecuted instantiation: sgemm.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml.c:fp32_to_bits Unexecuted instantiation: ggml-alloc.c:fp32_to_bits Unexecuted instantiation: ggml-backend.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-opt.cpp:fp32_to_bits(float) Unexecuted instantiation: ggml-quants.c:fp32_to_bits Unexecuted instantiation: gguf.cpp:fp32_to_bits(float) |
373 | | |
374 | 65.5k | static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { |
375 | 65.5k | const uint32_t w = (uint32_t) h << 16; |
376 | 65.5k | const uint32_t sign = w & UINT32_C(0x80000000); |
377 | 65.5k | const uint32_t two_w = w + w; |
378 | | |
379 | 65.5k | const uint32_t exp_offset = UINT32_C(0xE0) << 23; |
380 | 65.5k | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) |
381 | 65.5k | const float exp_scale = 0x1.0p-112f; |
382 | | #else |
383 | | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); |
384 | | #endif |
385 | 65.5k | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; |
386 | | |
387 | 65.5k | const uint32_t magic_mask = UINT32_C(126) << 23; |
388 | 65.5k | const float magic_bias = 0.5f; |
389 | 65.5k | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; |
390 | | |
391 | 65.5k | const uint32_t denormalized_cutoff = UINT32_C(1) << 27; |
392 | 65.5k | const uint32_t result = sign | |
393 | 65.5k | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); |
394 | 65.5k | return fp32_from_bits(result); |
395 | 65.5k | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: repack.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: traits.cpp:ggml_compute_fp16_to_fp32(unsigned short) ggml-cpu.c:ggml_compute_fp16_to_fp32 Line | Count | Source | 374 | 65.5k | static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { | 375 | 65.5k | const uint32_t w = (uint32_t) h << 16; | 376 | 65.5k | const uint32_t sign = w & UINT32_C(0x80000000); | 377 | 65.5k | const uint32_t two_w = w + w; | 378 | | | 379 | 65.5k | const uint32_t exp_offset = UINT32_C(0xE0) << 23; | 380 | 65.5k | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) | 381 | 65.5k | const float exp_scale = 0x1.0p-112f; | 382 | | #else | 383 | | const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); | 384 | | #endif | 385 | 65.5k | const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; | 386 | | | 387 | 65.5k | const uint32_t magic_mask = UINT32_C(126) << 23; | 388 | 65.5k | const float magic_bias = 0.5f; | 389 | 65.5k | const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; | 390 | | | 391 | 65.5k | const uint32_t denormalized_cutoff = UINT32_C(1) << 27; | 392 | 65.5k | const uint32_t result = sign | | 393 | 65.5k | (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); | 394 | 65.5k | return fp32_from_bits(result); | 395 | 65.5k | } |
Unexecuted instantiation: quants.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: vec.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ops.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp16_to_fp32(unsigned short) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp16_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_compute_fp16_to_fp32(unsigned short) |
396 | | |
397 | 131k | static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { |
398 | 131k | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) |
399 | 131k | const float scale_to_inf = 0x1.0p+112f; |
400 | 131k | const float scale_to_zero = 0x1.0p-110f; |
401 | | #else |
402 | | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); |
403 | | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); |
404 | | #endif |
405 | 131k | float base = (fabsf(f) * scale_to_inf) * scale_to_zero; |
406 | | |
407 | 131k | const uint32_t w = fp32_to_bits(f); |
408 | 131k | const uint32_t shl1_w = w + w; |
409 | 131k | const uint32_t sign = w & UINT32_C(0x80000000); |
410 | 131k | uint32_t bias = shl1_w & UINT32_C(0xFF000000); |
411 | 131k | if (bias < UINT32_C(0x71000000)) { |
412 | 36.1k | bias = UINT32_C(0x71000000); |
413 | 36.1k | } |
414 | | |
415 | 131k | base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; |
416 | 131k | const uint32_t bits = fp32_to_bits(base); |
417 | 131k | const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); |
418 | 131k | const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); |
419 | 131k | const uint32_t nonsign = exp_bits + mantissa_bits; |
420 | 131k | return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); |
421 | 131k | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: repack.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: traits.cpp:ggml_compute_fp32_to_fp16(float) ggml-cpu.c:ggml_compute_fp32_to_fp16 Line | Count | Source | 397 | 131k | static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { | 398 | 131k | #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) | 399 | 131k | const float scale_to_inf = 0x1.0p+112f; | 400 | 131k | const float scale_to_zero = 0x1.0p-110f; | 401 | | #else | 402 | | const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); | 403 | | const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); | 404 | | #endif | 405 | 131k | float base = (fabsf(f) * scale_to_inf) * scale_to_zero; | 406 | | | 407 | 131k | const uint32_t w = fp32_to_bits(f); | 408 | 131k | const uint32_t shl1_w = w + w; | 409 | 131k | const uint32_t sign = w & UINT32_C(0x80000000); | 410 | 131k | uint32_t bias = shl1_w & UINT32_C(0xFF000000); | 411 | 131k | if (bias < UINT32_C(0x71000000)) { | 412 | 36.1k | bias = UINT32_C(0x71000000); | 413 | 36.1k | } | 414 | | | 415 | 131k | base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; | 416 | 131k | const uint32_t bits = fp32_to_bits(base); | 417 | 131k | const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); | 418 | 131k | const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); | 419 | 131k | const uint32_t nonsign = exp_bits + mantissa_bits; | 420 | 131k | return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); | 421 | 131k | } |
Unexecuted instantiation: quants.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: vec.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ops.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp32_to_fp16(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp32_to_fp16 Unexecuted instantiation: gguf.cpp:ggml_compute_fp32_to_fp16(float) |
422 | | |
423 | 65.5k | #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) |
424 | 131k | #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) |
425 | | |
426 | 0 | #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) |
427 | 0 | #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) |
428 | | |
429 | 0 | static inline float ggml_e8m0_to_fp32(uint8_t x) { |
430 | 0 | uint32_t bits; // Stores the raw bit representation of the float |
431 | 0 |
|
432 | 0 | // Handle special case for minimum exponent (denormalized float) |
433 | 0 | if (x == 0) { |
434 | 0 | // Bit pattern for 2^(-127): |
435 | 0 | // - Sign bit: 0 (positive) |
436 | 0 | // - Exponent: 0 (denormalized number) |
437 | 0 | // - Mantissa: 0x400000 (0.5 in fractional form) |
438 | 0 | // Value = 0.5 * 2^(-126) = 2^(-127) |
439 | 0 | bits = 0x00400000; |
440 | 0 | } |
441 | 0 | // note: disabled as we don't need to handle NaNs |
442 | 0 | //// Handle special case for NaN (all bits set) |
443 | 0 | //else if (x == 0xFF) { |
444 | 0 | // // Standard quiet NaN pattern: |
445 | 0 | // // - Sign bit: 0 |
446 | 0 | // // - Exponent: all 1s (0xFF) |
447 | 0 | // // - Mantissa: 0x400000 (quiet NaN flag) |
448 | 0 | // bits = 0x7FC00000; |
449 | 0 | //} |
450 | 0 | // Normalized values (most common case) |
451 | 0 | else { |
452 | 0 | // Construct normalized float by shifting exponent into position: |
453 | 0 | // - Exponent field: 8 bits (positions 30-23) |
454 | 0 | // - Mantissa: 0 (implicit leading 1) |
455 | 0 | // Value = 2^(x - 127) |
456 | 0 | bits = (uint32_t) x << 23; |
457 | 0 | } |
458 | 0 |
|
459 | 0 | float result; // Final float value |
460 | 0 | // Safely reinterpret bit pattern as float without type-punning issues |
461 | 0 | memcpy(&result, &bits, sizeof(float)); |
462 | 0 | return result; |
463 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-cpu.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: repack.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: traits.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-cpu.c:ggml_e8m0_to_fp32 Unexecuted instantiation: quants.c:ggml_e8m0_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: unary-ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: vec.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ops.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: sgemm.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml.c:ggml_e8m0_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_e8m0_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-opt.cpp:ggml_e8m0_to_fp32(unsigned char) Unexecuted instantiation: ggml-quants.c:ggml_e8m0_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_e8m0_to_fp32(unsigned char) |
464 | | |
465 | | // Equal to ggml_e8m0_to_fp32/2 |
466 | | // Useful with MXFP4 quantization since the E0M2 values are doubled |
467 | 0 | static inline float ggml_e8m0_to_fp32_half(uint8_t x) { |
468 | 0 | uint32_t bits; |
469 | | |
470 | | // For x < 2: use precomputed denormal patterns |
471 | 0 | if (x < 2) { |
472 | | // 0x00200000 = 2^(-128), 0x00400000 = 2^(-127) |
473 | 0 | bits = 0x00200000 << x; |
474 | 0 | } |
475 | | // For x >= 2: normalized exponent adjustment |
476 | 0 | else { |
477 | | // 0.5 * 2^(x-127) = 2^(x-128) = normalized with exponent (x-1) |
478 | 0 | bits = (uint32_t)(x - 1) << 23; |
479 | 0 | } |
480 | | // Note: NaNs are not handled here |
481 | |
|
482 | 0 | float result; |
483 | 0 | memcpy(&result, &bits, sizeof(float)); |
484 | 0 | return result; |
485 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-cpu.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: repack.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: traits.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-cpu.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: quants.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: binary-ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: unary-ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: vec.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ops.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: sgemm.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: ggml-alloc.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: ggml-backend.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-opt.cpp:ggml_e8m0_to_fp32_half(unsigned char) Unexecuted instantiation: ggml-quants.c:ggml_e8m0_to_fp32_half Unexecuted instantiation: gguf.cpp:ggml_e8m0_to_fp32_half(unsigned char) |
486 | | |
487 | | #define GGML_E8M0_TO_FP32(x) ggml_e8m0_to_fp32(x) |
488 | 0 | #define GGML_E8M0_TO_FP32_HALF(x) ggml_e8m0_to_fp32_half(x) |
489 | | |
490 | | /** |
491 | | * Converts brain16 to float32. |
492 | | * |
493 | | * The bfloat16 floating point format has the following structure: |
494 | | * |
495 | | * ┌sign |
496 | | * │ |
497 | | * │ ┌exponent |
498 | | * │ │ |
499 | | * │ │ ┌mantissa |
500 | | * │ │ │ |
501 | | * │┌──┴───┐┌─┴───┐ |
502 | | * 0b0000000000000000 brain16 |
503 | | * |
504 | | * Since bf16 has the same number of exponent bits as a 32bit float, |
505 | | * encoding and decoding numbers becomes relatively straightforward. |
506 | | * |
507 | | * ┌sign |
508 | | * │ |
509 | | * │ ┌exponent |
510 | | * │ │ |
511 | | * │ │ ┌mantissa |
512 | | * │ │ │ |
513 | | * │┌──┴───┐┌─┴───────────────────┐ |
514 | | * 0b00000000000000000000000000000000 IEEE binary32 |
515 | | * |
516 | | * For comparison, the standard fp16 format has fewer exponent bits. |
517 | | * |
518 | | * ┌sign |
519 | | * │ |
520 | | * │ ┌exponent |
521 | | * │ │ |
522 | | * │ │ ┌mantissa |
523 | | * │ │ │ |
524 | | * │┌─┴─┐┌─┴──────┐ |
525 | | * 0b0000000000000000 IEEE binary16 |
526 | | * |
527 | | * @see IEEE 754-2008 |
528 | | */ |
529 | 0 | static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) { |
530 | 0 | union { |
531 | 0 | float f; |
532 | 0 | uint32_t i; |
533 | 0 | } u; |
534 | 0 | u.i = (uint32_t)h.bits << 16; |
535 | 0 | return u.f; |
536 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: repack.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: traits.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-cpu.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: quants.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: binary-ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: unary-ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: vec.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ops.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: sgemm.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: ggml-alloc.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) Unexecuted instantiation: ggml-quants.c:ggml_compute_bf16_to_fp32 Unexecuted instantiation: gguf.cpp:ggml_compute_bf16_to_fp32(ggml_bf16_t) |
537 | | |
538 | | /** |
539 | | * Converts float32 to brain16. |
540 | | * |
541 | | * This is binary identical with Google Brain float conversion. |
542 | | * Floats shall round to nearest even, and NANs shall be quiet. |
543 | | * Subnormals aren't flushed to zero, except perhaps when used. |
544 | | * This code should vectorize nicely if using modern compilers. |
545 | | */ |
546 | 0 | static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { |
547 | 0 | ggml_bf16_t h; |
548 | 0 | union { |
549 | 0 | float f; |
550 | 0 | uint32_t i; |
551 | 0 | } u; |
552 | 0 | u.f = s; |
553 | 0 | if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */ |
554 | 0 | h.bits = (u.i >> 16) | 64; /* force to quiet */ |
555 | 0 | return h; |
556 | 0 | } |
557 | 0 | h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16; |
558 | 0 | return h; |
559 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-cpu.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: repack.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: traits.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-cpu.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: quants.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: binary-ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: unary-ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: vec.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ops.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: sgemm.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: ggml-alloc.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: ggml-backend.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-opt.cpp:ggml_compute_fp32_to_bf16(float) Unexecuted instantiation: ggml-quants.c:ggml_compute_fp32_to_bf16 Unexecuted instantiation: gguf.cpp:ggml_compute_fp32_to_bf16(float) |
560 | | |
561 | 0 | #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) |
562 | 0 | #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) |
563 | | |
564 | 0 | static inline int32_t ggml_node_get_use_count(const struct ggml_cgraph * cgraph, int node_idx) { |
565 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idx]; |
566 | |
|
567 | 0 | size_t hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); |
568 | 0 | if (!ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos)) { |
569 | 0 | return 0; |
570 | 0 | } |
571 | 0 | return cgraph->use_counts[hash_pos]; |
572 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: repack.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: traits.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_node_get_use_count Unexecuted instantiation: quants.c:ggml_node_get_use_count Unexecuted instantiation: binary-ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: vec.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ops.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: sgemm.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml.c:ggml_node_get_use_count Unexecuted instantiation: ggml-alloc.c:ggml_node_get_use_count Unexecuted instantiation: ggml-backend.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) Unexecuted instantiation: ggml-quants.c:ggml_node_get_use_count Unexecuted instantiation: gguf.cpp:ggml_node_get_use_count(ggml_cgraph const*, int) |
573 | | |
574 | | // return true if the node's results are only used by N other nodes |
575 | | // and can be fused into their calculations. |
576 | 0 | static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int node_idx, int32_t n_uses) { |
577 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idx]; |
578 | 0 |
|
579 | 0 | // check the use count against how many we're replacing |
580 | 0 | if (ggml_node_get_use_count(cgraph, node_idx) != n_uses) { |
581 | 0 | return false; |
582 | 0 | } |
583 | 0 |
|
584 | 0 | // if node is a view, some other node might be using the intermediate result |
585 | 0 | // via the view source. |
586 | 0 | if (node->view_src) { |
587 | 0 | return false; |
588 | 0 | } |
589 | 0 |
|
590 | 0 | // If the user requested output for the node, can't fuse |
591 | 0 | if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { |
592 | 0 | return false; |
593 | 0 | } |
594 | 0 |
|
595 | 0 | return true; |
596 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: repack.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: traits.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-cpu.c:ggml_node_has_n_uses Unexecuted instantiation: quants.c:ggml_node_has_n_uses Unexecuted instantiation: binary-ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: unary-ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: vec.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ops.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: sgemm.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml.c:ggml_node_has_n_uses Unexecuted instantiation: ggml-alloc.c:ggml_node_has_n_uses Unexecuted instantiation: ggml-backend.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-opt.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) Unexecuted instantiation: ggml-quants.c:ggml_node_has_n_uses Unexecuted instantiation: gguf.cpp:ggml_node_has_n_uses(ggml_cgraph const*, int, int) |
597 | | |
598 | | // Returns true if nodes with indices { node_idxs } are the sequence of ggml_ops in ops[] |
599 | | // and are fusable. Nodes are considered fusable according to this function if: |
600 | | // - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses). |
601 | | // - all nodes except the last are a src of the following node. |
602 | | // - all nodes are the same shape. |
603 | | // TODO: Consider allowing GGML_OP_NONE nodes in between |
604 | 0 | static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) { |
605 | 0 | for (int i = 0; i < num_ops; ++i) { |
606 | 0 | if (node_idxs[i] >= cgraph->n_nodes) { |
607 | 0 | return false; |
608 | 0 | } |
609 | 0 |
|
610 | 0 | struct ggml_tensor * node = cgraph->nodes[node_idxs[i]]; |
611 | 0 | if (node->op != ops[i]) { |
612 | 0 | return false; |
613 | 0 | } |
614 | 0 | if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) { |
615 | 0 | return false; |
616 | 0 | } |
617 | 0 | if (i > 0) { |
618 | 0 | struct ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]]; |
619 | 0 | if (node->src[0] != prev && node->src[1] != prev) { |
620 | 0 | return false; |
621 | 0 | } |
622 | 0 | if (!ggml_are_same_shape(node, prev)) { |
623 | 0 | return false; |
624 | 0 | } |
625 | 0 | } |
626 | 0 | } |
627 | 0 | return true; |
628 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse_ext Unexecuted instantiation: quants.c:ggml_can_fuse_ext Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse_ext Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse_ext Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse_ext Unexecuted instantiation: gguf.cpp:ggml_can_fuse_ext(ggml_cgraph const*, int const*, ggml_op const*, int) |
629 | | |
630 | | // same as above, for sequential indices starting at node_idx |
631 | 0 | static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { |
632 | 0 | assert(num_ops < 32); |
633 | 0 |
|
634 | 0 | if (node_idx + num_ops > cgraph->n_nodes) { |
635 | 0 | return false; |
636 | 0 | } |
637 | 0 |
|
638 | 0 | int idxs[32]; |
639 | 0 | for (int i = 0; i < num_ops; ++i) { |
640 | 0 | idxs[i] = node_idx + i; |
641 | 0 | } |
642 | 0 |
|
643 | 0 | return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops); |
644 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse Unexecuted instantiation: quants.c:ggml_can_fuse Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse Unexecuted instantiation: gguf.cpp:ggml_can_fuse(ggml_cgraph const*, int, ggml_op const*, int) |
645 | | |
646 | | GGML_API bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph, |
647 | | const int * node_idxs, |
648 | | int count, |
649 | | const enum ggml_op * ops, |
650 | | const int * outputs, |
651 | | int num_outputs); |
652 | | |
653 | | // Returns true if the subgraph formed by {node_idxs} can be fused |
654 | | // checks whethers all nodes which are not part of outputs can be elided |
655 | | // by checking if their num_uses are confined to the subgraph |
656 | | static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph, |
657 | | int node_idx, |
658 | | int count, |
659 | | const enum ggml_op * ops, |
660 | | const int * outputs, |
661 | 0 | int num_outputs) { |
662 | 0 | GGML_ASSERT(count < 32); |
663 | 0 | if (node_idx + count > cgraph->n_nodes) { |
664 | 0 | return false; |
665 | 0 | } |
666 | 0 |
|
667 | 0 | int idxs[32]; |
668 | 0 |
|
669 | 0 | for (int i = 0; i < count; ++i) { |
670 | 0 | idxs[i] = node_idx + i; |
671 | 0 | } |
672 | 0 |
|
673 | 0 | return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs); |
674 | 0 | } Unexecuted instantiation: ggml-backend-reg.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-cpu.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: repack.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: traits.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-cpu.c:ggml_can_fuse_subgraph Unexecuted instantiation: quants.c:ggml_can_fuse_subgraph Unexecuted instantiation: binary-ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: unary-ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: vec.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ops.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: sgemm.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml.c:ggml_can_fuse_subgraph Unexecuted instantiation: ggml-alloc.c:ggml_can_fuse_subgraph Unexecuted instantiation: ggml-backend.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-opt.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) Unexecuted instantiation: ggml-quants.c:ggml_can_fuse_subgraph Unexecuted instantiation: gguf.cpp:ggml_can_fuse_subgraph(ggml_cgraph const*, int, int, ggml_op const*, int const*, int) |
675 | | |
676 | | #ifdef __cplusplus |
677 | | } |
678 | | #endif |
679 | | |
680 | | #ifdef __cplusplus |
681 | | #include <array> |
682 | | #include <initializer_list> |
683 | | #include <vector> |
684 | | |
685 | | // nicer C++ syntax for ggml_can_fuse |
686 | 0 | inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops) { |
687 | 0 | return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size()); |
688 | 0 | } |
689 | | |
690 | | inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph, |
691 | | int start_idx, |
692 | | std::initializer_list<enum ggml_op> ops, |
693 | 0 | std::initializer_list<int> outputs = {}) { |
694 | 0 | return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size()); |
695 | 0 | } |
696 | | |
697 | | // Return true if the edges in the graph match expectations. |
698 | | inline bool ggml_check_edges(const struct ggml_cgraph * cgraph, |
699 | | int start_idx, |
700 | 0 | std::initializer_list<std::array<int, 3>> edges) { |
701 | 0 | for (const auto & edge : edges) { |
702 | 0 | int dst_node = edge[0]; |
703 | 0 | int src_idx = edge[1]; |
704 | 0 | int src_node = edge[2]; |
705 | 0 | if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) { |
706 | 0 | return false; |
707 | 0 | } |
708 | 0 | } |
709 | 0 | return true; |
710 | 0 | } |
711 | | |
712 | | // expose GGUF internals for test code |
713 | | GGML_API size_t gguf_type_size(enum gguf_type type); |
714 | | GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); |
715 | | GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta); |
716 | | #endif // __cplusplus |