/src/llama.cpp/ggml/src/ggml.c
Line | Count | Source |
1 | | #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows |
2 | | #define _USE_MATH_DEFINES // For M_PI on MSVC |
3 | | |
4 | | #include "ggml-backend.h" |
5 | | #include "ggml-impl.h" |
6 | | #include "ggml-threading.h" |
7 | | #include "ggml-cpu.h" |
8 | | #include "ggml.h" |
9 | | |
10 | | // FIXME: required here for quantization functions |
11 | | #include "ggml-quants.h" |
12 | | |
13 | | #ifdef GGML_USE_CPU_HBM |
14 | | #include <hbwmalloc.h> |
15 | | #endif |
16 | | |
17 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
18 | | #include <malloc.h> // using malloc.h with MSC/MINGW |
19 | | #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) |
20 | | #include <alloca.h> |
21 | | #endif |
22 | | |
23 | | #include <assert.h> |
24 | | #include <errno.h> |
25 | | #include <time.h> |
26 | | #include <math.h> |
27 | | #include <stdlib.h> |
28 | | #include <string.h> |
29 | | #include <stdint.h> |
30 | | #include <inttypes.h> |
31 | | #include <stdio.h> |
32 | | #include <float.h> |
33 | | #include <limits.h> |
34 | | #include <stdarg.h> |
35 | | #include <signal.h> |
36 | | #if defined(__gnu_linux__) |
37 | | #include <syscall.h> |
38 | | #endif |
39 | | |
40 | | #if defined(__APPLE__) |
41 | | #include <unistd.h> |
42 | | #include <mach/mach.h> |
43 | | #include <TargetConditionals.h> |
44 | | #endif |
45 | | |
46 | | #if defined(_WIN32) |
47 | | #define WIN32_LEAN_AND_MEAN |
48 | | #ifndef NOMINMAX |
49 | | #define NOMINMAX |
50 | | #endif |
51 | | #include <windows.h> |
52 | | #endif |
53 | | |
54 | 0 | #define UNUSED GGML_UNUSED |
55 | | |
56 | | // Needed for ggml_fp32_to_bf16_row() |
57 | | #if defined(__AVX512BF16__) |
58 | | #if defined(_MSC_VER) |
59 | | #define m512i(p) p |
60 | | #else |
61 | | #include <immintrin.h> |
62 | | #define m512i(p) (__m512i)(p) |
63 | | #endif // defined(_MSC_VER) |
64 | | #endif // defined(__AVX512BF16__) |
65 | | |
66 | | #if defined(__linux__) || \ |
67 | | defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ |
68 | | (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH) |
69 | | |
70 | | #include <unistd.h> |
71 | | #include <sys/types.h> |
72 | | #include <sys/stat.h> |
73 | | #include <sys/wait.h> |
74 | | #if defined(__linux__) |
75 | | #include <sys/prctl.h> |
76 | | #endif |
77 | | |
78 | | #if defined(__ANDROID__) |
79 | | #include <unwind.h> |
80 | | #include <dlfcn.h> |
81 | | #include <stdio.h> |
82 | | |
83 | | struct backtrace_state { |
84 | | void ** current; |
85 | | void ** end; |
86 | | }; |
87 | | |
88 | | static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) { |
89 | | struct backtrace_state * state = (struct backtrace_state *)arg; |
90 | | uintptr_t pc = _Unwind_GetIP(context); |
91 | | if (pc) { |
92 | | if (state->current == state->end) { |
93 | | return _URC_END_OF_STACK; |
94 | | } else { |
95 | | *state->current++ = (void*)pc; |
96 | | } |
97 | | } |
98 | | return _URC_NO_REASON; |
99 | | } |
100 | | |
101 | | static void ggml_print_backtrace_symbols(void) { |
102 | | const int max = 100; |
103 | | void* buffer[max]; |
104 | | |
105 | | struct backtrace_state state = {buffer, buffer + max}; |
106 | | _Unwind_Backtrace(unwind_callback, &state); |
107 | | |
108 | | int count = state.current - buffer; |
109 | | |
110 | | for (int idx = 0; idx < count; ++idx) { |
111 | | const void * addr = buffer[idx]; |
112 | | const char * symbol = ""; |
113 | | |
114 | | Dl_info info; |
115 | | if (dladdr(addr, &info) && info.dli_sname) { |
116 | | symbol = info.dli_sname; |
117 | | } |
118 | | |
119 | | fprintf(stderr, "%d: %p %s\n", idx, addr, symbol); |
120 | | } |
121 | | } |
122 | | #elif defined(__linux__) && defined(__GLIBC__) |
123 | | #include <execinfo.h> |
124 | 0 | static void ggml_print_backtrace_symbols(void) { |
125 | 0 | void * trace[100]; |
126 | 0 | int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0])); |
127 | 0 | backtrace_symbols_fd(trace, nptrs, STDERR_FILENO); |
128 | 0 | } |
129 | | #elif defined(__APPLE__) |
130 | | #include <execinfo.h> |
131 | | static void ggml_print_backtrace_symbols(void) { |
132 | | void * trace[100]; |
133 | | int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0])); |
134 | | backtrace_symbols_fd(trace, nptrs, STDERR_FILENO); |
135 | | } |
136 | | #else |
137 | | static void ggml_print_backtrace_symbols(void) { |
138 | | // platform not supported |
139 | | } |
140 | | #endif |
141 | | |
142 | 0 | void ggml_print_backtrace(void) { |
143 | 0 | const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE"); |
144 | 0 | if (GGML_NO_BACKTRACE) { |
145 | 0 | return; |
146 | 0 | } |
147 | | #if defined(__APPLE__) |
148 | | // On macOS, fork+debugger attachment is problematic due to: |
149 | | // 1. libdispatch "poisons" forked child processes |
150 | | // 2. lldb has issues attaching to parent from forked child |
151 | | // Use simple backtrace() instead to avoid Terminal.app crashes |
152 | | const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB"); |
153 | | if (!GGML_BACKTRACE_LLDB) { |
154 | | fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n"); |
155 | | fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n"); |
156 | | fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n"); |
157 | | ggml_print_backtrace_symbols(); |
158 | | return; |
159 | | } |
160 | | #endif |
161 | 0 | #if defined(__linux__) |
162 | 0 | FILE * f = fopen("/proc/self/status", "r"); |
163 | 0 | size_t size = 0; |
164 | 0 | char * line = NULL; |
165 | 0 | ssize_t length = 0; |
166 | 0 | while ((length = getline(&line, &size, f)) > 0) { |
167 | 0 | if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) && |
168 | 0 | (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) { |
169 | | // Already being debugged, and the breakpoint is the later abort() |
170 | 0 | free(line); |
171 | 0 | fclose(f); |
172 | 0 | return; |
173 | 0 | } |
174 | 0 | } |
175 | 0 | free(line); |
176 | 0 | fclose(f); |
177 | 0 | int lock[2] = { -1, -1 }; |
178 | 0 | (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER |
179 | 0 | #endif |
180 | 0 | const int parent_pid = getpid(); |
181 | 0 | const int child_pid = fork(); |
182 | 0 | if (child_pid < 0) { // error |
183 | 0 | #if defined(__linux__) |
184 | 0 | close(lock[1]); |
185 | 0 | close(lock[0]); |
186 | 0 | #endif |
187 | 0 | return; |
188 | 0 | } else if (child_pid == 0) { // child |
189 | 0 | char attach[32]; |
190 | 0 | snprintf(attach, sizeof(attach), "attach %d", parent_pid); |
191 | 0 | #if defined(__linux__) |
192 | 0 | close(lock[1]); |
193 | 0 | (void) !read(lock[0], lock, 1); |
194 | 0 | close(lock[0]); |
195 | 0 | #endif |
196 | | // try gdb |
197 | 0 | execlp("gdb", "gdb", "--batch", |
198 | 0 | "-ex", "set style enabled on", |
199 | 0 | "-ex", attach, |
200 | 0 | "-ex", "bt -frame-info source-and-location", |
201 | 0 | "-ex", "detach", |
202 | 0 | "-ex", "quit", |
203 | 0 | (char *) NULL); |
204 | | // try lldb |
205 | 0 | execlp("lldb", "lldb", "--batch", |
206 | 0 | "-o", "bt", |
207 | 0 | "-o", "quit", |
208 | 0 | "-p", &attach[sizeof("attach ") - 1], |
209 | 0 | (char *) NULL); |
210 | | // gdb failed, fallback to backtrace_symbols |
211 | 0 | ggml_print_backtrace_symbols(); |
212 | 0 | _Exit(0); |
213 | 0 | } else { // parent |
214 | 0 | #if defined(__linux__) |
215 | 0 | prctl(PR_SET_PTRACER, child_pid); |
216 | 0 | close(lock[1]); |
217 | 0 | close(lock[0]); |
218 | 0 | #endif |
219 | 0 | waitpid(child_pid, NULL, 0); |
220 | 0 | } |
221 | 0 | } |
222 | | #else |
223 | | void ggml_print_backtrace(void) { |
224 | | // platform not supported |
225 | | } |
226 | | #endif |
227 | | |
228 | | static ggml_abort_callback_t g_abort_callback = NULL; |
229 | | |
230 | | // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout) |
231 | 0 | GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) { |
232 | 0 | ggml_abort_callback_t ret_val = g_abort_callback; |
233 | 0 | g_abort_callback = callback; |
234 | 0 | return ret_val; |
235 | 0 | } |
236 | | |
237 | 0 | void ggml_abort(const char * file, int line, const char * fmt, ...) { |
238 | 0 | fflush(stdout); |
239 | |
|
240 | 0 | char message[2048]; |
241 | 0 | int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line); |
242 | |
|
243 | 0 | va_list args; |
244 | 0 | va_start(args, fmt); |
245 | 0 | vsnprintf(message + offset, sizeof(message) - offset, fmt, args); |
246 | 0 | va_end(args); |
247 | |
|
248 | 0 | if (g_abort_callback) { |
249 | 0 | g_abort_callback(message); |
250 | 0 | } else { |
251 | | // default: print error and backtrace to stderr |
252 | 0 | fprintf(stderr, "%s\n", message); |
253 | | |
254 | 0 | } |
255 | |
|
256 | 0 | abort(); |
257 | 0 | } |
258 | | |
259 | | // ggml_print_backtrace is registered with std::set_terminate by ggml.cpp |
260 | | |
261 | | // |
262 | | // logging |
263 | | // |
264 | | |
265 | | struct ggml_logger_state { |
266 | | ggml_log_callback log_callback; |
267 | | void * log_callback_user_data; |
268 | | }; |
269 | | static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL}; |
270 | | |
271 | 0 | static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { |
272 | 0 | if (format == NULL) { |
273 | 0 | return; |
274 | 0 | } |
275 | 0 | va_list args_copy; |
276 | 0 | va_copy(args_copy, args); |
277 | 0 | char buffer[128]; |
278 | 0 | int len = vsnprintf(buffer, 128, format, args); |
279 | 0 | if (len < 128) { |
280 | 0 | g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); |
281 | 0 | } else { |
282 | 0 | char * buffer2 = (char *) calloc(len + 1, sizeof(char)); |
283 | 0 | vsnprintf(buffer2, len + 1, format, args_copy); |
284 | 0 | buffer2[len] = 0; |
285 | 0 | g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); |
286 | 0 | free(buffer2); |
287 | 0 | } |
288 | 0 | va_end(args_copy); |
289 | 0 | } |
290 | | |
291 | 0 | void ggml_log_internal(enum ggml_log_level level, const char * format, ...) { |
292 | 0 | va_list args; |
293 | 0 | va_start(args, format); |
294 | 0 | ggml_log_internal_v(level, format, args); |
295 | 0 | va_end(args); |
296 | 0 | } |
297 | | |
298 | 0 | void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { |
299 | 0 | (void) level; |
300 | 0 | (void) user_data; |
301 | 0 | fputs(text, stderr); |
302 | 0 | fflush(stderr); |
303 | 0 | } |
304 | | |
305 | | // |
306 | | // end of logging block |
307 | | // |
308 | | |
309 | | #ifdef GGML_USE_ACCELERATE |
310 | | // uncomment to use vDSP for soft max computation |
311 | | // note: not sure if it is actually faster |
312 | | //#define GGML_SOFT_MAX_ACCELERATE |
313 | | #endif |
314 | | |
315 | | |
316 | 9 | void * ggml_aligned_malloc(size_t size) { |
317 | | #if defined(__s390x__) |
318 | | const int alignment = 256; |
319 | | #else |
320 | 9 | const int alignment = 64; |
321 | 9 | #endif |
322 | | |
323 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
324 | | return _aligned_malloc(size, alignment); |
325 | | #else |
326 | 9 | if (size == 0) { |
327 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); |
328 | 0 | return NULL; |
329 | 0 | } |
330 | 9 | void * aligned_memory = NULL; |
331 | | #ifdef GGML_USE_CPU_HBM |
332 | | int result = hbw_posix_memalign(&aligned_memory, alignment, size); |
333 | | #elif TARGET_OS_OSX |
334 | | GGML_UNUSED(alignment); |
335 | | kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); |
336 | | int result = EFAULT; |
337 | | switch (alloc_status) { |
338 | | case KERN_SUCCESS: |
339 | | result = 0; |
340 | | break; |
341 | | case KERN_INVALID_ADDRESS: |
342 | | result = EINVAL; |
343 | | break; |
344 | | case KERN_NO_SPACE: |
345 | | result = ENOMEM; |
346 | | break; |
347 | | default: |
348 | | result = EFAULT; |
349 | | break; |
350 | | } |
351 | | #else |
352 | 9 | int result = posix_memalign(&aligned_memory, alignment, size); |
353 | 9 | #endif |
354 | 9 | if (result != 0) { |
355 | | // Handle allocation failure |
356 | 0 | const char *error_desc = "unknown allocation error"; |
357 | 0 | switch (result) { |
358 | 0 | case EINVAL: |
359 | 0 | error_desc = "invalid alignment value"; |
360 | 0 | break; |
361 | 0 | case ENOMEM: |
362 | 0 | error_desc = "insufficient memory"; |
363 | 0 | break; |
364 | 0 | } |
365 | 0 | GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); |
366 | 0 | return NULL; |
367 | 0 | } |
368 | 9 | return aligned_memory; |
369 | 9 | #endif |
370 | 9 | } |
371 | | |
372 | 9 | void ggml_aligned_free(void * ptr, size_t size) { |
373 | 9 | GGML_UNUSED(size); |
374 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
375 | | _aligned_free(ptr); |
376 | | #elif GGML_USE_CPU_HBM |
377 | | if (ptr != NULL) { |
378 | | hbw_free(ptr); |
379 | | } |
380 | | #elif TARGET_OS_OSX |
381 | | if (ptr != NULL) { |
382 | | vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); |
383 | | } |
384 | | #else |
385 | 9 | free(ptr); |
386 | 9 | #endif |
387 | 9 | } |
388 | | |
389 | | |
390 | 9 | inline static void * ggml_malloc(size_t size) { |
391 | 9 | if (size == 0) { |
392 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n"); |
393 | 0 | return NULL; |
394 | 0 | } |
395 | 9 | void * result = malloc(size); |
396 | 9 | if (result == NULL) { |
397 | 0 | GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); |
398 | 0 | GGML_ABORT("fatal error"); |
399 | 0 | } |
400 | 9 | return result; |
401 | 9 | } |
402 | | |
403 | | // calloc |
404 | 0 | inline static void * ggml_calloc(size_t num, size_t size) { |
405 | 0 | if ((num * size) > 9000000) {GGML_ABORT("calloc err");} |
406 | |
|
407 | 0 | if (num == 0 || size == 0) { |
408 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n"); |
409 | 0 | return NULL; |
410 | 0 | } |
411 | 0 | void * result = calloc(num, size); |
412 | 0 | if (result == NULL) { |
413 | 0 | GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); |
414 | 0 | GGML_ABORT("fatal error"); |
415 | 0 | } |
416 | 0 | return result; |
417 | 0 | } |
418 | | |
419 | 9 | #define GGML_MALLOC(size) ggml_malloc(size) |
420 | 0 | #define GGML_CALLOC(num, size) ggml_calloc(num, size) |
421 | | |
422 | 9 | #define GGML_FREE(ptr) free(ptr) |
423 | | |
424 | 0 | const char * ggml_status_to_string(enum ggml_status status) { |
425 | 0 | switch (status) { |
426 | 0 | case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)"; |
427 | 0 | case GGML_STATUS_FAILED: return "GGML status: error (operation failed)"; |
428 | 0 | case GGML_STATUS_SUCCESS: return "GGML status: success"; |
429 | 0 | case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)"; |
430 | 0 | } |
431 | | |
432 | 0 | return "GGML status: unknown"; |
433 | 0 | } |
434 | | |
435 | 0 | float ggml_fp16_to_fp32(ggml_fp16_t x) { |
436 | 0 | #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml |
437 | 0 | return GGML_FP16_TO_FP32(x); |
438 | 0 | } |
439 | | |
440 | 0 | ggml_fp16_t ggml_fp32_to_fp16(float x) { |
441 | 0 | #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml |
442 | 0 | return GGML_FP32_TO_FP16(x); |
443 | 0 | } |
444 | | |
445 | 0 | float ggml_bf16_to_fp32(ggml_bf16_t x) { |
446 | 0 | #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml |
447 | 0 | return GGML_BF16_TO_FP32(x); // it just left shifts |
448 | 0 | } |
449 | | |
450 | 0 | ggml_bf16_t ggml_fp32_to_bf16(float x) { |
451 | 0 | #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml |
452 | 0 | return GGML_FP32_TO_BF16(x); |
453 | 0 | } |
454 | | |
455 | 0 | void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) { |
456 | 0 | for (int64_t i = 0; i < n; i++) { |
457 | 0 | y[i] = GGML_FP16_TO_FP32(x[i]); |
458 | 0 | } |
459 | 0 | } |
460 | | |
461 | 0 | void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { |
462 | 0 | int i = 0; |
463 | 0 | for (; i < n; ++i) { |
464 | 0 | y[i] = GGML_FP32_TO_FP16(x[i]); |
465 | 0 | } |
466 | 0 | } |
467 | | |
468 | 0 | void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { |
469 | 0 | int i = 0; |
470 | 0 | for (; i < n; ++i) { |
471 | 0 | y[i] = GGML_BF16_TO_FP32(x[i]); |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | 0 | void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) { |
476 | 0 | for (int i = 0; i < n; i++) { |
477 | 0 | y[i] = ggml_compute_fp32_to_bf16(x[i]); |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | 0 | void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) { |
482 | 0 | int i = 0; |
483 | | #if defined(__AVX512BF16__) |
484 | | // subnormals are flushed to zero on this platform |
485 | | for (; i + 32 <= n; i += 32) { |
486 | | _mm512_storeu_si512( |
487 | | (__m512i *)(y + i), |
488 | | m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16), |
489 | | _mm512_loadu_ps(x + i)))); |
490 | | } |
491 | | #endif |
492 | 0 | for (; i < n; i++) { |
493 | 0 | y[i] = GGML_FP32_TO_BF16(x[i]); |
494 | 0 | } |
495 | 0 | } |
496 | | |
497 | 0 | bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) { |
498 | 0 | return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0; |
499 | 0 | } |
500 | | |
501 | 0 | const char * ggml_version(void) { |
502 | 0 | return GGML_VERSION; |
503 | 0 | } |
504 | | |
505 | 0 | const char * ggml_commit(void) { |
506 | 0 | return GGML_COMMIT; |
507 | 0 | } |
508 | | |
509 | | // |
510 | | // timing |
511 | | // |
512 | | |
513 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
514 | | static int64_t timer_freq, timer_start; |
515 | | void ggml_time_init(void) { |
516 | | LARGE_INTEGER t; |
517 | | QueryPerformanceFrequency(&t); |
518 | | timer_freq = t.QuadPart; |
519 | | |
520 | | // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq |
521 | | // and the uptime is high enough. |
522 | | // We subtract the program start time to reduce the likelihood of that happening. |
523 | | QueryPerformanceCounter(&t); |
524 | | timer_start = t.QuadPart; |
525 | | } |
526 | | int64_t ggml_time_ms(void) { |
527 | | LARGE_INTEGER t; |
528 | | QueryPerformanceCounter(&t); |
529 | | return ((t.QuadPart-timer_start) * 1000) / timer_freq; |
530 | | } |
531 | | int64_t ggml_time_us(void) { |
532 | | LARGE_INTEGER t; |
533 | | QueryPerformanceCounter(&t); |
534 | | return ((t.QuadPart-timer_start) * 1000000) / timer_freq; |
535 | | } |
536 | | #else |
537 | 25 | void ggml_time_init(void) {} |
538 | 0 | int64_t ggml_time_ms(void) { |
539 | 0 | struct timespec ts; |
540 | 0 | clock_gettime(CLOCK_MONOTONIC, &ts); |
541 | 0 | return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000; |
542 | 0 | } |
543 | | |
544 | 2 | int64_t ggml_time_us(void) { |
545 | 2 | struct timespec ts; |
546 | 2 | clock_gettime(CLOCK_MONOTONIC, &ts); |
547 | 2 | return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; |
548 | 2 | } |
549 | | #endif |
550 | | |
551 | 0 | int64_t ggml_cycles(void) { |
552 | 0 | return clock(); |
553 | 0 | } |
554 | | |
555 | 0 | int64_t ggml_cycles_per_ms(void) { |
556 | 0 | return CLOCKS_PER_SEC/1000; |
557 | 0 | } |
558 | | |
559 | | // |
560 | | // cross-platform UTF-8 file paths |
561 | | // |
562 | | |
563 | | #ifdef _WIN32 |
564 | | static wchar_t * ggml_mbstowcs(const char * mbs) { |
565 | | int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0); |
566 | | if (!wlen) { |
567 | | errno = EINVAL; |
568 | | return NULL; |
569 | | } |
570 | | |
571 | | wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t)); |
572 | | wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen); |
573 | | if (!wlen) { |
574 | | GGML_FREE(wbuf); |
575 | | errno = EINVAL; |
576 | | return NULL; |
577 | | } |
578 | | |
579 | | return wbuf; |
580 | | } |
581 | | #endif |
582 | | |
583 | 0 | FILE * ggml_fopen(const char * fname, const char * mode) { |
584 | | #ifdef _WIN32 |
585 | | FILE * file = NULL; |
586 | | |
587 | | // convert fname (UTF-8) |
588 | | wchar_t * wfname = ggml_mbstowcs(fname); |
589 | | if (wfname) { |
590 | | // convert mode (ANSI) |
591 | | wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t)); |
592 | | wchar_t * wmode_p = wmode; |
593 | | do { |
594 | | *wmode_p++ = (wchar_t)*mode; |
595 | | } while (*mode++); |
596 | | |
597 | | // open file |
598 | | file = _wfopen(wfname, wmode); |
599 | | |
600 | | GGML_FREE(wfname); |
601 | | GGML_FREE(wmode); |
602 | | } |
603 | | |
604 | | return file; |
605 | | #else |
606 | 0 | return fopen(fname, mode); |
607 | 0 | #endif |
608 | |
|
609 | 0 | } |
610 | | |
611 | | static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = { |
612 | | [GGML_TYPE_I8] = { |
613 | | .type_name = "i8", |
614 | | .blck_size = 1, |
615 | | .type_size = sizeof(int8_t), |
616 | | .is_quantized = false, |
617 | | }, |
618 | | [GGML_TYPE_I16] = { |
619 | | .type_name = "i16", |
620 | | .blck_size = 1, |
621 | | .type_size = sizeof(int16_t), |
622 | | .is_quantized = false, |
623 | | }, |
624 | | [GGML_TYPE_I32] = { |
625 | | .type_name = "i32", |
626 | | .blck_size = 1, |
627 | | .type_size = sizeof(int32_t), |
628 | | .is_quantized = false, |
629 | | }, |
630 | | [GGML_TYPE_I64] = { |
631 | | .type_name = "i64", |
632 | | .blck_size = 1, |
633 | | .type_size = sizeof(int64_t), |
634 | | .is_quantized = false, |
635 | | }, |
636 | | [GGML_TYPE_F64] = { |
637 | | .type_name = "f64", |
638 | | .blck_size = 1, |
639 | | .type_size = sizeof(double), |
640 | | .is_quantized = false, |
641 | | }, |
642 | | [GGML_TYPE_F32] = { |
643 | | .type_name = "f32", |
644 | | .blck_size = 1, |
645 | | .type_size = sizeof(float), |
646 | | .is_quantized = false, |
647 | | }, |
648 | | [GGML_TYPE_F16] = { |
649 | | .type_name = "f16", |
650 | | .blck_size = 1, |
651 | | .type_size = sizeof(ggml_fp16_t), |
652 | | .is_quantized = false, |
653 | | .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row, |
654 | | .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row, |
655 | | }, |
656 | | [GGML_TYPE_Q4_0] = { |
657 | | .type_name = "q4_0", |
658 | | .blck_size = QK4_0, |
659 | | .type_size = sizeof(block_q4_0), |
660 | | .is_quantized = true, |
661 | | .to_float = (ggml_to_float_t) dequantize_row_q4_0, |
662 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref, |
663 | | }, |
664 | | [GGML_TYPE_Q4_1] = { |
665 | | .type_name = "q4_1", |
666 | | .blck_size = QK4_1, |
667 | | .type_size = sizeof(block_q4_1), |
668 | | .is_quantized = true, |
669 | | .to_float = (ggml_to_float_t) dequantize_row_q4_1, |
670 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref, |
671 | | }, |
672 | | [4] = { // GGML_TYPE_Q4_2 |
673 | | .type_name = "DEPRECATED", |
674 | | .blck_size = 0, |
675 | | .type_size = 0, |
676 | | .is_quantized = false, |
677 | | }, |
678 | | [5] = { // GGML_TYPE_Q4_3 |
679 | | .type_name = "DEPRECATED", |
680 | | .blck_size = 0, |
681 | | .type_size = 0, |
682 | | .is_quantized = false, |
683 | | }, |
684 | | [GGML_TYPE_Q5_0] = { |
685 | | .type_name = "q5_0", |
686 | | .blck_size = QK5_0, |
687 | | .type_size = sizeof(block_q5_0), |
688 | | .is_quantized = true, |
689 | | .to_float = (ggml_to_float_t) dequantize_row_q5_0, |
690 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref, |
691 | | }, |
692 | | [GGML_TYPE_Q5_1] = { |
693 | | .type_name = "q5_1", |
694 | | .blck_size = QK5_1, |
695 | | .type_size = sizeof(block_q5_1), |
696 | | .is_quantized = true, |
697 | | .to_float = (ggml_to_float_t) dequantize_row_q5_1, |
698 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref, |
699 | | }, |
700 | | [GGML_TYPE_Q8_0] = { |
701 | | .type_name = "q8_0", |
702 | | .blck_size = QK8_0, |
703 | | .type_size = sizeof(block_q8_0), |
704 | | .is_quantized = true, |
705 | | .to_float = (ggml_to_float_t) dequantize_row_q8_0, |
706 | | .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref, |
707 | | }, |
708 | | [GGML_TYPE_Q8_1] = { |
709 | | .type_name = "q8_1", |
710 | | .blck_size = QK8_1, |
711 | | .type_size = sizeof(block_q8_1), |
712 | | .is_quantized = true, |
713 | | .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref, |
714 | | }, |
715 | | [GGML_TYPE_MXFP4] = { |
716 | | .type_name = "mxfp4", |
717 | | .blck_size = QK_MXFP4, |
718 | | .type_size = sizeof(block_mxfp4), |
719 | | .is_quantized = true, |
720 | | .to_float = (ggml_to_float_t) dequantize_row_mxfp4, |
721 | | .from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref, |
722 | | }, |
723 | | [GGML_TYPE_Q2_K] = { |
724 | | .type_name = "q2_K", |
725 | | .blck_size = QK_K, |
726 | | .type_size = sizeof(block_q2_K), |
727 | | .is_quantized = true, |
728 | | .to_float = (ggml_to_float_t) dequantize_row_q2_K, |
729 | | .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref, |
730 | | }, |
731 | | [GGML_TYPE_Q3_K] = { |
732 | | .type_name = "q3_K", |
733 | | .blck_size = QK_K, |
734 | | .type_size = sizeof(block_q3_K), |
735 | | .is_quantized = true, |
736 | | .to_float = (ggml_to_float_t) dequantize_row_q3_K, |
737 | | .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref, |
738 | | }, |
739 | | [GGML_TYPE_Q4_K] = { |
740 | | .type_name = "q4_K", |
741 | | .blck_size = QK_K, |
742 | | .type_size = sizeof(block_q4_K), |
743 | | .is_quantized = true, |
744 | | .to_float = (ggml_to_float_t) dequantize_row_q4_K, |
745 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref, |
746 | | }, |
747 | | [GGML_TYPE_Q5_K] = { |
748 | | .type_name = "q5_K", |
749 | | .blck_size = QK_K, |
750 | | .type_size = sizeof(block_q5_K), |
751 | | .is_quantized = true, |
752 | | .to_float = (ggml_to_float_t) dequantize_row_q5_K, |
753 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref, |
754 | | }, |
755 | | [GGML_TYPE_Q6_K] = { |
756 | | .type_name = "q6_K", |
757 | | .blck_size = QK_K, |
758 | | .type_size = sizeof(block_q6_K), |
759 | | .is_quantized = true, |
760 | | .to_float = (ggml_to_float_t) dequantize_row_q6_K, |
761 | | .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref, |
762 | | }, |
763 | | [GGML_TYPE_IQ2_XXS] = { |
764 | | .type_name = "iq2_xxs", |
765 | | .blck_size = QK_K, |
766 | | .type_size = sizeof(block_iq2_xxs), |
767 | | .is_quantized = true, |
768 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs, |
769 | | .from_float_ref = NULL, |
770 | | }, |
771 | | [GGML_TYPE_IQ2_XS] = { |
772 | | .type_name = "iq2_xs", |
773 | | .blck_size = QK_K, |
774 | | .type_size = sizeof(block_iq2_xs), |
775 | | .is_quantized = true, |
776 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_xs, |
777 | | .from_float_ref = NULL, |
778 | | }, |
779 | | [GGML_TYPE_IQ3_XXS] = { |
780 | | .type_name = "iq3_xxs", |
781 | | .blck_size = QK_K, |
782 | | .type_size = sizeof(block_iq3_xxs), |
783 | | .is_quantized = true, |
784 | | .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs, |
785 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref, |
786 | | }, |
787 | | [GGML_TYPE_IQ3_S] = { |
788 | | .type_name = "iq3_s", |
789 | | .blck_size = QK_K, |
790 | | .type_size = sizeof(block_iq3_s), |
791 | | .is_quantized = true, |
792 | | .to_float = (ggml_to_float_t) dequantize_row_iq3_s, |
793 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref, |
794 | | }, |
795 | | [GGML_TYPE_IQ2_S] = { |
796 | | .type_name = "iq2_s", |
797 | | .blck_size = QK_K, |
798 | | .type_size = sizeof(block_iq2_s), |
799 | | .is_quantized = true, |
800 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_s, |
801 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref, |
802 | | }, |
803 | | [GGML_TYPE_IQ1_S] = { |
804 | | .type_name = "iq1_s", |
805 | | .blck_size = QK_K, |
806 | | .type_size = sizeof(block_iq1_s), |
807 | | .is_quantized = true, |
808 | | .to_float = (ggml_to_float_t) dequantize_row_iq1_s, |
809 | | .from_float_ref = NULL, |
810 | | }, |
811 | | [GGML_TYPE_IQ1_M] = { |
812 | | .type_name = "iq1_m", |
813 | | .blck_size = QK_K, |
814 | | .type_size = sizeof(block_iq1_m), |
815 | | .is_quantized = true, |
816 | | .to_float = (ggml_to_float_t) dequantize_row_iq1_m, |
817 | | .from_float_ref = NULL, |
818 | | }, |
819 | | [GGML_TYPE_IQ4_NL] = { |
820 | | .type_name = "iq4_nl", |
821 | | .blck_size = QK4_NL, |
822 | | .type_size = sizeof(block_iq4_nl), |
823 | | .is_quantized = true, |
824 | | .to_float = (ggml_to_float_t) dequantize_row_iq4_nl, |
825 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref, |
826 | | }, |
827 | | [GGML_TYPE_IQ4_XS] = { |
828 | | .type_name = "iq4_xs", |
829 | | .blck_size = QK_K, |
830 | | .type_size = sizeof(block_iq4_xs), |
831 | | .is_quantized = true, |
832 | | .to_float = (ggml_to_float_t) dequantize_row_iq4_xs, |
833 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref, |
834 | | }, |
835 | | [GGML_TYPE_Q8_K] = { |
836 | | .type_name = "q8_K", |
837 | | .blck_size = QK_K, |
838 | | .type_size = sizeof(block_q8_K), |
839 | | .is_quantized = true, |
840 | | }, |
841 | | [GGML_TYPE_BF16] = { |
842 | | .type_name = "bf16", |
843 | | .blck_size = 1, |
844 | | .type_size = sizeof(ggml_bf16_t), |
845 | | .is_quantized = false, |
846 | | .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row, |
847 | | .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref, |
848 | | }, |
849 | | [31] = { // GGML_TYPE_Q4_0_4_4 |
850 | | .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking", |
851 | | .blck_size = 0, |
852 | | .type_size = 0, |
853 | | .is_quantized = false, |
854 | | }, |
855 | | [32] = { // GGML_TYPE_Q4_0_4_8 |
856 | | .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking", |
857 | | .blck_size = 0, |
858 | | .type_size = 0, |
859 | | .is_quantized = false, |
860 | | }, |
861 | | [33] = { // GGML_TYPE_Q4_0_8_8 |
862 | | .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking", |
863 | | .blck_size = 0, |
864 | | .type_size = 0, |
865 | | .is_quantized = false, |
866 | | }, |
867 | | [GGML_TYPE_TQ1_0] = { |
868 | | .type_name = "tq1_0", |
869 | | .blck_size = QK_K, |
870 | | .type_size = sizeof(block_tq1_0), |
871 | | .is_quantized = true, |
872 | | .to_float = (ggml_to_float_t) dequantize_row_tq1_0, |
873 | | .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref, |
874 | | }, |
875 | | [GGML_TYPE_TQ2_0] = { |
876 | | .type_name = "tq2_0", |
877 | | .blck_size = QK_K, |
878 | | .type_size = sizeof(block_tq2_0), |
879 | | .is_quantized = true, |
880 | | .to_float = (ggml_to_float_t) dequantize_row_tq2_0, |
881 | | .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref, |
882 | | }, |
883 | | [36] = { // GGML_TYPE_IQ4_NL_4_4 |
884 | | .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking", |
885 | | .blck_size = 0, |
886 | | .type_size = 0, |
887 | | .is_quantized = false, |
888 | | }, |
889 | | [37] = { // GGML_TYPE_IQ4_NL_4_8 |
890 | | .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking", |
891 | | .blck_size = 0, |
892 | | .type_size = 0, |
893 | | .is_quantized = false, |
894 | | }, |
895 | | [38] = { // GGML_TYPE_IQ4_NL_8_8 |
896 | | .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking", |
897 | | .blck_size = 0, |
898 | | .type_size = 0, |
899 | | .is_quantized = false, |
900 | | }, |
901 | | }; |
902 | | |
903 | 0 | const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) { |
904 | 0 | GGML_ASSERT(type < GGML_TYPE_COUNT); |
905 | 0 | return &type_traits[type]; |
906 | 0 | } |
907 | | |
908 | | // |
909 | | // ggml object |
910 | | // |
911 | | |
912 | | struct ggml_object { |
913 | | size_t offs; |
914 | | size_t size; |
915 | | |
916 | | struct ggml_object * next; |
917 | | |
918 | | enum ggml_object_type type; |
919 | | |
920 | | char padding[4]; |
921 | | }; |
922 | | |
923 | | static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); |
924 | | |
925 | | // |
926 | | // ggml context |
927 | | // |
928 | | |
929 | | struct ggml_context { |
930 | | size_t mem_size; |
931 | | void * mem_buffer; |
932 | | bool mem_buffer_owned; |
933 | | bool no_alloc; |
934 | | |
935 | | int n_objects; |
936 | | |
937 | | struct ggml_object * objects_begin; |
938 | | struct ggml_object * objects_end; |
939 | | }; |
940 | | |
941 | | // |
942 | | // data types |
943 | | // |
944 | | |
945 | | static const char * GGML_OP_NAME[GGML_OP_COUNT] = { |
946 | | "NONE", |
947 | | |
948 | | "DUP", |
949 | | "ADD", |
950 | | "ADD_ID", |
951 | | "ADD1", |
952 | | "ACC", |
953 | | "SUB", |
954 | | "MUL", |
955 | | "DIV", |
956 | | "SQR", |
957 | | "SQRT", |
958 | | "LOG", |
959 | | "SIN", |
960 | | "COS", |
961 | | "SUM", |
962 | | "SUM_ROWS", |
963 | | "CUMSUM", |
964 | | "MEAN", |
965 | | "ARGMAX", |
966 | | "COUNT_EQUAL", |
967 | | "REPEAT", |
968 | | "REPEAT_BACK", |
969 | | "CONCAT", |
970 | | "SILU_BACK", |
971 | | "NORM", |
972 | | "RMS_NORM", |
973 | | "RMS_NORM_BACK", |
974 | | "GROUP_NORM", |
975 | | "L2_NORM", |
976 | | |
977 | | "MUL_MAT", |
978 | | "MUL_MAT_ID", |
979 | | "OUT_PROD", |
980 | | |
981 | | "SCALE", |
982 | | "SET", |
983 | | "CPY", |
984 | | "CONT", |
985 | | "RESHAPE", |
986 | | "VIEW", |
987 | | "PERMUTE", |
988 | | "TRANSPOSE", |
989 | | "GET_ROWS", |
990 | | "GET_ROWS_BACK", |
991 | | "SET_ROWS", |
992 | | "DIAG", |
993 | | "DIAG_MASK_INF", |
994 | | "DIAG_MASK_ZERO", |
995 | | "SOFT_MAX", |
996 | | "SOFT_MAX_BACK", |
997 | | "ROPE", |
998 | | "ROPE_BACK", |
999 | | "CLAMP", |
1000 | | "CONV_TRANSPOSE_1D", |
1001 | | "IM2COL", |
1002 | | "IM2COL_BACK", |
1003 | | "IM2COL_3D", |
1004 | | "CONV_2D", |
1005 | | "CONV_3D", |
1006 | | "CONV_2D_DW", |
1007 | | "CONV_TRANSPOSE_2D", |
1008 | | "POOL_1D", |
1009 | | "POOL_2D", |
1010 | | "POOL_2D_BACK", |
1011 | | "UPSCALE", |
1012 | | "PAD", |
1013 | | "PAD_REFLECT_1D", |
1014 | | "ROLL", |
1015 | | "ARANGE", |
1016 | | "TIMESTEP_EMBEDDING", |
1017 | | "ARGSORT", |
1018 | | "TOP_K", |
1019 | | "LEAKY_RELU", |
1020 | | "TRI", |
1021 | | "FILL", |
1022 | | |
1023 | | "FLASH_ATTN_EXT", |
1024 | | "FLASH_ATTN_BACK", |
1025 | | "SSM_CONV", |
1026 | | "SSM_SCAN", |
1027 | | "WIN_PART", |
1028 | | "WIN_UNPART", |
1029 | | "GET_REL_POS", |
1030 | | "ADD_REL_POS", |
1031 | | "RWKV_WKV6", |
1032 | | "GATED_LINEAR_ATTN", |
1033 | | "RWKV_WKV7", |
1034 | | "SOLVE_TRI", |
1035 | | |
1036 | | "UNARY", |
1037 | | |
1038 | | "MAP_CUSTOM1", |
1039 | | "MAP_CUSTOM2", |
1040 | | "MAP_CUSTOM3", |
1041 | | |
1042 | | "CUSTOM", |
1043 | | |
1044 | | "CROSS_ENTROPY_LOSS", |
1045 | | "CROSS_ENTROPY_LOSS_BACK", |
1046 | | "OPT_STEP_ADAMW", |
1047 | | "OPT_STEP_SGD", |
1048 | | |
1049 | | "GLU", |
1050 | | }; |
1051 | | |
1052 | | static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95"); |
1053 | | |
1054 | | static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { |
1055 | | "none", |
1056 | | |
1057 | | "x", |
1058 | | "x+y", |
1059 | | "x[i]+y", |
1060 | | "x+y", |
1061 | | "view(x,nb,offset)+=y->x", |
1062 | | "x-y", |
1063 | | "x*y", |
1064 | | "x/y", |
1065 | | "x^2", |
1066 | | "√x", |
1067 | | "log(x)", |
1068 | | "sin(x)", |
1069 | | "cos(x)", |
1070 | | "Σx", |
1071 | | "Σx_k", |
1072 | | "cumsum(x)", |
1073 | | "Σx/n", |
1074 | | "argmax(x)", |
1075 | | "count_equal(x)", |
1076 | | "repeat(x)", |
1077 | | "repeat_back(x)", |
1078 | | "concat(x, y)", |
1079 | | "silu_back(x)", |
1080 | | "norm(x)", |
1081 | | "rms_norm(x)", |
1082 | | "rms_norm_back(x)", |
1083 | | "group_norm(x)", |
1084 | | "l2_norm(x)", |
1085 | | |
1086 | | "X*Y", |
1087 | | "X[i]*Y", |
1088 | | "X*Y", |
1089 | | |
1090 | | "x*v", |
1091 | | "y-\\>view(x)", |
1092 | | "x-\\>y", |
1093 | | "cont(x)", |
1094 | | "reshape(x)", |
1095 | | "view(x)", |
1096 | | "permute(x)", |
1097 | | "transpose(x)", |
1098 | | "get_rows(x)", |
1099 | | "get_rows_back(x)", |
1100 | | "set_rows(x)", |
1101 | | "diag(x)", |
1102 | | "diag_mask_inf(x)", |
1103 | | "diag_mask_zero(x)", |
1104 | | "soft_max(x)", |
1105 | | "soft_max_back(x)", |
1106 | | "rope(x)", |
1107 | | "rope_back(x)", |
1108 | | "clamp(x)", |
1109 | | "conv_transpose_1d(x)", |
1110 | | "im2col(x)", |
1111 | | "im2col_back(x)", |
1112 | | "im2col_3d(x)", |
1113 | | "conv_2d(x)", |
1114 | | "conv_3d(x)", |
1115 | | "conv_2d_dw(x)", |
1116 | | "conv_transpose_2d(x)", |
1117 | | "pool_1d(x)", |
1118 | | "pool_2d(x)", |
1119 | | "pool_2d_back(x)", |
1120 | | "upscale(x)", |
1121 | | "pad(x)", |
1122 | | "pad_reflect_1d(x)", |
1123 | | "roll(x)", |
1124 | | "arange(start, stop, step)", |
1125 | | "timestep_embedding(timesteps, dim, max_period)", |
1126 | | "argsort(x)", |
1127 | | "top_k(x)", |
1128 | | "leaky_relu(x)", |
1129 | | "tri(x)", |
1130 | | "fill(x, c)", |
1131 | | |
1132 | | "flash_attn_ext(x)", |
1133 | | "flash_attn_back(x)", |
1134 | | "ssm_conv(x)", |
1135 | | "ssm_scan(x)", |
1136 | | "win_part(x)", |
1137 | | "win_unpart(x)", |
1138 | | "get_rel_pos(x)", |
1139 | | "add_rel_pos(x)", |
1140 | | "rwkv_wkv6(k, v, r, tf, td, s)", |
1141 | | "gated_linear_attn(k, v, q, gate, s)", |
1142 | | "rwkv_wkv7(r, w, k, v, a, b, s)", |
1143 | | "A X = B, A triangular, solve X", |
1144 | | |
1145 | | "unary(x)", |
1146 | | |
1147 | | "map_custom(x)", |
1148 | | "map_custom(x,y)", |
1149 | | "map_custom(x,y,z)", |
1150 | | |
1151 | | "custom(x)", |
1152 | | |
1153 | | "cross_entropy_loss(x,y)", |
1154 | | "cross_entropy_loss_back(x,y)", |
1155 | | "adamw(x)", |
1156 | | "sgd(x)", |
1157 | | |
1158 | | "glu(x)", |
1159 | | }; |
1160 | | |
1161 | | static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95"); |
1162 | | |
1163 | | static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); |
1164 | | |
1165 | | static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { |
1166 | | "ABS", |
1167 | | "SGN", |
1168 | | "NEG", |
1169 | | "STEP", |
1170 | | "TANH", |
1171 | | "ELU", |
1172 | | "RELU", |
1173 | | "SIGMOID", |
1174 | | "GELU", |
1175 | | "GELU_QUICK", |
1176 | | "SILU", |
1177 | | "HARDSWISH", |
1178 | | "HARDSIGMOID", |
1179 | | "EXP", |
1180 | | "EXPM1", |
1181 | | "SOFTPLUS", |
1182 | | "GELU_ERF", |
1183 | | "XIELU", |
1184 | | "FLOOR", |
1185 | | "CEIL", |
1186 | | "ROUND", |
1187 | | "TRUNC", |
1188 | | }; |
1189 | | |
1190 | | static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22"); |
1191 | | |
1192 | | static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { |
1193 | | "REGLU", |
1194 | | "GEGLU", |
1195 | | "SWIGLU", |
1196 | | "SWIGLU_OAI", |
1197 | | "GEGLU_ERF", |
1198 | | "GEGLU_QUICK", |
1199 | | }; |
1200 | | |
1201 | | static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6"); |
1202 | | |
1203 | | |
1204 | | static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); |
1205 | | static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); |
1206 | | |
1207 | | |
1208 | | //////////////////////////////////////////////////////////////////////////////// |
1209 | | |
1210 | 0 | void ggml_print_object(const struct ggml_object * obj) { |
1211 | 0 | GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n", |
1212 | 0 | obj->type, obj->offs, obj->size, (const void *) obj->next); |
1213 | 0 | } |
1214 | | |
1215 | 0 | void ggml_print_objects(const struct ggml_context * ctx) { |
1216 | 0 | struct ggml_object * obj = ctx->objects_begin; |
1217 | |
|
1218 | 0 | GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx); |
1219 | |
|
1220 | 0 | while (obj != NULL) { |
1221 | 0 | ggml_print_object(obj); |
1222 | 0 | obj = obj->next; |
1223 | 0 | } |
1224 | |
|
1225 | 0 | GGML_LOG_INFO("%s: --- end ---\n", __func__); |
1226 | 0 | } |
1227 | | |
1228 | 0 | int64_t ggml_nelements(const struct ggml_tensor * tensor) { |
1229 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1230 | |
|
1231 | 0 | return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; |
1232 | 0 | } |
1233 | | |
1234 | 0 | int64_t ggml_nrows(const struct ggml_tensor * tensor) { |
1235 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1236 | |
|
1237 | 0 | return tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; |
1238 | 0 | } |
1239 | | |
1240 | 0 | size_t ggml_nbytes(const struct ggml_tensor * tensor) { |
1241 | 0 | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1242 | 0 | if (tensor->ne[i] <= 0) { |
1243 | 0 | return 0; |
1244 | 0 | } |
1245 | 0 | } |
1246 | | |
1247 | 0 | size_t nbytes; |
1248 | 0 | const size_t blck_size = ggml_blck_size(tensor->type); |
1249 | 0 | if (blck_size == 1) { |
1250 | 0 | nbytes = ggml_type_size(tensor->type); |
1251 | 0 | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1252 | 0 | nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; |
1253 | 0 | } |
1254 | 0 | } |
1255 | 0 | else { |
1256 | 0 | nbytes = tensor->ne[0]*tensor->nb[0]/blck_size; |
1257 | 0 | for (int i = 1; i < GGML_MAX_DIMS; ++i) { |
1258 | 0 | nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; |
1259 | 0 | } |
1260 | 0 | } |
1261 | |
|
1262 | 0 | return nbytes; |
1263 | 0 | } |
1264 | | |
1265 | 0 | size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { |
1266 | 0 | return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN); |
1267 | 0 | } |
1268 | | |
1269 | 0 | int64_t ggml_blck_size(enum ggml_type type) { |
1270 | 0 | return type_traits[type].blck_size; |
1271 | 0 | } |
1272 | | |
1273 | 0 | size_t ggml_type_size(enum ggml_type type) { |
1274 | 0 | return type_traits[type].type_size; |
1275 | 0 | } |
1276 | | |
1277 | 0 | size_t ggml_row_size(enum ggml_type type, int64_t ne) { |
1278 | 0 | assert(ne % ggml_blck_size(type) == 0); |
1279 | 0 | return ggml_type_size(type)*ne/ggml_blck_size(type); |
1280 | 0 | } |
1281 | | |
1282 | 0 | double ggml_type_sizef(enum ggml_type type) { |
1283 | 0 | return ((double)(type_traits[type].type_size))/type_traits[type].blck_size; |
1284 | 0 | } |
1285 | | |
1286 | 0 | const char * ggml_type_name(enum ggml_type type) { |
1287 | 0 | return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE"; |
1288 | 0 | } |
1289 | | |
1290 | 0 | bool ggml_is_quantized(enum ggml_type type) { |
1291 | 0 | return type_traits[type].is_quantized; |
1292 | 0 | } |
1293 | | |
1294 | 0 | const char * ggml_op_name(enum ggml_op op) { |
1295 | 0 | return GGML_OP_NAME[op]; |
1296 | 0 | } |
1297 | | |
1298 | 0 | const char * ggml_op_symbol(enum ggml_op op) { |
1299 | 0 | return GGML_OP_SYMBOL[op]; |
1300 | 0 | } |
1301 | | |
1302 | 0 | const char * ggml_unary_op_name(enum ggml_unary_op op) { |
1303 | 0 | return GGML_UNARY_OP_NAME[op]; |
1304 | 0 | } |
1305 | | |
1306 | 0 | const char * ggml_glu_op_name(enum ggml_glu_op op) { |
1307 | 0 | return GGML_GLU_OP_NAME[op]; |
1308 | 0 | } |
1309 | | |
1310 | 0 | const char * ggml_op_desc(const struct ggml_tensor * t) { |
1311 | 0 | if (t->op == GGML_OP_UNARY) { |
1312 | 0 | enum ggml_unary_op uop = ggml_get_unary_op(t); |
1313 | 0 | return ggml_unary_op_name(uop); |
1314 | 0 | } |
1315 | 0 | if (t->op == GGML_OP_GLU) { |
1316 | 0 | enum ggml_glu_op gop = ggml_get_glu_op(t); |
1317 | 0 | return ggml_glu_op_name(gop); |
1318 | 0 | } |
1319 | 0 | return ggml_op_name(t->op); |
1320 | 0 | } |
1321 | | |
1322 | 0 | size_t ggml_element_size(const struct ggml_tensor * tensor) { |
1323 | 0 | return ggml_type_size(tensor->type); |
1324 | 0 | } |
1325 | | |
1326 | 0 | bool ggml_is_scalar(const struct ggml_tensor * tensor) { |
1327 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1328 | |
|
1329 | 0 | return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1330 | 0 | } |
1331 | | |
1332 | 0 | bool ggml_is_vector(const struct ggml_tensor * tensor) { |
1333 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1334 | |
|
1335 | 0 | return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1336 | 0 | } |
1337 | | |
1338 | 0 | bool ggml_is_matrix(const struct ggml_tensor * tensor) { |
1339 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1340 | |
|
1341 | 0 | return tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1342 | 0 | } |
1343 | | |
1344 | 0 | bool ggml_is_3d(const struct ggml_tensor * tensor) { |
1345 | 0 | return tensor->ne[3] == 1; |
1346 | 0 | } |
1347 | | |
1348 | 0 | int ggml_n_dims(const struct ggml_tensor * tensor) { |
1349 | 0 | for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) { |
1350 | 0 | if (tensor->ne[i] > 1) { |
1351 | 0 | return i + 1; |
1352 | 0 | } |
1353 | 0 | } |
1354 | 0 | return 1; |
1355 | 0 | } |
1356 | | |
1357 | 0 | enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { |
1358 | 0 | enum ggml_type wtype = GGML_TYPE_COUNT; |
1359 | |
|
1360 | 0 | switch (ftype) { |
1361 | 0 | case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break; |
1362 | 0 | case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break; |
1363 | 0 | case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break; |
1364 | 0 | case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break; |
1365 | 0 | case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break; |
1366 | 0 | case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break; |
1367 | 0 | case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break; |
1368 | 0 | case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break; |
1369 | 0 | case GGML_FTYPE_MOSTLY_MXFP4: wtype = GGML_TYPE_MXFP4; break; |
1370 | 0 | case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break; |
1371 | 0 | case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break; |
1372 | 0 | case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break; |
1373 | 0 | case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break; |
1374 | 0 | case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break; |
1375 | 0 | case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break; |
1376 | 0 | case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break; |
1377 | 0 | case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break; |
1378 | 0 | case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break; |
1379 | 0 | case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break; |
1380 | 0 | case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break; |
1381 | 0 | case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break; |
1382 | 0 | case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break; |
1383 | 0 | case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break; |
1384 | 0 | case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break; |
1385 | 0 | case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break; |
1386 | 0 | } |
1387 | | |
1388 | 0 | GGML_ASSERT(wtype != GGML_TYPE_COUNT); |
1389 | |
|
1390 | 0 | return wtype; |
1391 | 0 | } |
1392 | | |
1393 | 0 | size_t ggml_tensor_overhead(void) { |
1394 | 0 | return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE; |
1395 | 0 | } |
1396 | | |
1397 | 0 | bool ggml_is_transposed(const struct ggml_tensor * tensor) { |
1398 | 0 | return tensor->nb[0] > tensor->nb[1]; |
1399 | 0 | } |
1400 | | |
1401 | 0 | static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) { |
1402 | 0 | size_t next_nb = ggml_type_size(tensor->type); |
1403 | 0 | if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) { |
1404 | 0 | return false; |
1405 | 0 | } |
1406 | 0 | next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type); |
1407 | 0 | for (int i = 1; i < GGML_MAX_DIMS; i++) { |
1408 | 0 | if (tensor->ne[i] != 1) { |
1409 | 0 | if (i > n) { |
1410 | 0 | if (tensor->nb[i] != next_nb) { |
1411 | 0 | return false; |
1412 | 0 | } |
1413 | 0 | next_nb *= tensor->ne[i]; |
1414 | 0 | } else { |
1415 | | // this dimension does not need to be contiguous |
1416 | 0 | next_nb = tensor->ne[i]*tensor->nb[i]; |
1417 | 0 | } |
1418 | 0 | } |
1419 | 0 | } |
1420 | 0 | return true; |
1421 | 0 | } |
1422 | | |
1423 | 0 | bool ggml_is_contiguous(const struct ggml_tensor * tensor) { |
1424 | 0 | return ggml_is_contiguous_0(tensor); |
1425 | 0 | } |
1426 | | |
1427 | 0 | bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) { |
1428 | 0 | return ggml_is_contiguous_n(tensor, 0); |
1429 | 0 | } |
1430 | | |
1431 | 0 | bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) { |
1432 | 0 | return ggml_is_contiguous_n(tensor, 1); |
1433 | 0 | } |
1434 | | |
1435 | 0 | bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) { |
1436 | 0 | return ggml_is_contiguous_n(tensor, 2); |
1437 | 0 | } |
1438 | | |
1439 | 0 | bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) { |
1440 | 0 | return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type); |
1441 | 0 | } |
1442 | | |
1443 | 0 | bool ggml_is_permuted(const struct ggml_tensor * tensor) { |
1444 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1445 | |
|
1446 | 0 | return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3]; |
1447 | 0 | } |
1448 | | |
1449 | 0 | bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) { |
1450 | 0 | return |
1451 | 0 | tensor->nb[0] > tensor->nb[2] && |
1452 | 0 | tensor->nb[1] > tensor->nb[0] && |
1453 | 0 | tensor->nb[2] == ggml_type_size(tensor->type); |
1454 | 0 | } |
1455 | | |
1456 | 0 | bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) { |
1457 | 0 | return |
1458 | 0 | tensor->ne[0] == ggml_blck_size(tensor->type) || |
1459 | 0 | tensor->nb[0] == ggml_type_size(tensor->type); |
1460 | 0 | } |
1461 | | |
1462 | 0 | static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { |
1463 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1464 | |
|
1465 | 0 | return |
1466 | 0 | tensor->nb[0] == ggml_type_size(tensor->type) && |
1467 | 0 | tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && |
1468 | 0 | tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; |
1469 | 0 | } |
1470 | | |
1471 | 0 | bool ggml_is_empty(const struct ggml_tensor * tensor) { |
1472 | 0 | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1473 | 0 | if (tensor->ne[i] == 0) { |
1474 | | // empty if any dimension has no elements |
1475 | 0 | return true; |
1476 | 0 | } |
1477 | 0 | } |
1478 | 0 | return false; |
1479 | 0 | } |
1480 | | |
1481 | 0 | bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1482 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1483 | |
|
1484 | 0 | return |
1485 | 0 | (t0->ne[0] == t1->ne[0]) && |
1486 | 0 | (t0->ne[1] == t1->ne[1]) && |
1487 | 0 | (t0->ne[2] == t1->ne[2]) && |
1488 | 0 | (t0->ne[3] == t1->ne[3]); |
1489 | 0 | } |
1490 | | |
1491 | 0 | bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1492 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1493 | |
|
1494 | 0 | return |
1495 | 0 | (t0->nb[0] == t1->nb[0]) && |
1496 | 0 | (t0->nb[1] == t1->nb[1]) && |
1497 | 0 | (t0->nb[2] == t1->nb[2]) && |
1498 | 0 | (t0->nb[3] == t1->nb[3]); |
1499 | 0 | } |
1500 | | |
1501 | | // check if t1 can be represented as a repetition of t0 |
1502 | 0 | bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1503 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1504 | |
|
1505 | 0 | return ggml_is_empty(t0) ? ggml_is_empty(t1) : |
1506 | 0 | (t1->ne[0]%t0->ne[0] == 0) && |
1507 | 0 | (t1->ne[1]%t0->ne[1] == 0) && |
1508 | 0 | (t1->ne[2]%t0->ne[2] == 0) && |
1509 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
1510 | 0 | } |
1511 | | |
1512 | 0 | static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1513 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1514 | |
|
1515 | 0 | return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1); |
1516 | 0 | } |
1517 | | |
1518 | | // assert that pointer is aligned to GGML_MEM_ALIGN |
1519 | | #define GGML_ASSERT_ALIGNED(ptr) \ |
1520 | 9 | GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) |
1521 | | |
1522 | | //////////////////////////////////////////////////////////////////////////////// |
1523 | | |
1524 | 9 | struct ggml_context * ggml_init(struct ggml_init_params params) { |
1525 | 9 | bool is_first_call = true; |
1526 | | |
1527 | 9 | ggml_critical_section_start(); |
1528 | | |
1529 | 9 | if (is_first_call) { |
1530 | | // initialize time system (required on Windows) |
1531 | 9 | ggml_time_init(); |
1532 | | |
1533 | 9 | is_first_call = false; |
1534 | 9 | } |
1535 | | |
1536 | 9 | ggml_critical_section_end(); |
1537 | | |
1538 | 9 | struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context)); |
1539 | | |
1540 | | // allow to call ggml_init with 0 size |
1541 | 9 | if (params.mem_size == 0) { |
1542 | 9 | params.mem_size = GGML_MEM_ALIGN; |
1543 | 9 | } |
1544 | | |
1545 | 9 | const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); |
1546 | | |
1547 | 9 | *ctx = (struct ggml_context) { |
1548 | 9 | /*.mem_size =*/ mem_size, |
1549 | 9 | /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), |
1550 | 9 | /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, |
1551 | 9 | /*.no_alloc =*/ params.no_alloc, |
1552 | 9 | /*.n_objects =*/ 0, |
1553 | 9 | /*.objects_begin =*/ NULL, |
1554 | 9 | /*.objects_end =*/ NULL, |
1555 | 9 | }; |
1556 | | |
1557 | 9 | GGML_ASSERT(ctx->mem_buffer != NULL); |
1558 | | |
1559 | 9 | GGML_ASSERT_ALIGNED(ctx->mem_buffer); |
1560 | | |
1561 | 9 | GGML_PRINT_DEBUG("%s: context initialized\n", __func__); |
1562 | | |
1563 | 9 | return ctx; |
1564 | 9 | } |
1565 | | |
1566 | 0 | void ggml_reset(struct ggml_context * ctx) { |
1567 | 0 | if (ctx == NULL) { |
1568 | 0 | return; |
1569 | 0 | } |
1570 | | |
1571 | 0 | ctx->n_objects = 0; |
1572 | 0 | ctx->objects_begin = NULL; |
1573 | 0 | ctx->objects_end = NULL; |
1574 | 0 | } |
1575 | | |
1576 | 9 | void ggml_free(struct ggml_context * ctx) { |
1577 | 9 | if (ctx == NULL) { |
1578 | 0 | return; |
1579 | 0 | } |
1580 | | |
1581 | 9 | if (ctx->mem_buffer_owned) { |
1582 | 9 | ggml_aligned_free(ctx->mem_buffer, ctx->mem_size); |
1583 | 9 | } |
1584 | | |
1585 | 9 | GGML_FREE(ctx); |
1586 | 9 | } |
1587 | | |
1588 | 0 | size_t ggml_used_mem(const struct ggml_context * ctx) { |
1589 | 0 | return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; |
1590 | 0 | } |
1591 | | |
1592 | 0 | bool ggml_get_no_alloc(struct ggml_context * ctx) { |
1593 | 0 | return ctx->no_alloc; |
1594 | 0 | } |
1595 | | |
1596 | 0 | void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { |
1597 | 0 | ctx->no_alloc = no_alloc; |
1598 | 0 | } |
1599 | | |
1600 | 0 | void * ggml_get_mem_buffer(const struct ggml_context * ctx) { |
1601 | 0 | return ctx->mem_buffer; |
1602 | 0 | } |
1603 | | |
1604 | 0 | size_t ggml_get_mem_size(const struct ggml_context * ctx) { |
1605 | 0 | return ctx->mem_size; |
1606 | 0 | } |
1607 | | |
1608 | 0 | size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { |
1609 | 0 | size_t max_size = 0; |
1610 | |
|
1611 | 0 | for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) { |
1612 | 0 | size_t bytes = ggml_nbytes(tensor); |
1613 | 0 | max_size = MAX(max_size, bytes); |
1614 | 0 | } |
1615 | |
|
1616 | 0 | return max_size; |
1617 | 0 | } |
1618 | | |
1619 | | //////////////////////////////////////////////////////////////////////////////// |
1620 | | |
1621 | 0 | static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) { |
1622 | | // always insert objects at the end of the context's memory pool |
1623 | 0 | struct ggml_object * obj_cur = ctx->objects_end; |
1624 | |
|
1625 | 0 | const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs; |
1626 | 0 | const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size; |
1627 | 0 | const size_t cur_end = cur_offs + cur_size; |
1628 | | |
1629 | | // align to GGML_MEM_ALIGN |
1630 | 0 | size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN); |
1631 | |
|
1632 | 0 | char * const mem_buffer = ctx->mem_buffer; |
1633 | 0 | struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end); |
1634 | |
|
1635 | 0 | if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) { |
1636 | 0 | GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", |
1637 | 0 | __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size); |
1638 | | #ifndef NDEBUG |
1639 | | GGML_ABORT("not enough space in the context's memory pool"); |
1640 | | #endif |
1641 | 0 | return NULL; |
1642 | 0 | } |
1643 | | |
1644 | 0 | *obj_new = (struct ggml_object) { |
1645 | 0 | .offs = cur_end + GGML_OBJECT_SIZE, |
1646 | 0 | .size = size_needed, |
1647 | 0 | .next = NULL, |
1648 | 0 | .type = type, |
1649 | 0 | }; |
1650 | |
|
1651 | 0 | GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs); |
1652 | |
|
1653 | 0 | if (obj_cur != NULL) { |
1654 | 0 | obj_cur->next = obj_new; |
1655 | 0 | } else { |
1656 | | // this is the first object in this context |
1657 | 0 | ctx->objects_begin = obj_new; |
1658 | 0 | } |
1659 | |
|
1660 | 0 | ctx->objects_end = obj_new; |
1661 | | |
1662 | | //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size); |
1663 | |
|
1664 | 0 | return obj_new; |
1665 | 0 | } |
1666 | | |
1667 | | static struct ggml_tensor * ggml_new_tensor_impl( |
1668 | | struct ggml_context * ctx, |
1669 | | enum ggml_type type, |
1670 | | int n_dims, |
1671 | | const int64_t * ne, |
1672 | | struct ggml_tensor * view_src, |
1673 | 0 | size_t view_offs) { |
1674 | |
|
1675 | 0 | GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT); |
1676 | 0 | GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS); |
1677 | | |
1678 | | // find the base tensor and absolute offset |
1679 | 0 | if (view_src != NULL && view_src->view_src != NULL) { |
1680 | 0 | view_offs += view_src->view_offs; |
1681 | 0 | view_src = view_src->view_src; |
1682 | 0 | } |
1683 | |
|
1684 | 0 | size_t data_size = ggml_row_size(type, ne[0]); |
1685 | 0 | for (int i = 1; i < n_dims; i++) { |
1686 | 0 | data_size *= ne[i]; |
1687 | 0 | } |
1688 | |
|
1689 | 0 | GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src)); |
1690 | |
|
1691 | 0 | void * data = view_src != NULL ? view_src->data : NULL; |
1692 | 0 | if (data != NULL) { |
1693 | 0 | data = (char *) data + view_offs; |
1694 | 0 | } |
1695 | |
|
1696 | 0 | size_t obj_alloc_size = 0; |
1697 | |
|
1698 | 0 | if (view_src == NULL && !ctx->no_alloc) { |
1699 | | // allocate tensor data in the context's memory pool |
1700 | 0 | obj_alloc_size = data_size; |
1701 | 0 | } |
1702 | |
|
1703 | 0 | struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size); |
1704 | 0 | GGML_ASSERT(obj_new); |
1705 | |
|
1706 | 0 | struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); |
1707 | |
|
1708 | 0 | *result = (struct ggml_tensor) { |
1709 | 0 | /*.type =*/ type, |
1710 | 0 | /*.buffer =*/ NULL, |
1711 | 0 | /*.ne =*/ { 1, 1, 1, 1 }, |
1712 | 0 | /*.nb =*/ { 0, 0, 0, 0 }, |
1713 | 0 | /*.op =*/ GGML_OP_NONE, |
1714 | 0 | /*.op_params =*/ { 0 }, |
1715 | 0 | /*.flags =*/ 0, |
1716 | 0 | /*.src =*/ { NULL }, |
1717 | 0 | /*.view_src =*/ view_src, |
1718 | 0 | /*.view_offs =*/ view_offs, |
1719 | 0 | /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, |
1720 | 0 | /*.name =*/ { 0 }, |
1721 | 0 | /*.extra =*/ NULL, |
1722 | 0 | /*.padding =*/ { 0 }, |
1723 | 0 | }; |
1724 | | |
1725 | | // TODO: this should not be needed as long as we don't rely on aligned SIMD loads |
1726 | | //GGML_ASSERT_ALIGNED(result->data); |
1727 | |
|
1728 | 0 | for (int i = 0; i < n_dims; i++) { |
1729 | 0 | result->ne[i] = ne[i]; |
1730 | 0 | } |
1731 | |
|
1732 | 0 | result->nb[0] = ggml_type_size(type); |
1733 | 0 | result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type)); |
1734 | 0 | for (int i = 2; i < GGML_MAX_DIMS; i++) { |
1735 | 0 | result->nb[i] = result->nb[i - 1]*result->ne[i - 1]; |
1736 | 0 | } |
1737 | |
|
1738 | 0 | ctx->n_objects++; |
1739 | |
|
1740 | 0 | return result; |
1741 | 0 | } |
1742 | | |
1743 | | struct ggml_tensor * ggml_new_tensor( |
1744 | | struct ggml_context * ctx, |
1745 | | enum ggml_type type, |
1746 | | int n_dims, |
1747 | 0 | const int64_t * ne) { |
1748 | 0 | return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0); |
1749 | 0 | } |
1750 | | |
1751 | | struct ggml_tensor * ggml_new_tensor_1d( |
1752 | | struct ggml_context * ctx, |
1753 | | enum ggml_type type, |
1754 | 0 | int64_t ne0) { |
1755 | 0 | return ggml_new_tensor(ctx, type, 1, &ne0); |
1756 | 0 | } |
1757 | | |
1758 | | struct ggml_tensor * ggml_new_tensor_2d( |
1759 | | struct ggml_context * ctx, |
1760 | | enum ggml_type type, |
1761 | | int64_t ne0, |
1762 | 0 | int64_t ne1) { |
1763 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
1764 | 0 | return ggml_new_tensor(ctx, type, 2, ne); |
1765 | 0 | } |
1766 | | |
1767 | | struct ggml_tensor * ggml_new_tensor_3d( |
1768 | | struct ggml_context * ctx, |
1769 | | enum ggml_type type, |
1770 | | int64_t ne0, |
1771 | | int64_t ne1, |
1772 | 0 | int64_t ne2) { |
1773 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
1774 | 0 | return ggml_new_tensor(ctx, type, 3, ne); |
1775 | 0 | } |
1776 | | |
1777 | | struct ggml_tensor * ggml_new_tensor_4d( |
1778 | | struct ggml_context * ctx, |
1779 | | enum ggml_type type, |
1780 | | int64_t ne0, |
1781 | | int64_t ne1, |
1782 | | int64_t ne2, |
1783 | 0 | int64_t ne3) { |
1784 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
1785 | 0 | return ggml_new_tensor(ctx, type, 4, ne); |
1786 | 0 | } |
1787 | | |
1788 | 0 | void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) { |
1789 | 0 | struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes); |
1790 | |
|
1791 | 0 | return (uint8_t *)ctx->mem_buffer + obj->offs; |
1792 | 0 | } |
1793 | | |
1794 | 0 | struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { |
1795 | 0 | return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne); |
1796 | 0 | } |
1797 | | |
1798 | 0 | void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) { |
1799 | 0 | const int64_t ne2 = tensor->ne[2]; |
1800 | 0 | const int64_t ne1 = tensor->ne[1]; |
1801 | 0 | const int64_t ne0 = tensor->ne[0]; |
1802 | |
|
1803 | 0 | const int64_t i3_ = (i/(ne2*ne1*ne0)); |
1804 | 0 | const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0); |
1805 | 0 | const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0; |
1806 | 0 | const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0); |
1807 | |
|
1808 | 0 | if (i0) { |
1809 | 0 | * i0 = i0_; |
1810 | 0 | } |
1811 | 0 | if (i1) { |
1812 | 0 | * i1 = i1_; |
1813 | 0 | } |
1814 | 0 | if (i2) { |
1815 | 0 | * i2 = i2_; |
1816 | 0 | } |
1817 | 0 | if (i3) { |
1818 | 0 | * i3 = i3_; |
1819 | 0 | } |
1820 | 0 | } |
1821 | | |
1822 | 0 | void * ggml_get_data(const struct ggml_tensor * tensor) { |
1823 | 0 | return tensor->data; |
1824 | 0 | } |
1825 | | |
1826 | 0 | float * ggml_get_data_f32(const struct ggml_tensor * tensor) { |
1827 | 0 | assert(tensor->type == GGML_TYPE_F32); |
1828 | 0 | return (float *)(tensor->data); |
1829 | 0 | } |
1830 | | |
1831 | 0 | enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { |
1832 | 0 | GGML_ASSERT(tensor->op == GGML_OP_UNARY); |
1833 | 0 | return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); |
1834 | 0 | } |
1835 | | |
1836 | 0 | enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) { |
1837 | 0 | GGML_ASSERT(tensor->op == GGML_OP_GLU); |
1838 | 0 | return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0); |
1839 | 0 | } |
1840 | | |
1841 | 0 | const char * ggml_get_name(const struct ggml_tensor * tensor) { |
1842 | 0 | return tensor->name; |
1843 | 0 | } |
1844 | | |
1845 | 0 | struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) { |
1846 | 0 | size_t i; |
1847 | 0 | for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) { |
1848 | 0 | tensor->name[i] = name[i]; |
1849 | 0 | } |
1850 | 0 | tensor->name[i] = '\0'; |
1851 | 0 | return tensor; |
1852 | 0 | } |
1853 | | |
1854 | 0 | struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) { |
1855 | 0 | va_list args; |
1856 | 0 | va_start(args, fmt); |
1857 | 0 | vsnprintf(tensor->name, sizeof(tensor->name), fmt, args); |
1858 | 0 | va_end(args); |
1859 | 0 | return tensor; |
1860 | 0 | } |
1861 | | |
1862 | | struct ggml_tensor * ggml_view_tensor( |
1863 | | struct ggml_context * ctx, |
1864 | 0 | struct ggml_tensor * src) { |
1865 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0); |
1866 | 0 | ggml_format_name(result, "%s (view)", src->name); |
1867 | |
|
1868 | 0 | for (int i = 0; i < GGML_MAX_DIMS; i++) { |
1869 | 0 | result->nb[i] = src->nb[i]; |
1870 | 0 | } |
1871 | |
|
1872 | 0 | return result; |
1873 | 0 | } |
1874 | | |
1875 | 0 | struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) { |
1876 | 0 | struct ggml_object * obj = ctx->objects_begin; |
1877 | |
|
1878 | 0 | char * const mem_buffer = ctx->mem_buffer; |
1879 | |
|
1880 | 0 | while (obj != NULL) { |
1881 | 0 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1882 | 0 | return (struct ggml_tensor *)(mem_buffer + obj->offs); |
1883 | 0 | } |
1884 | | |
1885 | 0 | obj = obj->next; |
1886 | 0 | } |
1887 | | |
1888 | 0 | return NULL; |
1889 | 0 | } |
1890 | | |
1891 | 0 | struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) { |
1892 | 0 | struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE); |
1893 | 0 | obj = obj->next; |
1894 | |
|
1895 | 0 | char * const mem_buffer = ctx->mem_buffer; |
1896 | |
|
1897 | 0 | while (obj != NULL) { |
1898 | 0 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1899 | 0 | return (struct ggml_tensor *)(mem_buffer + obj->offs); |
1900 | 0 | } |
1901 | | |
1902 | 0 | obj = obj->next; |
1903 | 0 | } |
1904 | | |
1905 | 0 | return NULL; |
1906 | 0 | } |
1907 | | |
1908 | 0 | struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { |
1909 | 0 | struct ggml_object * obj = ctx->objects_begin; |
1910 | |
|
1911 | 0 | char * const mem_buffer = ctx->mem_buffer; |
1912 | |
|
1913 | 0 | while (obj != NULL) { |
1914 | 0 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1915 | 0 | struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs); |
1916 | 0 | if (strcmp(cur->name, name) == 0) { |
1917 | 0 | return cur; |
1918 | 0 | } |
1919 | 0 | } |
1920 | | |
1921 | 0 | obj = obj->next; |
1922 | 0 | } |
1923 | | |
1924 | 0 | return NULL; |
1925 | 0 | } |
1926 | | |
1927 | | //////////////////////////////////////////////////////////////////////////////// |
1928 | | |
1929 | | // ggml_dup |
1930 | | |
1931 | | static struct ggml_tensor * ggml_dup_impl( |
1932 | | struct ggml_context * ctx, |
1933 | | struct ggml_tensor * a, |
1934 | 0 | bool inplace) { |
1935 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
1936 | |
|
1937 | 0 | result->op = GGML_OP_DUP; |
1938 | 0 | result->src[0] = a; |
1939 | |
|
1940 | 0 | return result; |
1941 | 0 | } |
1942 | | |
1943 | | struct ggml_tensor * ggml_dup( |
1944 | | struct ggml_context * ctx, |
1945 | 0 | struct ggml_tensor * a) { |
1946 | 0 | return ggml_dup_impl(ctx, a, false); |
1947 | 0 | } |
1948 | | |
1949 | | struct ggml_tensor * ggml_dup_inplace( |
1950 | | struct ggml_context * ctx, |
1951 | 0 | struct ggml_tensor * a) { |
1952 | 0 | return ggml_dup_impl(ctx, a, true); |
1953 | 0 | } |
1954 | | |
1955 | | // ggml_add |
1956 | | |
1957 | | static struct ggml_tensor * ggml_add_impl( |
1958 | | struct ggml_context * ctx, |
1959 | | struct ggml_tensor * a, |
1960 | | struct ggml_tensor * b, |
1961 | 0 | bool inplace) { |
1962 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
1963 | |
|
1964 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
1965 | |
|
1966 | 0 | result->op = GGML_OP_ADD; |
1967 | 0 | result->src[0] = a; |
1968 | 0 | result->src[1] = b; |
1969 | |
|
1970 | 0 | return result; |
1971 | 0 | } |
1972 | | |
1973 | | struct ggml_tensor * ggml_add( |
1974 | | struct ggml_context * ctx, |
1975 | | struct ggml_tensor * a, |
1976 | 0 | struct ggml_tensor * b) { |
1977 | 0 | return ggml_add_impl(ctx, a, b, false); |
1978 | 0 | } |
1979 | | |
1980 | | struct ggml_tensor * ggml_add_inplace( |
1981 | | struct ggml_context * ctx, |
1982 | | struct ggml_tensor * a, |
1983 | 0 | struct ggml_tensor * b) { |
1984 | 0 | return ggml_add_impl(ctx, a, b, true); |
1985 | 0 | } |
1986 | | |
1987 | | // ggml_add_cast |
1988 | | |
1989 | | static struct ggml_tensor * ggml_add_cast_impl( |
1990 | | struct ggml_context * ctx, |
1991 | | struct ggml_tensor * a, |
1992 | | struct ggml_tensor * b, |
1993 | 0 | enum ggml_type type) { |
1994 | | // TODO: support less-strict constraint |
1995 | | // GGML_ASSERT(ggml_can_repeat(b, a)); |
1996 | 0 | GGML_ASSERT(ggml_can_repeat_rows(b, a)); |
1997 | | |
1998 | | // currently only supported for quantized input and f16 |
1999 | 0 | GGML_ASSERT(ggml_is_quantized(a->type) || |
2000 | 0 | a->type == GGML_TYPE_F16 || |
2001 | 0 | a->type == GGML_TYPE_BF16); |
2002 | |
|
2003 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne); |
2004 | |
|
2005 | 0 | result->op = GGML_OP_ADD; |
2006 | 0 | result->src[0] = a; |
2007 | 0 | result->src[1] = b; |
2008 | |
|
2009 | 0 | return result; |
2010 | 0 | } |
2011 | | |
2012 | | struct ggml_tensor * ggml_add_cast( |
2013 | | struct ggml_context * ctx, |
2014 | | struct ggml_tensor * a, |
2015 | | struct ggml_tensor * b, |
2016 | 0 | enum ggml_type type) { |
2017 | 0 | return ggml_add_cast_impl(ctx, a, b, type); |
2018 | 0 | } |
2019 | | |
2020 | | struct ggml_tensor * ggml_add_id( |
2021 | | struct ggml_context * ctx, |
2022 | | struct ggml_tensor * a, |
2023 | | struct ggml_tensor * b, |
2024 | 0 | struct ggml_tensor * ids) { |
2025 | |
|
2026 | 0 | GGML_ASSERT(a->ne[0] == b->ne[0]); |
2027 | 0 | GGML_ASSERT(a->ne[1] == ids->ne[0]); |
2028 | 0 | GGML_ASSERT(a->ne[2] == ids->ne[1]); |
2029 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
2030 | |
|
2031 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2032 | |
|
2033 | 0 | result->op = GGML_OP_ADD_ID; |
2034 | 0 | result->src[0] = a; |
2035 | 0 | result->src[1] = b; |
2036 | 0 | result->src[2] = ids; |
2037 | |
|
2038 | 0 | return result; |
2039 | 0 | } |
2040 | | |
2041 | | // ggml_add1 |
2042 | | |
2043 | | static struct ggml_tensor * ggml_add1_impl( |
2044 | | struct ggml_context * ctx, |
2045 | | struct ggml_tensor * a, |
2046 | | struct ggml_tensor * b, |
2047 | 0 | bool inplace) { |
2048 | 0 | GGML_ASSERT(ggml_is_scalar(b)); |
2049 | 0 | GGML_ASSERT(ggml_is_padded_1d(a)); |
2050 | |
|
2051 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2052 | |
|
2053 | 0 | result->op = GGML_OP_ADD1; |
2054 | 0 | result->src[0] = a; |
2055 | 0 | result->src[1] = b; |
2056 | |
|
2057 | 0 | return result; |
2058 | 0 | } |
2059 | | |
2060 | | struct ggml_tensor * ggml_add1( |
2061 | | struct ggml_context * ctx, |
2062 | | struct ggml_tensor * a, |
2063 | 0 | struct ggml_tensor * b) { |
2064 | 0 | return ggml_add1_impl(ctx, a, b, false); |
2065 | 0 | } |
2066 | | |
2067 | | struct ggml_tensor * ggml_add1_inplace( |
2068 | | struct ggml_context * ctx, |
2069 | | struct ggml_tensor * a, |
2070 | 0 | struct ggml_tensor * b) { |
2071 | 0 | return ggml_add1_impl(ctx, a, b, true); |
2072 | 0 | } |
2073 | | |
2074 | | // ggml_acc |
2075 | | |
2076 | | static struct ggml_tensor * ggml_acc_impl( |
2077 | | struct ggml_context * ctx, |
2078 | | struct ggml_tensor * a, |
2079 | | struct ggml_tensor * b, |
2080 | | size_t nb1, |
2081 | | size_t nb2, |
2082 | | size_t nb3, |
2083 | | size_t offset, |
2084 | 0 | bool inplace) { |
2085 | 0 | GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a)); |
2086 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
2087 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
2088 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
2089 | |
|
2090 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2091 | |
|
2092 | 0 | int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; |
2093 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
2094 | |
|
2095 | 0 | result->op = GGML_OP_ACC; |
2096 | 0 | result->src[0] = a; |
2097 | 0 | result->src[1] = b; |
2098 | |
|
2099 | 0 | return result; |
2100 | 0 | } |
2101 | | |
2102 | | struct ggml_tensor * ggml_acc( |
2103 | | struct ggml_context * ctx, |
2104 | | struct ggml_tensor * a, |
2105 | | struct ggml_tensor * b, |
2106 | | size_t nb1, |
2107 | | size_t nb2, |
2108 | | size_t nb3, |
2109 | 0 | size_t offset) { |
2110 | 0 | return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false); |
2111 | 0 | } |
2112 | | |
2113 | | struct ggml_tensor * ggml_acc_inplace( |
2114 | | struct ggml_context * ctx, |
2115 | | struct ggml_tensor * a, |
2116 | | struct ggml_tensor * b, |
2117 | | size_t nb1, |
2118 | | size_t nb2, |
2119 | | size_t nb3, |
2120 | 0 | size_t offset) { |
2121 | 0 | return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true); |
2122 | 0 | } |
2123 | | |
2124 | | // ggml_sub |
2125 | | |
2126 | | static struct ggml_tensor * ggml_sub_impl( |
2127 | | struct ggml_context * ctx, |
2128 | | struct ggml_tensor * a, |
2129 | | struct ggml_tensor * b, |
2130 | 0 | bool inplace) { |
2131 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2132 | |
|
2133 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2134 | |
|
2135 | 0 | result->op = GGML_OP_SUB; |
2136 | 0 | result->src[0] = a; |
2137 | 0 | result->src[1] = b; |
2138 | |
|
2139 | 0 | return result; |
2140 | 0 | } |
2141 | | |
2142 | | struct ggml_tensor * ggml_sub( |
2143 | | struct ggml_context * ctx, |
2144 | | struct ggml_tensor * a, |
2145 | 0 | struct ggml_tensor * b) { |
2146 | 0 | return ggml_sub_impl(ctx, a, b, false); |
2147 | 0 | } |
2148 | | |
2149 | | struct ggml_tensor * ggml_sub_inplace( |
2150 | | struct ggml_context * ctx, |
2151 | | struct ggml_tensor * a, |
2152 | 0 | struct ggml_tensor * b) { |
2153 | 0 | return ggml_sub_impl(ctx, a, b, true); |
2154 | 0 | } |
2155 | | |
2156 | | // ggml_mul |
2157 | | |
2158 | | static struct ggml_tensor * ggml_mul_impl( |
2159 | | struct ggml_context * ctx, |
2160 | | struct ggml_tensor * a, |
2161 | | struct ggml_tensor * b, |
2162 | 0 | bool inplace) { |
2163 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2164 | |
|
2165 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2166 | |
|
2167 | 0 | result->op = GGML_OP_MUL; |
2168 | 0 | result->src[0] = a; |
2169 | 0 | result->src[1] = b; |
2170 | |
|
2171 | 0 | return result; |
2172 | 0 | } |
2173 | | |
2174 | | struct ggml_tensor * ggml_mul( |
2175 | | struct ggml_context * ctx, |
2176 | | struct ggml_tensor * a, |
2177 | 0 | struct ggml_tensor * b) { |
2178 | 0 | return ggml_mul_impl(ctx, a, b, false); |
2179 | 0 | } |
2180 | | |
2181 | | struct ggml_tensor * ggml_mul_inplace( |
2182 | | struct ggml_context * ctx, |
2183 | | struct ggml_tensor * a, |
2184 | 0 | struct ggml_tensor * b) { |
2185 | 0 | return ggml_mul_impl(ctx, a, b, true); |
2186 | 0 | } |
2187 | | |
2188 | | // ggml_div |
2189 | | |
2190 | | static struct ggml_tensor * ggml_div_impl( |
2191 | | struct ggml_context * ctx, |
2192 | | struct ggml_tensor * a, |
2193 | | struct ggml_tensor * b, |
2194 | 0 | bool inplace) { |
2195 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2196 | |
|
2197 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2198 | |
|
2199 | 0 | result->op = GGML_OP_DIV; |
2200 | 0 | result->src[0] = a; |
2201 | 0 | result->src[1] = b; |
2202 | |
|
2203 | 0 | return result; |
2204 | 0 | } |
2205 | | |
2206 | | struct ggml_tensor * ggml_div( |
2207 | | struct ggml_context * ctx, |
2208 | | struct ggml_tensor * a, |
2209 | 0 | struct ggml_tensor * b) { |
2210 | 0 | return ggml_div_impl(ctx, a, b, false); |
2211 | 0 | } |
2212 | | |
2213 | | struct ggml_tensor * ggml_div_inplace( |
2214 | | struct ggml_context * ctx, |
2215 | | struct ggml_tensor * a, |
2216 | 0 | struct ggml_tensor * b) { |
2217 | 0 | return ggml_div_impl(ctx, a, b, true); |
2218 | 0 | } |
2219 | | |
2220 | | // ggml_sqr |
2221 | | |
2222 | | static struct ggml_tensor * ggml_sqr_impl( |
2223 | | struct ggml_context * ctx, |
2224 | | struct ggml_tensor * a, |
2225 | 0 | bool inplace) { |
2226 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2227 | |
|
2228 | 0 | result->op = GGML_OP_SQR; |
2229 | 0 | result->src[0] = a; |
2230 | |
|
2231 | 0 | return result; |
2232 | 0 | } |
2233 | | |
2234 | | struct ggml_tensor * ggml_sqr( |
2235 | | struct ggml_context * ctx, |
2236 | 0 | struct ggml_tensor * a) { |
2237 | 0 | return ggml_sqr_impl(ctx, a, false); |
2238 | 0 | } |
2239 | | |
2240 | | struct ggml_tensor * ggml_sqr_inplace( |
2241 | | struct ggml_context * ctx, |
2242 | 0 | struct ggml_tensor * a) { |
2243 | 0 | return ggml_sqr_impl(ctx, a, true); |
2244 | 0 | } |
2245 | | |
2246 | | // ggml_sqrt |
2247 | | |
2248 | | static struct ggml_tensor * ggml_sqrt_impl( |
2249 | | struct ggml_context * ctx, |
2250 | | struct ggml_tensor * a, |
2251 | 0 | bool inplace) { |
2252 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2253 | |
|
2254 | 0 | result->op = GGML_OP_SQRT; |
2255 | 0 | result->src[0] = a; |
2256 | |
|
2257 | 0 | return result; |
2258 | 0 | } |
2259 | | |
2260 | | struct ggml_tensor * ggml_sqrt( |
2261 | | struct ggml_context * ctx, |
2262 | 0 | struct ggml_tensor * a) { |
2263 | 0 | return ggml_sqrt_impl(ctx, a, false); |
2264 | 0 | } |
2265 | | |
2266 | | struct ggml_tensor * ggml_sqrt_inplace( |
2267 | | struct ggml_context * ctx, |
2268 | 0 | struct ggml_tensor * a) { |
2269 | 0 | return ggml_sqrt_impl(ctx, a, true); |
2270 | 0 | } |
2271 | | |
2272 | | // ggml_log |
2273 | | |
2274 | | static struct ggml_tensor * ggml_log_impl( |
2275 | | struct ggml_context * ctx, |
2276 | | struct ggml_tensor * a, |
2277 | 0 | bool inplace) { |
2278 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2279 | |
|
2280 | 0 | result->op = GGML_OP_LOG; |
2281 | 0 | result->src[0] = a; |
2282 | |
|
2283 | 0 | return result; |
2284 | 0 | } |
2285 | | |
2286 | | struct ggml_tensor * ggml_log( |
2287 | | struct ggml_context * ctx, |
2288 | 0 | struct ggml_tensor * a) { |
2289 | 0 | return ggml_log_impl(ctx, a, false); |
2290 | 0 | } |
2291 | | |
2292 | | struct ggml_tensor * ggml_log_inplace( |
2293 | | struct ggml_context * ctx, |
2294 | 0 | struct ggml_tensor * a) { |
2295 | 0 | return ggml_log_impl(ctx, a, true); |
2296 | 0 | } |
2297 | | |
2298 | | struct ggml_tensor * ggml_expm1( |
2299 | | struct ggml_context * ctx, |
2300 | 0 | struct ggml_tensor * a) { |
2301 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1); |
2302 | 0 | } |
2303 | | |
2304 | | struct ggml_tensor * ggml_expm1_inplace( |
2305 | | struct ggml_context * ctx, |
2306 | 0 | struct ggml_tensor * a) { |
2307 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1); |
2308 | 0 | } |
2309 | | |
2310 | | struct ggml_tensor * ggml_softplus( |
2311 | | struct ggml_context * ctx, |
2312 | 0 | struct ggml_tensor * a) { |
2313 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS); |
2314 | 0 | } |
2315 | | |
2316 | | struct ggml_tensor * ggml_softplus_inplace( |
2317 | | struct ggml_context * ctx, |
2318 | 0 | struct ggml_tensor * a) { |
2319 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS); |
2320 | 0 | } |
2321 | | |
2322 | | // ggml_sin |
2323 | | |
2324 | | static struct ggml_tensor * ggml_sin_impl( |
2325 | | struct ggml_context * ctx, |
2326 | | struct ggml_tensor * a, |
2327 | 0 | bool inplace) { |
2328 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2329 | |
|
2330 | 0 | result->op = GGML_OP_SIN; |
2331 | 0 | result->src[0] = a; |
2332 | |
|
2333 | 0 | return result; |
2334 | 0 | } |
2335 | | |
2336 | | struct ggml_tensor * ggml_sin( |
2337 | | struct ggml_context * ctx, |
2338 | 0 | struct ggml_tensor * a) { |
2339 | 0 | return ggml_sin_impl(ctx, a, false); |
2340 | 0 | } |
2341 | | |
2342 | | struct ggml_tensor * ggml_sin_inplace( |
2343 | | struct ggml_context * ctx, |
2344 | 0 | struct ggml_tensor * a) { |
2345 | 0 | return ggml_sin_impl(ctx, a, true); |
2346 | 0 | } |
2347 | | |
2348 | | // ggml_cos |
2349 | | |
2350 | | static struct ggml_tensor * ggml_cos_impl( |
2351 | | struct ggml_context * ctx, |
2352 | | struct ggml_tensor * a, |
2353 | 0 | bool inplace) { |
2354 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2355 | |
|
2356 | 0 | result->op = GGML_OP_COS; |
2357 | 0 | result->src[0] = a; |
2358 | |
|
2359 | 0 | return result; |
2360 | 0 | } |
2361 | | |
2362 | | struct ggml_tensor * ggml_cos( |
2363 | | struct ggml_context * ctx, |
2364 | 0 | struct ggml_tensor * a) { |
2365 | 0 | return ggml_cos_impl(ctx, a, false); |
2366 | 0 | } |
2367 | | |
2368 | | struct ggml_tensor * ggml_cos_inplace( |
2369 | | struct ggml_context * ctx, |
2370 | 0 | struct ggml_tensor * a) { |
2371 | 0 | return ggml_cos_impl(ctx, a, true); |
2372 | 0 | } |
2373 | | |
2374 | | // ggml_sum |
2375 | | |
2376 | | struct ggml_tensor * ggml_sum( |
2377 | | struct ggml_context * ctx, |
2378 | 0 | struct ggml_tensor * a) { |
2379 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); |
2380 | |
|
2381 | 0 | result->op = GGML_OP_SUM; |
2382 | 0 | result->src[0] = a; |
2383 | |
|
2384 | 0 | return result; |
2385 | 0 | } |
2386 | | |
2387 | | // ggml_sum_rows |
2388 | | |
2389 | | struct ggml_tensor * ggml_sum_rows( |
2390 | | struct ggml_context * ctx, |
2391 | 0 | struct ggml_tensor * a) { |
2392 | 0 | int64_t ne[GGML_MAX_DIMS] = { 1 }; |
2393 | 0 | for (int i = 1; i < GGML_MAX_DIMS; ++i) { |
2394 | 0 | ne[i] = a->ne[i]; |
2395 | 0 | } |
2396 | |
|
2397 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne); |
2398 | |
|
2399 | 0 | result->op = GGML_OP_SUM_ROWS; |
2400 | 0 | result->src[0] = a; |
2401 | |
|
2402 | 0 | return result; |
2403 | 0 | } |
2404 | | |
2405 | | // ggml_cumsum |
2406 | | |
2407 | | struct ggml_tensor * ggml_cumsum( |
2408 | | struct ggml_context * ctx, |
2409 | 0 | struct ggml_tensor * a) { |
2410 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
2411 | |
|
2412 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2413 | |
|
2414 | 0 | result->op = GGML_OP_CUMSUM; |
2415 | 0 | result->src[0] = a; |
2416 | |
|
2417 | 0 | return result; |
2418 | 0 | } |
2419 | | |
2420 | | // ggml_mean |
2421 | | |
2422 | | struct ggml_tensor * ggml_mean( |
2423 | | struct ggml_context * ctx, |
2424 | 0 | struct ggml_tensor * a) { |
2425 | 0 | int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] }; |
2426 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
2427 | |
|
2428 | 0 | result->op = GGML_OP_MEAN; |
2429 | 0 | result->src[0] = a; |
2430 | |
|
2431 | 0 | return result; |
2432 | 0 | } |
2433 | | |
2434 | | // ggml_argmax |
2435 | | |
2436 | | struct ggml_tensor * ggml_argmax( |
2437 | | struct ggml_context * ctx, |
2438 | 0 | struct ggml_tensor * a) { |
2439 | 0 | GGML_ASSERT(ggml_is_matrix(a)); |
2440 | 0 | GGML_ASSERT(a->ne[0] <= INT32_MAX); |
2441 | |
|
2442 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]); |
2443 | |
|
2444 | 0 | result->op = GGML_OP_ARGMAX; |
2445 | 0 | result->src[0] = a; |
2446 | |
|
2447 | 0 | return result; |
2448 | 0 | } |
2449 | | |
2450 | | // ggml_count_equal |
2451 | | |
2452 | | struct ggml_tensor * ggml_count_equal( |
2453 | | struct ggml_context * ctx, |
2454 | | struct ggml_tensor * a, |
2455 | 0 | struct ggml_tensor * b) { |
2456 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
2457 | |
|
2458 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1); |
2459 | |
|
2460 | 0 | result->op = GGML_OP_COUNT_EQUAL; |
2461 | 0 | result->src[0] = a; |
2462 | 0 | result->src[1] = b; |
2463 | |
|
2464 | 0 | return result; |
2465 | 0 | } |
2466 | | |
2467 | | // ggml_repeat |
2468 | | |
2469 | | struct ggml_tensor * ggml_repeat( |
2470 | | struct ggml_context * ctx, |
2471 | | struct ggml_tensor * a, |
2472 | 0 | struct ggml_tensor * b) { |
2473 | 0 | GGML_ASSERT(ggml_can_repeat(a, b)); |
2474 | |
|
2475 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); |
2476 | |
|
2477 | 0 | result->op = GGML_OP_REPEAT; |
2478 | 0 | result->src[0] = a; |
2479 | |
|
2480 | 0 | return result; |
2481 | 0 | } |
2482 | | |
2483 | | struct ggml_tensor * ggml_repeat_4d( |
2484 | | struct ggml_context * ctx, |
2485 | | struct ggml_tensor * a, |
2486 | 0 | int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { |
2487 | 0 | const bool can_repeat = ggml_is_empty(a) || ( |
2488 | 0 | (ne0 % a->ne[0] == 0) && |
2489 | 0 | (ne1 % a->ne[1] == 0) && |
2490 | 0 | (ne2 % a->ne[2] == 0) && |
2491 | 0 | (ne3 % a->ne[3] == 0) |
2492 | 0 | ); |
2493 | 0 | GGML_ASSERT(can_repeat); |
2494 | |
|
2495 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
2496 | |
|
2497 | 0 | result->op = GGML_OP_REPEAT; |
2498 | 0 | result->src[0] = a; |
2499 | |
|
2500 | 0 | return result; |
2501 | 0 | } |
2502 | | |
2503 | | // ggml_repeat_back |
2504 | | |
2505 | | struct ggml_tensor * ggml_repeat_back( |
2506 | | struct ggml_context * ctx, |
2507 | | struct ggml_tensor * a, |
2508 | 0 | struct ggml_tensor * b) { |
2509 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2510 | |
|
2511 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); |
2512 | |
|
2513 | 0 | result->op = GGML_OP_REPEAT_BACK; |
2514 | 0 | result->src[0] = a; |
2515 | |
|
2516 | 0 | return result; |
2517 | 0 | } |
2518 | | |
2519 | | // ggml_concat |
2520 | | |
2521 | | struct ggml_tensor * ggml_concat( |
2522 | | struct ggml_context * ctx, |
2523 | | struct ggml_tensor * a, |
2524 | | struct ggml_tensor * b, |
2525 | 0 | int dim) { |
2526 | 0 | GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS); |
2527 | 0 | GGML_ASSERT(a->type == b->type); |
2528 | |
|
2529 | 0 | int64_t ne[GGML_MAX_DIMS]; |
2530 | 0 | for (int d = 0; d < GGML_MAX_DIMS; ++d) { |
2531 | 0 | if (d == dim) { |
2532 | 0 | ne[d] = a->ne[d] + b->ne[d]; |
2533 | 0 | continue; |
2534 | 0 | } |
2535 | 0 | GGML_ASSERT(a->ne[d] == b->ne[d]); |
2536 | 0 | ne[d] = a->ne[d]; |
2537 | 0 | } |
2538 | |
|
2539 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne); |
2540 | |
|
2541 | 0 | ggml_set_op_params_i32(result, 0, dim); |
2542 | |
|
2543 | 0 | result->op = GGML_OP_CONCAT; |
2544 | 0 | result->src[0] = a; |
2545 | 0 | result->src[1] = b; |
2546 | |
|
2547 | 0 | return result; |
2548 | 0 | } |
2549 | | |
2550 | | // ggml_abs |
2551 | | |
2552 | | struct ggml_tensor * ggml_abs( |
2553 | | struct ggml_context * ctx, |
2554 | 0 | struct ggml_tensor * a) { |
2555 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ABS); |
2556 | 0 | } |
2557 | | |
2558 | | struct ggml_tensor * ggml_abs_inplace( |
2559 | | struct ggml_context * ctx, |
2560 | 0 | struct ggml_tensor * a) { |
2561 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); |
2562 | 0 | } |
2563 | | |
2564 | | // ggml_sgn |
2565 | | |
2566 | | struct ggml_tensor * ggml_sgn( |
2567 | | struct ggml_context * ctx, |
2568 | 0 | struct ggml_tensor * a) { |
2569 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SGN); |
2570 | 0 | } |
2571 | | |
2572 | | struct ggml_tensor * ggml_sgn_inplace( |
2573 | | struct ggml_context * ctx, |
2574 | 0 | struct ggml_tensor * a) { |
2575 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN); |
2576 | 0 | } |
2577 | | |
2578 | | // ggml_neg |
2579 | | |
2580 | | struct ggml_tensor * ggml_neg( |
2581 | | struct ggml_context * ctx, |
2582 | 0 | struct ggml_tensor * a) { |
2583 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_NEG); |
2584 | 0 | } |
2585 | | |
2586 | | struct ggml_tensor * ggml_neg_inplace( |
2587 | | struct ggml_context * ctx, |
2588 | 0 | struct ggml_tensor * a) { |
2589 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG); |
2590 | 0 | } |
2591 | | |
2592 | | // ggml_step |
2593 | | |
2594 | | struct ggml_tensor * ggml_step( |
2595 | | struct ggml_context * ctx, |
2596 | 0 | struct ggml_tensor * a) { |
2597 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_STEP); |
2598 | 0 | } |
2599 | | |
2600 | | struct ggml_tensor * ggml_step_inplace( |
2601 | | struct ggml_context * ctx, |
2602 | 0 | struct ggml_tensor * a) { |
2603 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP); |
2604 | 0 | } |
2605 | | |
2606 | | // ggml_tanh |
2607 | | |
2608 | | struct ggml_tensor * ggml_tanh( |
2609 | | struct ggml_context * ctx, |
2610 | 0 | struct ggml_tensor * a) { |
2611 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_TANH); |
2612 | 0 | } |
2613 | | |
2614 | | struct ggml_tensor * ggml_tanh_inplace( |
2615 | | struct ggml_context * ctx, |
2616 | 0 | struct ggml_tensor * a) { |
2617 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH); |
2618 | 0 | } |
2619 | | |
2620 | | // ggml_elu |
2621 | | |
2622 | | struct ggml_tensor * ggml_elu( |
2623 | | struct ggml_context * ctx, |
2624 | 0 | struct ggml_tensor * a) { |
2625 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ELU); |
2626 | 0 | } |
2627 | | |
2628 | | struct ggml_tensor * ggml_elu_inplace( |
2629 | | struct ggml_context * ctx, |
2630 | 0 | struct ggml_tensor * a) { |
2631 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU); |
2632 | 0 | } |
2633 | | |
2634 | | // ggml_relu |
2635 | | |
2636 | | struct ggml_tensor * ggml_relu( |
2637 | | struct ggml_context * ctx, |
2638 | 0 | struct ggml_tensor * a) { |
2639 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_RELU); |
2640 | 0 | } |
2641 | | |
2642 | | struct ggml_tensor * ggml_relu_inplace( |
2643 | | struct ggml_context * ctx, |
2644 | 0 | struct ggml_tensor * a) { |
2645 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU); |
2646 | 0 | } |
2647 | | |
2648 | | // ggml_leaky_relu |
2649 | | |
2650 | | struct ggml_tensor * ggml_leaky_relu( |
2651 | | struct ggml_context * ctx, |
2652 | | struct ggml_tensor * a, |
2653 | | float negative_slope, |
2654 | 0 | bool inplace) { |
2655 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2656 | |
|
2657 | 0 | ggml_set_op_params(result, &negative_slope, sizeof(negative_slope)); |
2658 | |
|
2659 | 0 | result->op = GGML_OP_LEAKY_RELU; |
2660 | 0 | result->src[0] = a; |
2661 | |
|
2662 | 0 | return result; |
2663 | 0 | } |
2664 | | |
2665 | | // ggml_sigmoid |
2666 | | |
2667 | | struct ggml_tensor * ggml_sigmoid( |
2668 | | struct ggml_context * ctx, |
2669 | 0 | struct ggml_tensor * a) { |
2670 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID); |
2671 | 0 | } |
2672 | | |
2673 | | struct ggml_tensor * ggml_sigmoid_inplace( |
2674 | | struct ggml_context * ctx, |
2675 | 0 | struct ggml_tensor * a) { |
2676 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID); |
2677 | 0 | } |
2678 | | |
2679 | | // ggml_gelu |
2680 | | |
2681 | | struct ggml_tensor * ggml_gelu( |
2682 | | struct ggml_context * ctx, |
2683 | 0 | struct ggml_tensor * a) { |
2684 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU); |
2685 | 0 | } |
2686 | | |
2687 | | struct ggml_tensor * ggml_gelu_inplace( |
2688 | | struct ggml_context * ctx, |
2689 | 0 | struct ggml_tensor * a) { |
2690 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); |
2691 | 0 | } |
2692 | | |
2693 | | // ggml_gelu_erf |
2694 | | |
2695 | | struct ggml_tensor * ggml_gelu_erf( |
2696 | | struct ggml_context * ctx, |
2697 | 0 | struct ggml_tensor * a) { |
2698 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF); |
2699 | 0 | } |
2700 | | |
2701 | | struct ggml_tensor * ggml_gelu_erf_inplace( |
2702 | | struct ggml_context * ctx, |
2703 | 0 | struct ggml_tensor * a) { |
2704 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF); |
2705 | 0 | } |
2706 | | |
2707 | | // ggml_gelu_quick |
2708 | | |
2709 | | struct ggml_tensor * ggml_gelu_quick( |
2710 | | struct ggml_context * ctx, |
2711 | 0 | struct ggml_tensor * a) { |
2712 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK); |
2713 | 0 | } |
2714 | | |
2715 | | struct ggml_tensor * ggml_gelu_quick_inplace( |
2716 | | struct ggml_context * ctx, |
2717 | 0 | struct ggml_tensor * a) { |
2718 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK); |
2719 | 0 | } |
2720 | | |
2721 | | // ggml_silu |
2722 | | |
2723 | | struct ggml_tensor * ggml_silu( |
2724 | | struct ggml_context * ctx, |
2725 | 0 | struct ggml_tensor * a) { |
2726 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SILU); |
2727 | 0 | } |
2728 | | |
2729 | | struct ggml_tensor * ggml_silu_inplace( |
2730 | | struct ggml_context * ctx, |
2731 | 0 | struct ggml_tensor * a) { |
2732 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU); |
2733 | 0 | } |
2734 | | |
2735 | | // ggml_xielu |
2736 | | |
2737 | | struct ggml_tensor * ggml_xielu( |
2738 | | struct ggml_context * ctx, |
2739 | | struct ggml_tensor * a, |
2740 | | float alpha_n, |
2741 | | float alpha_p, |
2742 | | float beta, |
2743 | 0 | float eps) { |
2744 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2745 | |
|
2746 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU); |
2747 | 0 | ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n)); |
2748 | 0 | ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p)); |
2749 | 0 | ggml_set_op_params_f32(result, 3, beta); |
2750 | 0 | ggml_set_op_params_f32(result, 4, eps); |
2751 | |
|
2752 | 0 | result->op = GGML_OP_UNARY; |
2753 | 0 | result->src[0] = a; |
2754 | |
|
2755 | 0 | return result; |
2756 | 0 | } |
2757 | | |
2758 | | // ggml_silu_back |
2759 | | |
2760 | | struct ggml_tensor * ggml_silu_back( |
2761 | | struct ggml_context * ctx, |
2762 | | struct ggml_tensor * a, |
2763 | 0 | struct ggml_tensor * b) { |
2764 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2765 | |
|
2766 | 0 | result->op = GGML_OP_SILU_BACK; |
2767 | 0 | result->src[0] = a; |
2768 | 0 | result->src[1] = b; |
2769 | |
|
2770 | 0 | return result; |
2771 | 0 | } |
2772 | | |
2773 | | // ggml hardswish |
2774 | | |
2775 | | struct ggml_tensor * ggml_hardswish( |
2776 | | struct ggml_context * ctx, |
2777 | 0 | struct ggml_tensor * a) { |
2778 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH); |
2779 | 0 | } |
2780 | | |
2781 | | // ggml hardsigmoid |
2782 | | |
2783 | | struct ggml_tensor * ggml_hardsigmoid( |
2784 | | struct ggml_context * ctx, |
2785 | 0 | struct ggml_tensor * a) { |
2786 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID); |
2787 | 0 | } |
2788 | | |
2789 | | // ggml exp |
2790 | | |
2791 | | struct ggml_tensor * ggml_exp( |
2792 | | struct ggml_context * ctx, |
2793 | 0 | struct ggml_tensor * a) { |
2794 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_EXP); |
2795 | 0 | } |
2796 | | |
2797 | | struct ggml_tensor * ggml_exp_inplace( |
2798 | | struct ggml_context * ctx, |
2799 | 0 | struct ggml_tensor * a) { |
2800 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP); |
2801 | 0 | } |
2802 | | |
2803 | | // ggml_glu |
2804 | | |
2805 | | static struct ggml_tensor * ggml_glu_impl( |
2806 | | struct ggml_context * ctx, |
2807 | | struct ggml_tensor * a, |
2808 | | struct ggml_tensor * b, |
2809 | | enum ggml_glu_op op, |
2810 | 0 | bool swapped) { |
2811 | 0 | GGML_ASSERT(ggml_is_contiguous_1(a)); |
2812 | |
|
2813 | 0 | if (b) { |
2814 | 0 | GGML_ASSERT(ggml_is_contiguous_1(b)); |
2815 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
2816 | 0 | GGML_ASSERT(a->type == b->type); |
2817 | 0 | } |
2818 | |
|
2819 | 0 | int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i]; |
2820 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0); |
2821 | |
|
2822 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) op); |
2823 | 0 | ggml_set_op_params_i32(result, 1, (int32_t) swapped); |
2824 | |
|
2825 | 0 | result->op = GGML_OP_GLU; |
2826 | 0 | result->src[0] = a; |
2827 | 0 | result->src[1] = b; |
2828 | |
|
2829 | 0 | return result; |
2830 | 0 | } |
2831 | | |
2832 | | // ggml_floor |
2833 | | |
2834 | | struct ggml_tensor * ggml_floor( |
2835 | | struct ggml_context * ctx, |
2836 | 0 | struct ggml_tensor * a) { |
2837 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR); |
2838 | 0 | } |
2839 | | |
2840 | | struct ggml_tensor * ggml_floor_inplace( |
2841 | | struct ggml_context * ctx, |
2842 | 0 | struct ggml_tensor * a) { |
2843 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR); |
2844 | 0 | } |
2845 | | |
2846 | | // ggml_ceil |
2847 | | |
2848 | | struct ggml_tensor * ggml_ceil( |
2849 | | struct ggml_context * ctx, |
2850 | 0 | struct ggml_tensor * a) { |
2851 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL); |
2852 | 0 | } |
2853 | | |
2854 | | struct ggml_tensor * ggml_ceil_inplace( |
2855 | | struct ggml_context * ctx, |
2856 | 0 | struct ggml_tensor * a) { |
2857 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL); |
2858 | 0 | } |
2859 | | |
2860 | | //ggml_round |
2861 | | |
2862 | | struct ggml_tensor * ggml_round( |
2863 | | struct ggml_context * ctx, |
2864 | 0 | struct ggml_tensor * a) { |
2865 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND); |
2866 | 0 | } |
2867 | | |
2868 | | struct ggml_tensor * ggml_round_inplace( |
2869 | | struct ggml_context * ctx, |
2870 | 0 | struct ggml_tensor * a) { |
2871 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND); |
2872 | 0 | } |
2873 | | |
2874 | | //ggml_trunc |
2875 | | |
2876 | | struct ggml_tensor * ggml_trunc( |
2877 | | struct ggml_context * ctx, |
2878 | 0 | struct ggml_tensor * a) { |
2879 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC); |
2880 | 0 | } |
2881 | | |
2882 | | struct ggml_tensor * ggml_trunc_inplace( |
2883 | | struct ggml_context * ctx, |
2884 | 0 | struct ggml_tensor * a) { |
2885 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC); |
2886 | 0 | } |
2887 | | |
2888 | | struct ggml_tensor * ggml_glu( |
2889 | | struct ggml_context * ctx, |
2890 | | struct ggml_tensor * a, |
2891 | | enum ggml_glu_op op, |
2892 | 0 | bool swapped) { |
2893 | 0 | return ggml_glu_impl(ctx, a, NULL, op, swapped); |
2894 | 0 | } |
2895 | | |
2896 | | struct ggml_tensor * ggml_glu_split( |
2897 | | struct ggml_context * ctx, |
2898 | | struct ggml_tensor * a, |
2899 | | struct ggml_tensor * b, |
2900 | 0 | enum ggml_glu_op op) { |
2901 | 0 | return ggml_glu_impl(ctx, a, b, op, false); |
2902 | 0 | } |
2903 | | |
2904 | | // ggml_reglu |
2905 | | |
2906 | | struct ggml_tensor * ggml_reglu( |
2907 | | struct ggml_context * ctx, |
2908 | 0 | struct ggml_tensor * a) { |
2909 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false); |
2910 | 0 | } |
2911 | | |
2912 | | struct ggml_tensor * ggml_reglu_swapped( |
2913 | | struct ggml_context * ctx, |
2914 | 0 | struct ggml_tensor * a) { |
2915 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true); |
2916 | 0 | } |
2917 | | |
2918 | | struct ggml_tensor * ggml_reglu_split( |
2919 | | struct ggml_context * ctx, |
2920 | | struct ggml_tensor * a, |
2921 | 0 | struct ggml_tensor * b) { |
2922 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false); |
2923 | 0 | } |
2924 | | |
2925 | | // ggml_geglu |
2926 | | |
2927 | | struct ggml_tensor * ggml_geglu( |
2928 | | struct ggml_context * ctx, |
2929 | 0 | struct ggml_tensor * a) { |
2930 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false); |
2931 | 0 | } |
2932 | | |
2933 | | struct ggml_tensor * ggml_geglu_swapped( |
2934 | | struct ggml_context * ctx, |
2935 | 0 | struct ggml_tensor * a) { |
2936 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true); |
2937 | 0 | } |
2938 | | |
2939 | | struct ggml_tensor * ggml_geglu_split( |
2940 | | struct ggml_context * ctx, |
2941 | | struct ggml_tensor * a, |
2942 | 0 | struct ggml_tensor * b) { |
2943 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false); |
2944 | 0 | } |
2945 | | |
2946 | | // ggml_swiglu |
2947 | | |
2948 | | struct ggml_tensor * ggml_swiglu( |
2949 | | struct ggml_context * ctx, |
2950 | 0 | struct ggml_tensor * a) { |
2951 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false); |
2952 | 0 | } |
2953 | | |
2954 | | struct ggml_tensor * ggml_swiglu_swapped( |
2955 | | struct ggml_context * ctx, |
2956 | 0 | struct ggml_tensor * a) { |
2957 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true); |
2958 | 0 | } |
2959 | | |
2960 | | struct ggml_tensor * ggml_swiglu_split( |
2961 | | struct ggml_context * ctx, |
2962 | | struct ggml_tensor * a, |
2963 | 0 | struct ggml_tensor * b) { |
2964 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false); |
2965 | 0 | } |
2966 | | |
2967 | | // ggml_geglu_erf |
2968 | | |
2969 | | struct ggml_tensor * ggml_geglu_erf( |
2970 | | struct ggml_context * ctx, |
2971 | 0 | struct ggml_tensor * a) { |
2972 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false); |
2973 | 0 | } |
2974 | | |
2975 | | struct ggml_tensor * ggml_geglu_erf_swapped( |
2976 | | struct ggml_context * ctx, |
2977 | 0 | struct ggml_tensor * a) { |
2978 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true); |
2979 | 0 | } |
2980 | | |
2981 | | struct ggml_tensor * ggml_geglu_erf_split( |
2982 | | struct ggml_context * ctx, |
2983 | | struct ggml_tensor * a, |
2984 | 0 | struct ggml_tensor * b) { |
2985 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false); |
2986 | 0 | } |
2987 | | |
2988 | | // ggml_geglu_quick |
2989 | | |
2990 | | struct ggml_tensor * ggml_geglu_quick( |
2991 | | struct ggml_context * ctx, |
2992 | 0 | struct ggml_tensor * a) { |
2993 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false); |
2994 | 0 | } |
2995 | | |
2996 | | struct ggml_tensor * ggml_geglu_quick_swapped( |
2997 | | struct ggml_context * ctx, |
2998 | 0 | struct ggml_tensor * a) { |
2999 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true); |
3000 | 0 | } |
3001 | | |
3002 | | struct ggml_tensor * ggml_geglu_quick_split( |
3003 | | struct ggml_context * ctx, |
3004 | | struct ggml_tensor * a, |
3005 | 0 | struct ggml_tensor * b) { |
3006 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false); |
3007 | 0 | } |
3008 | | |
3009 | | struct ggml_tensor * ggml_swiglu_oai( |
3010 | | struct ggml_context * ctx, |
3011 | | struct ggml_tensor * a, |
3012 | | struct ggml_tensor * b, |
3013 | | float alpha, |
3014 | 0 | float limit) { |
3015 | 0 | struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false); |
3016 | 0 | ggml_set_op_params_f32(result, 2, alpha); |
3017 | 0 | ggml_set_op_params_f32(result, 3, limit); |
3018 | |
|
3019 | 0 | return result; |
3020 | 0 | } |
3021 | | |
3022 | | // ggml_norm |
3023 | | |
3024 | | static struct ggml_tensor * ggml_norm_impl( |
3025 | | struct ggml_context * ctx, |
3026 | | struct ggml_tensor * a, |
3027 | | float eps, |
3028 | 0 | bool inplace) { |
3029 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3030 | |
|
3031 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3032 | |
|
3033 | 0 | result->op = GGML_OP_NORM; |
3034 | 0 | result->src[0] = a; |
3035 | |
|
3036 | 0 | return result; |
3037 | 0 | } |
3038 | | |
3039 | | struct ggml_tensor * ggml_norm( |
3040 | | struct ggml_context * ctx, |
3041 | | struct ggml_tensor * a, |
3042 | 0 | float eps) { |
3043 | 0 | return ggml_norm_impl(ctx, a, eps, false); |
3044 | 0 | } |
3045 | | |
3046 | | struct ggml_tensor * ggml_norm_inplace( |
3047 | | struct ggml_context * ctx, |
3048 | | struct ggml_tensor * a, |
3049 | 0 | float eps) { |
3050 | 0 | return ggml_norm_impl(ctx, a, eps, true); |
3051 | 0 | } |
3052 | | |
3053 | | // ggml_rms_norm |
3054 | | |
3055 | | static struct ggml_tensor * ggml_rms_norm_impl( |
3056 | | struct ggml_context * ctx, |
3057 | | struct ggml_tensor * a, |
3058 | | float eps, |
3059 | 0 | bool inplace) { |
3060 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3061 | |
|
3062 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3063 | |
|
3064 | 0 | result->op = GGML_OP_RMS_NORM; |
3065 | 0 | result->src[0] = a; |
3066 | |
|
3067 | 0 | return result; |
3068 | 0 | } |
3069 | | |
3070 | | struct ggml_tensor * ggml_rms_norm( |
3071 | | struct ggml_context * ctx, |
3072 | | struct ggml_tensor * a, |
3073 | 0 | float eps) { |
3074 | 0 | return ggml_rms_norm_impl(ctx, a, eps, false); |
3075 | 0 | } |
3076 | | |
3077 | | struct ggml_tensor * ggml_rms_norm_inplace( |
3078 | | struct ggml_context * ctx, |
3079 | | struct ggml_tensor * a, |
3080 | 0 | float eps) { |
3081 | 0 | return ggml_rms_norm_impl(ctx, a, eps, true); |
3082 | 0 | } |
3083 | | |
3084 | | // ggml_rms_norm_back |
3085 | | |
3086 | | struct ggml_tensor * ggml_rms_norm_back( |
3087 | | struct ggml_context * ctx, |
3088 | | struct ggml_tensor * a, |
3089 | | struct ggml_tensor * b, |
3090 | 0 | float eps) { |
3091 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
3092 | |
|
3093 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3094 | |
|
3095 | 0 | result->op = GGML_OP_RMS_NORM_BACK; |
3096 | 0 | result->src[0] = a; |
3097 | 0 | result->src[1] = b; |
3098 | |
|
3099 | 0 | return result; |
3100 | 0 | } |
3101 | | |
3102 | | // ggml_group_norm |
3103 | | |
3104 | | static struct ggml_tensor * ggml_group_norm_impl( |
3105 | | struct ggml_context * ctx, |
3106 | | struct ggml_tensor * a, |
3107 | | int n_groups, |
3108 | | float eps, |
3109 | 0 | bool inplace) { |
3110 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3111 | |
|
3112 | 0 | ggml_set_op_params_i32(result, 0, n_groups); |
3113 | 0 | ggml_set_op_params_f32(result, 1, eps); |
3114 | |
|
3115 | 0 | result->op = GGML_OP_GROUP_NORM; |
3116 | 0 | result->src[0] = a; |
3117 | |
|
3118 | 0 | return result; |
3119 | 0 | } |
3120 | | |
3121 | | struct ggml_tensor * ggml_group_norm( |
3122 | | struct ggml_context * ctx, |
3123 | | struct ggml_tensor * a, |
3124 | | int n_groups, |
3125 | 0 | float eps) { |
3126 | 0 | return ggml_group_norm_impl(ctx, a, n_groups, eps, false); |
3127 | 0 | } |
3128 | | |
3129 | | struct ggml_tensor * ggml_group_norm_inplace( |
3130 | | struct ggml_context * ctx, |
3131 | | struct ggml_tensor * a, |
3132 | | int n_groups, |
3133 | 0 | float eps) { |
3134 | 0 | return ggml_group_norm_impl(ctx, a, n_groups, eps, true); |
3135 | 0 | } |
3136 | | |
3137 | | // ggml_l2_norm |
3138 | | |
3139 | | static struct ggml_tensor * ggml_l2_norm_impl( |
3140 | | struct ggml_context * ctx, |
3141 | | struct ggml_tensor * a, |
3142 | | float eps, |
3143 | 0 | bool inplace) { |
3144 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3145 | |
|
3146 | 0 | ggml_set_op_params_f32(result, 0, eps); |
3147 | |
|
3148 | 0 | result->op = GGML_OP_L2_NORM; |
3149 | 0 | result->src[0] = a; |
3150 | |
|
3151 | 0 | return result; |
3152 | 0 | } |
3153 | | |
3154 | | struct ggml_tensor * ggml_l2_norm( |
3155 | | struct ggml_context * ctx, |
3156 | | struct ggml_tensor * a, |
3157 | 0 | float eps) { |
3158 | 0 | return ggml_l2_norm_impl(ctx, a, eps, false); |
3159 | 0 | } |
3160 | | |
3161 | | struct ggml_tensor * ggml_l2_norm_inplace( |
3162 | | struct ggml_context * ctx, |
3163 | | struct ggml_tensor * a, |
3164 | 0 | float eps) { |
3165 | 0 | return ggml_l2_norm_impl(ctx, a, eps, true); |
3166 | 0 | } |
3167 | | |
3168 | | // ggml_mul_mat |
3169 | | |
3170 | 0 | static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
3171 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
3172 | |
|
3173 | 0 | return (t0->ne[0] == t1->ne[0]) && |
3174 | 0 | (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable |
3175 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
3176 | 0 | } |
3177 | | |
3178 | | struct ggml_tensor * ggml_mul_mat( |
3179 | | struct ggml_context * ctx, |
3180 | | struct ggml_tensor * a, |
3181 | 0 | struct ggml_tensor * b) { |
3182 | 0 | GGML_ASSERT(ggml_can_mul_mat(a, b)); |
3183 | 0 | GGML_ASSERT(!ggml_is_transposed(a)); |
3184 | |
|
3185 | 0 | const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] }; |
3186 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3187 | |
|
3188 | 0 | result->op = GGML_OP_MUL_MAT; |
3189 | 0 | result->src[0] = a; |
3190 | 0 | result->src[1] = b; |
3191 | |
|
3192 | 0 | return result; |
3193 | 0 | } |
3194 | | |
3195 | | void ggml_mul_mat_set_prec( |
3196 | | struct ggml_tensor * a, |
3197 | 0 | enum ggml_prec prec) { |
3198 | 0 | GGML_ASSERT(a->op == GGML_OP_MUL_MAT); |
3199 | |
|
3200 | 0 | const int32_t prec_i32 = (int32_t) prec; |
3201 | |
|
3202 | 0 | ggml_set_op_params_i32(a, 0, prec_i32); |
3203 | 0 | } |
3204 | | |
3205 | | // ggml_mul_mat_id |
3206 | | |
3207 | | /* |
3208 | | c = ggml_mul_mat_id(ctx, as, b, ids); |
3209 | | |
3210 | | as -> [cols, rows, n_expert] |
3211 | | b -> [cols, n_expert_used, n_tokens] |
3212 | | ids -> [n_expert_used, n_tokens] (i32) |
3213 | | c -> [rows, n_expert_used, n_tokens] |
3214 | | |
3215 | | in b, n_expert_used can be broadcasted to match the n_expert_used of ids |
3216 | | |
3217 | | c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids |
3218 | | */ |
3219 | | struct ggml_tensor * ggml_mul_mat_id( |
3220 | | struct ggml_context * ctx, |
3221 | | struct ggml_tensor * as, |
3222 | | struct ggml_tensor * b, |
3223 | 0 | struct ggml_tensor * ids) { |
3224 | 0 | GGML_ASSERT(!ggml_is_transposed(as)); |
3225 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
3226 | |
|
3227 | 0 | GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert) |
3228 | 0 | GGML_ASSERT(b->ne[3] == 1); // b is 3d |
3229 | 0 | GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d |
3230 | 0 | GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row |
3231 | 0 | GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat |
3232 | 0 | GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast |
3233 | |
|
3234 | 0 | const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 }; |
3235 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3236 | |
|
3237 | 0 | result->op = GGML_OP_MUL_MAT_ID; |
3238 | 0 | result->src[0] = as; |
3239 | 0 | result->src[1] = b; |
3240 | 0 | result->src[2] = ids; |
3241 | |
|
3242 | 0 | return result; |
3243 | 0 | } |
3244 | | |
3245 | | // ggml_out_prod |
3246 | | |
3247 | 0 | static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
3248 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
3249 | |
|
3250 | 0 | return (t0->ne[1] == t1->ne[1]) && |
3251 | 0 | (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable |
3252 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
3253 | 0 | } |
3254 | | |
3255 | | struct ggml_tensor * ggml_out_prod( |
3256 | | struct ggml_context * ctx, |
3257 | | struct ggml_tensor * a, |
3258 | 0 | struct ggml_tensor * b) { |
3259 | 0 | GGML_ASSERT(ggml_can_out_prod(a, b)); |
3260 | 0 | GGML_ASSERT(!ggml_is_transposed(a)); |
3261 | | |
3262 | | // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3] |
3263 | 0 | const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] }; |
3264 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3265 | |
|
3266 | 0 | result->op = GGML_OP_OUT_PROD; |
3267 | 0 | result->src[0] = a; |
3268 | 0 | result->src[1] = b; |
3269 | |
|
3270 | 0 | return result; |
3271 | 0 | } |
3272 | | |
3273 | | // ggml_scale |
3274 | | |
3275 | | static struct ggml_tensor * ggml_scale_impl( |
3276 | | struct ggml_context * ctx, |
3277 | | struct ggml_tensor * a, |
3278 | | float s, |
3279 | | float b, |
3280 | 0 | bool inplace) { |
3281 | 0 | GGML_ASSERT(ggml_is_padded_1d(a)); |
3282 | |
|
3283 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3284 | |
|
3285 | 0 | float params[2] = { s, b }; |
3286 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
3287 | |
|
3288 | 0 | result->op = GGML_OP_SCALE; |
3289 | 0 | result->src[0] = a; |
3290 | |
|
3291 | 0 | return result; |
3292 | 0 | } |
3293 | | |
3294 | | struct ggml_tensor * ggml_scale( |
3295 | | struct ggml_context * ctx, |
3296 | | struct ggml_tensor * a, |
3297 | 0 | float s) { |
3298 | 0 | return ggml_scale_impl(ctx, a, s, 0.0, false); |
3299 | 0 | } |
3300 | | |
3301 | | struct ggml_tensor * ggml_scale_inplace( |
3302 | | struct ggml_context * ctx, |
3303 | | struct ggml_tensor * a, |
3304 | 0 | float s) { |
3305 | 0 | return ggml_scale_impl(ctx, a, s, 0.0, true); |
3306 | 0 | } |
3307 | | |
3308 | | struct ggml_tensor * ggml_scale_bias( |
3309 | | struct ggml_context * ctx, |
3310 | | struct ggml_tensor * a, |
3311 | | float s, |
3312 | 0 | float b) { |
3313 | 0 | return ggml_scale_impl(ctx, a, s, b, false); |
3314 | 0 | } |
3315 | | |
3316 | | struct ggml_tensor * ggml_scale_bias_inplace( |
3317 | | struct ggml_context * ctx, |
3318 | | struct ggml_tensor * a, |
3319 | | float s, |
3320 | 0 | float b) { |
3321 | 0 | return ggml_scale_impl(ctx, a, s, b, true); |
3322 | 0 | } |
3323 | | |
3324 | | // ggml_set |
3325 | | |
3326 | | static struct ggml_tensor * ggml_set_impl( |
3327 | | struct ggml_context * ctx, |
3328 | | struct ggml_tensor * a, |
3329 | | struct ggml_tensor * b, |
3330 | | size_t nb1, |
3331 | | size_t nb2, |
3332 | | size_t nb3, |
3333 | | size_t offset, |
3334 | 0 | bool inplace) { |
3335 | 0 | GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b)); |
3336 | | |
3337 | | // make a view of the destination |
3338 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3339 | |
|
3340 | 0 | GGML_ASSERT(offset < (size_t)(1 << 30)); |
3341 | 0 | int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; |
3342 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3343 | |
|
3344 | 0 | result->op = GGML_OP_SET; |
3345 | 0 | result->src[0] = a; |
3346 | 0 | result->src[1] = b; |
3347 | |
|
3348 | 0 | return result; |
3349 | 0 | } |
3350 | | |
3351 | | struct ggml_tensor * ggml_set( |
3352 | | struct ggml_context * ctx, |
3353 | | struct ggml_tensor * a, |
3354 | | struct ggml_tensor * b, |
3355 | | size_t nb1, |
3356 | | size_t nb2, |
3357 | | size_t nb3, |
3358 | 0 | size_t offset) { |
3359 | 0 | return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false); |
3360 | 0 | } |
3361 | | |
3362 | | struct ggml_tensor * ggml_set_inplace( |
3363 | | struct ggml_context * ctx, |
3364 | | struct ggml_tensor * a, |
3365 | | struct ggml_tensor * b, |
3366 | | size_t nb1, |
3367 | | size_t nb2, |
3368 | | size_t nb3, |
3369 | 0 | size_t offset) { |
3370 | 0 | return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true); |
3371 | 0 | } |
3372 | | |
3373 | | struct ggml_tensor * ggml_set_1d( |
3374 | | struct ggml_context * ctx, |
3375 | | struct ggml_tensor * a, |
3376 | | struct ggml_tensor * b, |
3377 | 0 | size_t offset) { |
3378 | 0 | return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false); |
3379 | 0 | } |
3380 | | |
3381 | | struct ggml_tensor * ggml_set_1d_inplace( |
3382 | | struct ggml_context * ctx, |
3383 | | struct ggml_tensor * a, |
3384 | | struct ggml_tensor * b, |
3385 | 0 | size_t offset) { |
3386 | 0 | return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true); |
3387 | 0 | } |
3388 | | |
3389 | | struct ggml_tensor * ggml_set_2d( |
3390 | | struct ggml_context * ctx, |
3391 | | struct ggml_tensor * a, |
3392 | | struct ggml_tensor * b, |
3393 | | size_t nb1, |
3394 | 0 | size_t offset) { |
3395 | 0 | return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false); |
3396 | 0 | } |
3397 | | |
3398 | | struct ggml_tensor * ggml_set_2d_inplace( |
3399 | | struct ggml_context * ctx, |
3400 | | struct ggml_tensor * a, |
3401 | | struct ggml_tensor * b, |
3402 | | size_t nb1, |
3403 | 0 | size_t offset) { |
3404 | 0 | return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true); |
3405 | 0 | } |
3406 | | |
3407 | | // ggml_cpy |
3408 | | |
3409 | | static struct ggml_tensor * ggml_cpy_impl( |
3410 | | struct ggml_context * ctx, |
3411 | | struct ggml_tensor * a, |
3412 | 0 | struct ggml_tensor * b) { |
3413 | 0 | GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); |
3414 | | |
3415 | | // make a view of the destination |
3416 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, b); |
3417 | 0 | if (strlen(b->name) > 0) { |
3418 | 0 | ggml_format_name(result, "%s (copy of %s)", b->name, a->name); |
3419 | 0 | } else { |
3420 | 0 | ggml_format_name(result, "%s (copy)", a->name); |
3421 | 0 | } |
3422 | |
|
3423 | 0 | result->op = GGML_OP_CPY; |
3424 | 0 | result->src[0] = a; |
3425 | 0 | result->src[1] = b; |
3426 | |
|
3427 | 0 | return result; |
3428 | 0 | } |
3429 | | |
3430 | | struct ggml_tensor * ggml_cpy( |
3431 | | struct ggml_context * ctx, |
3432 | | struct ggml_tensor * a, |
3433 | 0 | struct ggml_tensor * b) { |
3434 | 0 | return ggml_cpy_impl(ctx, a, b); |
3435 | 0 | } |
3436 | | |
3437 | | struct ggml_tensor * ggml_cast( |
3438 | | struct ggml_context * ctx, |
3439 | | struct ggml_tensor * a, |
3440 | 0 | enum ggml_type type) { |
3441 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne); |
3442 | 0 | ggml_format_name(result, "%s (copy)", a->name); |
3443 | |
|
3444 | 0 | result->op = GGML_OP_CPY; |
3445 | 0 | result->src[0] = a; |
3446 | 0 | result->src[1] = result; |
3447 | |
|
3448 | 0 | return result; |
3449 | 0 | } |
3450 | | |
3451 | | // ggml_cont |
3452 | | |
3453 | | static struct ggml_tensor * ggml_cont_impl( |
3454 | | struct ggml_context * ctx, |
3455 | 0 | struct ggml_tensor * a) { |
3456 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
3457 | 0 | ggml_format_name(result, "%s (cont)", a->name); |
3458 | |
|
3459 | 0 | result->op = GGML_OP_CONT; |
3460 | 0 | result->src[0] = a; |
3461 | |
|
3462 | 0 | return result; |
3463 | 0 | } |
3464 | | |
3465 | | struct ggml_tensor * ggml_cont( |
3466 | | struct ggml_context * ctx, |
3467 | 0 | struct ggml_tensor * a) { |
3468 | 0 | return ggml_cont_impl(ctx, a); |
3469 | 0 | } |
3470 | | |
3471 | | // make contiguous, with new shape |
3472 | | GGML_API struct ggml_tensor * ggml_cont_1d( |
3473 | | struct ggml_context * ctx, |
3474 | | struct ggml_tensor * a, |
3475 | 0 | int64_t ne0) { |
3476 | 0 | return ggml_cont_4d(ctx, a, ne0, 1, 1, 1); |
3477 | 0 | } |
3478 | | |
3479 | | GGML_API struct ggml_tensor * ggml_cont_2d( |
3480 | | struct ggml_context * ctx, |
3481 | | struct ggml_tensor * a, |
3482 | | int64_t ne0, |
3483 | 0 | int64_t ne1) { |
3484 | 0 | return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1); |
3485 | 0 | } |
3486 | | |
3487 | | GGML_API struct ggml_tensor * ggml_cont_3d( |
3488 | | struct ggml_context * ctx, |
3489 | | struct ggml_tensor * a, |
3490 | | int64_t ne0, |
3491 | | int64_t ne1, |
3492 | 0 | int64_t ne2) { |
3493 | 0 | return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1); |
3494 | 0 | } |
3495 | | |
3496 | | struct ggml_tensor * ggml_cont_4d( |
3497 | | struct ggml_context * ctx, |
3498 | | struct ggml_tensor * a, |
3499 | | int64_t ne0, |
3500 | | int64_t ne1, |
3501 | | int64_t ne2, |
3502 | 0 | int64_t ne3) { |
3503 | 0 | GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3)); |
3504 | |
|
3505 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
3506 | 0 | ggml_format_name(result, "%s (cont)", a->name); |
3507 | |
|
3508 | 0 | result->op = GGML_OP_CONT; |
3509 | 0 | result->src[0] = a; |
3510 | |
|
3511 | 0 | return result; |
3512 | 0 | } |
3513 | | |
3514 | | // ggml_reshape |
3515 | | |
3516 | | struct ggml_tensor * ggml_reshape( |
3517 | | struct ggml_context * ctx, |
3518 | | struct ggml_tensor * a, |
3519 | 0 | struct ggml_tensor * b) { |
3520 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3521 | | // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous. |
3522 | 0 | GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); |
3523 | |
|
3524 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0); |
3525 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3526 | |
|
3527 | 0 | result->op = GGML_OP_RESHAPE; |
3528 | 0 | result->src[0] = a; |
3529 | |
|
3530 | 0 | return result; |
3531 | 0 | } |
3532 | | |
3533 | | struct ggml_tensor * ggml_reshape_1d( |
3534 | | struct ggml_context * ctx, |
3535 | | struct ggml_tensor * a, |
3536 | 0 | int64_t ne0) { |
3537 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3538 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0); |
3539 | |
|
3540 | 0 | const int64_t ne[1] = { ne0 }; |
3541 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0); |
3542 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3543 | |
|
3544 | 0 | result->op = GGML_OP_RESHAPE; |
3545 | 0 | result->src[0] = a; |
3546 | |
|
3547 | 0 | return result; |
3548 | 0 | } |
3549 | | |
3550 | | struct ggml_tensor * ggml_reshape_2d( |
3551 | | struct ggml_context * ctx, |
3552 | | struct ggml_tensor * a, |
3553 | | int64_t ne0, |
3554 | 0 | int64_t ne1) { |
3555 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3556 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1); |
3557 | |
|
3558 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
3559 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0); |
3560 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3561 | |
|
3562 | 0 | result->op = GGML_OP_RESHAPE; |
3563 | 0 | result->src[0] = a; |
3564 | |
|
3565 | 0 | return result; |
3566 | 0 | } |
3567 | | |
3568 | | struct ggml_tensor * ggml_reshape_3d( |
3569 | | struct ggml_context * ctx, |
3570 | | struct ggml_tensor * a, |
3571 | | int64_t ne0, |
3572 | | int64_t ne1, |
3573 | 0 | int64_t ne2) { |
3574 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3575 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2); |
3576 | |
|
3577 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
3578 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0); |
3579 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3580 | |
|
3581 | 0 | result->op = GGML_OP_RESHAPE; |
3582 | 0 | result->src[0] = a; |
3583 | |
|
3584 | 0 | return result; |
3585 | 0 | } |
3586 | | |
3587 | | struct ggml_tensor * ggml_reshape_4d( |
3588 | | struct ggml_context * ctx, |
3589 | | struct ggml_tensor * a, |
3590 | | int64_t ne0, |
3591 | | int64_t ne1, |
3592 | | int64_t ne2, |
3593 | 0 | int64_t ne3) { |
3594 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3595 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3); |
3596 | |
|
3597 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
3598 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0); |
3599 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3600 | |
|
3601 | 0 | result->op = GGML_OP_RESHAPE; |
3602 | 0 | result->src[0] = a; |
3603 | |
|
3604 | 0 | return result; |
3605 | 0 | } |
3606 | | |
3607 | | static struct ggml_tensor * ggml_view_impl( |
3608 | | struct ggml_context * ctx, |
3609 | | struct ggml_tensor * a, |
3610 | | int n_dims, |
3611 | | const int64_t * ne, |
3612 | 0 | size_t offset) { |
3613 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset); |
3614 | 0 | ggml_format_name(result, "%s (view)", a->name); |
3615 | |
|
3616 | 0 | ggml_set_op_params(result, &offset, sizeof(offset)); |
3617 | |
|
3618 | 0 | result->op = GGML_OP_VIEW; |
3619 | 0 | result->src[0] = a; |
3620 | |
|
3621 | 0 | return result; |
3622 | 0 | } |
3623 | | |
3624 | | // ggml_view_1d |
3625 | | |
3626 | | struct ggml_tensor * ggml_view_1d( |
3627 | | struct ggml_context * ctx, |
3628 | | struct ggml_tensor * a, |
3629 | | int64_t ne0, |
3630 | 0 | size_t offset) { |
3631 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset); |
3632 | |
|
3633 | 0 | return result; |
3634 | 0 | } |
3635 | | |
3636 | | // ggml_view_2d |
3637 | | |
3638 | | struct ggml_tensor * ggml_view_2d( |
3639 | | struct ggml_context * ctx, |
3640 | | struct ggml_tensor * a, |
3641 | | int64_t ne0, |
3642 | | int64_t ne1, |
3643 | | size_t nb1, |
3644 | 0 | size_t offset) { |
3645 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
3646 | |
|
3647 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset); |
3648 | |
|
3649 | 0 | result->nb[1] = nb1; |
3650 | 0 | result->nb[2] = result->nb[1]*ne1; |
3651 | 0 | result->nb[3] = result->nb[2]; |
3652 | |
|
3653 | 0 | return result; |
3654 | 0 | } |
3655 | | |
3656 | | // ggml_view_3d |
3657 | | |
3658 | | struct ggml_tensor * ggml_view_3d( |
3659 | | struct ggml_context * ctx, |
3660 | | struct ggml_tensor * a, |
3661 | | int64_t ne0, |
3662 | | int64_t ne1, |
3663 | | int64_t ne2, |
3664 | | size_t nb1, |
3665 | | size_t nb2, |
3666 | 0 | size_t offset) { |
3667 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
3668 | |
|
3669 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset); |
3670 | |
|
3671 | 0 | result->nb[1] = nb1; |
3672 | 0 | result->nb[2] = nb2; |
3673 | 0 | result->nb[3] = result->nb[2]*ne2; |
3674 | |
|
3675 | 0 | return result; |
3676 | 0 | } |
3677 | | |
3678 | | // ggml_view_4d |
3679 | | |
3680 | | struct ggml_tensor * ggml_view_4d( |
3681 | | struct ggml_context * ctx, |
3682 | | struct ggml_tensor * a, |
3683 | | int64_t ne0, |
3684 | | int64_t ne1, |
3685 | | int64_t ne2, |
3686 | | int64_t ne3, |
3687 | | size_t nb1, |
3688 | | size_t nb2, |
3689 | | size_t nb3, |
3690 | 0 | size_t offset) { |
3691 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
3692 | |
|
3693 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset); |
3694 | |
|
3695 | 0 | result->nb[1] = nb1; |
3696 | 0 | result->nb[2] = nb2; |
3697 | 0 | result->nb[3] = nb3; |
3698 | |
|
3699 | 0 | return result; |
3700 | 0 | } |
3701 | | |
3702 | | // ggml_permute |
3703 | | |
3704 | | struct ggml_tensor * ggml_permute( |
3705 | | struct ggml_context * ctx, |
3706 | | struct ggml_tensor * a, |
3707 | | int axis0, |
3708 | | int axis1, |
3709 | | int axis2, |
3710 | 0 | int axis3) { |
3711 | 0 | GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS); |
3712 | 0 | GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS); |
3713 | 0 | GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS); |
3714 | 0 | GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS); |
3715 | |
|
3716 | 0 | GGML_ASSERT(axis0 != axis1); |
3717 | 0 | GGML_ASSERT(axis0 != axis2); |
3718 | 0 | GGML_ASSERT(axis0 != axis3); |
3719 | 0 | GGML_ASSERT(axis1 != axis2); |
3720 | 0 | GGML_ASSERT(axis1 != axis3); |
3721 | 0 | GGML_ASSERT(axis2 != axis3); |
3722 | |
|
3723 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3724 | 0 | ggml_format_name(result, "%s (permuted)", a->name); |
3725 | |
|
3726 | 0 | int ne[GGML_MAX_DIMS]; |
3727 | 0 | int nb[GGML_MAX_DIMS]; |
3728 | |
|
3729 | 0 | ne[axis0] = a->ne[0]; |
3730 | 0 | ne[axis1] = a->ne[1]; |
3731 | 0 | ne[axis2] = a->ne[2]; |
3732 | 0 | ne[axis3] = a->ne[3]; |
3733 | |
|
3734 | 0 | nb[axis0] = a->nb[0]; |
3735 | 0 | nb[axis1] = a->nb[1]; |
3736 | 0 | nb[axis2] = a->nb[2]; |
3737 | 0 | nb[axis3] = a->nb[3]; |
3738 | |
|
3739 | 0 | result->ne[0] = ne[0]; |
3740 | 0 | result->ne[1] = ne[1]; |
3741 | 0 | result->ne[2] = ne[2]; |
3742 | 0 | result->ne[3] = ne[3]; |
3743 | |
|
3744 | 0 | result->nb[0] = nb[0]; |
3745 | 0 | result->nb[1] = nb[1]; |
3746 | 0 | result->nb[2] = nb[2]; |
3747 | 0 | result->nb[3] = nb[3]; |
3748 | |
|
3749 | 0 | result->op = GGML_OP_PERMUTE; |
3750 | 0 | result->src[0] = a; |
3751 | |
|
3752 | 0 | int32_t params[] = { axis0, axis1, axis2, axis3 }; |
3753 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3754 | |
|
3755 | 0 | return result; |
3756 | 0 | } |
3757 | | |
3758 | | // ggml_transpose |
3759 | | |
3760 | | struct ggml_tensor * ggml_transpose( |
3761 | | struct ggml_context * ctx, |
3762 | 0 | struct ggml_tensor * a) { |
3763 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3764 | 0 | ggml_format_name(result, "%s (transposed)", a->name); |
3765 | |
|
3766 | 0 | result->ne[0] = a->ne[1]; |
3767 | 0 | result->ne[1] = a->ne[0]; |
3768 | |
|
3769 | 0 | result->nb[0] = a->nb[1]; |
3770 | 0 | result->nb[1] = a->nb[0]; |
3771 | |
|
3772 | 0 | result->op = GGML_OP_TRANSPOSE; |
3773 | 0 | result->src[0] = a; |
3774 | |
|
3775 | 0 | return result; |
3776 | 0 | } |
3777 | | |
3778 | | // ggml_get_rows |
3779 | | |
3780 | | struct ggml_tensor * ggml_get_rows( |
3781 | | struct ggml_context * ctx, |
3782 | | struct ggml_tensor * a, |
3783 | 0 | struct ggml_tensor * b) { |
3784 | 0 | GGML_ASSERT(a->ne[2] == b->ne[1]); |
3785 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
3786 | 0 | GGML_ASSERT(b->ne[3] == 1); |
3787 | 0 | GGML_ASSERT(b->type == GGML_TYPE_I32); |
3788 | | |
3789 | | // TODO: implement non F32 return |
3790 | 0 | enum ggml_type type = GGML_TYPE_F32; |
3791 | 0 | if (a->type == GGML_TYPE_I32) { |
3792 | 0 | type = a->type; |
3793 | 0 | } |
3794 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]); |
3795 | |
|
3796 | 0 | result->op = GGML_OP_GET_ROWS; |
3797 | 0 | result->src[0] = a; |
3798 | 0 | result->src[1] = b; |
3799 | |
|
3800 | 0 | return result; |
3801 | 0 | } |
3802 | | |
3803 | | // ggml_get_rows_back |
3804 | | |
3805 | | struct ggml_tensor * ggml_get_rows_back( |
3806 | | struct ggml_context * ctx, |
3807 | | struct ggml_tensor * a, |
3808 | | struct ggml_tensor * b, |
3809 | 0 | struct ggml_tensor * c) { |
3810 | 0 | GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); |
3811 | 0 | GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0])); |
3812 | | |
3813 | | // TODO: implement non F32 return |
3814 | | //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]); |
3815 | 0 | struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]); |
3816 | |
|
3817 | 0 | result->op = GGML_OP_GET_ROWS_BACK; |
3818 | 0 | result->src[0] = a; |
3819 | 0 | result->src[1] = b; |
3820 | |
|
3821 | 0 | return result; |
3822 | 0 | } |
3823 | | |
3824 | | // ggml_set_rows |
3825 | | |
3826 | | struct ggml_tensor * ggml_set_rows( |
3827 | | struct ggml_context * ctx, |
3828 | | struct ggml_tensor * a, |
3829 | | struct ggml_tensor * b, |
3830 | 0 | struct ggml_tensor * c) { |
3831 | 0 | GGML_ASSERT(a->ne[0] == b->ne[0]); |
3832 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
3833 | 0 | GGML_ASSERT(a->ne[3] == b->ne[3]); |
3834 | 0 | GGML_ASSERT(b->ne[1] == c->ne[0]); |
3835 | 0 | GGML_ASSERT(b->ne[2] % c->ne[1] == 0); |
3836 | 0 | GGML_ASSERT(b->ne[3] % c->ne[2] == 0); |
3837 | 0 | GGML_ASSERT(c->ne[3] == 1); |
3838 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
3839 | 0 | GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32); |
3840 | |
|
3841 | 0 | GGML_ASSERT(ggml_is_contiguous_rows(a)); |
3842 | 0 | GGML_ASSERT(ggml_is_contiguous_rows(b)); |
3843 | |
|
3844 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3845 | |
|
3846 | 0 | result->op = GGML_OP_SET_ROWS; |
3847 | 0 | result->src[0] = b; |
3848 | 0 | result->src[1] = c; |
3849 | 0 | result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931) |
3850 | |
|
3851 | 0 | return result; |
3852 | 0 | } |
3853 | | |
3854 | | // ggml_diag |
3855 | | |
3856 | | struct ggml_tensor * ggml_diag( |
3857 | | struct ggml_context * ctx, |
3858 | 0 | struct ggml_tensor * a) { |
3859 | 0 | GGML_ASSERT(a->ne[1] == 1); |
3860 | |
|
3861 | 0 | const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] }; |
3862 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne); |
3863 | |
|
3864 | 0 | result->op = GGML_OP_DIAG; |
3865 | 0 | result->src[0] = a; |
3866 | |
|
3867 | 0 | return result; |
3868 | 0 | } |
3869 | | |
3870 | | // ggml_diag_mask_inf |
3871 | | |
3872 | | static struct ggml_tensor * ggml_diag_mask_inf_impl( |
3873 | | struct ggml_context * ctx, |
3874 | | struct ggml_tensor * a, |
3875 | | int n_past, |
3876 | 0 | bool inplace) { |
3877 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3878 | |
|
3879 | 0 | int32_t params[] = { n_past }; |
3880 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3881 | |
|
3882 | 0 | result->op = GGML_OP_DIAG_MASK_INF; |
3883 | 0 | result->src[0] = a; |
3884 | |
|
3885 | 0 | return result; |
3886 | 0 | } |
3887 | | |
3888 | | struct ggml_tensor * ggml_diag_mask_inf( |
3889 | | struct ggml_context * ctx, |
3890 | | struct ggml_tensor * a, |
3891 | 0 | int n_past) { |
3892 | 0 | return ggml_diag_mask_inf_impl(ctx, a, n_past, false); |
3893 | 0 | } |
3894 | | |
3895 | | struct ggml_tensor * ggml_diag_mask_inf_inplace( |
3896 | | struct ggml_context * ctx, |
3897 | | struct ggml_tensor * a, |
3898 | 0 | int n_past) { |
3899 | 0 | return ggml_diag_mask_inf_impl(ctx, a, n_past, true); |
3900 | 0 | } |
3901 | | |
3902 | | // ggml_diag_mask_zero |
3903 | | |
3904 | | static struct ggml_tensor * ggml_diag_mask_zero_impl( |
3905 | | struct ggml_context * ctx, |
3906 | | struct ggml_tensor * a, |
3907 | | int n_past, |
3908 | 0 | bool inplace) { |
3909 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3910 | |
|
3911 | 0 | int32_t params[] = { n_past }; |
3912 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3913 | |
|
3914 | 0 | result->op = GGML_OP_DIAG_MASK_ZERO; |
3915 | 0 | result->src[0] = a; |
3916 | |
|
3917 | 0 | return result; |
3918 | 0 | } |
3919 | | |
3920 | | struct ggml_tensor * ggml_diag_mask_zero( |
3921 | | struct ggml_context * ctx, |
3922 | | struct ggml_tensor * a, |
3923 | 0 | int n_past) { |
3924 | 0 | return ggml_diag_mask_zero_impl(ctx, a, n_past, false); |
3925 | 0 | } |
3926 | | |
3927 | | struct ggml_tensor * ggml_diag_mask_zero_inplace( |
3928 | | struct ggml_context * ctx, |
3929 | | struct ggml_tensor * a, |
3930 | 0 | int n_past) { |
3931 | 0 | return ggml_diag_mask_zero_impl(ctx, a, n_past, true); |
3932 | 0 | } |
3933 | | |
3934 | | // ggml_soft_max |
3935 | | |
3936 | | static struct ggml_tensor * ggml_soft_max_impl( |
3937 | | struct ggml_context * ctx, |
3938 | | struct ggml_tensor * a, |
3939 | | struct ggml_tensor * mask, |
3940 | | float scale, |
3941 | | float max_bias, |
3942 | 0 | bool inplace) { |
3943 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3944 | |
|
3945 | 0 | if (mask) { |
3946 | 0 | GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32); |
3947 | 0 | GGML_ASSERT(ggml_is_contiguous(mask)); |
3948 | 0 | GGML_ASSERT(mask->ne[0] == a->ne[0]); |
3949 | 0 | GGML_ASSERT(mask->ne[1] >= a->ne[1]); |
3950 | 0 | GGML_ASSERT(a->ne[2]%mask->ne[2] == 0); |
3951 | 0 | GGML_ASSERT(a->ne[3]%mask->ne[3] == 0); |
3952 | 0 | } |
3953 | |
|
3954 | 0 | if (max_bias > 0.0f) { |
3955 | 0 | GGML_ASSERT(mask); |
3956 | 0 | } |
3957 | |
|
3958 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3959 | |
|
3960 | 0 | float params[] = { scale, max_bias }; |
3961 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3962 | |
|
3963 | 0 | result->op = GGML_OP_SOFT_MAX; |
3964 | 0 | result->src[0] = a; |
3965 | 0 | result->src[1] = mask; |
3966 | |
|
3967 | 0 | return result; |
3968 | 0 | } |
3969 | | |
3970 | | struct ggml_tensor * ggml_soft_max( |
3971 | | struct ggml_context * ctx, |
3972 | 0 | struct ggml_tensor * a) { |
3973 | 0 | return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false); |
3974 | 0 | } |
3975 | | |
3976 | | struct ggml_tensor * ggml_soft_max_inplace( |
3977 | | struct ggml_context * ctx, |
3978 | 0 | struct ggml_tensor * a) { |
3979 | 0 | return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true); |
3980 | 0 | } |
3981 | | |
3982 | | struct ggml_tensor * ggml_soft_max_ext( |
3983 | | struct ggml_context * ctx, |
3984 | | struct ggml_tensor * a, |
3985 | | struct ggml_tensor * mask, |
3986 | | float scale, |
3987 | 0 | float max_bias) { |
3988 | 0 | return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false); |
3989 | 0 | } |
3990 | | |
3991 | | struct ggml_tensor * ggml_soft_max_ext_inplace( |
3992 | | struct ggml_context * ctx, |
3993 | | struct ggml_tensor * a, |
3994 | | struct ggml_tensor * mask, |
3995 | | float scale, |
3996 | 0 | float max_bias) { |
3997 | 0 | return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true); |
3998 | 0 | } |
3999 | | |
4000 | | void ggml_soft_max_add_sinks( |
4001 | | struct ggml_tensor * a, |
4002 | 0 | struct ggml_tensor * sinks) { |
4003 | 0 | if (!sinks) { |
4004 | 0 | a->src[2] = NULL; |
4005 | 0 | return; |
4006 | 0 | } |
4007 | | |
4008 | 0 | GGML_ASSERT(a->op == GGML_OP_SOFT_MAX); |
4009 | 0 | GGML_ASSERT(a->src[2] == NULL); |
4010 | 0 | GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]); |
4011 | 0 | GGML_ASSERT(sinks->type == GGML_TYPE_F32); |
4012 | |
|
4013 | 0 | a->src[2] = sinks; |
4014 | 0 | } |
4015 | | |
4016 | | // ggml_soft_max_ext_back |
4017 | | |
4018 | | static struct ggml_tensor * ggml_soft_max_ext_back_impl( |
4019 | | struct ggml_context * ctx, |
4020 | | struct ggml_tensor * a, |
4021 | | struct ggml_tensor * b, |
4022 | | float scale, |
4023 | | float max_bias, |
4024 | 0 | bool inplace) { |
4025 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
4026 | |
|
4027 | 0 | result->op = GGML_OP_SOFT_MAX_BACK; |
4028 | 0 | result->src[0] = a; |
4029 | 0 | result->src[1] = b; |
4030 | |
|
4031 | 0 | memcpy((float *) result->op_params + 0, &scale, sizeof(float)); |
4032 | 0 | memcpy((float *) result->op_params + 1, &max_bias, sizeof(float)); |
4033 | |
|
4034 | 0 | return result; |
4035 | 0 | } |
4036 | | |
4037 | | struct ggml_tensor * ggml_soft_max_ext_back( |
4038 | | struct ggml_context * ctx, |
4039 | | struct ggml_tensor * a, |
4040 | | struct ggml_tensor * b, |
4041 | | float scale, |
4042 | 0 | float max_bias) { |
4043 | 0 | return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false); |
4044 | 0 | } |
4045 | | |
4046 | | struct ggml_tensor * ggml_soft_max_ext_back_inplace( |
4047 | | struct ggml_context * ctx, |
4048 | | struct ggml_tensor * a, |
4049 | | struct ggml_tensor * b, |
4050 | | float scale, |
4051 | 0 | float max_bias) { |
4052 | 0 | return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true); |
4053 | 0 | } |
4054 | | |
4055 | | // ggml_rope |
4056 | | |
4057 | | static struct ggml_tensor * ggml_rope_impl( |
4058 | | struct ggml_context * ctx, |
4059 | | struct ggml_tensor * a, |
4060 | | struct ggml_tensor * b, |
4061 | | struct ggml_tensor * c, |
4062 | | int n_dims, |
4063 | | int sections[GGML_MROPE_SECTIONS], |
4064 | | int mode, |
4065 | | int n_ctx_orig, |
4066 | | float freq_base, |
4067 | | float freq_scale, |
4068 | | float ext_factor, |
4069 | | float attn_factor, |
4070 | | float beta_fast, |
4071 | | float beta_slow, |
4072 | 0 | bool inplace) { |
4073 | 0 | GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported"); |
4074 | |
|
4075 | 0 | GGML_ASSERT(ggml_is_vector(b)); |
4076 | 0 | GGML_ASSERT(b->type == GGML_TYPE_I32); |
4077 | |
|
4078 | 0 | bool mrope_used = mode & GGML_ROPE_TYPE_MROPE; |
4079 | 0 | if (mrope_used) { |
4080 | 0 | GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token |
4081 | 0 | } else { |
4082 | 0 | GGML_ASSERT(a->ne[2] == b->ne[0]); |
4083 | 0 | } |
4084 | |
|
4085 | 0 | if (c) { |
4086 | 0 | GGML_ASSERT(c->type == GGML_TYPE_F32); |
4087 | 0 | GGML_ASSERT(c->ne[0] >= n_dims / 2); |
4088 | 0 | } |
4089 | |
|
4090 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
4091 | |
|
4092 | 0 | int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig }; |
4093 | 0 | memcpy(params + 5, &freq_base, sizeof(float)); |
4094 | 0 | memcpy(params + 6, &freq_scale, sizeof(float)); |
4095 | 0 | memcpy(params + 7, &ext_factor, sizeof(float)); |
4096 | 0 | memcpy(params + 8, &attn_factor, sizeof(float)); |
4097 | 0 | memcpy(params + 9, &beta_fast, sizeof(float)); |
4098 | 0 | memcpy(params + 10, &beta_slow, sizeof(float)); |
4099 | 0 | if (mrope_used && sections) { |
4100 | 0 | memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS); |
4101 | 0 | } else { |
4102 | 0 | memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS); |
4103 | 0 | } |
4104 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4105 | |
|
4106 | 0 | result->op = GGML_OP_ROPE; |
4107 | 0 | result->src[0] = a; |
4108 | 0 | result->src[1] = b; |
4109 | 0 | result->src[2] = c; |
4110 | |
|
4111 | 0 | return result; |
4112 | 0 | } |
4113 | | |
4114 | | struct ggml_tensor * ggml_rope( |
4115 | | struct ggml_context * ctx, |
4116 | | struct ggml_tensor * a, |
4117 | | struct ggml_tensor * b, |
4118 | | int n_dims, |
4119 | 0 | int mode) { |
4120 | 0 | return ggml_rope_impl( |
4121 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false |
4122 | 0 | ); |
4123 | 0 | } |
4124 | | |
4125 | | struct ggml_tensor * ggml_rope_multi( |
4126 | | struct ggml_context * ctx, |
4127 | | struct ggml_tensor * a, |
4128 | | struct ggml_tensor * b, |
4129 | | struct ggml_tensor * c, |
4130 | | int n_dims, |
4131 | | int sections[GGML_MROPE_SECTIONS], |
4132 | | int mode, |
4133 | | int n_ctx_orig, |
4134 | | float freq_base, |
4135 | | float freq_scale, |
4136 | | float ext_factor, |
4137 | | float attn_factor, |
4138 | | float beta_fast, |
4139 | 0 | float beta_slow) { |
4140 | 0 | return ggml_rope_impl( |
4141 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, |
4142 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4143 | 0 | ); |
4144 | 0 | } |
4145 | | |
4146 | | struct ggml_tensor * ggml_rope_multi_inplace( |
4147 | | struct ggml_context * ctx, |
4148 | | struct ggml_tensor * a, |
4149 | | struct ggml_tensor * b, |
4150 | | struct ggml_tensor * c, |
4151 | | int n_dims, |
4152 | | int sections[GGML_MROPE_SECTIONS], |
4153 | | int mode, |
4154 | | int n_ctx_orig, |
4155 | | float freq_base, |
4156 | | float freq_scale, |
4157 | | float ext_factor, |
4158 | | float attn_factor, |
4159 | | float beta_fast, |
4160 | 0 | float beta_slow) { |
4161 | 0 | return ggml_rope_impl( |
4162 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, |
4163 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4164 | 0 | ); |
4165 | 0 | } |
4166 | | |
4167 | | struct ggml_tensor * ggml_rope_inplace( |
4168 | | struct ggml_context * ctx, |
4169 | | struct ggml_tensor * a, |
4170 | | struct ggml_tensor * b, |
4171 | | int n_dims, |
4172 | 0 | int mode) { |
4173 | 0 | return ggml_rope_impl( |
4174 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true |
4175 | 0 | ); |
4176 | 0 | } |
4177 | | |
4178 | | struct ggml_tensor * ggml_rope_ext( |
4179 | | struct ggml_context * ctx, |
4180 | | struct ggml_tensor * a, |
4181 | | struct ggml_tensor * b, |
4182 | | struct ggml_tensor * c, |
4183 | | int n_dims, |
4184 | | int mode, |
4185 | | int n_ctx_orig, |
4186 | | float freq_base, |
4187 | | float freq_scale, |
4188 | | float ext_factor, |
4189 | | float attn_factor, |
4190 | | float beta_fast, |
4191 | 0 | float beta_slow) { |
4192 | 0 | return ggml_rope_impl( |
4193 | 0 | ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4194 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4195 | 0 | ); |
4196 | 0 | } |
4197 | | |
4198 | | struct ggml_tensor * ggml_rope_ext_inplace( |
4199 | | struct ggml_context * ctx, |
4200 | | struct ggml_tensor * a, |
4201 | | struct ggml_tensor * b, |
4202 | | struct ggml_tensor * c, |
4203 | | int n_dims, |
4204 | | int mode, |
4205 | | int n_ctx_orig, |
4206 | | float freq_base, |
4207 | | float freq_scale, |
4208 | | float ext_factor, |
4209 | | float attn_factor, |
4210 | | float beta_fast, |
4211 | 0 | float beta_slow) { |
4212 | 0 | return ggml_rope_impl( |
4213 | 0 | ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4214 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4215 | 0 | ); |
4216 | 0 | } |
4217 | | |
4218 | | struct ggml_tensor * ggml_rope_custom( |
4219 | | struct ggml_context * ctx, |
4220 | | struct ggml_tensor * a, |
4221 | | struct ggml_tensor * b, |
4222 | | int n_dims, |
4223 | | int mode, |
4224 | | int n_ctx_orig, |
4225 | | float freq_base, |
4226 | | float freq_scale, |
4227 | | float ext_factor, |
4228 | | float attn_factor, |
4229 | | float beta_fast, |
4230 | 0 | float beta_slow) { |
4231 | 0 | return ggml_rope_impl( |
4232 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4233 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4234 | 0 | ); |
4235 | 0 | } |
4236 | | |
4237 | | struct ggml_tensor * ggml_rope_custom_inplace( |
4238 | | struct ggml_context * ctx, |
4239 | | struct ggml_tensor * a, |
4240 | | struct ggml_tensor * b, |
4241 | | int n_dims, |
4242 | | int mode, |
4243 | | int n_ctx_orig, |
4244 | | float freq_base, |
4245 | | float freq_scale, |
4246 | | float ext_factor, |
4247 | | float attn_factor, |
4248 | | float beta_fast, |
4249 | 0 | float beta_slow) { |
4250 | 0 | return ggml_rope_impl( |
4251 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4252 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4253 | 0 | ); |
4254 | 0 | } |
4255 | | |
4256 | | // Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get |
4257 | | // `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))` |
4258 | 0 | static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) { |
4259 | 0 | return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base)); |
4260 | 0 | } |
4261 | | |
4262 | | void ggml_rope_yarn_corr_dims( |
4263 | | int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2] |
4264 | 0 | ) { |
4265 | | // start and end correction dims |
4266 | 0 | float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base)); |
4267 | 0 | float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base)); |
4268 | 0 | dims[0] = MAX(0, start); |
4269 | 0 | dims[1] = MIN(n_dims - 1, end); |
4270 | 0 | } |
4271 | | |
4272 | | // ggml_rope_back |
4273 | | |
4274 | | struct ggml_tensor * ggml_rope_ext_back( |
4275 | | struct ggml_context * ctx, |
4276 | | struct ggml_tensor * a, |
4277 | | struct ggml_tensor * b, |
4278 | | struct ggml_tensor * c, |
4279 | | int n_dims, |
4280 | | int mode, |
4281 | | int n_ctx_orig, |
4282 | | float freq_base, |
4283 | | float freq_scale, |
4284 | | float ext_factor, |
4285 | | float attn_factor, |
4286 | | float beta_fast, |
4287 | 0 | float beta_slow) { |
4288 | 0 | struct ggml_tensor * result = ggml_rope_ext( |
4289 | 0 | ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
4290 | 0 | result->op = GGML_OP_ROPE_BACK; |
4291 | 0 | return result; |
4292 | 0 | } |
4293 | | |
4294 | | struct ggml_tensor * ggml_rope_multi_back( |
4295 | | struct ggml_context * ctx, |
4296 | | struct ggml_tensor * a, |
4297 | | struct ggml_tensor * b, |
4298 | | struct ggml_tensor * c, |
4299 | | int n_dims, |
4300 | | int sections[4], |
4301 | | int mode, |
4302 | | int n_ctx_orig, |
4303 | | float freq_base, |
4304 | | float freq_scale, |
4305 | | float ext_factor, |
4306 | | float attn_factor, |
4307 | | float beta_fast, |
4308 | 0 | float beta_slow) { |
4309 | 0 | struct ggml_tensor * result = ggml_rope_multi( |
4310 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
4311 | 0 | result->op = GGML_OP_ROPE_BACK; |
4312 | 0 | return result; |
4313 | 0 | } |
4314 | | // ggml_clamp |
4315 | | |
4316 | | struct ggml_tensor * ggml_clamp( |
4317 | | struct ggml_context * ctx, |
4318 | | struct ggml_tensor * a, |
4319 | | float min, |
4320 | 0 | float max) { |
4321 | | // TODO: when implement backward, fix this: |
4322 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
4323 | |
|
4324 | 0 | float params[] = { min, max }; |
4325 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4326 | |
|
4327 | 0 | result->op = GGML_OP_CLAMP; |
4328 | 0 | result->src[0] = a; |
4329 | |
|
4330 | 0 | return result; |
4331 | 0 | } |
4332 | | |
4333 | 0 | static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) { |
4334 | 0 | return (ins + 2 * p - d * (ks - 1) - 1) / s + 1; |
4335 | 0 | } |
4336 | | |
4337 | | // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW] |
4338 | | // a: [OC,IC, KH, KW] |
4339 | | // b: [N, IC, IH, IW] |
4340 | | // result: [N, OH, OW, IC*KH*KW] |
4341 | | struct ggml_tensor * ggml_im2col( |
4342 | | struct ggml_context * ctx, |
4343 | | struct ggml_tensor * a, |
4344 | | struct ggml_tensor * b, |
4345 | | int s0, |
4346 | | int s1, |
4347 | | int p0, |
4348 | | int p1, |
4349 | | int d0, |
4350 | | int d1, |
4351 | | bool is_2D, |
4352 | 0 | enum ggml_type dst_type) { |
4353 | 0 | if (is_2D) { |
4354 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
4355 | 0 | } else { |
4356 | | //GGML_ASSERT(b->ne[1] % a->ne[1] == 0); |
4357 | 0 | GGML_ASSERT(b->ne[1] == a->ne[1]); |
4358 | 0 | GGML_ASSERT(b->ne[3] == 1); |
4359 | 0 | } |
4360 | |
|
4361 | 0 | const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0; |
4362 | 0 | const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4363 | |
|
4364 | 0 | GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a"); |
4365 | 0 | GGML_ASSERT((OW > 0) && "b too small compared to a"); |
4366 | |
|
4367 | 0 | const int64_t ne[4] = { |
4368 | 0 | is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0], |
4369 | 0 | OW, |
4370 | 0 | is_2D ? OH : b->ne[2], |
4371 | 0 | is_2D ? b->ne[3] : 1, |
4372 | 0 | }; |
4373 | |
|
4374 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); |
4375 | 0 | int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; |
4376 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4377 | |
|
4378 | 0 | result->op = GGML_OP_IM2COL; |
4379 | 0 | result->src[0] = a; |
4380 | 0 | result->src[1] = b; |
4381 | |
|
4382 | 0 | return result; |
4383 | 0 | } |
4384 | | |
4385 | | struct ggml_tensor * ggml_im2col_back( |
4386 | | struct ggml_context * ctx, |
4387 | | struct ggml_tensor * a, |
4388 | | struct ggml_tensor * b, |
4389 | | int64_t * ne, |
4390 | | int s0, |
4391 | | int s1, |
4392 | | int p0, |
4393 | | int p1, |
4394 | | int d0, |
4395 | | int d1, |
4396 | 0 | bool is_2D) { |
4397 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4398 | 0 | int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; |
4399 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4400 | |
|
4401 | 0 | result->op = GGML_OP_IM2COL_BACK; |
4402 | 0 | result->src[0] = a; |
4403 | 0 | result->src[1] = b; |
4404 | |
|
4405 | 0 | return result; |
4406 | 0 | } |
4407 | | |
4408 | | // ggml_conv_1d |
4409 | | |
4410 | | struct ggml_tensor * ggml_conv_1d( |
4411 | | struct ggml_context * ctx, |
4412 | | struct ggml_tensor * a, |
4413 | | struct ggml_tensor * b, |
4414 | | int s0, |
4415 | | int p0, |
4416 | 0 | int d0) { |
4417 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K] |
4418 | |
|
4419 | 0 | struct ggml_tensor * result = |
4420 | 0 | ggml_mul_mat(ctx, |
4421 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K] |
4422 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K] |
4423 | |
|
4424 | 0 | result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL] |
4425 | |
|
4426 | 0 | return result; |
4427 | 0 | } |
4428 | | |
4429 | | // ggml_conv_1d_ph |
4430 | | |
4431 | | struct ggml_tensor* ggml_conv_1d_ph( |
4432 | | struct ggml_context * ctx, |
4433 | | struct ggml_tensor * a, |
4434 | | struct ggml_tensor * b, |
4435 | | int s, |
4436 | 0 | int d) { |
4437 | 0 | return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d); |
4438 | 0 | } |
4439 | | |
4440 | | // ggml_conv_1d_dw |
4441 | | |
4442 | | struct ggml_tensor * ggml_conv_1d_dw( |
4443 | | struct ggml_context * ctx, |
4444 | | struct ggml_tensor * a, |
4445 | | struct ggml_tensor * b, |
4446 | | int s0, |
4447 | | int p0, |
4448 | 0 | int d0) { |
4449 | 0 | struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]); |
4450 | |
|
4451 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); |
4452 | |
|
4453 | 0 | struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a); |
4454 | |
|
4455 | 0 | result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1); |
4456 | |
|
4457 | 0 | return result; |
4458 | 0 | } |
4459 | | |
4460 | | // ggml_conv_1d_dw_ph |
4461 | | |
4462 | | struct ggml_tensor * ggml_conv_1d_dw_ph( |
4463 | | struct ggml_context * ctx, |
4464 | | struct ggml_tensor * a, |
4465 | | struct ggml_tensor * b, |
4466 | | int s0, |
4467 | 0 | int d0) { |
4468 | 0 | return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0); |
4469 | 0 | } |
4470 | | |
4471 | | // ggml_conv_transpose_1d |
4472 | | |
4473 | 0 | static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) { |
4474 | 0 | return (ins - 1) * s - 2 * p + d * (ks - 1) + 1; |
4475 | 0 | } |
4476 | | |
4477 | | GGML_API struct ggml_tensor * ggml_conv_transpose_1d( |
4478 | | struct ggml_context * ctx, |
4479 | | struct ggml_tensor * a, |
4480 | | struct ggml_tensor * b, |
4481 | | int s0, |
4482 | | int p0, |
4483 | 0 | int d0) { |
4484 | 0 | GGML_ASSERT(ggml_is_matrix(b)); |
4485 | 0 | GGML_ASSERT(a->ne[2] == b->ne[1]); |
4486 | 0 | GGML_ASSERT(a->ne[3] == 1); |
4487 | |
|
4488 | 0 | GGML_ASSERT(p0 == 0); |
4489 | 0 | GGML_ASSERT(d0 == 1); |
4490 | |
|
4491 | 0 | const int64_t ne[4] = { |
4492 | 0 | ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/), |
4493 | 0 | a->ne[1], b->ne[2], 1, |
4494 | 0 | }; |
4495 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4496 | |
|
4497 | 0 | int32_t params[] = { s0, p0, d0 }; |
4498 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4499 | |
|
4500 | 0 | result->op = GGML_OP_CONV_TRANSPOSE_1D; |
4501 | 0 | result->src[0] = a; |
4502 | 0 | result->src[1] = b; |
4503 | |
|
4504 | 0 | return result; |
4505 | 0 | } |
4506 | | |
4507 | | // ggml_conv_2d |
4508 | | |
4509 | | // a: [OC,IC, KH, KW] |
4510 | | // b: [N, IC, IH, IW] |
4511 | | // result: [N, OC, OH, OW] |
4512 | | struct ggml_tensor * ggml_conv_2d( |
4513 | | struct ggml_context * ctx, |
4514 | | struct ggml_tensor * a, |
4515 | | struct ggml_tensor * b, |
4516 | | int s0, |
4517 | | int s1, |
4518 | | int p0, |
4519 | | int p1, |
4520 | | int d0, |
4521 | 0 | int d1) { |
4522 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW] |
4523 | |
|
4524 | 0 | struct ggml_tensor * result = |
4525 | 0 | ggml_mul_mat(ctx, |
4526 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW] |
4527 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW] |
4528 | |
|
4529 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW] |
4530 | 0 | result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW] |
4531 | | |
4532 | |
|
4533 | 0 | return result; |
4534 | 0 | } |
4535 | | |
4536 | | // a: [OC*IC, KD, KH, KW] |
4537 | | // b: [N*IC, ID, IH, IW] |
4538 | | // result: [N*OD, OH, OW, IC * KD * KH * KW] |
4539 | | struct ggml_tensor * ggml_im2col_3d( |
4540 | | struct ggml_context * ctx, |
4541 | | struct ggml_tensor * a, |
4542 | | struct ggml_tensor * b, |
4543 | | int64_t IC, |
4544 | | int s0, // stride width |
4545 | | int s1, // stride height |
4546 | | int s2, // stride depth |
4547 | | int p0, // padding width |
4548 | | int p1, // padding height |
4549 | | int p2, // padding depth |
4550 | | int d0, // dilation width |
4551 | | int d1, // dilation height |
4552 | | int d2, // dilation depth |
4553 | 0 | enum ggml_type dst_type) { |
4554 | 0 | const int64_t N = b->ne[3] / IC; |
4555 | 0 | const int64_t ID = b->ne[2]; |
4556 | 0 | const int64_t IH = b->ne[1]; |
4557 | 0 | const int64_t IW = b->ne[0]; |
4558 | |
|
4559 | 0 | const int64_t OC = a->ne[3] / IC; |
4560 | 0 | UNUSED(OC); |
4561 | 0 | const int64_t KD = a->ne[2]; |
4562 | 0 | const int64_t KH = a->ne[1]; |
4563 | 0 | const int64_t KW = a->ne[0]; |
4564 | 0 | const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2); |
4565 | 0 | const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1); |
4566 | 0 | const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0); |
4567 | |
|
4568 | 0 | GGML_ASSERT((OD > 0) && "b too small compared to a"); |
4569 | 0 | GGML_ASSERT((OH > 0) && "b too small compared to a"); |
4570 | 0 | GGML_ASSERT((OW > 0) && "b too small compared to a"); |
4571 | | |
4572 | |
|
4573 | 0 | const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N}; |
4574 | |
|
4575 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); |
4576 | 0 | int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC}; |
4577 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4578 | |
|
4579 | 0 | result->op = GGML_OP_IM2COL_3D; |
4580 | 0 | result->src[0] = a; |
4581 | 0 | result->src[1] = b; |
4582 | |
|
4583 | 0 | return result; |
4584 | 0 | } |
4585 | | |
4586 | | // a: [OC*IC, KD, KH, KW] |
4587 | | // b: [N*IC, ID, IH, IW] |
4588 | | // result: [N*OC, OD, OH, OW] |
4589 | | struct ggml_tensor * ggml_conv_3d( |
4590 | | struct ggml_context * ctx, |
4591 | | struct ggml_tensor * a, |
4592 | | struct ggml_tensor * b, |
4593 | | int64_t IC, |
4594 | | int s0, // stride width |
4595 | | int s1, // stride height |
4596 | | int s2, // stride depth |
4597 | | int p0, // padding width |
4598 | | int p1, // padding height |
4599 | | int p2, // padding depth |
4600 | | int d0, // dilation width |
4601 | | int d1, // dilation height |
4602 | | int d2 // dilation depth |
4603 | 0 | ) { |
4604 | 0 | struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW] |
4605 | |
|
4606 | 0 | int64_t OC = a->ne[3] / IC; |
4607 | 0 | int64_t N = b->ne[3] / IC; |
4608 | 0 | struct ggml_tensor * result = |
4609 | 0 | ggml_mul_mat(ctx, |
4610 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW] |
4611 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC)); // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW] |
4612 | |
|
4613 | 0 | int64_t OD = im2col->ne[3] / N; |
4614 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW] |
4615 | 0 | result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW] |
4616 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW] |
4617 | |
|
4618 | 0 | return result; |
4619 | 0 | } |
4620 | | |
4621 | | // ggml_conv_2d_sk_p0 |
4622 | | |
4623 | | struct ggml_tensor * ggml_conv_2d_sk_p0( |
4624 | | struct ggml_context * ctx, |
4625 | | struct ggml_tensor * a, |
4626 | 0 | struct ggml_tensor * b) { |
4627 | 0 | return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1); |
4628 | 0 | } |
4629 | | |
4630 | | // ggml_conv_2d_s1_ph |
4631 | | |
4632 | | struct ggml_tensor * ggml_conv_2d_s1_ph( |
4633 | | struct ggml_context * ctx, |
4634 | | struct ggml_tensor * a, |
4635 | 0 | struct ggml_tensor * b) { |
4636 | 0 | return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1); |
4637 | 0 | } |
4638 | | |
4639 | | // ggml_conv_2d_dw |
4640 | | |
4641 | | struct ggml_tensor * ggml_conv_2d_dw( |
4642 | | struct ggml_context * ctx, |
4643 | | struct ggml_tensor * a, |
4644 | | struct ggml_tensor * b, |
4645 | | int s0, |
4646 | | int s1, |
4647 | | int p0, |
4648 | | int p1, |
4649 | | int d0, |
4650 | 0 | int d1) { |
4651 | 0 | struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]); |
4652 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, |
4653 | 0 | ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]), |
4654 | 0 | s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW] |
4655 | 0 | struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW] |
4656 | |
|
4657 | 0 | new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW] |
4658 | 0 | struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b); |
4659 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW] |
4660 | |
|
4661 | 0 | return result; |
4662 | 0 | } |
4663 | | |
4664 | | // ggml_conv_2d_dw_direct |
4665 | | |
4666 | | struct ggml_tensor * ggml_conv_2d_dw_direct( |
4667 | | struct ggml_context * ctx, |
4668 | | struct ggml_tensor * a, |
4669 | | struct ggml_tensor * b, |
4670 | | int stride0, |
4671 | | int stride1, |
4672 | | int pad0, |
4673 | | int pad1, |
4674 | | int dilation0, |
4675 | 0 | int dilation1) { |
4676 | 0 | GGML_ASSERT(a->ne[2] == 1); |
4677 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
4678 | 0 | int64_t ne[4]; |
4679 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0); |
4680 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1); |
4681 | 0 | ne[2] = b->ne[2]; |
4682 | 0 | ne[3] = b->ne[3]; |
4683 | |
|
4684 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); |
4685 | |
|
4686 | 0 | if (ggml_is_contiguous_channels(b)) { |
4687 | | // Result will be permuted the same way as input (CWHN order) |
4688 | 0 | const int64_t type_size = ggml_type_size(result->type); |
4689 | 0 | GGML_ASSERT(ggml_blck_size(result->type) == 1); |
4690 | 0 | result->nb[0] = result->ne[2] * type_size; |
4691 | 0 | result->nb[1] = result->ne[0] * result->nb[0]; |
4692 | 0 | result->nb[2] = type_size; |
4693 | 0 | } |
4694 | |
|
4695 | 0 | int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 }; |
4696 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4697 | |
|
4698 | 0 | result->op = GGML_OP_CONV_2D_DW; |
4699 | 0 | result->src[0] = a; |
4700 | 0 | result->src[1] = b; |
4701 | 0 | return result; |
4702 | 0 | } |
4703 | | |
4704 | | // ggml_conv_2d_direct |
4705 | | |
4706 | | struct ggml_tensor * ggml_conv_2d_direct( |
4707 | | struct ggml_context * ctx, |
4708 | | struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC] |
4709 | | struct ggml_tensor * b, // input data [W, H, C, N] |
4710 | | int s0, // stride dimension 0 |
4711 | | int s1, // stride dimension 1 |
4712 | | int p0, // padding dimension 0 |
4713 | | int p1, // padding dimension 1 |
4714 | | int d0, // dilation dimension 0 |
4715 | 0 | int d1) {// dilation dimension 1 |
4716 | |
|
4717 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
4718 | | //GGML_ASSERT(a->type == b->type); |
4719 | |
|
4720 | 0 | int64_t ne[4]; |
4721 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4722 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); |
4723 | 0 | ne[2] = a->ne[3]; |
4724 | 0 | ne[3] = b->ne[3]; |
4725 | |
|
4726 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); |
4727 | |
|
4728 | 0 | ggml_set_op_params_i32(result, 0, s0); |
4729 | 0 | ggml_set_op_params_i32(result, 1, s1); |
4730 | 0 | ggml_set_op_params_i32(result, 2, p0); |
4731 | 0 | ggml_set_op_params_i32(result, 3, p1); |
4732 | 0 | ggml_set_op_params_i32(result, 4, d0); |
4733 | 0 | ggml_set_op_params_i32(result, 5, d1); |
4734 | |
|
4735 | 0 | result->op = GGML_OP_CONV_2D; |
4736 | 0 | result->src[0] = a; |
4737 | 0 | result->src[1] = b; |
4738 | |
|
4739 | 0 | return result; |
4740 | 0 | } |
4741 | | |
4742 | | // ggml_conv_3d_direct |
4743 | | |
4744 | | struct ggml_tensor * ggml_conv_3d_direct( |
4745 | | struct ggml_context * ctx, |
4746 | | struct ggml_tensor * a, |
4747 | | struct ggml_tensor * b, |
4748 | | int s0, |
4749 | | int s1, |
4750 | | int s2, |
4751 | | int p0, |
4752 | | int p1, |
4753 | | int p2, |
4754 | | int d0, |
4755 | | int d1, |
4756 | | int d2, |
4757 | | int c, |
4758 | | int n, |
4759 | 0 | int oc) { |
4760 | |
|
4761 | 0 | GGML_ASSERT(a->ne[3] == (int64_t) c * oc); |
4762 | 0 | GGML_ASSERT(b->ne[3] == (int64_t) c * n); |
4763 | |
|
4764 | 0 | int64_t ne[4]; |
4765 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4766 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); |
4767 | 0 | ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2); |
4768 | 0 | ne[3] = (int64_t) oc * n; |
4769 | |
|
4770 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4771 | |
|
4772 | 0 | ggml_set_op_params_i32(result, 0, s0); |
4773 | 0 | ggml_set_op_params_i32(result, 1, s1); |
4774 | 0 | ggml_set_op_params_i32(result, 2, s2); |
4775 | 0 | ggml_set_op_params_i32(result, 3, p0); |
4776 | 0 | ggml_set_op_params_i32(result, 4, p1); |
4777 | 0 | ggml_set_op_params_i32(result, 5, p2); |
4778 | 0 | ggml_set_op_params_i32(result, 6, d0); |
4779 | 0 | ggml_set_op_params_i32(result, 7, d1); |
4780 | 0 | ggml_set_op_params_i32(result, 8, d2); |
4781 | 0 | ggml_set_op_params_i32(result, 9, c); |
4782 | 0 | ggml_set_op_params_i32(result, 10, n); |
4783 | 0 | ggml_set_op_params_i32(result, 11, oc); |
4784 | |
|
4785 | 0 | result->op = GGML_OP_CONV_3D; |
4786 | 0 | result->src[0] = a; |
4787 | 0 | result->src[1] = b; |
4788 | |
|
4789 | 0 | return result; |
4790 | 0 | } |
4791 | | |
4792 | | // ggml_conv_transpose_2d_p0 |
4793 | | |
4794 | 0 | static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) { |
4795 | 0 | return (ins - 1) * s - 2 * p + ks; |
4796 | 0 | } |
4797 | | |
4798 | | struct ggml_tensor * ggml_conv_transpose_2d_p0( |
4799 | | struct ggml_context * ctx, |
4800 | | struct ggml_tensor * a, |
4801 | | struct ggml_tensor * b, |
4802 | 0 | int stride) { |
4803 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
4804 | |
|
4805 | 0 | const int64_t ne[4] = { |
4806 | 0 | ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/), |
4807 | 0 | ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/), |
4808 | 0 | a->ne[2], b->ne[3], |
4809 | 0 | }; |
4810 | |
|
4811 | 0 | struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4812 | |
|
4813 | 0 | ggml_set_op_params_i32(result, 0, stride); |
4814 | |
|
4815 | 0 | result->op = GGML_OP_CONV_TRANSPOSE_2D; |
4816 | 0 | result->src[0] = a; |
4817 | 0 | result->src[1] = b; |
4818 | |
|
4819 | 0 | return result; |
4820 | 0 | } |
4821 | | |
4822 | | // ggml_pool_* |
4823 | | |
4824 | 0 | static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) { |
4825 | 0 | return (ins + 2 * p - ks) / s + 1; |
4826 | 0 | } |
4827 | | |
4828 | | // ggml_pool_1d |
4829 | | |
4830 | | struct ggml_tensor * ggml_pool_1d( |
4831 | | struct ggml_context * ctx, |
4832 | | struct ggml_tensor * a, |
4833 | | enum ggml_op_pool op, |
4834 | | int k0, |
4835 | | int s0, |
4836 | 0 | int p0) { |
4837 | 0 | const int64_t ne[4] = { |
4838 | 0 | ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), |
4839 | 0 | a->ne[1], |
4840 | 0 | a->ne[2], |
4841 | 0 | a->ne[3], |
4842 | 0 | }; |
4843 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4844 | |
|
4845 | 0 | int32_t params[] = { op, k0, s0, p0 }; |
4846 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4847 | |
|
4848 | 0 | result->op = GGML_OP_POOL_1D; |
4849 | 0 | result->src[0] = a; |
4850 | |
|
4851 | 0 | return result; |
4852 | 0 | } |
4853 | | |
4854 | | // ggml_pool_2d |
4855 | | |
4856 | | struct ggml_tensor * ggml_pool_2d( |
4857 | | struct ggml_context * ctx, |
4858 | | struct ggml_tensor * a, |
4859 | | enum ggml_op_pool op, |
4860 | | int k0, |
4861 | | int k1, |
4862 | | int s0, |
4863 | | int s1, |
4864 | | float p0, |
4865 | 0 | float p1) { |
4866 | 0 | struct ggml_tensor * result; |
4867 | 0 | const int64_t ne[4] = { |
4868 | 0 | ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), |
4869 | 0 | ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), |
4870 | 0 | a->ne[2], |
4871 | 0 | a->ne[3], |
4872 | 0 | }; |
4873 | 0 | result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4874 | |
|
4875 | 0 | int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; |
4876 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4877 | |
|
4878 | 0 | result->op = GGML_OP_POOL_2D; |
4879 | 0 | result->src[0] = a; |
4880 | |
|
4881 | 0 | return result; |
4882 | 0 | } |
4883 | | |
4884 | | struct ggml_tensor * ggml_pool_2d_back( |
4885 | | struct ggml_context * ctx, |
4886 | | struct ggml_tensor * a, |
4887 | | struct ggml_tensor * af, |
4888 | | enum ggml_op_pool op, |
4889 | | int k0, |
4890 | | int k1, |
4891 | | int s0, |
4892 | | int s1, |
4893 | | float p0, |
4894 | 0 | float p1) { |
4895 | 0 | struct ggml_tensor * result; |
4896 | 0 | result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne); |
4897 | |
|
4898 | 0 | int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; |
4899 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4900 | |
|
4901 | 0 | result->op = GGML_OP_POOL_2D_BACK; |
4902 | 0 | result->src[0] = a; |
4903 | 0 | result->src[1] = af; |
4904 | |
|
4905 | 0 | return result; |
4906 | 0 | } |
4907 | | |
4908 | | // ggml_upscale / ggml_interpolate |
4909 | | |
4910 | | static struct ggml_tensor * ggml_interpolate_impl( |
4911 | | struct ggml_context * ctx, |
4912 | | struct ggml_tensor * a, |
4913 | | int64_t ne0, |
4914 | | int64_t ne1, |
4915 | | int64_t ne2, |
4916 | | int64_t ne3, |
4917 | 0 | uint32_t mode) { |
4918 | 0 | GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); |
4919 | | // TODO: implement antialias for modes other than bilinear |
4920 | 0 | GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR); |
4921 | |
|
4922 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
4923 | |
|
4924 | 0 | ggml_set_op_params_i32(result, 0, (int32_t)mode); |
4925 | |
|
4926 | 0 | result->op = GGML_OP_UPSCALE; |
4927 | 0 | result->src[0] = a; |
4928 | |
|
4929 | 0 | return result; |
4930 | 0 | } |
4931 | | |
4932 | | struct ggml_tensor * ggml_upscale( |
4933 | | struct ggml_context * ctx, |
4934 | | struct ggml_tensor * a, |
4935 | | int scale_factor, |
4936 | 0 | enum ggml_scale_mode mode) { |
4937 | 0 | GGML_ASSERT(scale_factor > 1); |
4938 | 0 | return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); |
4939 | 0 | } |
4940 | | |
4941 | | struct ggml_tensor * ggml_upscale_ext( |
4942 | | struct ggml_context * ctx, |
4943 | | struct ggml_tensor * a, |
4944 | | int ne0, |
4945 | | int ne1, |
4946 | | int ne2, |
4947 | | int ne3, |
4948 | 0 | enum ggml_scale_mode mode) { |
4949 | 0 | return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); |
4950 | 0 | } |
4951 | | |
4952 | | struct ggml_tensor * ggml_interpolate( |
4953 | | struct ggml_context * ctx, |
4954 | | struct ggml_tensor * a, |
4955 | | int64_t ne0, |
4956 | | int64_t ne1, |
4957 | | int64_t ne2, |
4958 | | int64_t ne3, |
4959 | 0 | uint32_t mode) { |
4960 | 0 | return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); |
4961 | 0 | } |
4962 | | |
4963 | | // ggml_pad |
4964 | | |
4965 | | struct ggml_tensor * ggml_pad( |
4966 | | struct ggml_context * ctx, |
4967 | | struct ggml_tensor * a, |
4968 | | int p0, |
4969 | | int p1, |
4970 | | int p2, |
4971 | 0 | int p3) { |
4972 | 0 | return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3); |
4973 | 0 | } |
4974 | | |
4975 | | // ggml_pad_circular |
4976 | | |
4977 | | struct ggml_tensor * ggml_pad_circular( |
4978 | | struct ggml_context * ctx, |
4979 | | struct ggml_tensor * a, |
4980 | | int p0, |
4981 | | int p1, |
4982 | | int p2, |
4983 | 0 | int p3) { |
4984 | 0 | return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3); |
4985 | 0 | } |
4986 | | |
4987 | | struct ggml_tensor * ggml_pad_ext( |
4988 | | struct ggml_context * ctx, |
4989 | | struct ggml_tensor * a, |
4990 | | int lp0, |
4991 | | int rp0, |
4992 | | int lp1, |
4993 | | int rp1, |
4994 | | int lp2, |
4995 | | int rp2, |
4996 | | int lp3, |
4997 | | int rp3 |
4998 | 0 | ) { |
4999 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, |
5000 | 0 | a->ne[0] + lp0 + rp0, |
5001 | 0 | a->ne[1] + lp1 + rp1, |
5002 | 0 | a->ne[2] + lp2 + rp2, |
5003 | 0 | a->ne[3] + lp3 + rp3); |
5004 | |
|
5005 | 0 | ggml_set_op_params_i32(result, 0, lp0); |
5006 | 0 | ggml_set_op_params_i32(result, 1, rp0); |
5007 | 0 | ggml_set_op_params_i32(result, 2, lp1); |
5008 | 0 | ggml_set_op_params_i32(result, 3, rp1); |
5009 | 0 | ggml_set_op_params_i32(result, 4, lp2); |
5010 | 0 | ggml_set_op_params_i32(result, 5, rp2); |
5011 | 0 | ggml_set_op_params_i32(result, 6, lp3); |
5012 | 0 | ggml_set_op_params_i32(result, 7, rp3); |
5013 | 0 | ggml_set_op_params_i32(result, 8, 0); // not circular by default |
5014 | | |
5015 | |
|
5016 | 0 | result->op = GGML_OP_PAD; |
5017 | 0 | result->src[0] = a; |
5018 | |
|
5019 | 0 | return result; |
5020 | 0 | } |
5021 | | |
5022 | | // ggml_pad_ext_circular |
5023 | | |
5024 | | struct ggml_tensor * ggml_pad_ext_circular( |
5025 | | struct ggml_context * ctx, |
5026 | | struct ggml_tensor * a, |
5027 | | int lp0, |
5028 | | int rp0, |
5029 | | int lp1, |
5030 | | int rp1, |
5031 | | int lp2, |
5032 | | int rp2, |
5033 | | int lp3, |
5034 | | int rp3 |
5035 | 0 | ) { |
5036 | 0 | struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3); |
5037 | 0 | ggml_set_op_params_i32(result, 8, 1); // circular |
5038 | 0 | return result; |
5039 | 0 | } |
5040 | | |
5041 | | // ggml_pad_reflect_1d |
5042 | | |
5043 | | struct ggml_tensor * ggml_pad_reflect_1d( |
5044 | | struct ggml_context * ctx, |
5045 | | struct ggml_tensor * a, |
5046 | | int p0, |
5047 | 0 | int p1) { |
5048 | 0 | GGML_ASSERT(p0 >= 0); |
5049 | 0 | GGML_ASSERT(p1 >= 0); |
5050 | |
|
5051 | 0 | GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the |
5052 | 0 | GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded |
5053 | |
|
5054 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5055 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5056 | |
|
5057 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, |
5058 | 0 | a->ne[0] + p0 + p1, |
5059 | 0 | a->ne[1], |
5060 | 0 | a->ne[2], |
5061 | 0 | a->ne[3]); |
5062 | |
|
5063 | 0 | int32_t params[] = { p0, p1 }; |
5064 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5065 | |
|
5066 | 0 | result->op = GGML_OP_PAD_REFLECT_1D; |
5067 | 0 | result->src[0] = a; |
5068 | |
|
5069 | 0 | return result; |
5070 | 0 | } |
5071 | | |
5072 | | // ggml_roll |
5073 | | |
5074 | | struct ggml_tensor * ggml_roll( |
5075 | | struct ggml_context * ctx, |
5076 | | struct ggml_tensor * a, |
5077 | | int shift0, |
5078 | | int shift1, |
5079 | | int shift2, |
5080 | 0 | int shift3) { |
5081 | 0 | GGML_ASSERT(a->nb[0] == ggml_type_size(a->type)); |
5082 | 0 | GGML_ASSERT(abs(shift0) < a->ne[0]); |
5083 | 0 | GGML_ASSERT(abs(shift1) < a->ne[1]); |
5084 | 0 | GGML_ASSERT(abs(shift2) < a->ne[2]); |
5085 | 0 | GGML_ASSERT(abs(shift3) < a->ne[3]); |
5086 | |
|
5087 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
5088 | |
|
5089 | 0 | ggml_set_op_params_i32(result, 0, shift0); |
5090 | 0 | ggml_set_op_params_i32(result, 1, shift1); |
5091 | 0 | ggml_set_op_params_i32(result, 2, shift2); |
5092 | 0 | ggml_set_op_params_i32(result, 3, shift3); |
5093 | |
|
5094 | 0 | result->op = GGML_OP_ROLL; |
5095 | 0 | result->src[0] = a; |
5096 | |
|
5097 | 0 | return result; |
5098 | 0 | } |
5099 | | |
5100 | | // ggml_timestep_embedding |
5101 | | |
5102 | | struct ggml_tensor * ggml_timestep_embedding( |
5103 | | struct ggml_context * ctx, |
5104 | | struct ggml_tensor * timesteps, |
5105 | | int dim, |
5106 | 0 | int max_period) { |
5107 | |
|
5108 | 0 | struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]); |
5109 | |
|
5110 | 0 | ggml_set_op_params_i32(result, 0, dim); |
5111 | 0 | ggml_set_op_params_i32(result, 1, max_period); |
5112 | |
|
5113 | 0 | result->op = GGML_OP_TIMESTEP_EMBEDDING; |
5114 | 0 | result->src[0] = timesteps; |
5115 | |
|
5116 | 0 | return result; |
5117 | 0 | } |
5118 | | |
5119 | | // ggml_tri |
5120 | | |
5121 | | struct ggml_tensor * ggml_tri( |
5122 | | struct ggml_context * ctx, |
5123 | | struct ggml_tensor * a, |
5124 | 0 | enum ggml_tri_type type) { |
5125 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5126 | |
|
5127 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5128 | 0 | GGML_ASSERT(a->ne[0] == a->ne[1]); |
5129 | |
|
5130 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
5131 | |
|
5132 | 0 | ggml_set_op_params_i32(result, 0, type); |
5133 | |
|
5134 | 0 | result->op = GGML_OP_TRI; |
5135 | 0 | result->src[0] = a; |
5136 | |
|
5137 | 0 | return result; |
5138 | 0 | } |
5139 | | |
5140 | | // ggml_fill |
5141 | | |
5142 | | static struct ggml_tensor * ggml_fill_impl( |
5143 | | struct ggml_context * ctx, |
5144 | | struct ggml_tensor * a, |
5145 | | float c, |
5146 | 0 | bool inplace) { |
5147 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5148 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5149 | |
|
5150 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5151 | |
|
5152 | 0 | ggml_set_op_params_f32(result, 0, c); |
5153 | |
|
5154 | 0 | result->op = GGML_OP_FILL; |
5155 | 0 | result->src[0] = a; |
5156 | |
|
5157 | 0 | return result; |
5158 | 0 | } |
5159 | | |
5160 | | struct ggml_tensor * ggml_fill( |
5161 | | struct ggml_context * ctx, |
5162 | | struct ggml_tensor * a, |
5163 | 0 | float c) { |
5164 | 0 | return ggml_fill_impl(ctx, a, c, false); |
5165 | 0 | } |
5166 | | |
5167 | | struct ggml_tensor * ggml_fill_inplace( |
5168 | | struct ggml_context * ctx, |
5169 | | struct ggml_tensor * a, |
5170 | 0 | float c) { |
5171 | 0 | return ggml_fill_impl(ctx, a, c, true); |
5172 | 0 | } |
5173 | | |
5174 | | // ggml_argsort |
5175 | | |
5176 | | struct ggml_tensor * ggml_argsort( |
5177 | | struct ggml_context * ctx, |
5178 | | struct ggml_tensor * a, |
5179 | 0 | enum ggml_sort_order order) { |
5180 | 0 | GGML_ASSERT(a->ne[0] <= INT32_MAX); |
5181 | |
|
5182 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne); |
5183 | |
|
5184 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) order); |
5185 | |
|
5186 | 0 | result->op = GGML_OP_ARGSORT; |
5187 | 0 | result->src[0] = a; |
5188 | |
|
5189 | 0 | return result; |
5190 | 0 | } |
5191 | | |
5192 | | // ggml_argsort_top_k |
5193 | | |
5194 | | struct ggml_tensor * ggml_argsort_top_k( |
5195 | | struct ggml_context * ctx, |
5196 | | struct ggml_tensor * a, |
5197 | 0 | int k) { |
5198 | 0 | GGML_ASSERT(a->ne[0] >= k); |
5199 | |
|
5200 | 0 | struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC); |
5201 | |
|
5202 | 0 | result = ggml_view_4d(ctx, result, |
5203 | 0 | k, result->ne[1], result->ne[2], result->ne[3], |
5204 | 0 | result->nb[1], result->nb[2], result->nb[3], |
5205 | 0 | 0); |
5206 | |
|
5207 | 0 | return result; |
5208 | 0 | } |
5209 | | |
5210 | | // ggml_top_k |
5211 | | |
5212 | | struct ggml_tensor * ggml_top_k( |
5213 | | struct ggml_context * ctx, |
5214 | | struct ggml_tensor * a, |
5215 | 0 | int k) { |
5216 | 0 | GGML_ASSERT(a->ne[0] >= k); |
5217 | |
|
5218 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]); |
5219 | |
|
5220 | 0 | result->op = GGML_OP_TOP_K; |
5221 | 0 | result->src[0] = a; |
5222 | |
|
5223 | 0 | return result; |
5224 | 0 | } |
5225 | | |
5226 | | // ggml_arange |
5227 | | |
5228 | | struct ggml_tensor * ggml_arange( |
5229 | | struct ggml_context * ctx, |
5230 | | float start, |
5231 | | float stop, |
5232 | 0 | float step) { |
5233 | 0 | GGML_ASSERT(stop > start); |
5234 | |
|
5235 | 0 | const int64_t steps = (int64_t) ceilf((stop - start) / step); |
5236 | |
|
5237 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps); |
5238 | |
|
5239 | 0 | ggml_set_op_params_f32(result, 0, start); |
5240 | 0 | ggml_set_op_params_f32(result, 1, stop); |
5241 | 0 | ggml_set_op_params_f32(result, 2, step); |
5242 | |
|
5243 | 0 | result->op = GGML_OP_ARANGE; |
5244 | |
|
5245 | 0 | return result; |
5246 | 0 | } |
5247 | | |
5248 | | // ggml_flash_attn_ext |
5249 | | |
5250 | | struct ggml_tensor * ggml_flash_attn_ext( |
5251 | | struct ggml_context * ctx, |
5252 | | struct ggml_tensor * q, |
5253 | | struct ggml_tensor * k, |
5254 | | struct ggml_tensor * v, |
5255 | | struct ggml_tensor * mask, |
5256 | | float scale, |
5257 | | float max_bias, |
5258 | 0 | float logit_softcap) { |
5259 | 0 | GGML_ASSERT(ggml_can_mul_mat(k, q)); |
5260 | | // TODO: check if vT can be multiplied by (k*qT) |
5261 | |
|
5262 | 0 | GGML_ASSERT(q->ne[3] == k->ne[3]); |
5263 | 0 | GGML_ASSERT(q->ne[3] == v->ne[3]); |
5264 | |
|
5265 | 0 | if (mask) { |
5266 | 0 | GGML_ASSERT(ggml_is_contiguous(mask)); |
5267 | | //GGML_ASSERT(ggml_can_repeat_rows(mask, qk)); |
5268 | |
|
5269 | 0 | GGML_ASSERT(q->ne[2] % mask->ne[2] == 0); |
5270 | 0 | GGML_ASSERT(q->ne[3] % mask->ne[3] == 0); |
5271 | 0 | } |
5272 | |
|
5273 | 0 | if (max_bias > 0.0f) { |
5274 | 0 | GGML_ASSERT(mask); |
5275 | 0 | } |
5276 | | |
5277 | | // permute(0, 2, 1, 3) |
5278 | 0 | int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] }; |
5279 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5280 | |
|
5281 | 0 | float params[] = { scale, max_bias, logit_softcap }; |
5282 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5283 | |
|
5284 | 0 | result->op = GGML_OP_FLASH_ATTN_EXT; |
5285 | 0 | result->src[0] = q; |
5286 | 0 | result->src[1] = k; |
5287 | 0 | result->src[2] = v; |
5288 | 0 | result->src[3] = mask; |
5289 | |
|
5290 | 0 | return result; |
5291 | 0 | } |
5292 | | |
5293 | | void ggml_flash_attn_ext_set_prec( |
5294 | | struct ggml_tensor * a, |
5295 | 0 | enum ggml_prec prec) { |
5296 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5297 | |
|
5298 | 0 | const int32_t prec_i32 = (int32_t) prec; |
5299 | |
|
5300 | 0 | ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second |
5301 | 0 | } |
5302 | | |
5303 | | enum ggml_prec ggml_flash_attn_ext_get_prec( |
5304 | 0 | const struct ggml_tensor * a) { |
5305 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5306 | |
|
5307 | 0 | const int32_t prec_i32 = ggml_get_op_params_i32(a, 3); |
5308 | |
|
5309 | 0 | return (enum ggml_prec) prec_i32; |
5310 | 0 | } |
5311 | | |
5312 | | void ggml_flash_attn_ext_add_sinks( |
5313 | | struct ggml_tensor * a, |
5314 | 0 | struct ggml_tensor * sinks) { |
5315 | 0 | if (!sinks) { |
5316 | 0 | a->src[4] = NULL; |
5317 | 0 | return; |
5318 | 0 | } |
5319 | | |
5320 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5321 | 0 | GGML_ASSERT(a->src[4] == NULL); |
5322 | 0 | GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]); |
5323 | 0 | GGML_ASSERT(sinks->type == GGML_TYPE_F32); |
5324 | |
|
5325 | 0 | a->src[4] = sinks; |
5326 | 0 | } |
5327 | | |
5328 | | // ggml_flash_attn_back |
5329 | | |
5330 | | struct ggml_tensor * ggml_flash_attn_back( |
5331 | | struct ggml_context * ctx, |
5332 | | struct ggml_tensor * q, |
5333 | | struct ggml_tensor * k, |
5334 | | struct ggml_tensor * v, |
5335 | | struct ggml_tensor * d, |
5336 | 0 | bool masked) { |
5337 | 0 | GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes"); |
5338 | |
|
5339 | 0 | GGML_ASSERT(ggml_can_mul_mat(k, q)); |
5340 | | // TODO: check if vT can be multiplied by (k*qT) |
5341 | | |
5342 | | // d shape [D,N,ne2,ne3] |
5343 | | // q shape [D,N,ne2,ne3] |
5344 | | // k shape [D,M,kvne2,ne3] |
5345 | | // v shape [M,D,kvne2,ne3] |
5346 | |
|
5347 | 0 | const int64_t D = q->ne[0]; |
5348 | 0 | const int64_t N = q->ne[1]; |
5349 | 0 | const int64_t M = k->ne[1]; |
5350 | 0 | const int64_t ne2 = q->ne[2]; |
5351 | 0 | const int64_t ne3 = q->ne[3]; |
5352 | 0 | const int64_t kvne2 = k->ne[2]; |
5353 | |
|
5354 | 0 | GGML_ASSERT(k->ne[0] == D); |
5355 | 0 | GGML_ASSERT(v->ne[0] == M); |
5356 | 0 | GGML_ASSERT(v->ne[1] == D); |
5357 | 0 | GGML_ASSERT(d->ne[0] == D); |
5358 | 0 | GGML_ASSERT(d->ne[1] == N); |
5359 | 0 | GGML_ASSERT(k->ne[2] == kvne2); |
5360 | 0 | GGML_ASSERT(k->ne[3] == ne3); |
5361 | 0 | GGML_ASSERT(v->ne[2] == kvne2); |
5362 | 0 | GGML_ASSERT(v->ne[3] == ne3); |
5363 | 0 | GGML_ASSERT(d->ne[2] == ne2); |
5364 | 0 | GGML_ASSERT(d->ne[3] == ne3); |
5365 | |
|
5366 | 0 | GGML_ASSERT(ne2 % kvne2 == 0); |
5367 | | |
5368 | | // store gradients of q, k and v as continuous tensors concatenated in result. |
5369 | | // note: v and gradv are actually transposed, i.e. v->ne[0] != D. |
5370 | 0 | const int64_t elem_q = ggml_nelements(q); |
5371 | 0 | const int64_t elem_k = ggml_nelements(k); |
5372 | 0 | const int64_t elem_v = ggml_nelements(v); |
5373 | |
|
5374 | 0 | enum ggml_type result_type = GGML_TYPE_F32; |
5375 | 0 | GGML_ASSERT(ggml_blck_size(result_type) == 1); |
5376 | 0 | const size_t tsize = ggml_type_size(result_type); |
5377 | |
|
5378 | 0 | const size_t offs_q = 0; |
5379 | 0 | const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN); |
5380 | 0 | const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN); |
5381 | 0 | const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN); |
5382 | |
|
5383 | 0 | const size_t nelements = (end + tsize - 1)/tsize; |
5384 | |
|
5385 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements); |
5386 | |
|
5387 | 0 | int32_t masked_i = masked ? 1 : 0; |
5388 | 0 | ggml_set_op_params(result, &masked_i, sizeof(masked_i)); |
5389 | |
|
5390 | 0 | result->op = GGML_OP_FLASH_ATTN_BACK; |
5391 | 0 | result->src[0] = q; |
5392 | 0 | result->src[1] = k; |
5393 | 0 | result->src[2] = v; |
5394 | 0 | result->src[3] = d; |
5395 | |
|
5396 | 0 | return result; |
5397 | 0 | } |
5398 | | |
5399 | | // ggml_ssm_conv |
5400 | | |
5401 | | struct ggml_tensor * ggml_ssm_conv( |
5402 | | struct ggml_context * ctx, |
5403 | | struct ggml_tensor * sx, |
5404 | 0 | struct ggml_tensor * c) { |
5405 | 0 | GGML_ASSERT(ggml_is_3d(sx)); |
5406 | 0 | GGML_ASSERT(ggml_is_matrix(c)); |
5407 | |
|
5408 | 0 | const int64_t d_conv = c->ne[0]; |
5409 | 0 | const int64_t d_inner = c->ne[1]; |
5410 | 0 | const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence |
5411 | 0 | const int64_t n_s = sx->ne[2]; |
5412 | | |
5413 | | // TODO: maybe support other strides than 1? |
5414 | 0 | GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t); |
5415 | 0 | GGML_ASSERT(sx->ne[1] == d_inner); |
5416 | 0 | GGML_ASSERT(n_t >= 0); |
5417 | |
|
5418 | 0 | struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s); |
5419 | |
|
5420 | 0 | result->op = GGML_OP_SSM_CONV; |
5421 | 0 | result->src[0] = sx; |
5422 | 0 | result->src[1] = c; |
5423 | |
|
5424 | 0 | return result; |
5425 | 0 | } |
5426 | | |
5427 | | // ggml_ssm_scan |
5428 | | |
5429 | | struct ggml_tensor * ggml_ssm_scan( |
5430 | | struct ggml_context * ctx, |
5431 | | struct ggml_tensor * s, |
5432 | | struct ggml_tensor * x, |
5433 | | struct ggml_tensor * dt, |
5434 | | struct ggml_tensor * A, |
5435 | | struct ggml_tensor * B, |
5436 | | struct ggml_tensor * C, |
5437 | 0 | struct ggml_tensor * ids) { |
5438 | 0 | GGML_ASSERT(ggml_is_contiguous(s)); |
5439 | 0 | GGML_ASSERT(ggml_is_contiguous(dt)); |
5440 | 0 | GGML_ASSERT(ggml_is_contiguous(A)); |
5441 | 0 | GGML_ASSERT(x->nb[0] == ggml_type_size(x->type)); |
5442 | 0 | GGML_ASSERT(B->nb[0] == ggml_type_size(B->type)); |
5443 | 0 | GGML_ASSERT(C->nb[0] == ggml_type_size(C->type)); |
5444 | 0 | GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]); |
5445 | 0 | GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]); |
5446 | 0 | GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]); |
5447 | 0 | GGML_ASSERT(ggml_are_same_shape(B, C)); |
5448 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
5449 | |
|
5450 | 0 | { |
5451 | 0 | const int64_t d_state = s->ne[0]; |
5452 | 0 | const int64_t head_dim = x->ne[0]; |
5453 | 0 | const int64_t n_head = x->ne[1]; |
5454 | 0 | const int64_t n_seq_tokens = x->ne[2]; |
5455 | 0 | const int64_t n_seqs = x->ne[3]; |
5456 | |
|
5457 | 0 | GGML_ASSERT(dt->ne[0] == n_head); |
5458 | 0 | GGML_ASSERT(dt->ne[1] == n_seq_tokens); |
5459 | 0 | GGML_ASSERT(dt->ne[2] == n_seqs); |
5460 | 0 | GGML_ASSERT(ggml_is_3d(dt)); |
5461 | 0 | GGML_ASSERT(s->ne[1] == head_dim); |
5462 | 0 | GGML_ASSERT(s->ne[2] == n_head); |
5463 | 0 | GGML_ASSERT(B->ne[0] == d_state); |
5464 | 0 | GGML_ASSERT(B->ne[2] == n_seq_tokens); |
5465 | 0 | GGML_ASSERT(B->ne[3] == n_seqs); |
5466 | 0 | GGML_ASSERT(ids->ne[0] == n_seqs); |
5467 | 0 | GGML_ASSERT(ggml_is_vector(ids)); |
5468 | 0 | GGML_ASSERT(A->ne[1] == n_head); |
5469 | 0 | GGML_ASSERT(ggml_is_matrix(A)); |
5470 | |
|
5471 | 0 | if (A->ne[0] != 1) { |
5472 | | // Mamba-1 has more granular decay factors |
5473 | 0 | GGML_ASSERT(A->ne[0] == d_state); |
5474 | 0 | } |
5475 | 0 | } |
5476 | | |
5477 | | // concatenated y + ssm_states |
5478 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]); |
5479 | |
|
5480 | 0 | result->op = GGML_OP_SSM_SCAN; |
5481 | 0 | result->src[0] = s; |
5482 | 0 | result->src[1] = x; |
5483 | 0 | result->src[2] = dt; |
5484 | 0 | result->src[3] = A; |
5485 | 0 | result->src[4] = B; |
5486 | 0 | result->src[5] = C; |
5487 | 0 | result->src[6] = ids; |
5488 | |
|
5489 | 0 | return result; |
5490 | 0 | } |
5491 | | |
5492 | | // ggml_win_part |
5493 | | |
5494 | | struct ggml_tensor * ggml_win_part( |
5495 | | struct ggml_context * ctx, |
5496 | | struct ggml_tensor * a, |
5497 | 0 | int w) { |
5498 | 0 | GGML_ASSERT(a->ne[3] == 1); |
5499 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5500 | | |
5501 | | // padding |
5502 | 0 | const int px = (w - a->ne[1]%w)%w; |
5503 | 0 | const int py = (w - a->ne[2]%w)%w; |
5504 | |
|
5505 | 0 | const int npx = (px + a->ne[1])/w; |
5506 | 0 | const int npy = (py + a->ne[2])/w; |
5507 | 0 | const int np = npx*npy; |
5508 | |
|
5509 | 0 | const int64_t ne[4] = { a->ne[0], w, w, np, }; |
5510 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5511 | |
|
5512 | 0 | int32_t params[] = { npx, npy, w }; |
5513 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5514 | |
|
5515 | 0 | result->op = GGML_OP_WIN_PART; |
5516 | 0 | result->src[0] = a; |
5517 | |
|
5518 | 0 | return result; |
5519 | 0 | } |
5520 | | |
5521 | | // ggml_win_unpart |
5522 | | |
5523 | | struct ggml_tensor * ggml_win_unpart( |
5524 | | struct ggml_context * ctx, |
5525 | | struct ggml_tensor * a, |
5526 | | int w0, |
5527 | | int h0, |
5528 | 0 | int w) { |
5529 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5530 | |
|
5531 | 0 | const int64_t ne[4] = { a->ne[0], w0, h0, 1, }; |
5532 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); |
5533 | |
|
5534 | 0 | int32_t params[] = { w }; |
5535 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5536 | |
|
5537 | 0 | result->op = GGML_OP_WIN_UNPART; |
5538 | 0 | result->src[0] = a; |
5539 | |
|
5540 | 0 | return result; |
5541 | 0 | } |
5542 | | |
5543 | | // ggml_get_rel_pos |
5544 | | |
5545 | | struct ggml_tensor * ggml_get_rel_pos( |
5546 | | struct ggml_context * ctx, |
5547 | | struct ggml_tensor * a, |
5548 | | int qh, |
5549 | 0 | int kh) { |
5550 | 0 | GGML_ASSERT(qh == kh); |
5551 | 0 | GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]); |
5552 | |
|
5553 | 0 | const int64_t ne[4] = { a->ne[0], kh, qh, 1, }; |
5554 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne); |
5555 | |
|
5556 | 0 | result->op = GGML_OP_GET_REL_POS; |
5557 | 0 | result->src[0] = a; |
5558 | |
|
5559 | 0 | return result; |
5560 | 0 | } |
5561 | | |
5562 | | // ggml_add_rel_pos |
5563 | | |
5564 | | static struct ggml_tensor * ggml_add_rel_pos_impl( |
5565 | | struct ggml_context * ctx, |
5566 | | struct ggml_tensor * a, |
5567 | | struct ggml_tensor * pw, |
5568 | | struct ggml_tensor * ph, |
5569 | 0 | bool inplace) { |
5570 | 0 | GGML_ASSERT(ggml_are_same_shape(pw, ph)); |
5571 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5572 | 0 | GGML_ASSERT(ggml_is_contiguous(pw)); |
5573 | 0 | GGML_ASSERT(ggml_is_contiguous(ph)); |
5574 | 0 | GGML_ASSERT(ph->type == GGML_TYPE_F32); |
5575 | 0 | GGML_ASSERT(pw->type == GGML_TYPE_F32); |
5576 | 0 | GGML_ASSERT(pw->ne[3] == a->ne[2]); |
5577 | 0 | GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]); |
5578 | 0 | GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]); |
5579 | |
|
5580 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5581 | 0 | ggml_set_op_params_i32(result, 0, inplace ? 1 : 0); |
5582 | |
|
5583 | 0 | result->op = GGML_OP_ADD_REL_POS; |
5584 | 0 | result->src[0] = a; |
5585 | 0 | result->src[1] = pw; |
5586 | 0 | result->src[2] = ph; |
5587 | |
|
5588 | 0 | return result; |
5589 | 0 | } |
5590 | | |
5591 | | struct ggml_tensor * ggml_add_rel_pos( |
5592 | | struct ggml_context * ctx, |
5593 | | struct ggml_tensor * a, |
5594 | | struct ggml_tensor * pw, |
5595 | 0 | struct ggml_tensor * ph) { |
5596 | 0 | return ggml_add_rel_pos_impl(ctx, a, pw, ph, false); |
5597 | 0 | } |
5598 | | |
5599 | | struct ggml_tensor * ggml_add_rel_pos_inplace( |
5600 | | struct ggml_context * ctx, |
5601 | | struct ggml_tensor * a, |
5602 | | struct ggml_tensor * pw, |
5603 | 0 | struct ggml_tensor * ph) { |
5604 | 0 | return ggml_add_rel_pos_impl(ctx, a, pw, ph, true); |
5605 | 0 | } |
5606 | | |
5607 | | // ggml_rwkv_wkv6 |
5608 | | |
5609 | | struct ggml_tensor * ggml_rwkv_wkv6( |
5610 | | struct ggml_context * ctx, |
5611 | | struct ggml_tensor * k, |
5612 | | struct ggml_tensor * v, |
5613 | | struct ggml_tensor * r, |
5614 | | struct ggml_tensor * tf, |
5615 | | struct ggml_tensor * td, |
5616 | 0 | struct ggml_tensor * state) { |
5617 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5618 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5619 | 0 | GGML_ASSERT(ggml_is_contiguous(r)); |
5620 | 0 | GGML_ASSERT(ggml_is_contiguous(tf)); |
5621 | 0 | GGML_ASSERT(ggml_is_contiguous(td)); |
5622 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5623 | |
|
5624 | 0 | const int64_t S = k->ne[0]; |
5625 | 0 | const int64_t H = k->ne[1]; |
5626 | 0 | const int64_t n_tokens = k->ne[2]; |
5627 | 0 | const int64_t n_seqs = state->ne[1]; |
5628 | 0 | { |
5629 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5630 | 0 | GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens); |
5631 | 0 | GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens); |
5632 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5633 | 0 | } |
5634 | | |
5635 | | // concat output and new_state |
5636 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5637 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5638 | |
|
5639 | 0 | result->op = GGML_OP_RWKV_WKV6; |
5640 | 0 | result->src[0] = k; |
5641 | 0 | result->src[1] = v; |
5642 | 0 | result->src[2] = r; |
5643 | 0 | result->src[3] = tf; |
5644 | 0 | result->src[4] = td; |
5645 | 0 | result->src[5] = state; |
5646 | |
|
5647 | 0 | return result; |
5648 | 0 | } |
5649 | | |
5650 | | // ggml_gated_linear_attn |
5651 | | |
5652 | | struct ggml_tensor * ggml_gated_linear_attn( |
5653 | | struct ggml_context * ctx, |
5654 | | struct ggml_tensor * k, |
5655 | | struct ggml_tensor * v, |
5656 | | struct ggml_tensor * q, |
5657 | | struct ggml_tensor * g, |
5658 | | struct ggml_tensor * state, |
5659 | 0 | float scale) { |
5660 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5661 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5662 | 0 | GGML_ASSERT(ggml_is_contiguous(q)); |
5663 | 0 | GGML_ASSERT(ggml_is_contiguous(g)); |
5664 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5665 | |
|
5666 | 0 | const int64_t S = k->ne[0]; |
5667 | 0 | const int64_t H = k->ne[1]; |
5668 | 0 | const int64_t n_tokens = k->ne[2]; |
5669 | 0 | const int64_t n_seqs = state->ne[1]; |
5670 | 0 | { |
5671 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5672 | 0 | GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens); |
5673 | 0 | GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens); |
5674 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5675 | 0 | } |
5676 | | |
5677 | | // concat output and new_state |
5678 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5679 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5680 | |
|
5681 | 0 | ggml_set_op_params_f32(result, 0, scale); |
5682 | |
|
5683 | 0 | result->op = GGML_OP_GATED_LINEAR_ATTN; |
5684 | 0 | result->src[0] = k; |
5685 | 0 | result->src[1] = v; |
5686 | 0 | result->src[2] = q; |
5687 | 0 | result->src[3] = g; |
5688 | 0 | result->src[4] = state; |
5689 | |
|
5690 | 0 | return result; |
5691 | 0 | } |
5692 | | |
5693 | | // ggml_rwkv_wkv7 |
5694 | | |
5695 | | struct ggml_tensor * ggml_rwkv_wkv7( |
5696 | | struct ggml_context * ctx, |
5697 | | struct ggml_tensor * r, |
5698 | | struct ggml_tensor * w, |
5699 | | struct ggml_tensor * k, |
5700 | | struct ggml_tensor * v, |
5701 | | struct ggml_tensor * a, |
5702 | | struct ggml_tensor * b, |
5703 | 0 | struct ggml_tensor * state) { |
5704 | 0 | GGML_ASSERT(ggml_is_contiguous(r)); |
5705 | 0 | GGML_ASSERT(ggml_is_contiguous(w)); |
5706 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5707 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5708 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5709 | 0 | GGML_ASSERT(ggml_is_contiguous(b)); |
5710 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5711 | |
|
5712 | 0 | const int64_t S = k->ne[0]; |
5713 | 0 | const int64_t H = k->ne[1]; |
5714 | 0 | const int64_t n_tokens = k->ne[2]; |
5715 | 0 | const int64_t n_seqs = state->ne[1]; |
5716 | 0 | { |
5717 | 0 | GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens); |
5718 | 0 | GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens); |
5719 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5720 | 0 | GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens); |
5721 | 0 | GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens); |
5722 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5723 | 0 | } |
5724 | | |
5725 | | // concat output and new_state |
5726 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5727 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5728 | |
|
5729 | 0 | result->op = GGML_OP_RWKV_WKV7; |
5730 | 0 | result->src[0] = r; |
5731 | 0 | result->src[1] = w; |
5732 | 0 | result->src[2] = k; |
5733 | 0 | result->src[3] = v; |
5734 | 0 | result->src[4] = a; |
5735 | 0 | result->src[5] = b; |
5736 | 0 | result->src[6] = state; |
5737 | |
|
5738 | 0 | return result; |
5739 | 0 | } |
5740 | | |
5741 | | // ggml_unary |
5742 | | |
5743 | | static struct ggml_tensor * ggml_unary_impl( |
5744 | | struct ggml_context * ctx, |
5745 | | struct ggml_tensor * a, |
5746 | | enum ggml_unary_op op, |
5747 | 0 | bool inplace) { |
5748 | 0 | GGML_ASSERT(ggml_is_contiguous_1(a)); |
5749 | |
|
5750 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5751 | |
|
5752 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) op); |
5753 | |
|
5754 | 0 | result->op = GGML_OP_UNARY; |
5755 | 0 | result->src[0] = a; |
5756 | |
|
5757 | 0 | return result; |
5758 | 0 | } |
5759 | | |
5760 | | struct ggml_tensor * ggml_unary( |
5761 | | struct ggml_context * ctx, |
5762 | | struct ggml_tensor * a, |
5763 | 0 | enum ggml_unary_op op) { |
5764 | 0 | return ggml_unary_impl(ctx, a, op, false); |
5765 | 0 | } |
5766 | | |
5767 | | struct ggml_tensor * ggml_unary_inplace( |
5768 | | struct ggml_context * ctx, |
5769 | | struct ggml_tensor * a, |
5770 | 0 | enum ggml_unary_op op) { |
5771 | 0 | return ggml_unary_impl(ctx, a, op, true); |
5772 | 0 | } |
5773 | | |
5774 | | // ggml_map_custom1 |
5775 | | |
5776 | | static struct ggml_tensor * ggml_map_custom1_impl( |
5777 | | struct ggml_context * ctx, |
5778 | | struct ggml_tensor * a, |
5779 | | const ggml_custom1_op_t fun, |
5780 | | int n_tasks, |
5781 | | void * userdata, |
5782 | 0 | bool inplace) { |
5783 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5784 | |
|
5785 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5786 | |
|
5787 | 0 | struct ggml_map_custom1_op_params params = { |
5788 | 0 | /*.fun =*/ fun, |
5789 | 0 | /*.n_tasks =*/ n_tasks, |
5790 | 0 | /*.userdata =*/ userdata |
5791 | 0 | }; |
5792 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5793 | |
|
5794 | 0 | result->op = GGML_OP_MAP_CUSTOM1; |
5795 | 0 | result->src[0] = a; |
5796 | |
|
5797 | 0 | return result; |
5798 | 0 | } |
5799 | | |
5800 | | struct ggml_tensor * ggml_map_custom1( |
5801 | | struct ggml_context * ctx, |
5802 | | struct ggml_tensor * a, |
5803 | | const ggml_custom1_op_t fun, |
5804 | | int n_tasks, |
5805 | 0 | void * userdata) { |
5806 | 0 | return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false); |
5807 | 0 | } |
5808 | | |
5809 | | struct ggml_tensor * ggml_map_custom1_inplace( |
5810 | | struct ggml_context * ctx, |
5811 | | struct ggml_tensor * a, |
5812 | | const ggml_custom1_op_t fun, |
5813 | | int n_tasks, |
5814 | 0 | void * userdata) { |
5815 | 0 | return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true); |
5816 | 0 | } |
5817 | | |
5818 | | // ggml_map_custom2 |
5819 | | |
5820 | | static struct ggml_tensor * ggml_map_custom2_impl( |
5821 | | struct ggml_context * ctx, |
5822 | | struct ggml_tensor * a, |
5823 | | struct ggml_tensor * b, |
5824 | | const ggml_custom2_op_t fun, |
5825 | | int n_tasks, |
5826 | | void * userdata, |
5827 | 0 | bool inplace) { |
5828 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5829 | |
|
5830 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5831 | |
|
5832 | 0 | struct ggml_map_custom2_op_params params = { |
5833 | 0 | /*.fun =*/ fun, |
5834 | 0 | /*.n_tasks =*/ n_tasks, |
5835 | 0 | /*.userdata =*/ userdata |
5836 | 0 | }; |
5837 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5838 | |
|
5839 | 0 | result->op = GGML_OP_MAP_CUSTOM2; |
5840 | 0 | result->src[0] = a; |
5841 | 0 | result->src[1] = b; |
5842 | |
|
5843 | 0 | return result; |
5844 | 0 | } |
5845 | | |
5846 | | struct ggml_tensor * ggml_map_custom2( |
5847 | | struct ggml_context * ctx, |
5848 | | struct ggml_tensor * a, |
5849 | | struct ggml_tensor * b, |
5850 | | const ggml_custom2_op_t fun, |
5851 | | int n_tasks, |
5852 | 0 | void * userdata) { |
5853 | 0 | return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false); |
5854 | 0 | } |
5855 | | |
5856 | | struct ggml_tensor * ggml_map_custom2_inplace( |
5857 | | struct ggml_context * ctx, |
5858 | | struct ggml_tensor * a, |
5859 | | struct ggml_tensor * b, |
5860 | | const ggml_custom2_op_t fun, |
5861 | | int n_tasks, |
5862 | 0 | void * userdata) { |
5863 | 0 | return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true); |
5864 | 0 | } |
5865 | | |
5866 | | // ggml_map_custom3 |
5867 | | |
5868 | | static struct ggml_tensor * ggml_map_custom3_impl( |
5869 | | struct ggml_context * ctx, |
5870 | | struct ggml_tensor * a, |
5871 | | struct ggml_tensor * b, |
5872 | | struct ggml_tensor * c, |
5873 | | const ggml_custom3_op_t fun, |
5874 | | int n_tasks, |
5875 | | void * userdata, |
5876 | 0 | bool inplace) { |
5877 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5878 | |
|
5879 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5880 | |
|
5881 | 0 | struct ggml_map_custom3_op_params params = { |
5882 | 0 | /*.fun =*/ fun, |
5883 | 0 | /*.n_tasks =*/ n_tasks, |
5884 | 0 | /*.userdata =*/ userdata |
5885 | 0 | }; |
5886 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5887 | |
|
5888 | 0 | result->op = GGML_OP_MAP_CUSTOM3; |
5889 | 0 | result->src[0] = a; |
5890 | 0 | result->src[1] = b; |
5891 | 0 | result->src[2] = c; |
5892 | |
|
5893 | 0 | return result; |
5894 | 0 | } |
5895 | | |
5896 | | struct ggml_tensor * ggml_map_custom3( |
5897 | | struct ggml_context * ctx, |
5898 | | struct ggml_tensor * a, |
5899 | | struct ggml_tensor * b, |
5900 | | struct ggml_tensor * c, |
5901 | | const ggml_custom3_op_t fun, |
5902 | | int n_tasks, |
5903 | 0 | void * userdata) { |
5904 | 0 | return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false); |
5905 | 0 | } |
5906 | | |
5907 | | struct ggml_tensor * ggml_map_custom3_inplace( |
5908 | | struct ggml_context * ctx, |
5909 | | struct ggml_tensor * a, |
5910 | | struct ggml_tensor * b, |
5911 | | struct ggml_tensor * c, |
5912 | | const ggml_custom3_op_t fun, |
5913 | | int n_tasks, |
5914 | 0 | void * userdata) { |
5915 | 0 | return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true); |
5916 | 0 | } |
5917 | | |
5918 | | struct ggml_tensor * ggml_custom_4d( |
5919 | | struct ggml_context * ctx, |
5920 | | enum ggml_type type, |
5921 | | int64_t ne0, |
5922 | | int64_t ne1, |
5923 | | int64_t ne2, |
5924 | | int64_t ne3, |
5925 | | struct ggml_tensor ** args, |
5926 | | int n_args, |
5927 | | ggml_custom_op_t fun, |
5928 | | int n_tasks, |
5929 | 0 | void * userdata) { |
5930 | |
|
5931 | 0 | GGML_ASSERT(n_args < GGML_MAX_SRC); |
5932 | |
|
5933 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3); |
5934 | |
|
5935 | 0 | struct ggml_custom_op_params params = { |
5936 | 0 | /*.fun =*/ fun, |
5937 | 0 | /*.n_tasks =*/ n_tasks, |
5938 | 0 | /*.userdata =*/ userdata |
5939 | 0 | }; |
5940 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5941 | |
|
5942 | 0 | result->op = GGML_OP_CUSTOM; |
5943 | 0 | for (int i = 0; i < n_args; i++) { |
5944 | 0 | result->src[i] = args[i]; |
5945 | 0 | } |
5946 | |
|
5947 | 0 | return result; |
5948 | 0 | } |
5949 | | |
5950 | | struct ggml_tensor * ggml_custom_inplace( |
5951 | | struct ggml_context * ctx, |
5952 | | struct ggml_tensor * a, |
5953 | | struct ggml_tensor ** args, |
5954 | | int n_args, |
5955 | | ggml_custom_op_t fun, |
5956 | | int n_tasks, |
5957 | 0 | void * userdata) { |
5958 | |
|
5959 | 0 | GGML_ASSERT(n_args < GGML_MAX_SRC - 1); |
5960 | |
|
5961 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
5962 | |
|
5963 | 0 | struct ggml_custom_op_params params = { |
5964 | 0 | /*.fun =*/ fun, |
5965 | 0 | /*.n_tasks =*/ n_tasks, |
5966 | 0 | /*.userdata =*/ userdata |
5967 | 0 | }; |
5968 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5969 | |
|
5970 | 0 | result->op = GGML_OP_CUSTOM; |
5971 | 0 | result->src[0] = a; |
5972 | 0 | for (int i = 0; i < n_args; i++) { |
5973 | 0 | result->src[i + 1] = args[i]; |
5974 | 0 | } |
5975 | |
|
5976 | 0 | return result; |
5977 | 0 | } |
5978 | | // ggml_cross_entropy_loss |
5979 | | |
5980 | | struct ggml_tensor * ggml_cross_entropy_loss( |
5981 | | struct ggml_context * ctx, |
5982 | | struct ggml_tensor * a, |
5983 | 0 | struct ggml_tensor * b) { |
5984 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
5985 | |
|
5986 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); |
5987 | |
|
5988 | 0 | result->op = GGML_OP_CROSS_ENTROPY_LOSS; |
5989 | 0 | result->src[0] = a; |
5990 | 0 | result->src[1] = b; |
5991 | |
|
5992 | 0 | return result; |
5993 | 0 | } |
5994 | | |
5995 | | // ggml_cross_entropy_loss_back |
5996 | | |
5997 | | struct ggml_tensor * ggml_cross_entropy_loss_back( |
5998 | | struct ggml_context * ctx, |
5999 | | struct ggml_tensor * a, |
6000 | | struct ggml_tensor * b, |
6001 | 0 | struct ggml_tensor * c) { |
6002 | 0 | GGML_ASSERT(ggml_is_scalar(a)); |
6003 | 0 | GGML_ASSERT(ggml_are_same_shape(b, c)); |
6004 | |
|
6005 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, b); |
6006 | |
|
6007 | 0 | result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK; |
6008 | 0 | result->src[0] = a; |
6009 | 0 | result->src[1] = b; |
6010 | 0 | result->src[2] = c; |
6011 | |
|
6012 | 0 | return result; |
6013 | 0 | } |
6014 | | |
6015 | | // opt_step_adamw |
6016 | | |
6017 | | struct ggml_tensor * ggml_opt_step_adamw( |
6018 | | struct ggml_context * ctx, |
6019 | | struct ggml_tensor * a, |
6020 | | struct ggml_tensor * grad, |
6021 | | struct ggml_tensor * m, |
6022 | | struct ggml_tensor * v, |
6023 | 0 | struct ggml_tensor * adamw_params) { |
6024 | 0 | GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM); |
6025 | 0 | GGML_ASSERT(ggml_are_same_shape(a, grad)); |
6026 | 0 | GGML_ASSERT(ggml_are_same_shape(a, m)); |
6027 | 0 | GGML_ASSERT(ggml_are_same_shape(a, v)); |
6028 | 0 | GGML_ASSERT(adamw_params->type == GGML_TYPE_F32); |
6029 | 0 | GGML_ASSERT(ggml_nelements(adamw_params) == 7); |
6030 | |
|
6031 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
6032 | |
|
6033 | 0 | result->op = GGML_OP_OPT_STEP_ADAMW; |
6034 | 0 | result->src[0] = a; |
6035 | 0 | result->src[1] = grad; |
6036 | 0 | result->src[2] = m; |
6037 | 0 | result->src[3] = v; |
6038 | 0 | result->src[4] = adamw_params; |
6039 | |
|
6040 | 0 | return result; |
6041 | 0 | } |
6042 | | |
6043 | | // opt_step_sgd |
6044 | | |
6045 | | struct ggml_tensor * ggml_opt_step_sgd( |
6046 | | struct ggml_context * ctx, |
6047 | | struct ggml_tensor * a, |
6048 | | struct ggml_tensor * grad, |
6049 | 0 | struct ggml_tensor * params) { |
6050 | 0 | GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM); |
6051 | 0 | GGML_ASSERT(ggml_are_same_shape(a, grad)); |
6052 | 0 | GGML_ASSERT(params->type == GGML_TYPE_F32); |
6053 | 0 | GGML_ASSERT(ggml_nelements(params) == 2); |
6054 | |
|
6055 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
6056 | |
|
6057 | 0 | result->op = GGML_OP_OPT_STEP_SGD; |
6058 | 0 | result->src[0] = a; |
6059 | 0 | result->src[1] = grad; |
6060 | 0 | result->src[2] = params; |
6061 | |
|
6062 | 0 | return result; |
6063 | 0 | } |
6064 | | |
6065 | | // solve_tri |
6066 | | |
6067 | | struct ggml_tensor * ggml_solve_tri( |
6068 | | struct ggml_context * ctx, |
6069 | | struct ggml_tensor * a, |
6070 | | struct ggml_tensor * b, |
6071 | | bool left, |
6072 | | bool lower, |
6073 | 0 | bool uni) { |
6074 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
6075 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
6076 | | |
6077 | | // A must be square and lower diagonal |
6078 | 0 | GGML_ASSERT(a->ne[0] == a->ne[1]); |
6079 | | // B must have same outer dimension as A |
6080 | 0 | GGML_ASSERT(a->ne[1] == b->ne[1]); |
6081 | | |
6082 | | // batch dimensions must be equal |
6083 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
6084 | 0 | GGML_ASSERT(a->ne[3] == b->ne[3]); |
6085 | |
|
6086 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
6087 | 0 | GGML_ASSERT(ggml_is_contiguous(b)); |
6088 | |
|
6089 | 0 | GGML_ASSERT(lower && left && !uni); // TODO: support other variants |
6090 | |
|
6091 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]); |
6092 | |
|
6093 | 0 | result->op = GGML_OP_SOLVE_TRI; |
6094 | 0 | result->src[0] = a; |
6095 | 0 | result->src[1] = b; |
6096 | |
|
6097 | 0 | return result; |
6098 | 0 | } |
6099 | | |
6100 | | //////////////////////////////////////////////////////////////////////////////// |
6101 | | |
6102 | 0 | struct ggml_hash_set ggml_hash_set_new(size_t size) { |
6103 | 0 | size = ggml_hash_size(size); |
6104 | 0 | struct ggml_hash_set result; |
6105 | 0 | result.size = size; |
6106 | 0 | result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size); |
6107 | 0 | result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t)); |
6108 | 0 | return result; |
6109 | 0 | } |
6110 | | |
6111 | 0 | void ggml_hash_set_reset(struct ggml_hash_set * hash_set) { |
6112 | 0 | memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size)); |
6113 | 0 | } |
6114 | | |
6115 | 0 | void ggml_hash_set_free(struct ggml_hash_set * hash_set) { |
6116 | 0 | GGML_FREE(hash_set->used); |
6117 | 0 | GGML_FREE(hash_set->keys); |
6118 | 0 | } |
6119 | | |
6120 | 0 | size_t ggml_hash_size(size_t min_sz) { |
6121 | | // next primes after powers of two |
6122 | 0 | static const size_t primes[] = { |
6123 | 0 | 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031, |
6124 | 0 | 2053, 4099, 8209, 16411, 32771, 65537, 131101, |
6125 | 0 | 262147, 524309, 1048583, 2097169, 4194319, 8388617, |
6126 | 0 | 16777259, 33554467, 67108879, 134217757, 268435459, |
6127 | 0 | 536870923, 1073741827, 2147483659 |
6128 | 0 | }; |
6129 | 0 | static const size_t n_primes = sizeof(primes)/sizeof(primes[0]); |
6130 | | |
6131 | | // find the smallest prime that is larger or equal than min_sz |
6132 | 0 | size_t l = 0; |
6133 | 0 | size_t r = n_primes; |
6134 | 0 | while (l < r) { |
6135 | 0 | size_t m = (l + r)/2; |
6136 | 0 | if (primes[m] < min_sz) { |
6137 | 0 | l = m + 1; |
6138 | 0 | } else { |
6139 | 0 | r = m; |
6140 | 0 | } |
6141 | 0 | } |
6142 | 0 | size_t sz = l < n_primes ? primes[l] : min_sz | 1; |
6143 | 0 | return sz; |
6144 | 0 | } |
6145 | | |
6146 | | struct hash_map { |
6147 | | struct ggml_hash_set set; |
6148 | | struct ggml_tensor ** vals; |
6149 | | }; |
6150 | | |
6151 | 0 | static struct hash_map * ggml_new_hash_map(size_t size) { |
6152 | 0 | struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map)); |
6153 | 0 | result->set = ggml_hash_set_new(size); |
6154 | 0 | result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *)); |
6155 | 0 | return result; |
6156 | 0 | } |
6157 | | |
6158 | 0 | static void ggml_hash_map_free(struct hash_map * map) { |
6159 | 0 | ggml_hash_set_free(&map->set); |
6160 | 0 | GGML_FREE(map->vals); |
6161 | 0 | GGML_FREE(map); |
6162 | 0 | } |
6163 | | |
6164 | | // utility functions to change gradients |
6165 | | // isrc is the index of tensor in cgraph->visited_has_set.keys |
6166 | | // the corresponding gradient (accumulators) are also at position isrc |
6167 | | // if tensor has a gradient accumulator, modify that accumulator in-place |
6168 | | // else if there is no gradient for tensor, set the corresponding value |
6169 | | // else, just add/subtract/etc. the gradients |
6170 | | |
6171 | | static void ggml_add_or_set( |
6172 | | struct ggml_context * ctx, |
6173 | | struct ggml_cgraph * cgraph, |
6174 | | size_t isrc, |
6175 | 0 | struct ggml_tensor * tensor) { |
6176 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6177 | 0 | GGML_ASSERT(src); |
6178 | 0 | if (cgraph->grads[isrc]) { |
6179 | 0 | cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]); |
6180 | 0 | } else { |
6181 | 0 | cgraph->grads[isrc] = tensor; |
6182 | 0 | } |
6183 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6184 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6185 | 0 | } |
6186 | | |
6187 | | static void ggml_acc_or_set( |
6188 | | struct ggml_context * ctx, |
6189 | | struct ggml_cgraph * cgraph, |
6190 | | size_t isrc, |
6191 | | struct ggml_tensor * tensor, |
6192 | | const size_t nb1, |
6193 | | const size_t nb2, |
6194 | | const size_t nb3, |
6195 | 0 | const size_t offset) { |
6196 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6197 | 0 | GGML_ASSERT(src); |
6198 | 0 | if (cgraph->grads[isrc]) { |
6199 | 0 | cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]); |
6200 | 0 | } else { |
6201 | 0 | struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN |
6202 | 0 | cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false); |
6203 | 0 | } |
6204 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name); |
6205 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6206 | 0 | } |
6207 | | |
6208 | | static void ggml_add1_or_set( |
6209 | | struct ggml_context * ctx, |
6210 | | struct ggml_cgraph * cgraph, |
6211 | | size_t isrc, |
6212 | 0 | struct ggml_tensor * tensor) { |
6213 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6214 | 0 | GGML_ASSERT(src); |
6215 | 0 | if (cgraph->grads[isrc]) { |
6216 | 0 | cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); |
6217 | 0 | } else { |
6218 | 0 | cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src); |
6219 | 0 | } |
6220 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6221 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6222 | 0 | } |
6223 | | |
6224 | | static void ggml_sub_or_set( |
6225 | | struct ggml_context * ctx, |
6226 | | struct ggml_cgraph * cgraph, |
6227 | | size_t isrc, |
6228 | 0 | struct ggml_tensor * tensor) { |
6229 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6230 | 0 | GGML_ASSERT(src); |
6231 | 0 | if (cgraph->grads[isrc]) { |
6232 | 0 | cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); |
6233 | 0 | } else { |
6234 | 0 | cgraph->grads[isrc] = ggml_neg(ctx, tensor); |
6235 | 0 | } |
6236 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6237 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6238 | 0 | } |
6239 | | |
6240 | | static void ggml_compute_backward( |
6241 | 0 | struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) { |
6242 | 0 | struct ggml_tensor * tensor = cgraph->nodes[i]; |
6243 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor); |
6244 | |
|
6245 | 0 | if (!grad) { |
6246 | 0 | return; |
6247 | 0 | } |
6248 | | |
6249 | 0 | struct ggml_tensor * src0 = tensor->src[0]; |
6250 | 0 | struct ggml_tensor * src1 = tensor->src[1]; |
6251 | 0 | struct ggml_tensor * src2 = tensor->src[2]; |
6252 | 0 | struct ggml_hash_set * hash_set = &cgraph->visited_hash_set; |
6253 | 0 | const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1; |
6254 | 0 | const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1; |
6255 | 0 | const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1; |
6256 | 0 | const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0]; |
6257 | 0 | const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1]; |
6258 | 0 | const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2]; |
6259 | |
|
6260 | 0 | switch (tensor->op) { |
6261 | 0 | case GGML_OP_DUP: { |
6262 | 0 | if (src0_needs_grads) { |
6263 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6264 | 0 | } |
6265 | 0 | } break; |
6266 | 0 | case GGML_OP_ADD: { |
6267 | 0 | if (src0_needs_grads) { |
6268 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6269 | 0 | } |
6270 | 0 | if (src1_needs_grads) { |
6271 | 0 | struct ggml_tensor * tmp = grad; |
6272 | 0 | if (!ggml_are_same_shape(src0, src1)) { |
6273 | 0 | tmp = ggml_repeat_back(ctx, tmp, src1); |
6274 | 0 | } |
6275 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, tmp); |
6276 | 0 | } |
6277 | 0 | } break; |
6278 | 0 | case GGML_OP_ADD1: { |
6279 | 0 | if (src0_needs_grads) { |
6280 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6281 | 0 | } |
6282 | 0 | if (src1_needs_grads) { |
6283 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean |
6284 | 0 | } |
6285 | 0 | } break; |
6286 | 0 | case GGML_OP_ACC: { |
6287 | 0 | if (src0_needs_grads) { |
6288 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6289 | 0 | } |
6290 | 0 | if (src1_needs_grads) { |
6291 | 0 | const size_t nb1 = ((int32_t *) tensor->op_params)[0]; |
6292 | 0 | const size_t nb2 = ((int32_t *) tensor->op_params)[1]; |
6293 | 0 | const size_t nb3 = ((int32_t *) tensor->op_params)[2]; |
6294 | 0 | const size_t offset = ((int32_t *) tensor->op_params)[3]; |
6295 | |
|
6296 | 0 | struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx, |
6297 | 0 | grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], |
6298 | 0 | nb1, nb2, nb3, offset); |
6299 | |
|
6300 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1)); |
6301 | 0 | } |
6302 | 0 | } break; |
6303 | 0 | case GGML_OP_SUB: { |
6304 | 0 | if (src0_needs_grads) { |
6305 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6306 | 0 | } |
6307 | 0 | if (src1_needs_grads) { |
6308 | 0 | ggml_sub_or_set(ctx, cgraph, isrc1, grad); |
6309 | 0 | } |
6310 | 0 | } break; |
6311 | 0 | case GGML_OP_MUL: { |
6312 | 0 | if (src0_needs_grads) { |
6313 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1)); |
6314 | 0 | } |
6315 | 0 | if (src1_needs_grads) { |
6316 | 0 | struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad); |
6317 | 0 | if (!ggml_are_same_shape(src0, src1)) { |
6318 | 0 | tmp = ggml_repeat_back(ctx, tmp, src1); |
6319 | 0 | } |
6320 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, tmp); |
6321 | 0 | } |
6322 | 0 | } break; |
6323 | 0 | case GGML_OP_DIV: { |
6324 | 0 | if (src0_needs_grads) { |
6325 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1)); |
6326 | 0 | } |
6327 | 0 | if (src1_needs_grads) { |
6328 | 0 | ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1))); |
6329 | 0 | } |
6330 | 0 | } break; |
6331 | 0 | case GGML_OP_SQR: { |
6332 | 0 | if (src0_needs_grads) { |
6333 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f)); |
6334 | 0 | } |
6335 | 0 | } break; |
6336 | 0 | case GGML_OP_SQRT: { |
6337 | 0 | if (src0_needs_grads) { |
6338 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f)); |
6339 | 0 | } |
6340 | 0 | } break; |
6341 | 0 | case GGML_OP_LOG: { |
6342 | 0 | if (src0_needs_grads) { |
6343 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0)); |
6344 | 0 | } |
6345 | 0 | } break; |
6346 | 0 | case GGML_OP_SIN: { |
6347 | 0 | if (src0_needs_grads) { |
6348 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0))); |
6349 | 0 | } |
6350 | 0 | } break; |
6351 | 0 | case GGML_OP_COS: { |
6352 | 0 | if (src0_needs_grads) { |
6353 | 0 | ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0))); |
6354 | 0 | } |
6355 | 0 | } break; |
6356 | 0 | case GGML_OP_SUM: { |
6357 | 0 | if (src0_needs_grads) { |
6358 | 0 | ggml_add1_or_set(ctx, cgraph, isrc0, grad); |
6359 | 0 | } |
6360 | 0 | } break; |
6361 | 0 | case GGML_OP_SUM_ROWS: { |
6362 | 0 | if (src0_needs_grads) { |
6363 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0)); |
6364 | 0 | } |
6365 | 0 | } break; |
6366 | 0 | case GGML_OP_MEAN: { |
6367 | 0 | if (src0_needs_grads) { |
6368 | 0 | ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false)); |
6369 | 0 | } |
6370 | 0 | } break; |
6371 | 0 | case GGML_OP_REPEAT: { |
6372 | 0 | if (src0_needs_grads) { |
6373 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0)); |
6374 | 0 | } |
6375 | 0 | } break; |
6376 | 0 | case GGML_OP_REPEAT_BACK: { |
6377 | 0 | if (src0_needs_grads) { |
6378 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0)); |
6379 | 0 | } |
6380 | 0 | } break; |
6381 | 0 | case GGML_OP_RMS_NORM: { |
6382 | 0 | if (src0_needs_grads) { |
6383 | 0 | float eps; |
6384 | 0 | memcpy(&eps, tensor->op_params, sizeof(float)); |
6385 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps)); |
6386 | 0 | } |
6387 | 0 | } break; |
6388 | 0 | case GGML_OP_MUL_MAT: { |
6389 | | // https://cs231n.github.io/optimization-2/#staged |
6390 | | // # forward pass |
6391 | | // s0 = np.random.randn(5, 10) |
6392 | | // s1 = np.random.randn(10, 3) |
6393 | | // t = s0.dot(s1) |
6394 | | |
6395 | | // # now suppose we had the gradient on t from above in the circuit |
6396 | | // dt = np.random.randn(*t.shape) # same shape as t |
6397 | | // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix |
6398 | | // ds1 = t.T.dot(dt) |
6399 | | |
6400 | | // tensor.shape [m,p,qq,rr] |
6401 | | // src0.shape [n,m,q1,r1] |
6402 | | // src1.shape [n,p,qq,rr] |
6403 | |
|
6404 | 0 | if (src0_needs_grads) { |
6405 | 0 | GGML_ASSERT(grad->ne[2] == src1->ne[2]); |
6406 | 0 | GGML_ASSERT(grad->ne[3] == src1->ne[3]); |
6407 | 0 | struct ggml_tensor * tmp = |
6408 | 0 | ggml_out_prod(ctx, // [n,m,qq,rr] |
6409 | 0 | src1, // [n,p,qq,rr] |
6410 | 0 | grad); // [m,p,qq,rr] |
6411 | 0 | if (!ggml_are_same_shape(tmp, src0)) { |
6412 | 0 | GGML_ASSERT(tmp->ne[0] == src0->ne[0]); |
6413 | 0 | GGML_ASSERT(tmp->ne[1] == src0->ne[1]); |
6414 | 0 | GGML_ASSERT(tmp->ne[3] == 1); |
6415 | |
|
6416 | 0 | const int64_t nr2 = tmp->ne[2] / src0->ne[2]; |
6417 | 0 | const size_t nb2 = tmp->nb[2] * nr2; |
6418 | 0 | const size_t nb3 = tmp->nb[2]; |
6419 | |
|
6420 | 0 | tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0); |
6421 | 0 | tmp = ggml_repeat_back(ctx, tmp, src0); |
6422 | 0 | } |
6423 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, tmp); |
6424 | 0 | } |
6425 | 0 | if (src1_needs_grads) { |
6426 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, |
6427 | | // ggml_mul_mat(ctx, // [n,p,qq,rr] |
6428 | | // ggml_cont(ctx, // [m,n,q1,r1] |
6429 | | // ggml_transpose(ctx, src0)), // [m,n,q1,r1] |
6430 | | // grad), // [m,p,qq,rr] |
6431 | | |
6432 | | // when src0 is bigger than tensor->grad (this is mostly the case in llama), |
6433 | | // avoid transpose of src0, rather transpose smaller tensor->grad |
6434 | | // and then use ggml_out_prod |
6435 | 0 | ggml_out_prod(ctx, // [n,p,qq,rr] |
6436 | 0 | src0, // [n,m,q1,r1] |
6437 | 0 | ggml_transpose(ctx, // [p,m,qq,rr] |
6438 | 0 | grad))); // [m,p,qq,rr] |
6439 | 0 | } |
6440 | 0 | } break; |
6441 | 0 | case GGML_OP_SCALE: { |
6442 | 0 | if (src0_needs_grads) { |
6443 | 0 | float s; |
6444 | 0 | memcpy(&s, tensor->op_params, sizeof(float)); |
6445 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false)); |
6446 | 0 | } |
6447 | 0 | } break; |
6448 | 0 | case GGML_OP_SET: { |
6449 | 0 | const size_t nb1 = ((const int32_t *) tensor->op_params)[0]; |
6450 | 0 | const size_t nb2 = ((const int32_t *) tensor->op_params)[1]; |
6451 | 0 | const size_t nb3 = ((const int32_t *) tensor->op_params)[2]; |
6452 | 0 | const size_t offset = ((const int32_t *) tensor->op_params)[3]; |
6453 | |
|
6454 | 0 | struct ggml_tensor * tensor_grad_view = NULL; |
6455 | |
|
6456 | 0 | if (src0_needs_grads || src1_needs_grads) { |
6457 | 0 | GGML_ASSERT(src0->type == tensor->type); |
6458 | 0 | GGML_ASSERT(!cgraph->grads[isrc0] || cgraph->grads[isrc0]->type == grad->type); |
6459 | 0 | GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type); |
6460 | |
|
6461 | 0 | tensor_grad_view = ggml_view_4d(ctx, |
6462 | 0 | grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], |
6463 | 0 | nb1, nb2, nb3, offset); |
6464 | 0 | } |
6465 | |
|
6466 | 0 | if (src0_needs_grads) { |
6467 | 0 | struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view); |
6468 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false)); |
6469 | 0 | } |
6470 | |
|
6471 | 0 | if (src1_needs_grads) { |
6472 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1)); |
6473 | 0 | } |
6474 | 0 | } break; |
6475 | 0 | case GGML_OP_CPY: { |
6476 | | // cpy overwrites value of src1 by src0 and returns view(src1) |
6477 | | // the overwriting is mathematically equivalent to: |
6478 | | // tensor = src0 * 1 + src1 * 0 |
6479 | 0 | if (src0_needs_grads) { |
6480 | | // dsrc0 = dtensor * 1 |
6481 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0)); |
6482 | 0 | } |
6483 | 0 | if (src1_needs_grads) { |
6484 | | // dsrc1 = dtensor * 0 -> noop |
6485 | 0 | } |
6486 | 0 | } break; |
6487 | 0 | case GGML_OP_CONT: { |
6488 | | // same as cpy |
6489 | 0 | if (src0_needs_grads) { |
6490 | 0 | GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0])); |
6491 | 0 | GGML_ASSERT(ggml_is_contiguous(grad)); |
6492 | 0 | GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0)); |
6493 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, |
6494 | 0 | ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0)); |
6495 | 0 | } |
6496 | 0 | } break; |
6497 | 0 | case GGML_OP_RESHAPE: { |
6498 | 0 | if (src0_needs_grads) { |
6499 | 0 | struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad); |
6500 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0)); |
6501 | 0 | } |
6502 | 0 | } break; |
6503 | 0 | case GGML_OP_VIEW: { |
6504 | 0 | if (src0_needs_grads) { |
6505 | 0 | size_t offset; |
6506 | |
|
6507 | 0 | memcpy(&offset, tensor->op_params, sizeof(offset)); |
6508 | |
|
6509 | 0 | size_t nb1 = tensor->nb[1]; |
6510 | 0 | size_t nb2 = tensor->nb[2]; |
6511 | 0 | size_t nb3 = tensor->nb[3]; |
6512 | |
|
6513 | 0 | if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) { |
6514 | | // gradient is typically F32, but src0 could be other type |
6515 | 0 | size_t ng = ggml_element_size(cgraph->grads[isrc0]); |
6516 | 0 | size_t n0 = ggml_element_size(src0); |
6517 | 0 | GGML_ASSERT(offset % n0 == 0); |
6518 | 0 | GGML_ASSERT(nb1 % n0 == 0); |
6519 | 0 | GGML_ASSERT(nb2 % n0 == 0); |
6520 | 0 | GGML_ASSERT(nb3 % n0 == 0); |
6521 | 0 | offset = (offset / n0) * ng; |
6522 | 0 | nb1 = (nb1 / n0) * ng; |
6523 | 0 | nb2 = (nb2 / n0) * ng; |
6524 | 0 | nb3 = (nb3 / n0) * ng; |
6525 | 0 | } |
6526 | |
|
6527 | 0 | ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset); |
6528 | 0 | } |
6529 | 0 | } break; |
6530 | 0 | case GGML_OP_PERMUTE: { |
6531 | 0 | if (src0_needs_grads) { |
6532 | 0 | const int32_t * axes = (const int32_t *) tensor->op_params; |
6533 | 0 | const int axis0 = axes[0] & 0x3; |
6534 | 0 | const int axis1 = axes[1] & 0x3; |
6535 | 0 | const int axis2 = axes[2] & 0x3; |
6536 | 0 | const int axis3 = axes[3] & 0x3; |
6537 | 0 | int axb[4] = {0,0,0,0}; // axes backward |
6538 | 0 | axb[axis0] = 0; |
6539 | 0 | axb[axis1] = 1; |
6540 | 0 | axb[axis2] = 2; |
6541 | 0 | axb[axis3] = 3; |
6542 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3])); |
6543 | 0 | } |
6544 | 0 | } break; |
6545 | 0 | case GGML_OP_TRANSPOSE: { |
6546 | 0 | if (src0_needs_grads) { |
6547 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad)); |
6548 | 0 | } |
6549 | 0 | } break; |
6550 | 0 | case GGML_OP_GET_ROWS: { |
6551 | 0 | if (src0_needs_grads) { |
6552 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0)); |
6553 | 0 | } |
6554 | 0 | if (src1_needs_grads) { |
6555 | | // noop |
6556 | 0 | } |
6557 | 0 | } break; |
6558 | 0 | case GGML_OP_DIAG_MASK_INF: { |
6559 | 0 | if (src0_needs_grads) { |
6560 | | /* ggml_diag_mask_inf_impl() shouldn't be here */ |
6561 | | /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */ |
6562 | 0 | const int n_past = ((const int32_t *) tensor->op_params)[0]; |
6563 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false)); |
6564 | 0 | } |
6565 | 0 | } break; |
6566 | 0 | case GGML_OP_DIAG_MASK_ZERO: { |
6567 | 0 | if (src0_needs_grads) { |
6568 | 0 | const int n_past = ((const int32_t *) tensor->op_params)[0]; |
6569 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false)); |
6570 | 0 | } |
6571 | 0 | } break; |
6572 | 0 | case GGML_OP_SOFT_MAX: { |
6573 | 0 | if (src0_needs_grads) { |
6574 | 0 | float scale = 1.0f; |
6575 | 0 | float max_bias = 0.0f; |
6576 | |
|
6577 | 0 | memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float)); |
6578 | 0 | memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float)); |
6579 | |
|
6580 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias)); |
6581 | 0 | } |
6582 | 0 | GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented"); |
6583 | 0 | } break; |
6584 | 0 | case GGML_OP_ROPE: { |
6585 | 0 | if (src0_needs_grads) { |
6586 | | //const int n_past = ((int32_t *) tensor->op_params)[0]; |
6587 | 0 | const int n_dims = ((const int32_t *) tensor->op_params)[1]; |
6588 | 0 | const int mode = ((const int32_t *) tensor->op_params)[2]; |
6589 | | //const int n_ctx = ((int32_t *) tensor->op_params)[3]; |
6590 | 0 | const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4]; |
6591 | 0 | float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow; |
6592 | 0 | int sections[4] = {0, 0, 0, 0}; |
6593 | |
|
6594 | 0 | memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float)); |
6595 | 0 | memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float)); |
6596 | 0 | memcpy(&ext_factor, (const float *) tensor->op_params + 7, sizeof(float)); |
6597 | 0 | memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float)); |
6598 | 0 | memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float)); |
6599 | 0 | memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float)); |
6600 | 0 | memcpy(§ions, tensor->op_params + 11, sizeof(sections)); |
6601 | |
|
6602 | 0 | struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ? |
6603 | 0 | ggml_rope_ext_back(ctx, grad, src1, src2, n_dims, |
6604 | 0 | mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) : |
6605 | 0 | ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections, |
6606 | 0 | mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
6607 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, rope_back); |
6608 | 0 | } |
6609 | 0 | GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented"); |
6610 | 0 | } break; |
6611 | 0 | case GGML_OP_IM2COL: { |
6612 | 0 | if (src1_needs_grads) { |
6613 | 0 | const int32_t s0 = ggml_get_op_params_i32(tensor, 0); |
6614 | 0 | const int32_t s1 = ggml_get_op_params_i32(tensor, 1); |
6615 | 0 | const int32_t p0 = ggml_get_op_params_i32(tensor, 2); |
6616 | 0 | const int32_t p1 = ggml_get_op_params_i32(tensor, 3); |
6617 | 0 | const int32_t d0 = ggml_get_op_params_i32(tensor, 4); |
6618 | 0 | const int32_t d1 = ggml_get_op_params_i32(tensor, 5); |
6619 | 0 | const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1; |
6620 | |
|
6621 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D)); |
6622 | 0 | } |
6623 | 0 | } break; |
6624 | 0 | case GGML_OP_POOL_2D: { |
6625 | 0 | if (src0_needs_grads) { |
6626 | 0 | const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0); |
6627 | 0 | const int32_t k0 = ggml_get_op_params_i32(tensor, 1); |
6628 | 0 | const int32_t k1 = ggml_get_op_params_i32(tensor, 2); |
6629 | 0 | const int32_t s0 = ggml_get_op_params_i32(tensor, 3); |
6630 | 0 | const int32_t s1 = ggml_get_op_params_i32(tensor, 4); |
6631 | 0 | const int32_t p0 = ggml_get_op_params_i32(tensor, 5); |
6632 | 0 | const int32_t p1 = ggml_get_op_params_i32(tensor, 6); |
6633 | |
|
6634 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1)); |
6635 | 0 | } |
6636 | 0 | } break; |
6637 | 0 | case GGML_OP_WIN_PART: |
6638 | 0 | case GGML_OP_WIN_UNPART: |
6639 | 0 | case GGML_OP_UNARY: { |
6640 | 0 | switch (ggml_get_unary_op(tensor)) { |
6641 | 0 | case GGML_UNARY_OP_ABS: { |
6642 | 0 | if (src0_needs_grads) { |
6643 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad)); |
6644 | 0 | } |
6645 | 0 | } break; |
6646 | 0 | case GGML_UNARY_OP_SGN: { |
6647 | | // noop |
6648 | 0 | } break; |
6649 | 0 | case GGML_UNARY_OP_NEG: { |
6650 | 0 | if (src0_needs_grads) { |
6651 | 0 | ggml_sub_or_set(ctx, cgraph, isrc0, grad); |
6652 | 0 | } |
6653 | 0 | } break; |
6654 | 0 | case GGML_UNARY_OP_STEP: { |
6655 | | // noop |
6656 | 0 | } break; |
6657 | 0 | case GGML_UNARY_OP_RELU: { |
6658 | 0 | if (src0_needs_grads) { |
6659 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad)); |
6660 | 0 | } |
6661 | 0 | } break; |
6662 | 0 | case GGML_UNARY_OP_SILU: { |
6663 | 0 | if (src0_needs_grads) { |
6664 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0)); |
6665 | 0 | } |
6666 | 0 | } break; |
6667 | 0 | case GGML_UNARY_OP_EXP: { |
6668 | 0 | if (src0_needs_grads) { |
6669 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad)); |
6670 | 0 | } |
6671 | 0 | } break; |
6672 | 0 | case GGML_UNARY_OP_EXPM1: { |
6673 | 0 | if (src0_needs_grads) { |
6674 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0))); |
6675 | 0 | } |
6676 | 0 | } break; |
6677 | 0 | case GGML_UNARY_OP_SOFTPLUS: { |
6678 | 0 | if (src0_needs_grads) { |
6679 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0))); |
6680 | 0 | } |
6681 | 0 | } break; |
6682 | 0 | default: { |
6683 | 0 | fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n", |
6684 | 0 | __func__, ggml_unary_op_name(ggml_get_unary_op(tensor))); |
6685 | 0 | GGML_ABORT("fatal error"); |
6686 | 0 | } //break; |
6687 | 0 | } |
6688 | 0 | } break; |
6689 | 0 | case GGML_OP_CROSS_ENTROPY_LOSS: { |
6690 | 0 | if (src0_needs_grads) { |
6691 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1)); |
6692 | 0 | } |
6693 | 0 | GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented"); |
6694 | 0 | } break; |
6695 | 0 | case GGML_OP_GLU: { |
6696 | 0 | switch (ggml_get_glu_op(tensor)) { |
6697 | 0 | case GGML_GLU_OP_SWIGLU: { |
6698 | 0 | if (src0_needs_grads) { |
6699 | 0 | GGML_ASSERT(src1 && "backward pass only implemented for split swiglu"); |
6700 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0)); |
6701 | 0 | } |
6702 | 0 | if (src1_needs_grads) { |
6703 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad)); |
6704 | 0 | } |
6705 | 0 | } break; |
6706 | 0 | default: { |
6707 | 0 | GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor))); |
6708 | 0 | } //break; |
6709 | 0 | } |
6710 | 0 | } break; |
6711 | 0 | case GGML_OP_NONE: { |
6712 | | // noop |
6713 | 0 | } break; |
6714 | 0 | case GGML_OP_COUNT: |
6715 | 0 | default: { |
6716 | 0 | GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op)); |
6717 | 0 | } //break; |
6718 | 0 | } |
6719 | | |
6720 | 0 | GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0])); |
6721 | 0 | GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1])); |
6722 | 0 | GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2])); |
6723 | 0 | } |
6724 | | |
6725 | 0 | static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { |
6726 | | // check if already visited |
6727 | 0 | size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); |
6728 | 0 | GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL); |
6729 | 0 | if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) { |
6730 | | // This is the first time we see this node in the current graph. |
6731 | 0 | cgraph->visited_hash_set.keys[node_hash_pos] = node; |
6732 | 0 | ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos); |
6733 | 0 | cgraph->use_counts[node_hash_pos] = 0; |
6734 | 0 | } else { |
6735 | | // already visited |
6736 | 0 | return node_hash_pos; |
6737 | 0 | } |
6738 | | |
6739 | 0 | for (int i = 0; i < GGML_MAX_SRC; ++i) { |
6740 | 0 | const int k = |
6741 | 0 | (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i : |
6742 | 0 | (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) : |
6743 | 0 | /* unknown order, just fall back to using i */ i; |
6744 | |
|
6745 | 0 | struct ggml_tensor * src = node->src[k]; |
6746 | 0 | if (src) { |
6747 | 0 | size_t src_hash_pos = ggml_visit_parents(cgraph, src); |
6748 | | |
6749 | | // Update the use count for this operand. |
6750 | 0 | cgraph->use_counts[src_hash_pos]++; |
6751 | 0 | } |
6752 | 0 | } |
6753 | |
|
6754 | 0 | if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) { |
6755 | | // reached a leaf node, not part of the gradient graph (e.g. a constant) |
6756 | 0 | GGML_ASSERT(cgraph->n_leafs < cgraph->size); |
6757 | |
|
6758 | 0 | if (strlen(node->name) == 0) { |
6759 | 0 | ggml_format_name(node, "leaf_%d", cgraph->n_leafs); |
6760 | 0 | } |
6761 | |
|
6762 | 0 | cgraph->leafs[cgraph->n_leafs] = node; |
6763 | 0 | cgraph->n_leafs++; |
6764 | 0 | } else { |
6765 | 0 | GGML_ASSERT(cgraph->n_nodes < cgraph->size); |
6766 | |
|
6767 | 0 | if (strlen(node->name) == 0) { |
6768 | 0 | ggml_format_name(node, "node_%d", cgraph->n_nodes); |
6769 | 0 | } |
6770 | |
|
6771 | 0 | cgraph->nodes[cgraph->n_nodes] = node; |
6772 | 0 | cgraph->n_nodes++; |
6773 | 0 | } |
6774 | |
|
6775 | 0 | return node_hash_pos; |
6776 | 0 | } |
6777 | | |
6778 | 0 | static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) { |
6779 | 0 | if (!expand) { |
6780 | | // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand |
6781 | 0 | ggml_graph_clear(cgraph); |
6782 | 0 | } |
6783 | |
|
6784 | 0 | const int n0 = cgraph->n_nodes; |
6785 | |
|
6786 | 0 | ggml_visit_parents(cgraph, tensor); |
6787 | |
|
6788 | 0 | const int n_new = cgraph->n_nodes - n0; |
6789 | 0 | GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new); |
6790 | |
|
6791 | 0 | if (n_new > 0) { |
6792 | | // the last added node should always be starting point |
6793 | 0 | GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor); |
6794 | 0 | } |
6795 | 0 | } |
6796 | | |
6797 | 0 | void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { |
6798 | 0 | ggml_build_forward_impl(cgraph, tensor, true); |
6799 | 0 | } |
6800 | | |
6801 | | void ggml_build_backward_expand( |
6802 | | struct ggml_context * ctx, |
6803 | | struct ggml_cgraph * cgraph, |
6804 | 0 | struct ggml_tensor ** grad_accs) { |
6805 | 0 | GGML_ASSERT(cgraph->n_nodes > 0); |
6806 | 0 | GGML_ASSERT(cgraph->grads); |
6807 | 0 | GGML_ASSERT(cgraph->grad_accs); |
6808 | |
|
6809 | 0 | const int n_nodes_f = cgraph->n_nodes; |
6810 | |
|
6811 | 0 | memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6812 | 0 | memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6813 | 0 | bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool)); |
6814 | |
|
6815 | 0 | { |
6816 | 0 | bool any_params = false; |
6817 | 0 | bool any_loss = false; |
6818 | 0 | for (int i = 0; i < n_nodes_f; ++i) { |
6819 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
6820 | 0 | any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM); |
6821 | 0 | any_loss = any_loss || (node->flags & GGML_TENSOR_FLAG_LOSS); |
6822 | 0 | } |
6823 | 0 | GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?"); |
6824 | 0 | GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?"); |
6825 | 0 | } |
6826 | |
|
6827 | 0 | for (int i = 0; i < n_nodes_f; ++i) { |
6828 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
6829 | |
|
6830 | 0 | if (node->type == GGML_TYPE_I32) { |
6831 | 0 | continue; |
6832 | 0 | } |
6833 | | |
6834 | 0 | bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS); |
6835 | 0 | bool ignore_src[GGML_MAX_SRC] = {false}; |
6836 | 0 | switch (node->op) { |
6837 | | // gradients in node->src[0] for one reason or another have no effect on output gradients |
6838 | 0 | case GGML_OP_IM2COL: // only used for its shape |
6839 | 0 | case GGML_OP_IM2COL_BACK: // same as IM2COL |
6840 | 0 | ignore_src[0] = true; |
6841 | 0 | break; |
6842 | 0 | case GGML_OP_UNARY: { |
6843 | 0 | const enum ggml_unary_op uop = ggml_get_unary_op(node); |
6844 | | // SGN and STEP unary ops are piecewise constant |
6845 | 0 | if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) { |
6846 | 0 | ignore_src[0] = true; |
6847 | 0 | } |
6848 | 0 | } break; |
6849 | | |
6850 | | // gradients in node->src[1] for one reason or another have no effect on output gradients |
6851 | 0 | case GGML_OP_CPY: // gradients in CPY target are irrelevant |
6852 | 0 | case GGML_OP_GET_ROWS: // row indices not differentiable |
6853 | 0 | case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS |
6854 | 0 | case GGML_OP_ROPE: // positions not differentiable |
6855 | 0 | ignore_src[1] = true; |
6856 | 0 | break; |
6857 | | |
6858 | 0 | default: |
6859 | 0 | break; |
6860 | 0 | } |
6861 | 0 | for (int j = 0; j < GGML_MAX_SRC; ++j) { |
6862 | 0 | if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) { |
6863 | 0 | continue; |
6864 | 0 | } |
6865 | 0 | GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16); |
6866 | 0 | node_needs_grad = true; |
6867 | 0 | break; |
6868 | 0 | } |
6869 | 0 | if (!node_needs_grad) { |
6870 | 0 | continue; |
6871 | 0 | } |
6872 | | |
6873 | | // inplace operations are currently not supported |
6874 | 0 | GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW || |
6875 | 0 | node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE); |
6876 | |
|
6877 | 0 | const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node); |
6878 | 0 | GGML_ASSERT(ihash != GGML_HASHSET_FULL); |
6879 | 0 | GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash)); |
6880 | 0 | if (grad_accs && grad_accs[i]) { |
6881 | 0 | cgraph->grad_accs[ihash] = grad_accs[i]; |
6882 | 0 | cgraph->grads[ihash] = cgraph->grad_accs[ihash]; |
6883 | 0 | } else if (node->flags & GGML_TENSOR_FLAG_LOSS) { |
6884 | | // loss tensors always need a gradient accumulator |
6885 | 0 | cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne); |
6886 | 0 | cgraph->grads[ihash] = cgraph->grad_accs[ihash]; |
6887 | 0 | } |
6888 | 0 | grads_needed[ihash] = true; |
6889 | 0 | } |
6890 | | |
6891 | 0 | for (int i = n_nodes_f - 1; i >= 0; --i) { |
6892 | | // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation |
6893 | | // use allocator to automatically make inplace operations |
6894 | 0 | ggml_compute_backward(ctx, cgraph, i, grads_needed); |
6895 | 0 | } |
6896 | |
|
6897 | 0 | free(grads_needed); |
6898 | 0 | } |
6899 | | |
6900 | 0 | static void * incr_ptr_aligned(void ** p, size_t size, size_t align) { |
6901 | 0 | void * ptr = *p; |
6902 | 0 | ptr = (void *) GGML_PAD((uintptr_t) ptr, align); |
6903 | 0 | *p = (void *) ((char *) ptr + size); |
6904 | 0 | return ptr; |
6905 | 0 | } |
6906 | | |
6907 | 0 | static size_t ggml_graph_nbytes(size_t size, bool grads) { |
6908 | 0 | size_t hash_size = ggml_hash_size(size * 2); |
6909 | 0 | void * p = 0; |
6910 | 0 | incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1); |
6911 | 0 | incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes |
6912 | 0 | incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs |
6913 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts |
6914 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys |
6915 | 0 | if (grads) { |
6916 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads |
6917 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs |
6918 | 0 | } |
6919 | 0 | incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); |
6920 | |
|
6921 | 0 | size_t nbytes = (size_t) p; |
6922 | 0 | return nbytes; |
6923 | 0 | } |
6924 | | |
6925 | 0 | size_t ggml_graph_overhead_custom(size_t size, bool grads) { |
6926 | 0 | return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN); |
6927 | 0 | } |
6928 | | |
6929 | 0 | size_t ggml_graph_overhead(void) { |
6930 | 0 | return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false); |
6931 | 0 | } |
6932 | | |
6933 | 0 | struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) { |
6934 | 0 | const size_t obj_size = ggml_graph_nbytes(size, grads); |
6935 | 0 | struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size); |
6936 | 0 | struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); |
6937 | | |
6938 | | // the size of the hash table is doubled since it needs to hold both nodes and leafs |
6939 | 0 | size_t hash_size = ggml_hash_size(size * 2); |
6940 | |
|
6941 | 0 | void * p = cgraph + 1; |
6942 | |
|
6943 | 0 | struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6944 | 0 | struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6945 | 0 | int32_t * use_counts_ptr = incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); |
6946 | 0 | struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6947 | 0 | struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; |
6948 | 0 | struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; |
6949 | |
|
6950 | 0 | ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); |
6951 | | |
6952 | | // check that we allocated the correct amount of memory |
6953 | 0 | assert(obj_size == (size_t)((char *)p - (char *)cgraph)); |
6954 | |
|
6955 | 0 | *cgraph = (struct ggml_cgraph) { |
6956 | 0 | /*.size =*/ size, |
6957 | 0 | /*.n_nodes =*/ 0, |
6958 | 0 | /*.n_leafs =*/ 0, |
6959 | 0 | /*.nodes =*/ nodes_ptr, |
6960 | 0 | /*.grads =*/ grads_ptr, |
6961 | 0 | /*.grad_accs =*/ grad_accs_ptr, |
6962 | 0 | /*.leafs =*/ leafs_ptr, |
6963 | 0 | /*.use_counts =*/ use_counts_ptr, |
6964 | 0 | /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr }, |
6965 | 0 | /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, |
6966 | 0 | }; |
6967 | |
|
6968 | 0 | ggml_hash_set_reset(&cgraph->visited_hash_set); |
6969 | 0 | if (grads) { |
6970 | 0 | memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *)); |
6971 | 0 | memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *)); |
6972 | 0 | } |
6973 | |
|
6974 | 0 | return cgraph; |
6975 | 0 | } |
6976 | | |
6977 | 0 | struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { |
6978 | 0 | return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false); |
6979 | 0 | } |
6980 | | |
6981 | 0 | struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) { |
6982 | 0 | struct ggml_cgraph cgraph = { |
6983 | 0 | /*.size =*/ 0, |
6984 | 0 | /*.n_nodes =*/ i1 - i0, |
6985 | 0 | /*.n_leafs =*/ 0, |
6986 | 0 | /*.nodes =*/ cgraph0->nodes + i0, |
6987 | 0 | /*.grads =*/ NULL, // gradients would need visited_hash_set |
6988 | 0 | /*.grad_accs =*/ NULL, |
6989 | 0 | /*.leafs =*/ NULL, |
6990 | 0 | /*.use_counts =*/ cgraph0->use_counts, |
6991 | 0 | /*.visited_hash_set =*/ cgraph0->visited_hash_set, |
6992 | 0 | /*.order =*/ cgraph0->order, |
6993 | 0 | }; |
6994 | |
|
6995 | 0 | return cgraph; |
6996 | 0 | } |
6997 | | |
6998 | 0 | void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { |
6999 | 0 | GGML_ASSERT(dst->size >= src->n_leafs); |
7000 | 0 | GGML_ASSERT(dst->size >= src->n_nodes); |
7001 | 0 | GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size); |
7002 | |
|
7003 | 0 | dst->n_leafs = src->n_leafs; |
7004 | 0 | dst->n_nodes = src->n_nodes; |
7005 | 0 | dst->order = src->order; |
7006 | |
|
7007 | 0 | for (int i = 0; i < src->n_leafs; ++i) { |
7008 | 0 | dst->leafs[i] = src->leafs[i]; |
7009 | 0 | } |
7010 | |
|
7011 | 0 | for (int i = 0; i < src->n_nodes; ++i) { |
7012 | 0 | dst->nodes[i] = src->nodes[i]; |
7013 | 0 | } |
7014 | |
|
7015 | 0 | for (size_t i = 0; i < src->visited_hash_set.size; ++i) { |
7016 | | // copy all hashset keys (tensors) that are in use |
7017 | 0 | if (ggml_bitset_get(src->visited_hash_set.used, i)) { |
7018 | 0 | size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]); |
7019 | 0 | dst->use_counts[new_hash_pos] = src->use_counts[i]; |
7020 | 0 | } |
7021 | 0 | } |
7022 | |
|
7023 | 0 | if (dst->grads) { |
7024 | 0 | memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
7025 | 0 | memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
7026 | 0 | } |
7027 | 0 | if (src->grads) { |
7028 | 0 | GGML_ASSERT(dst->grads != NULL); |
7029 | 0 | GGML_ASSERT(dst->grad_accs != NULL); |
7030 | 0 | for (int i = 0; i < src->n_nodes; ++i) { |
7031 | 0 | const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]); |
7032 | 0 | const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]); |
7033 | |
|
7034 | 0 | GGML_ASSERT(igrad_src != GGML_HASHSET_FULL); |
7035 | 0 | GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src)); |
7036 | 0 | GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL); |
7037 | 0 | GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst)); |
7038 | |
|
7039 | 0 | dst->grads[igrad_dst] = src->grads[igrad_src]; |
7040 | 0 | dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src]; |
7041 | 0 | } |
7042 | 0 | } |
7043 | 0 | } |
7044 | | |
7045 | 0 | struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) { |
7046 | 0 | struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads); |
7047 | 0 | ggml_graph_cpy(cgraph, result); |
7048 | 0 | return result; |
7049 | 0 | } |
7050 | | |
7051 | 0 | struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { |
7052 | 0 | if (ggml_is_empty(tensor)) { |
7053 | 0 | return tensor; |
7054 | 0 | } |
7055 | 0 | if (tensor->buffer) { |
7056 | 0 | ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor)); |
7057 | 0 | } else { |
7058 | 0 | GGML_ASSERT(tensor->data); |
7059 | 0 | memset(tensor->data, 0, ggml_nbytes(tensor)); |
7060 | 0 | } |
7061 | 0 | return tensor; |
7062 | 0 | } |
7063 | | |
7064 | 0 | void ggml_graph_reset(struct ggml_cgraph * cgraph) { |
7065 | 0 | if (!cgraph) { |
7066 | 0 | return; |
7067 | 0 | } |
7068 | 0 | GGML_ASSERT(cgraph->grads != NULL); |
7069 | |
|
7070 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7071 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
7072 | 0 | struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node); |
7073 | |
|
7074 | 0 | if (node->op == GGML_OP_OPT_STEP_ADAMW) { |
7075 | | // clear momenta |
7076 | 0 | ggml_set_zero(node->src[2]); |
7077 | 0 | ggml_set_zero(node->src[3]); |
7078 | 0 | } |
7079 | | |
7080 | | // initial gradients of loss should be 1, 0 otherwise |
7081 | 0 | if (grad_acc) { |
7082 | 0 | if (node->flags & GGML_TENSOR_FLAG_LOSS) { |
7083 | 0 | GGML_ASSERT(grad_acc->type == GGML_TYPE_F32); |
7084 | 0 | GGML_ASSERT(ggml_is_scalar(grad_acc)); |
7085 | |
|
7086 | 0 | const float onef = 1.0f; |
7087 | 0 | if (grad_acc->buffer) { |
7088 | 0 | ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float)); |
7089 | 0 | } else { |
7090 | 0 | GGML_ASSERT(grad_acc->data); |
7091 | 0 | *((float *) grad_acc->data) = onef; |
7092 | 0 | } |
7093 | 0 | } else { |
7094 | 0 | ggml_set_zero(grad_acc); |
7095 | 0 | } |
7096 | 0 | } |
7097 | 0 | } |
7098 | 0 | } |
7099 | | |
7100 | 0 | void ggml_graph_clear(struct ggml_cgraph * cgraph) { |
7101 | 0 | cgraph->n_leafs = 0; |
7102 | 0 | cgraph->n_nodes = 0; |
7103 | 0 | ggml_hash_set_reset(&cgraph->visited_hash_set); |
7104 | 0 | } |
7105 | | |
7106 | 0 | int ggml_graph_size(struct ggml_cgraph * cgraph) { |
7107 | 0 | return cgraph->size; |
7108 | 0 | } |
7109 | | |
7110 | 0 | struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { |
7111 | 0 | if (i < 0) { |
7112 | 0 | GGML_ASSERT(cgraph->n_nodes + i >= 0); |
7113 | 0 | return cgraph->nodes[cgraph->n_nodes + i]; |
7114 | 0 | } |
7115 | | |
7116 | 0 | GGML_ASSERT(i < cgraph->n_nodes); |
7117 | 0 | return cgraph->nodes[i]; |
7118 | 0 | } |
7119 | | |
7120 | 0 | struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) { |
7121 | 0 | return cgraph->nodes; |
7122 | 0 | } |
7123 | | |
7124 | 0 | int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) { |
7125 | 0 | return cgraph->n_nodes; |
7126 | 0 | } |
7127 | | |
7128 | 0 | void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { |
7129 | 0 | GGML_ASSERT(cgraph->size > cgraph->n_nodes); |
7130 | 0 | cgraph->nodes[cgraph->n_nodes] = tensor; |
7131 | 0 | cgraph->n_nodes++; |
7132 | 0 | } |
7133 | | |
7134 | 0 | struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) { |
7135 | 0 | for (int i = 0; i < cgraph->n_leafs; i++) { |
7136 | 0 | struct ggml_tensor * leaf = cgraph->leafs[i]; |
7137 | |
|
7138 | 0 | if (strcmp(leaf->name, name) == 0) { |
7139 | 0 | return leaf; |
7140 | 0 | } |
7141 | 0 | } |
7142 | | |
7143 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7144 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
7145 | |
|
7146 | 0 | if (strcmp(node->name, name) == 0) { |
7147 | 0 | return node; |
7148 | 0 | } |
7149 | 0 | } |
7150 | | |
7151 | 0 | return NULL; |
7152 | 0 | } |
7153 | | |
7154 | 0 | struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7155 | 0 | const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); |
7156 | 0 | return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL; |
7157 | 0 | } |
7158 | | |
7159 | 0 | struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7160 | 0 | const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); |
7161 | 0 | return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL; |
7162 | 0 | } |
7163 | | |
7164 | 0 | void ggml_graph_print(const struct ggml_cgraph * cgraph) { |
7165 | 0 | GGML_LOG_INFO("=== GRAPH ===\n"); |
7166 | |
|
7167 | 0 | GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes); |
7168 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7169 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
7170 | |
|
7171 | 0 | GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n", |
7172 | 0 | i, |
7173 | 0 | node->ne[0], node->ne[1], node->ne[2], |
7174 | 0 | ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : |
7175 | 0 | ggml_graph_get_grad(cgraph, node) ? "g" : " "); |
7176 | 0 | } |
7177 | |
|
7178 | 0 | GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs); |
7179 | 0 | for (int i = 0; i < cgraph->n_leafs; i++) { |
7180 | 0 | struct ggml_tensor * node = cgraph->leafs[i]; |
7181 | |
|
7182 | 0 | GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n", |
7183 | 0 | i, |
7184 | 0 | node->ne[0], node->ne[1], |
7185 | 0 | ggml_op_name(node->op), |
7186 | 0 | ggml_get_name(node)); |
7187 | 0 | } |
7188 | |
|
7189 | 0 | GGML_LOG_INFO("========================================\n"); |
7190 | 0 | } |
7191 | | |
7192 | | static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph, |
7193 | | const int * idxs, |
7194 | | int count, |
7195 | 0 | const struct ggml_tensor * tensor) { |
7196 | 0 | GGML_ASSERT(cgraph && idxs); |
7197 | 0 | for (int i = 0; i < count; ++i) { |
7198 | 0 | const int node_idx = idxs[i]; |
7199 | |
|
7200 | 0 | if (node_idx >= cgraph->n_nodes) { |
7201 | 0 | return -1; |
7202 | 0 | } |
7203 | 0 | if (cgraph->nodes[node_idx] == tensor) { |
7204 | 0 | return i; |
7205 | 0 | } |
7206 | 0 | } |
7207 | 0 | return -1; |
7208 | 0 | } |
7209 | | |
7210 | | bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph, |
7211 | | const int * node_idxs, |
7212 | | int count, |
7213 | | const enum ggml_op * ops, |
7214 | | const int * outputs, |
7215 | 0 | int num_outputs) { |
7216 | 0 | GGML_ASSERT(outputs && num_outputs > 0); |
7217 | |
|
7218 | 0 | for (int i = 0; i < count; ++i) { |
7219 | 0 | if (node_idxs[i] >= cgraph->n_nodes) { |
7220 | 0 | return false; |
7221 | 0 | } |
7222 | | |
7223 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]]; |
7224 | |
|
7225 | 0 | if (node->op != ops[i]) { |
7226 | 0 | return false; |
7227 | 0 | } |
7228 | | |
7229 | 0 | if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) { |
7230 | 0 | continue; |
7231 | 0 | } |
7232 | | |
7233 | 0 | if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { |
7234 | 0 | return false; |
7235 | 0 | } |
7236 | | |
7237 | 0 | int subgraph_uses = 0; |
7238 | 0 | for (int j = i + 1; j < count; ++j) { |
7239 | 0 | const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]]; |
7240 | 0 | for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) { |
7241 | 0 | if (other_node->src[src_idx] == node) { |
7242 | 0 | subgraph_uses++; |
7243 | 0 | } |
7244 | 0 | } |
7245 | 0 | } |
7246 | |
|
7247 | 0 | if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) { |
7248 | 0 | return false; |
7249 | 0 | } |
7250 | | |
7251 | | // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph |
7252 | 0 | struct ggml_tensor * view_src = node->view_src; |
7253 | 0 | while (view_src) { |
7254 | 0 | if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) { |
7255 | 0 | return false; |
7256 | 0 | } |
7257 | 0 | view_src = view_src->view_src; |
7258 | 0 | } |
7259 | 0 | } |
7260 | | |
7261 | 0 | return true; |
7262 | 0 | } |
7263 | | |
7264 | | // check if node is part of the graph |
7265 | 0 | static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7266 | 0 | if (cgraph == NULL) { |
7267 | 0 | return true; |
7268 | 0 | } |
7269 | | |
7270 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7271 | 0 | if (cgraph->nodes[i] == node) { |
7272 | 0 | return true; |
7273 | 0 | } |
7274 | 0 | } |
7275 | | |
7276 | 0 | return false; |
7277 | 0 | } |
7278 | | |
7279 | 0 | static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7280 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7281 | 0 | struct ggml_tensor * parent = cgraph->nodes[i]; |
7282 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent); |
7283 | |
|
7284 | 0 | if (grad == node) { |
7285 | 0 | return parent; |
7286 | 0 | } |
7287 | 0 | } |
7288 | | |
7289 | 0 | return NULL; |
7290 | 0 | } |
7291 | | |
7292 | 0 | static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { |
7293 | 0 | struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node); |
7294 | 0 | struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent); |
7295 | 0 | fprintf(fp, " \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n", |
7296 | 0 | gparent0 ? (void *) gparent0 : (void *) parent, |
7297 | 0 | gparent ? (void *) gparent : (void *) node, |
7298 | 0 | gparent ? "empty" : "vee", |
7299 | 0 | gparent ? "dashed" : "solid", |
7300 | 0 | label); |
7301 | 0 | } |
7302 | | |
7303 | 0 | static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { |
7304 | 0 | fprintf(fp, " \"%p\" -> \"%p\" [ label = \"%s\"; ]\n", |
7305 | 0 | (void *) parent, |
7306 | 0 | (void *) node, |
7307 | 0 | label); |
7308 | 0 | } |
7309 | | |
7310 | 0 | void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) { |
7311 | 0 | char color[16]; |
7312 | |
|
7313 | 0 | FILE * fp = ggml_fopen(filename, "w"); |
7314 | 0 | GGML_ASSERT(fp); |
7315 | |
|
7316 | 0 | fprintf(fp, "digraph G {\n"); |
7317 | 0 | fprintf(fp, " newrank = true;\n"); |
7318 | 0 | fprintf(fp, " rankdir = TB;\n"); |
7319 | |
|
7320 | 0 | for (int i = 0; i < gb->n_nodes; i++) { |
7321 | 0 | struct ggml_tensor * node = gb->nodes[i]; |
7322 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(gb, node); |
7323 | |
|
7324 | 0 | if (ggml_graph_get_parent(gb, node) != NULL) { |
7325 | 0 | continue; |
7326 | 0 | } |
7327 | | |
7328 | 0 | if (node->flags & GGML_TENSOR_FLAG_PARAM) { |
7329 | 0 | snprintf(color, sizeof(color), "yellow"); |
7330 | 0 | } else if (grad) { |
7331 | 0 | if (ggml_graph_find(gf, node)) { |
7332 | 0 | snprintf(color, sizeof(color), "green"); |
7333 | 0 | } else { |
7334 | 0 | snprintf(color, sizeof(color), "lightblue"); |
7335 | 0 | } |
7336 | 0 | } else { |
7337 | 0 | snprintf(color, sizeof(color), "white"); |
7338 | 0 | } |
7339 | |
|
7340 | 0 | fprintf(fp, " \"%p\" [ " |
7341 | 0 | "style = filled; fillcolor = %s; shape = record; " |
7342 | 0 | "label=\"", |
7343 | 0 | (void *) node, color); |
7344 | |
|
7345 | 0 | if (strlen(node->name) > 0) { |
7346 | 0 | fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); |
7347 | 0 | } else { |
7348 | 0 | fprintf(fp, "(%s)|", ggml_type_name(node->type)); |
7349 | 0 | } |
7350 | |
|
7351 | 0 | if (ggml_is_matrix(node)) { |
7352 | 0 | fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); |
7353 | 0 | } else { |
7354 | 0 | fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); |
7355 | 0 | } |
7356 | |
|
7357 | 0 | if (grad) { |
7358 | 0 | fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op)); |
7359 | 0 | } else { |
7360 | 0 | fprintf(fp, "\"; ]\n"); |
7361 | 0 | } |
7362 | 0 | } |
7363 | |
|
7364 | 0 | for (int i = 0; i < gb->n_leafs; i++) { |
7365 | 0 | struct ggml_tensor * node = gb->leafs[i]; |
7366 | |
|
7367 | 0 | snprintf(color, sizeof(color), "pink"); |
7368 | |
|
7369 | 0 | fprintf(fp, " \"%p\" [ " |
7370 | 0 | "style = filled; fillcolor = %s; shape = record; " |
7371 | 0 | "label=\"<x>", |
7372 | 0 | (void *) node, color); |
7373 | |
|
7374 | 0 | if (strlen(node->name) > 0) { |
7375 | 0 | fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); |
7376 | 0 | } else { |
7377 | 0 | fprintf(fp, "(%s)|", ggml_type_name(node->type)); |
7378 | 0 | } |
7379 | |
|
7380 | 0 | fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]); |
7381 | 0 | if (ggml_nelements(node) < 5 && node->data != NULL) { |
7382 | 0 | fprintf(fp, " | ("); |
7383 | 0 | for (int j = 0; j < ggml_nelements(node); j++) { |
7384 | | // FIXME: use ggml-backend to obtain the tensor data |
7385 | | //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) { |
7386 | | // fprintf(fp, "%d", ggml_get_i32_1d(node, j)); |
7387 | | //} |
7388 | | //else if (node->type == GGML_TYPE_F32 || |
7389 | | // node->type == GGML_TYPE_F16 || |
7390 | | // node->type == GGML_TYPE_BF16) { |
7391 | | // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j)); |
7392 | | //} |
7393 | | //else |
7394 | 0 | { |
7395 | 0 | fprintf(fp, "#"); |
7396 | 0 | } |
7397 | 0 | if (j < ggml_nelements(node) - 1) { |
7398 | 0 | fprintf(fp, ", "); |
7399 | 0 | } |
7400 | 0 | } |
7401 | 0 | fprintf(fp, ")"); |
7402 | 0 | } |
7403 | 0 | fprintf(fp, "\"; ]\n"); |
7404 | 0 | } |
7405 | |
|
7406 | 0 | for (int i = 0; i < gb->n_nodes; i++) { |
7407 | 0 | struct ggml_tensor * node = gb->nodes[i]; |
7408 | |
|
7409 | 0 | for (int j = 0; j < GGML_MAX_SRC; j++) { |
7410 | 0 | if (node->src[j]) { |
7411 | 0 | char label[16]; |
7412 | 0 | snprintf(label, sizeof(label), "src %d", j); |
7413 | 0 | ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label); |
7414 | 0 | } |
7415 | 0 | } |
7416 | 0 | } |
7417 | |
|
7418 | 0 | for (int i = 0; i < gb->n_leafs; i++) { |
7419 | 0 | struct ggml_tensor * node = gb->leafs[i]; |
7420 | |
|
7421 | 0 | for (int j = 0; j < GGML_MAX_SRC; j++) { |
7422 | 0 | if (node->src[j]) { |
7423 | 0 | char label[16]; |
7424 | 0 | snprintf(label, sizeof(label), "src %d", j); |
7425 | 0 | ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label); |
7426 | 0 | } |
7427 | 0 | } |
7428 | 0 | } |
7429 | |
|
7430 | 0 | fprintf(fp, "}\n"); |
7431 | |
|
7432 | 0 | fclose(fp); |
7433 | |
|
7434 | 0 | GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename); |
7435 | 0 | } |
7436 | | |
7437 | | //////////////////////////////////////////////////////////////////////////////// |
7438 | | |
7439 | 0 | void ggml_set_input(struct ggml_tensor * tensor) { |
7440 | 0 | tensor->flags |= GGML_TENSOR_FLAG_INPUT; |
7441 | 0 | } |
7442 | | |
7443 | 0 | void ggml_set_output(struct ggml_tensor * tensor) { |
7444 | 0 | tensor->flags |= GGML_TENSOR_FLAG_OUTPUT; |
7445 | 0 | } |
7446 | | |
7447 | 0 | void ggml_set_param(struct ggml_tensor * tensor) { |
7448 | 0 | GGML_ASSERT(tensor->op == GGML_OP_NONE); |
7449 | 0 | tensor->flags |= GGML_TENSOR_FLAG_PARAM; |
7450 | 0 | } |
7451 | | |
7452 | 0 | void ggml_set_loss(struct ggml_tensor * tensor) { |
7453 | 0 | GGML_ASSERT(ggml_is_scalar(tensor)); |
7454 | 0 | GGML_ASSERT(tensor->type == GGML_TYPE_F32); |
7455 | 0 | tensor->flags |= GGML_TENSOR_FLAG_LOSS; |
7456 | 0 | } |
7457 | | |
7458 | | //////////////////////////////////////////////////////////////////////////////// |
7459 | | |
7460 | 0 | void ggml_quantize_init(enum ggml_type type) { |
7461 | 0 | ggml_critical_section_start(); |
7462 | |
|
7463 | 0 | switch (type) { |
7464 | 0 | case GGML_TYPE_IQ2_XXS: |
7465 | 0 | case GGML_TYPE_IQ2_XS: |
7466 | 0 | case GGML_TYPE_IQ2_S: |
7467 | 0 | case GGML_TYPE_IQ1_S: |
7468 | 0 | case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break; |
7469 | 0 | case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break; |
7470 | 0 | case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break; |
7471 | 0 | default: // nothing |
7472 | 0 | break; |
7473 | 0 | } |
7474 | | |
7475 | 0 | ggml_critical_section_end(); |
7476 | 0 | } |
7477 | | |
7478 | 8 | void ggml_quantize_free(void) { |
7479 | 8 | ggml_critical_section_start(); |
7480 | | |
7481 | 8 | iq2xs_free_impl(GGML_TYPE_IQ2_XXS); |
7482 | 8 | iq2xs_free_impl(GGML_TYPE_IQ2_XS); |
7483 | 8 | iq2xs_free_impl(GGML_TYPE_IQ1_S); |
7484 | 8 | iq3xs_free_impl(256); |
7485 | | |
7486 | 8 | ggml_critical_section_end(); |
7487 | 8 | } |
7488 | | |
7489 | 0 | bool ggml_quantize_requires_imatrix(enum ggml_type type) { |
7490 | 0 | return |
7491 | 0 | type == GGML_TYPE_IQ2_XXS || |
7492 | 0 | type == GGML_TYPE_IQ2_XS || |
7493 | 0 | type == GGML_TYPE_IQ1_S;// || |
7494 | | //type == GGML_TYPE_IQ1_M; |
7495 | 0 | } |
7496 | | |
7497 | | size_t ggml_quantize_chunk( |
7498 | | enum ggml_type type, |
7499 | | const float * src, |
7500 | | void * dst, |
7501 | | int64_t start, |
7502 | | int64_t nrows, |
7503 | | int64_t n_per_row, |
7504 | 0 | const float * imatrix) { |
7505 | 0 | const int64_t n = (int64_t) nrows * n_per_row; |
7506 | |
|
7507 | 0 | if (ggml_quantize_requires_imatrix(type)) { |
7508 | 0 | GGML_ASSERT(imatrix != NULL); |
7509 | 0 | } |
7510 | |
|
7511 | 0 | GGML_ASSERT(start % type_traits[type].blck_size == 0); |
7512 | 0 | GGML_ASSERT(start % n_per_row == 0); |
7513 | |
|
7514 | 0 | ggml_quantize_init(type); // this is noop if already initialized |
7515 | |
|
7516 | 0 | const size_t start_row = start / n_per_row; |
7517 | 0 | const size_t row_size = ggml_row_size(type, n_per_row); |
7518 | |
|
7519 | 0 | size_t result = 0; |
7520 | |
|
7521 | 0 | switch (type) { |
7522 | 0 | case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7523 | 0 | case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7524 | 0 | case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7525 | 0 | case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7526 | 0 | case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7527 | 0 | case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7528 | 0 | case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7529 | 0 | case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7530 | 0 | case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7531 | 0 | case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7532 | 0 | case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7533 | 0 | case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7534 | 0 | case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7535 | 0 | case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7536 | 0 | case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7537 | 0 | case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7538 | 0 | case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7539 | 0 | case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7540 | 0 | case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7541 | 0 | case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7542 | 0 | case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7543 | 0 | case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7544 | 0 | case GGML_TYPE_F16: |
7545 | 0 | { |
7546 | 0 | size_t elemsize = sizeof(ggml_fp16_t); |
7547 | 0 | ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n); |
7548 | 0 | result = n * elemsize; |
7549 | 0 | } break; |
7550 | 0 | case GGML_TYPE_BF16: |
7551 | 0 | { |
7552 | 0 | size_t elemsize = sizeof(ggml_bf16_t); |
7553 | 0 | ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n); |
7554 | 0 | result = n * elemsize; |
7555 | 0 | } break; |
7556 | 0 | case GGML_TYPE_F32: |
7557 | 0 | { |
7558 | 0 | size_t elemsize = sizeof(float); |
7559 | 0 | result = n * elemsize; |
7560 | 0 | memcpy((uint8_t *)dst + start * elemsize, src + start, result); |
7561 | 0 | } break; |
7562 | 0 | default: |
7563 | 0 | assert(false); |
7564 | 0 | } |
7565 | | |
7566 | 0 | GGML_ASSERT(result == nrows * row_size); |
7567 | |
|
7568 | 0 | return result; |
7569 | 0 | } |
7570 | | |
7571 | | //////////////////////////////////////////////////////////////////////////////// |
7572 | | |
7573 | 0 | void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) { |
7574 | 0 | *log_callback = g_logger_state.log_callback; |
7575 | 0 | *user_data = g_logger_state.log_callback_user_data; |
7576 | 0 | } |
7577 | | |
7578 | 0 | void ggml_log_set(ggml_log_callback log_callback, void * user_data) { |
7579 | 0 | g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default; |
7580 | 0 | g_logger_state.log_callback_user_data = user_data; |
7581 | 0 | } |
7582 | | |
7583 | 0 | void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) { |
7584 | 0 | p->n_threads = n_threads; |
7585 | 0 | p->prio = 0; // default priority (usually means normal or inherited) |
7586 | 0 | p->poll = 50; // hybrid-polling enabled |
7587 | 0 | p->strict_cpu = false; // no strict placement (all threads share same cpumask) |
7588 | 0 | p->paused = false; // threads are ready to go |
7589 | 0 | memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited) |
7590 | 0 | } |
7591 | | |
7592 | 0 | struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) { |
7593 | 0 | struct ggml_threadpool_params p; |
7594 | 0 | ggml_threadpool_params_init(&p, n_threads); |
7595 | 0 | return p; |
7596 | 0 | } |
7597 | | |
7598 | 0 | bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) { |
7599 | 0 | if (p0->n_threads != p1->n_threads ) return false; |
7600 | 0 | if (p0->prio != p1->prio ) return false; |
7601 | 0 | if (p0->poll != p1->poll ) return false; |
7602 | 0 | if (p0->strict_cpu != p1->strict_cpu ) return false; |
7603 | 0 | return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0; |
7604 | 0 | } |