/src/llama.cpp/ggml/src/ggml.c
Line | Count | Source |
1 | | #define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows |
2 | | #define _USE_MATH_DEFINES // For M_PI on MSVC |
3 | | |
4 | | #include "ggml-backend.h" |
5 | | #include "ggml-impl.h" |
6 | | #include "ggml-threading.h" |
7 | | #include "ggml-cpu.h" |
8 | | #include "ggml.h" |
9 | | |
10 | | // FIXME: required here for quantization functions |
11 | | #include "ggml-quants.h" |
12 | | |
13 | | #ifdef GGML_USE_CPU_HBM |
14 | | #include <hbwmalloc.h> |
15 | | #endif |
16 | | |
17 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
18 | | #include <malloc.h> // using malloc.h with MSC/MINGW |
19 | | #elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) |
20 | | #include <alloca.h> |
21 | | #endif |
22 | | |
23 | | #include <assert.h> |
24 | | #include <errno.h> |
25 | | #include <time.h> |
26 | | #include <math.h> |
27 | | #include <stdlib.h> |
28 | | #include <string.h> |
29 | | #include <stdint.h> |
30 | | #include <inttypes.h> |
31 | | #include <stdio.h> |
32 | | #include <float.h> |
33 | | #include <limits.h> |
34 | | #include <stdarg.h> |
35 | | #include <signal.h> |
36 | | #if defined(__gnu_linux__) |
37 | | #include <syscall.h> |
38 | | #endif |
39 | | |
40 | | #if defined(__APPLE__) |
41 | | #include <unistd.h> |
42 | | #include <mach/mach.h> |
43 | | #include <TargetConditionals.h> |
44 | | #endif |
45 | | |
46 | | #if defined(_WIN32) |
47 | | #define WIN32_LEAN_AND_MEAN |
48 | | #ifndef NOMINMAX |
49 | | #define NOMINMAX |
50 | | #endif |
51 | | #include <windows.h> |
52 | | #endif |
53 | | |
54 | 0 | #define UNUSED GGML_UNUSED |
55 | | |
56 | | #if defined(_MSC_VER) |
57 | | #define m512bh(p) p |
58 | | #define m512i(p) p |
59 | | #else |
60 | | #define m512bh(p) (__m512bh)(p) |
61 | | #define m512i(p) (__m512i)(p) |
62 | | #endif |
63 | | |
64 | | #if defined(__linux__) || \ |
65 | | defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ |
66 | | (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH) |
67 | | |
68 | | #include <unistd.h> |
69 | | #include <sys/types.h> |
70 | | #include <sys/stat.h> |
71 | | #include <sys/wait.h> |
72 | | #if defined(__linux__) |
73 | | #include <sys/prctl.h> |
74 | | #endif |
75 | | |
76 | | #if defined(__ANDROID__) |
77 | | #include <unwind.h> |
78 | | #include <dlfcn.h> |
79 | | #include <stdio.h> |
80 | | |
81 | | struct backtrace_state { |
82 | | void ** current; |
83 | | void ** end; |
84 | | }; |
85 | | |
86 | | static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) { |
87 | | struct backtrace_state * state = (struct backtrace_state *)arg; |
88 | | uintptr_t pc = _Unwind_GetIP(context); |
89 | | if (pc) { |
90 | | if (state->current == state->end) { |
91 | | return _URC_END_OF_STACK; |
92 | | } else { |
93 | | *state->current++ = (void*)pc; |
94 | | } |
95 | | } |
96 | | return _URC_NO_REASON; |
97 | | } |
98 | | |
99 | | static void ggml_print_backtrace_symbols(void) { |
100 | | const int max = 100; |
101 | | void* buffer[max]; |
102 | | |
103 | | struct backtrace_state state = {buffer, buffer + max}; |
104 | | _Unwind_Backtrace(unwind_callback, &state); |
105 | | |
106 | | int count = state.current - buffer; |
107 | | |
108 | | for (int idx = 0; idx < count; ++idx) { |
109 | | const void * addr = buffer[idx]; |
110 | | const char * symbol = ""; |
111 | | |
112 | | Dl_info info; |
113 | | if (dladdr(addr, &info) && info.dli_sname) { |
114 | | symbol = info.dli_sname; |
115 | | } |
116 | | |
117 | | fprintf(stderr, "%d: %p %s\n", idx, addr, symbol); |
118 | | } |
119 | | } |
120 | | #elif defined(__linux__) && defined(__GLIBC__) |
121 | | #include <execinfo.h> |
122 | 0 | static void ggml_print_backtrace_symbols(void) { |
123 | 0 | void * trace[100]; |
124 | 0 | int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0])); |
125 | 0 | backtrace_symbols_fd(trace, nptrs, STDERR_FILENO); |
126 | 0 | } |
127 | | #else |
128 | | static void ggml_print_backtrace_symbols(void) { |
129 | | // platform not supported |
130 | | } |
131 | | #endif |
132 | | |
133 | 0 | void ggml_print_backtrace(void) { |
134 | 0 | const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE"); |
135 | 0 | if (GGML_NO_BACKTRACE) { |
136 | 0 | return; |
137 | 0 | } |
138 | 0 | #if defined(__linux__) |
139 | 0 | FILE * f = fopen("/proc/self/status", "r"); |
140 | 0 | size_t size = 0; |
141 | 0 | char * line = NULL; |
142 | 0 | ssize_t length = 0; |
143 | 0 | while ((length = getline(&line, &size, f)) > 0) { |
144 | 0 | if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) && |
145 | 0 | (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) { |
146 | | // Already being debugged, and the breakpoint is the later abort() |
147 | 0 | free(line); |
148 | 0 | fclose(f); |
149 | 0 | return; |
150 | 0 | } |
151 | 0 | } |
152 | 0 | free(line); |
153 | 0 | fclose(f); |
154 | 0 | int lock[2] = { -1, -1 }; |
155 | 0 | (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER |
156 | 0 | #endif |
157 | 0 | const int parent_pid = getpid(); |
158 | 0 | const int child_pid = fork(); |
159 | 0 | if (child_pid < 0) { // error |
160 | 0 | #if defined(__linux__) |
161 | 0 | close(lock[1]); |
162 | 0 | close(lock[0]); |
163 | 0 | #endif |
164 | 0 | return; |
165 | 0 | } else if (child_pid == 0) { // child |
166 | 0 | char attach[32]; |
167 | 0 | snprintf(attach, sizeof(attach), "attach %d", parent_pid); |
168 | 0 | #if defined(__linux__) |
169 | 0 | close(lock[1]); |
170 | 0 | (void) !read(lock[0], lock, 1); |
171 | 0 | close(lock[0]); |
172 | 0 | #endif |
173 | | // try gdb |
174 | 0 | execlp("gdb", "gdb", "--batch", |
175 | 0 | "-ex", "set style enabled on", |
176 | 0 | "-ex", attach, |
177 | 0 | "-ex", "bt -frame-info source-and-location", |
178 | 0 | "-ex", "detach", |
179 | 0 | "-ex", "quit", |
180 | 0 | (char *) NULL); |
181 | | // try lldb |
182 | 0 | execlp("lldb", "lldb", "--batch", |
183 | 0 | "-o", "bt", |
184 | 0 | "-o", "quit", |
185 | 0 | "-p", &attach[sizeof("attach ") - 1], |
186 | 0 | (char *) NULL); |
187 | | // gdb failed, fallback to backtrace_symbols |
188 | 0 | ggml_print_backtrace_symbols(); |
189 | 0 | _Exit(0); |
190 | 0 | } else { // parent |
191 | 0 | #if defined(__linux__) |
192 | 0 | prctl(PR_SET_PTRACER, child_pid); |
193 | 0 | close(lock[1]); |
194 | 0 | close(lock[0]); |
195 | 0 | #endif |
196 | 0 | waitpid(child_pid, NULL, 0); |
197 | 0 | } |
198 | 0 | } |
199 | | #else |
200 | | void ggml_print_backtrace(void) { |
201 | | // platform not supported |
202 | | } |
203 | | #endif |
204 | | |
205 | | static ggml_abort_callback_t g_abort_callback = NULL; |
206 | | |
207 | | // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout) |
208 | 0 | GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) { |
209 | 0 | ggml_abort_callback_t ret_val = g_abort_callback; |
210 | 0 | g_abort_callback = callback; |
211 | 0 | return ret_val; |
212 | 0 | } |
213 | | |
214 | 55 | void ggml_abort(const char * file, int line, const char * fmt, ...) { |
215 | 55 | fflush(stdout); |
216 | | |
217 | 55 | char message[2048]; |
218 | 55 | int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line); |
219 | | |
220 | 55 | va_list args; |
221 | 55 | va_start(args, fmt); |
222 | 55 | vsnprintf(message + offset, sizeof(message) - offset, fmt, args); |
223 | 55 | va_end(args); |
224 | | |
225 | 55 | if (g_abort_callback) { |
226 | 0 | g_abort_callback(message); |
227 | 55 | } else { |
228 | | // default: print error and backtrace to stderr |
229 | 55 | fprintf(stderr, "%s\n", message); |
230 | | |
231 | 55 | } |
232 | | |
233 | 55 | abort(); |
234 | 55 | } |
235 | | |
236 | | // ggml_print_backtrace is registered with std::set_terminate by ggml.cpp |
237 | | |
238 | | // |
239 | | // logging |
240 | | // |
241 | | |
242 | | struct ggml_logger_state { |
243 | | ggml_log_callback log_callback; |
244 | | void * log_callback_user_data; |
245 | | }; |
246 | | static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL}; |
247 | | |
248 | 918 | static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) { |
249 | 918 | if (format == NULL) { |
250 | 0 | return; |
251 | 0 | } |
252 | 918 | va_list args_copy; |
253 | 918 | va_copy(args_copy, args); |
254 | 918 | char buffer[128]; |
255 | 918 | int len = vsnprintf(buffer, 128, format, args); |
256 | 918 | if (len < 128) { |
257 | 902 | g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data); |
258 | 902 | } else { |
259 | 16 | char * buffer2 = (char *) calloc(len + 1, sizeof(char)); |
260 | 16 | vsnprintf(buffer2, len + 1, format, args_copy); |
261 | 16 | buffer2[len] = 0; |
262 | 16 | g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data); |
263 | 16 | free(buffer2); |
264 | 16 | } |
265 | 918 | va_end(args_copy); |
266 | 918 | } |
267 | | |
268 | 918 | void ggml_log_internal(enum ggml_log_level level, const char * format, ...) { |
269 | 918 | va_list args; |
270 | 918 | va_start(args, format); |
271 | 918 | ggml_log_internal_v(level, format, args); |
272 | 918 | va_end(args); |
273 | 918 | } |
274 | | |
275 | 918 | void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) { |
276 | 918 | (void) level; |
277 | 918 | (void) user_data; |
278 | 918 | fputs(text, stderr); |
279 | 918 | fflush(stderr); |
280 | 918 | } |
281 | | |
282 | | // |
283 | | // end of logging block |
284 | | // |
285 | | |
286 | | #ifdef GGML_USE_ACCELERATE |
287 | | // uncomment to use vDSP for soft max computation |
288 | | // note: not sure if it is actually faster |
289 | | //#define GGML_SOFT_MAX_ACCELERATE |
290 | | #endif |
291 | | |
292 | | |
293 | 1.10k | void * ggml_aligned_malloc(size_t size) { |
294 | | #if defined(__s390x__) |
295 | | const int alignment = 256; |
296 | | #else |
297 | 1.10k | const int alignment = 64; |
298 | 1.10k | #endif |
299 | | |
300 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
301 | | return _aligned_malloc(size, alignment); |
302 | | #else |
303 | 1.10k | if (size == 0) { |
304 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); |
305 | 0 | return NULL; |
306 | 0 | } |
307 | 1.10k | void * aligned_memory = NULL; |
308 | | #ifdef GGML_USE_CPU_HBM |
309 | | int result = hbw_posix_memalign(&aligned_memory, alignment, size); |
310 | | #elif TARGET_OS_OSX |
311 | | GGML_UNUSED(alignment); |
312 | | kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE); |
313 | | int result = EFAULT; |
314 | | switch (alloc_status) { |
315 | | case KERN_SUCCESS: |
316 | | result = 0; |
317 | | break; |
318 | | case KERN_INVALID_ADDRESS: |
319 | | result = EINVAL; |
320 | | break; |
321 | | case KERN_NO_SPACE: |
322 | | result = ENOMEM; |
323 | | break; |
324 | | default: |
325 | | result = EFAULT; |
326 | | break; |
327 | | } |
328 | | #else |
329 | 1.10k | int result = posix_memalign(&aligned_memory, alignment, size); |
330 | 1.10k | #endif |
331 | 1.10k | if (result != 0) { |
332 | | // Handle allocation failure |
333 | 0 | const char *error_desc = "unknown allocation error"; |
334 | 0 | switch (result) { |
335 | 0 | case EINVAL: |
336 | 0 | error_desc = "invalid alignment value"; |
337 | 0 | break; |
338 | 0 | case ENOMEM: |
339 | 0 | error_desc = "insufficient memory"; |
340 | 0 | break; |
341 | 0 | } |
342 | 0 | GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); |
343 | 0 | return NULL; |
344 | 0 | } |
345 | 1.10k | return aligned_memory; |
346 | 1.10k | #endif |
347 | 1.10k | } |
348 | | |
349 | 1.10k | void ggml_aligned_free(void * ptr, size_t size) { |
350 | 1.10k | GGML_UNUSED(size); |
351 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
352 | | _aligned_free(ptr); |
353 | | #elif GGML_USE_CPU_HBM |
354 | | if (ptr != NULL) { |
355 | | hbw_free(ptr); |
356 | | } |
357 | | #elif TARGET_OS_OSX |
358 | | if (ptr != NULL) { |
359 | | vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size); |
360 | | } |
361 | | #else |
362 | 1.10k | free(ptr); |
363 | 1.10k | #endif |
364 | 1.10k | } |
365 | | |
366 | | |
367 | 1.10k | inline static void * ggml_malloc(size_t size) { |
368 | 1.10k | if (size == 0) { |
369 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n"); |
370 | 0 | return NULL; |
371 | 0 | } |
372 | 1.10k | void * result = malloc(size); |
373 | 1.10k | if (result == NULL) { |
374 | 0 | GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); |
375 | 0 | GGML_ABORT("fatal error"); |
376 | 0 | } |
377 | 1.10k | return result; |
378 | 1.10k | } |
379 | | |
380 | | // calloc |
381 | 0 | inline static void * ggml_calloc(size_t num, size_t size) { |
382 | 0 | if ((num * size) > 9000000) {GGML_ABORT("calloc err");} |
383 | |
|
384 | 0 | if (num == 0 || size == 0) { |
385 | 0 | GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n"); |
386 | 0 | return NULL; |
387 | 0 | } |
388 | 0 | void * result = calloc(num, size); |
389 | 0 | if (result == NULL) { |
390 | 0 | GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0)); |
391 | 0 | GGML_ABORT("fatal error"); |
392 | 0 | } |
393 | 0 | return result; |
394 | 0 | } |
395 | | |
396 | 1.10k | #define GGML_MALLOC(size) ggml_malloc(size) |
397 | 0 | #define GGML_CALLOC(num, size) ggml_calloc(num, size) |
398 | | |
399 | 1.10k | #define GGML_FREE(ptr) free(ptr) |
400 | | |
401 | 0 | const char * ggml_status_to_string(enum ggml_status status) { |
402 | 0 | switch (status) { |
403 | 0 | case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)"; |
404 | 0 | case GGML_STATUS_FAILED: return "GGML status: error (operation failed)"; |
405 | 0 | case GGML_STATUS_SUCCESS: return "GGML status: success"; |
406 | 0 | case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)"; |
407 | 0 | } |
408 | | |
409 | 0 | return "GGML status: unknown"; |
410 | 0 | } |
411 | | |
412 | 0 | float ggml_fp16_to_fp32(ggml_fp16_t x) { |
413 | 0 | #define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml |
414 | 0 | return GGML_FP16_TO_FP32(x); |
415 | 0 | } |
416 | | |
417 | 0 | ggml_fp16_t ggml_fp32_to_fp16(float x) { |
418 | 0 | #define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml |
419 | 0 | return GGML_FP32_TO_FP16(x); |
420 | 0 | } |
421 | | |
422 | 0 | float ggml_bf16_to_fp32(ggml_bf16_t x) { |
423 | 0 | #define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml |
424 | 0 | return GGML_BF16_TO_FP32(x); // it just left shifts |
425 | 0 | } |
426 | | |
427 | 0 | ggml_bf16_t ggml_fp32_to_bf16(float x) { |
428 | 0 | #define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml |
429 | 0 | return GGML_FP32_TO_BF16(x); |
430 | 0 | } |
431 | | |
432 | 0 | void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) { |
433 | 0 | for (int64_t i = 0; i < n; i++) { |
434 | 0 | y[i] = GGML_FP16_TO_FP32(x[i]); |
435 | 0 | } |
436 | 0 | } |
437 | | |
438 | 0 | void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { |
439 | 0 | int i = 0; |
440 | 0 | for (; i < n; ++i) { |
441 | 0 | y[i] = GGML_FP32_TO_FP16(x[i]); |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | 0 | void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) { |
446 | 0 | int i = 0; |
447 | 0 | for (; i < n; ++i) { |
448 | 0 | y[i] = GGML_BF16_TO_FP32(x[i]); |
449 | 0 | } |
450 | 0 | } |
451 | | |
452 | 0 | void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) { |
453 | 0 | for (int i = 0; i < n; i++) { |
454 | 0 | y[i] = ggml_compute_fp32_to_bf16(x[i]); |
455 | 0 | } |
456 | 0 | } |
457 | | |
458 | 0 | void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) { |
459 | 0 | int i = 0; |
460 | | #if defined(__AVX512BF16__) |
461 | | // subnormals are flushed to zero on this platform |
462 | | for (; i + 32 <= n; i += 32) { |
463 | | _mm512_storeu_si512( |
464 | | (__m512i *)(y + i), |
465 | | m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16), |
466 | | _mm512_loadu_ps(x + i)))); |
467 | | } |
468 | | #endif |
469 | 0 | for (; i < n; i++) { |
470 | 0 | y[i] = GGML_FP32_TO_BF16(x[i]); |
471 | 0 | } |
472 | 0 | } |
473 | | |
474 | 0 | bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) { |
475 | 0 | return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0; |
476 | 0 | } |
477 | | |
478 | 0 | const char * ggml_version(void) { |
479 | 0 | return GGML_VERSION; |
480 | 0 | } |
481 | | |
482 | 0 | const char * ggml_commit(void) { |
483 | 0 | return GGML_COMMIT; |
484 | 0 | } |
485 | | |
486 | | // |
487 | | // timing |
488 | | // |
489 | | |
490 | | #if defined(_MSC_VER) || defined(__MINGW32__) |
491 | | static int64_t timer_freq, timer_start; |
492 | | void ggml_time_init(void) { |
493 | | LARGE_INTEGER t; |
494 | | QueryPerformanceFrequency(&t); |
495 | | timer_freq = t.QuadPart; |
496 | | |
497 | | // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq |
498 | | // and the uptime is high enough. |
499 | | // We subtract the program start time to reduce the likelihood of that happening. |
500 | | QueryPerformanceCounter(&t); |
501 | | timer_start = t.QuadPart; |
502 | | } |
503 | | int64_t ggml_time_ms(void) { |
504 | | LARGE_INTEGER t; |
505 | | QueryPerformanceCounter(&t); |
506 | | return ((t.QuadPart-timer_start) * 1000) / timer_freq; |
507 | | } |
508 | | int64_t ggml_time_us(void) { |
509 | | LARGE_INTEGER t; |
510 | | QueryPerformanceCounter(&t); |
511 | | return ((t.QuadPart-timer_start) * 1000000) / timer_freq; |
512 | | } |
513 | | #else |
514 | 2.82k | void ggml_time_init(void) {} |
515 | 0 | int64_t ggml_time_ms(void) { |
516 | 0 | struct timespec ts; |
517 | 0 | clock_gettime(CLOCK_MONOTONIC, &ts); |
518 | 0 | return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000; |
519 | 0 | } |
520 | | |
521 | 1.67k | int64_t ggml_time_us(void) { |
522 | 1.67k | struct timespec ts; |
523 | 1.67k | clock_gettime(CLOCK_MONOTONIC, &ts); |
524 | 1.67k | return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; |
525 | 1.67k | } |
526 | | #endif |
527 | | |
528 | 0 | int64_t ggml_cycles(void) { |
529 | 0 | return clock(); |
530 | 0 | } |
531 | | |
532 | 0 | int64_t ggml_cycles_per_ms(void) { |
533 | 0 | return CLOCKS_PER_SEC/1000; |
534 | 0 | } |
535 | | |
536 | | // |
537 | | // cross-platform UTF-8 file paths |
538 | | // |
539 | | |
540 | | #ifdef _WIN32 |
541 | | static wchar_t * ggml_mbstowcs(const char * mbs) { |
542 | | int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0); |
543 | | if (!wlen) { |
544 | | errno = EINVAL; |
545 | | return NULL; |
546 | | } |
547 | | |
548 | | wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t)); |
549 | | wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen); |
550 | | if (!wlen) { |
551 | | GGML_FREE(wbuf); |
552 | | errno = EINVAL; |
553 | | return NULL; |
554 | | } |
555 | | |
556 | | return wbuf; |
557 | | } |
558 | | #endif |
559 | | |
560 | 1.09k | FILE * ggml_fopen(const char * fname, const char * mode) { |
561 | | #ifdef _WIN32 |
562 | | FILE * file = NULL; |
563 | | |
564 | | // convert fname (UTF-8) |
565 | | wchar_t * wfname = ggml_mbstowcs(fname); |
566 | | if (wfname) { |
567 | | // convert mode (ANSI) |
568 | | wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t)); |
569 | | wchar_t * wmode_p = wmode; |
570 | | do { |
571 | | *wmode_p++ = (wchar_t)*mode; |
572 | | } while (*mode++); |
573 | | |
574 | | // open file |
575 | | file = _wfopen(wfname, wmode); |
576 | | |
577 | | GGML_FREE(wfname); |
578 | | GGML_FREE(wmode); |
579 | | } |
580 | | |
581 | | return file; |
582 | | #else |
583 | 1.09k | return fopen(fname, mode); |
584 | 1.09k | #endif |
585 | | |
586 | 1.09k | } |
587 | | |
588 | | static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = { |
589 | | [GGML_TYPE_I8] = { |
590 | | .type_name = "i8", |
591 | | .blck_size = 1, |
592 | | .type_size = sizeof(int8_t), |
593 | | .is_quantized = false, |
594 | | }, |
595 | | [GGML_TYPE_I16] = { |
596 | | .type_name = "i16", |
597 | | .blck_size = 1, |
598 | | .type_size = sizeof(int16_t), |
599 | | .is_quantized = false, |
600 | | }, |
601 | | [GGML_TYPE_I32] = { |
602 | | .type_name = "i32", |
603 | | .blck_size = 1, |
604 | | .type_size = sizeof(int32_t), |
605 | | .is_quantized = false, |
606 | | }, |
607 | | [GGML_TYPE_I64] = { |
608 | | .type_name = "i64", |
609 | | .blck_size = 1, |
610 | | .type_size = sizeof(int64_t), |
611 | | .is_quantized = false, |
612 | | }, |
613 | | [GGML_TYPE_F64] = { |
614 | | .type_name = "f64", |
615 | | .blck_size = 1, |
616 | | .type_size = sizeof(double), |
617 | | .is_quantized = false, |
618 | | }, |
619 | | [GGML_TYPE_F32] = { |
620 | | .type_name = "f32", |
621 | | .blck_size = 1, |
622 | | .type_size = sizeof(float), |
623 | | .is_quantized = false, |
624 | | }, |
625 | | [GGML_TYPE_F16] = { |
626 | | .type_name = "f16", |
627 | | .blck_size = 1, |
628 | | .type_size = sizeof(ggml_fp16_t), |
629 | | .is_quantized = false, |
630 | | .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row, |
631 | | .from_float_ref = (ggml_from_float_t) ggml_fp32_to_fp16_row, |
632 | | }, |
633 | | [GGML_TYPE_Q4_0] = { |
634 | | .type_name = "q4_0", |
635 | | .blck_size = QK4_0, |
636 | | .type_size = sizeof(block_q4_0), |
637 | | .is_quantized = true, |
638 | | .to_float = (ggml_to_float_t) dequantize_row_q4_0, |
639 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref, |
640 | | }, |
641 | | [GGML_TYPE_Q4_1] = { |
642 | | .type_name = "q4_1", |
643 | | .blck_size = QK4_1, |
644 | | .type_size = sizeof(block_q4_1), |
645 | | .is_quantized = true, |
646 | | .to_float = (ggml_to_float_t) dequantize_row_q4_1, |
647 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref, |
648 | | }, |
649 | | [4] = { // GGML_TYPE_Q4_2 |
650 | | .type_name = "DEPRECATED", |
651 | | .blck_size = 0, |
652 | | .type_size = 0, |
653 | | .is_quantized = false, |
654 | | }, |
655 | | [5] = { // GGML_TYPE_Q4_3 |
656 | | .type_name = "DEPRECATED", |
657 | | .blck_size = 0, |
658 | | .type_size = 0, |
659 | | .is_quantized = false, |
660 | | }, |
661 | | [GGML_TYPE_Q5_0] = { |
662 | | .type_name = "q5_0", |
663 | | .blck_size = QK5_0, |
664 | | .type_size = sizeof(block_q5_0), |
665 | | .is_quantized = true, |
666 | | .to_float = (ggml_to_float_t) dequantize_row_q5_0, |
667 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref, |
668 | | }, |
669 | | [GGML_TYPE_Q5_1] = { |
670 | | .type_name = "q5_1", |
671 | | .blck_size = QK5_1, |
672 | | .type_size = sizeof(block_q5_1), |
673 | | .is_quantized = true, |
674 | | .to_float = (ggml_to_float_t) dequantize_row_q5_1, |
675 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref, |
676 | | }, |
677 | | [GGML_TYPE_Q8_0] = { |
678 | | .type_name = "q8_0", |
679 | | .blck_size = QK8_0, |
680 | | .type_size = sizeof(block_q8_0), |
681 | | .is_quantized = true, |
682 | | .to_float = (ggml_to_float_t) dequantize_row_q8_0, |
683 | | .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref, |
684 | | }, |
685 | | [GGML_TYPE_Q8_1] = { |
686 | | .type_name = "q8_1", |
687 | | .blck_size = QK8_1, |
688 | | .type_size = sizeof(block_q8_1), |
689 | | .is_quantized = true, |
690 | | .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref, |
691 | | }, |
692 | | [GGML_TYPE_MXFP4] = { |
693 | | .type_name = "mxfp4", |
694 | | .blck_size = QK_MXFP4, |
695 | | .type_size = sizeof(block_mxfp4), |
696 | | .is_quantized = true, |
697 | | .to_float = (ggml_to_float_t) dequantize_row_mxfp4, |
698 | | .from_float_ref = (ggml_from_float_t)quantize_row_mxfp4_ref, |
699 | | }, |
700 | | [GGML_TYPE_Q2_K] = { |
701 | | .type_name = "q2_K", |
702 | | .blck_size = QK_K, |
703 | | .type_size = sizeof(block_q2_K), |
704 | | .is_quantized = true, |
705 | | .to_float = (ggml_to_float_t) dequantize_row_q2_K, |
706 | | .from_float_ref = (ggml_from_float_t) quantize_row_q2_K_ref, |
707 | | }, |
708 | | [GGML_TYPE_Q3_K] = { |
709 | | .type_name = "q3_K", |
710 | | .blck_size = QK_K, |
711 | | .type_size = sizeof(block_q3_K), |
712 | | .is_quantized = true, |
713 | | .to_float = (ggml_to_float_t) dequantize_row_q3_K, |
714 | | .from_float_ref = (ggml_from_float_t) quantize_row_q3_K_ref, |
715 | | }, |
716 | | [GGML_TYPE_Q4_K] = { |
717 | | .type_name = "q4_K", |
718 | | .blck_size = QK_K, |
719 | | .type_size = sizeof(block_q4_K), |
720 | | .is_quantized = true, |
721 | | .to_float = (ggml_to_float_t) dequantize_row_q4_K, |
722 | | .from_float_ref = (ggml_from_float_t) quantize_row_q4_K_ref, |
723 | | }, |
724 | | [GGML_TYPE_Q5_K] = { |
725 | | .type_name = "q5_K", |
726 | | .blck_size = QK_K, |
727 | | .type_size = sizeof(block_q5_K), |
728 | | .is_quantized = true, |
729 | | .to_float = (ggml_to_float_t) dequantize_row_q5_K, |
730 | | .from_float_ref = (ggml_from_float_t) quantize_row_q5_K_ref, |
731 | | }, |
732 | | [GGML_TYPE_Q6_K] = { |
733 | | .type_name = "q6_K", |
734 | | .blck_size = QK_K, |
735 | | .type_size = sizeof(block_q6_K), |
736 | | .is_quantized = true, |
737 | | .to_float = (ggml_to_float_t) dequantize_row_q6_K, |
738 | | .from_float_ref = (ggml_from_float_t) quantize_row_q6_K_ref, |
739 | | }, |
740 | | [GGML_TYPE_IQ2_XXS] = { |
741 | | .type_name = "iq2_xxs", |
742 | | .blck_size = QK_K, |
743 | | .type_size = sizeof(block_iq2_xxs), |
744 | | .is_quantized = true, |
745 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs, |
746 | | .from_float_ref = NULL, |
747 | | }, |
748 | | [GGML_TYPE_IQ2_XS] = { |
749 | | .type_name = "iq2_xs", |
750 | | .blck_size = QK_K, |
751 | | .type_size = sizeof(block_iq2_xs), |
752 | | .is_quantized = true, |
753 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_xs, |
754 | | .from_float_ref = NULL, |
755 | | }, |
756 | | [GGML_TYPE_IQ3_XXS] = { |
757 | | .type_name = "iq3_xxs", |
758 | | .blck_size = QK_K, |
759 | | .type_size = sizeof(block_iq3_xxs), |
760 | | .is_quantized = true, |
761 | | .to_float = (ggml_to_float_t) dequantize_row_iq3_xxs, |
762 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq3_xxs_ref, |
763 | | }, |
764 | | [GGML_TYPE_IQ3_S] = { |
765 | | .type_name = "iq3_s", |
766 | | .blck_size = QK_K, |
767 | | .type_size = sizeof(block_iq3_s), |
768 | | .is_quantized = true, |
769 | | .to_float = (ggml_to_float_t) dequantize_row_iq3_s, |
770 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq3_s_ref, |
771 | | }, |
772 | | [GGML_TYPE_IQ2_S] = { |
773 | | .type_name = "iq2_s", |
774 | | .blck_size = QK_K, |
775 | | .type_size = sizeof(block_iq2_s), |
776 | | .is_quantized = true, |
777 | | .to_float = (ggml_to_float_t) dequantize_row_iq2_s, |
778 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq2_s_ref, |
779 | | }, |
780 | | [GGML_TYPE_IQ1_S] = { |
781 | | .type_name = "iq1_s", |
782 | | .blck_size = QK_K, |
783 | | .type_size = sizeof(block_iq1_s), |
784 | | .is_quantized = true, |
785 | | .to_float = (ggml_to_float_t) dequantize_row_iq1_s, |
786 | | .from_float_ref = NULL, |
787 | | }, |
788 | | [GGML_TYPE_IQ1_M] = { |
789 | | .type_name = "iq1_m", |
790 | | .blck_size = QK_K, |
791 | | .type_size = sizeof(block_iq1_m), |
792 | | .is_quantized = true, |
793 | | .to_float = (ggml_to_float_t) dequantize_row_iq1_m, |
794 | | .from_float_ref = NULL, |
795 | | }, |
796 | | [GGML_TYPE_IQ4_NL] = { |
797 | | .type_name = "iq4_nl", |
798 | | .blck_size = QK4_NL, |
799 | | .type_size = sizeof(block_iq4_nl), |
800 | | .is_quantized = true, |
801 | | .to_float = (ggml_to_float_t) dequantize_row_iq4_nl, |
802 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref, |
803 | | }, |
804 | | [GGML_TYPE_IQ4_XS] = { |
805 | | .type_name = "iq4_xs", |
806 | | .blck_size = QK_K, |
807 | | .type_size = sizeof(block_iq4_xs), |
808 | | .is_quantized = true, |
809 | | .to_float = (ggml_to_float_t) dequantize_row_iq4_xs, |
810 | | .from_float_ref = (ggml_from_float_t)quantize_row_iq4_xs_ref, |
811 | | }, |
812 | | [GGML_TYPE_Q8_K] = { |
813 | | .type_name = "q8_K", |
814 | | .blck_size = QK_K, |
815 | | .type_size = sizeof(block_q8_K), |
816 | | .is_quantized = true, |
817 | | }, |
818 | | [GGML_TYPE_BF16] = { |
819 | | .type_name = "bf16", |
820 | | .blck_size = 1, |
821 | | .type_size = sizeof(ggml_bf16_t), |
822 | | .is_quantized = false, |
823 | | .to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row, |
824 | | .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref, |
825 | | }, |
826 | | [31] = { // GGML_TYPE_Q4_0_4_4 |
827 | | .type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking", |
828 | | .blck_size = 0, |
829 | | .type_size = 0, |
830 | | .is_quantized = false, |
831 | | }, |
832 | | [32] = { // GGML_TYPE_Q4_0_4_8 |
833 | | .type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking", |
834 | | .blck_size = 0, |
835 | | .type_size = 0, |
836 | | .is_quantized = false, |
837 | | }, |
838 | | [33] = { // GGML_TYPE_Q4_0_8_8 |
839 | | .type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking", |
840 | | .blck_size = 0, |
841 | | .type_size = 0, |
842 | | .is_quantized = false, |
843 | | }, |
844 | | [GGML_TYPE_TQ1_0] = { |
845 | | .type_name = "tq1_0", |
846 | | .blck_size = QK_K, |
847 | | .type_size = sizeof(block_tq1_0), |
848 | | .is_quantized = true, |
849 | | .to_float = (ggml_to_float_t) dequantize_row_tq1_0, |
850 | | .from_float_ref = (ggml_from_float_t) quantize_row_tq1_0_ref, |
851 | | }, |
852 | | [GGML_TYPE_TQ2_0] = { |
853 | | .type_name = "tq2_0", |
854 | | .blck_size = QK_K, |
855 | | .type_size = sizeof(block_tq2_0), |
856 | | .is_quantized = true, |
857 | | .to_float = (ggml_to_float_t) dequantize_row_tq2_0, |
858 | | .from_float_ref = (ggml_from_float_t) quantize_row_tq2_0_ref, |
859 | | }, |
860 | | [36] = { // GGML_TYPE_IQ4_NL_4_4 |
861 | | .type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking", |
862 | | .blck_size = 0, |
863 | | .type_size = 0, |
864 | | .is_quantized = false, |
865 | | }, |
866 | | [37] = { // GGML_TYPE_IQ4_NL_4_8 |
867 | | .type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking", |
868 | | .blck_size = 0, |
869 | | .type_size = 0, |
870 | | .is_quantized = false, |
871 | | }, |
872 | | [38] = { // GGML_TYPE_IQ4_NL_8_8 |
873 | | .type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking", |
874 | | .blck_size = 0, |
875 | | .type_size = 0, |
876 | | .is_quantized = false, |
877 | | }, |
878 | | }; |
879 | | |
880 | 0 | const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) { |
881 | 0 | GGML_ASSERT(type < GGML_TYPE_COUNT); |
882 | 0 | return &type_traits[type]; |
883 | 0 | } |
884 | | |
885 | | // |
886 | | // ggml object |
887 | | // |
888 | | |
889 | | struct ggml_object { |
890 | | size_t offs; |
891 | | size_t size; |
892 | | |
893 | | struct ggml_object * next; |
894 | | |
895 | | enum ggml_object_type type; |
896 | | |
897 | | char padding[4]; |
898 | | }; |
899 | | |
900 | | static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); |
901 | | |
902 | | // |
903 | | // ggml context |
904 | | // |
905 | | |
906 | | struct ggml_context { |
907 | | size_t mem_size; |
908 | | void * mem_buffer; |
909 | | bool mem_buffer_owned; |
910 | | bool no_alloc; |
911 | | |
912 | | int n_objects; |
913 | | |
914 | | struct ggml_object * objects_begin; |
915 | | struct ggml_object * objects_end; |
916 | | }; |
917 | | |
918 | | // |
919 | | // data types |
920 | | // |
921 | | |
922 | | static const char * GGML_OP_NAME[GGML_OP_COUNT] = { |
923 | | "NONE", |
924 | | |
925 | | "DUP", |
926 | | "ADD", |
927 | | "ADD_ID", |
928 | | "ADD1", |
929 | | "ACC", |
930 | | "SUB", |
931 | | "MUL", |
932 | | "DIV", |
933 | | "SQR", |
934 | | "SQRT", |
935 | | "LOG", |
936 | | "SIN", |
937 | | "COS", |
938 | | "SUM", |
939 | | "SUM_ROWS", |
940 | | "CUMSUM", |
941 | | "MEAN", |
942 | | "ARGMAX", |
943 | | "COUNT_EQUAL", |
944 | | "REPEAT", |
945 | | "REPEAT_BACK", |
946 | | "CONCAT", |
947 | | "SILU_BACK", |
948 | | "NORM", |
949 | | "RMS_NORM", |
950 | | "RMS_NORM_BACK", |
951 | | "GROUP_NORM", |
952 | | "L2_NORM", |
953 | | |
954 | | "MUL_MAT", |
955 | | "MUL_MAT_ID", |
956 | | "OUT_PROD", |
957 | | |
958 | | "SCALE", |
959 | | "SET", |
960 | | "CPY", |
961 | | "CONT", |
962 | | "RESHAPE", |
963 | | "VIEW", |
964 | | "PERMUTE", |
965 | | "TRANSPOSE", |
966 | | "GET_ROWS", |
967 | | "GET_ROWS_BACK", |
968 | | "SET_ROWS", |
969 | | "DIAG", |
970 | | "DIAG_MASK_INF", |
971 | | "DIAG_MASK_ZERO", |
972 | | "SOFT_MAX", |
973 | | "SOFT_MAX_BACK", |
974 | | "ROPE", |
975 | | "ROPE_BACK", |
976 | | "CLAMP", |
977 | | "CONV_TRANSPOSE_1D", |
978 | | "IM2COL", |
979 | | "IM2COL_BACK", |
980 | | "IM2COL_3D", |
981 | | "CONV_2D", |
982 | | "CONV_3D", |
983 | | "CONV_2D_DW", |
984 | | "CONV_TRANSPOSE_2D", |
985 | | "POOL_1D", |
986 | | "POOL_2D", |
987 | | "POOL_2D_BACK", |
988 | | "UPSCALE", |
989 | | "PAD", |
990 | | "PAD_REFLECT_1D", |
991 | | "ROLL", |
992 | | "ARANGE", |
993 | | "TIMESTEP_EMBEDDING", |
994 | | "ARGSORT", |
995 | | "LEAKY_RELU", |
996 | | "TRI", |
997 | | "FILL", |
998 | | |
999 | | "FLASH_ATTN_EXT", |
1000 | | "FLASH_ATTN_BACK", |
1001 | | "SSM_CONV", |
1002 | | "SSM_SCAN", |
1003 | | "WIN_PART", |
1004 | | "WIN_UNPART", |
1005 | | "GET_REL_POS", |
1006 | | "ADD_REL_POS", |
1007 | | "RWKV_WKV6", |
1008 | | "GATED_LINEAR_ATTN", |
1009 | | "RWKV_WKV7", |
1010 | | "SOLVE_TRI", |
1011 | | |
1012 | | "UNARY", |
1013 | | |
1014 | | "MAP_CUSTOM1", |
1015 | | "MAP_CUSTOM2", |
1016 | | "MAP_CUSTOM3", |
1017 | | |
1018 | | "CUSTOM", |
1019 | | |
1020 | | "CROSS_ENTROPY_LOSS", |
1021 | | "CROSS_ENTROPY_LOSS_BACK", |
1022 | | "OPT_STEP_ADAMW", |
1023 | | "OPT_STEP_SGD", |
1024 | | |
1025 | | "GLU", |
1026 | | }; |
1027 | | |
1028 | | static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94"); |
1029 | | |
1030 | | static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { |
1031 | | "none", |
1032 | | |
1033 | | "x", |
1034 | | "x+y", |
1035 | | "x[i]+y", |
1036 | | "x+y", |
1037 | | "view(x,nb,offset)+=y->x", |
1038 | | "x-y", |
1039 | | "x*y", |
1040 | | "x/y", |
1041 | | "x^2", |
1042 | | "√x", |
1043 | | "log(x)", |
1044 | | "sin(x)", |
1045 | | "cos(x)", |
1046 | | "Σx", |
1047 | | "Σx_k", |
1048 | | "cumsum(x)", |
1049 | | "Σx/n", |
1050 | | "argmax(x)", |
1051 | | "count_equal(x)", |
1052 | | "repeat(x)", |
1053 | | "repeat_back(x)", |
1054 | | "concat(x, y)", |
1055 | | "silu_back(x)", |
1056 | | "norm(x)", |
1057 | | "rms_norm(x)", |
1058 | | "rms_norm_back(x)", |
1059 | | "group_norm(x)", |
1060 | | "l2_norm(x)", |
1061 | | |
1062 | | "X*Y", |
1063 | | "X[i]*Y", |
1064 | | "X*Y", |
1065 | | |
1066 | | "x*v", |
1067 | | "y-\\>view(x)", |
1068 | | "x-\\>y", |
1069 | | "cont(x)", |
1070 | | "reshape(x)", |
1071 | | "view(x)", |
1072 | | "permute(x)", |
1073 | | "transpose(x)", |
1074 | | "get_rows(x)", |
1075 | | "get_rows_back(x)", |
1076 | | "set_rows(x)", |
1077 | | "diag(x)", |
1078 | | "diag_mask_inf(x)", |
1079 | | "diag_mask_zero(x)", |
1080 | | "soft_max(x)", |
1081 | | "soft_max_back(x)", |
1082 | | "rope(x)", |
1083 | | "rope_back(x)", |
1084 | | "clamp(x)", |
1085 | | "conv_transpose_1d(x)", |
1086 | | "im2col(x)", |
1087 | | "im2col_back(x)", |
1088 | | "im2col_3d(x)", |
1089 | | "conv_2d(x)", |
1090 | | "conv_3d(x)", |
1091 | | "conv_2d_dw(x)", |
1092 | | "conv_transpose_2d(x)", |
1093 | | "pool_1d(x)", |
1094 | | "pool_2d(x)", |
1095 | | "pool_2d_back(x)", |
1096 | | "upscale(x)", |
1097 | | "pad(x)", |
1098 | | "pad_reflect_1d(x)", |
1099 | | "roll(x)", |
1100 | | "arange(start, stop, step)", |
1101 | | "timestep_embedding(timesteps, dim, max_period)", |
1102 | | "argsort(x)", |
1103 | | "leaky_relu(x)", |
1104 | | "tri(x)", |
1105 | | "fill(x, c)", |
1106 | | |
1107 | | "flash_attn_ext(x)", |
1108 | | "flash_attn_back(x)", |
1109 | | "ssm_conv(x)", |
1110 | | "ssm_scan(x)", |
1111 | | "win_part(x)", |
1112 | | "win_unpart(x)", |
1113 | | "get_rel_pos(x)", |
1114 | | "add_rel_pos(x)", |
1115 | | "rwkv_wkv6(k, v, r, tf, td, s)", |
1116 | | "gated_linear_attn(k, v, q, gate, s)", |
1117 | | "rwkv_wkv7(r, w, k, v, a, b, s)", |
1118 | | "A X = B, A triangular, solve X", |
1119 | | |
1120 | | "unary(x)", |
1121 | | |
1122 | | "map_custom(x)", |
1123 | | "map_custom(x,y)", |
1124 | | "map_custom(x,y,z)", |
1125 | | |
1126 | | "custom(x)", |
1127 | | |
1128 | | "cross_entropy_loss(x,y)", |
1129 | | "cross_entropy_loss_back(x,y)", |
1130 | | "adamw(x)", |
1131 | | "sgd(x)", |
1132 | | |
1133 | | "glu(x)", |
1134 | | }; |
1135 | | |
1136 | | static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94"); |
1137 | | |
1138 | | static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); |
1139 | | |
1140 | | static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { |
1141 | | "ABS", |
1142 | | "SGN", |
1143 | | "NEG", |
1144 | | "STEP", |
1145 | | "TANH", |
1146 | | "ELU", |
1147 | | "RELU", |
1148 | | "SIGMOID", |
1149 | | "GELU", |
1150 | | "GELU_QUICK", |
1151 | | "SILU", |
1152 | | "HARDSWISH", |
1153 | | "HARDSIGMOID", |
1154 | | "EXP", |
1155 | | "EXPM1", |
1156 | | "SOFTPLUS", |
1157 | | "GELU_ERF", |
1158 | | "XIELU", |
1159 | | "FLOOR", |
1160 | | "CEIL", |
1161 | | "ROUND", |
1162 | | "TRUNC", |
1163 | | }; |
1164 | | |
1165 | | static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22"); |
1166 | | |
1167 | | static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { |
1168 | | "REGLU", |
1169 | | "GEGLU", |
1170 | | "SWIGLU", |
1171 | | "SWIGLU_OAI", |
1172 | | "GEGLU_ERF", |
1173 | | "GEGLU_QUICK", |
1174 | | }; |
1175 | | |
1176 | | static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6"); |
1177 | | |
1178 | | |
1179 | | static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); |
1180 | | static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); |
1181 | | |
1182 | | |
1183 | | //////////////////////////////////////////////////////////////////////////////// |
1184 | | |
1185 | 0 | void ggml_print_object(const struct ggml_object * obj) { |
1186 | 0 | GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n", |
1187 | 0 | obj->type, obj->offs, obj->size, (const void *) obj->next); |
1188 | 0 | } |
1189 | | |
1190 | 0 | void ggml_print_objects(const struct ggml_context * ctx) { |
1191 | 0 | struct ggml_object * obj = ctx->objects_begin; |
1192 | |
|
1193 | 0 | GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx); |
1194 | |
|
1195 | 0 | while (obj != NULL) { |
1196 | 0 | ggml_print_object(obj); |
1197 | 0 | obj = obj->next; |
1198 | 0 | } |
1199 | |
|
1200 | 0 | GGML_LOG_INFO("%s: --- end ---\n", __func__); |
1201 | 0 | } |
1202 | | |
1203 | 638 | int64_t ggml_nelements(const struct ggml_tensor * tensor) { |
1204 | 638 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1205 | | |
1206 | 638 | return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; |
1207 | 638 | } |
1208 | | |
1209 | 0 | int64_t ggml_nrows(const struct ggml_tensor * tensor) { |
1210 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1211 | |
|
1212 | 0 | return tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; |
1213 | 0 | } |
1214 | | |
1215 | 2.76k | size_t ggml_nbytes(const struct ggml_tensor * tensor) { |
1216 | 13.8k | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1217 | 11.0k | if (tensor->ne[i] <= 0) { |
1218 | 2 | return 0; |
1219 | 2 | } |
1220 | 11.0k | } |
1221 | | |
1222 | 2.76k | size_t nbytes; |
1223 | 2.76k | const size_t blck_size = ggml_blck_size(tensor->type); |
1224 | 2.76k | if (blck_size == 1) { |
1225 | 2.76k | nbytes = ggml_type_size(tensor->type); |
1226 | 13.8k | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1227 | 11.0k | nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; |
1228 | 11.0k | } |
1229 | 2.76k | } |
1230 | 0 | else { |
1231 | 0 | nbytes = tensor->ne[0]*tensor->nb[0]/blck_size; |
1232 | 0 | for (int i = 1; i < GGML_MAX_DIMS; ++i) { |
1233 | 0 | nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; |
1234 | 0 | } |
1235 | 0 | } |
1236 | | |
1237 | 2.76k | return nbytes; |
1238 | 2.76k | } |
1239 | | |
1240 | 0 | size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { |
1241 | 0 | return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN); |
1242 | 0 | } |
1243 | | |
1244 | 5.33k | int64_t ggml_blck_size(enum ggml_type type) { |
1245 | 5.33k | return type_traits[type].blck_size; |
1246 | 5.33k | } |
1247 | | |
1248 | 5.33k | size_t ggml_type_size(enum ggml_type type) { |
1249 | 5.33k | return type_traits[type].type_size; |
1250 | 5.33k | } |
1251 | | |
1252 | 769 | size_t ggml_row_size(enum ggml_type type, int64_t ne) { |
1253 | 769 | assert(ne % ggml_blck_size(type) == 0); |
1254 | 769 | return ggml_type_size(type)*ne/ggml_blck_size(type); |
1255 | 769 | } |
1256 | | |
1257 | 0 | double ggml_type_sizef(enum ggml_type type) { |
1258 | 0 | return ((double)(type_traits[type].type_size))/type_traits[type].blck_size; |
1259 | 0 | } |
1260 | | |
1261 | 90 | const char * ggml_type_name(enum ggml_type type) { |
1262 | 90 | return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE"; |
1263 | 90 | } |
1264 | | |
1265 | 0 | bool ggml_is_quantized(enum ggml_type type) { |
1266 | 0 | return type_traits[type].is_quantized; |
1267 | 0 | } |
1268 | | |
1269 | 0 | const char * ggml_op_name(enum ggml_op op) { |
1270 | 0 | return GGML_OP_NAME[op]; |
1271 | 0 | } |
1272 | | |
1273 | 0 | const char * ggml_op_symbol(enum ggml_op op) { |
1274 | 0 | return GGML_OP_SYMBOL[op]; |
1275 | 0 | } |
1276 | | |
1277 | 0 | const char * ggml_unary_op_name(enum ggml_unary_op op) { |
1278 | 0 | return GGML_UNARY_OP_NAME[op]; |
1279 | 0 | } |
1280 | | |
1281 | 0 | const char * ggml_glu_op_name(enum ggml_glu_op op) { |
1282 | 0 | return GGML_GLU_OP_NAME[op]; |
1283 | 0 | } |
1284 | | |
1285 | 0 | const char * ggml_op_desc(const struct ggml_tensor * t) { |
1286 | 0 | if (t->op == GGML_OP_UNARY) { |
1287 | 0 | enum ggml_unary_op uop = ggml_get_unary_op(t); |
1288 | 0 | return ggml_unary_op_name(uop); |
1289 | 0 | } |
1290 | 0 | if (t->op == GGML_OP_GLU) { |
1291 | 0 | enum ggml_glu_op gop = ggml_get_glu_op(t); |
1292 | 0 | return ggml_glu_op_name(gop); |
1293 | 0 | } |
1294 | 0 | return ggml_op_name(t->op); |
1295 | 0 | } |
1296 | | |
1297 | 0 | size_t ggml_element_size(const struct ggml_tensor * tensor) { |
1298 | 0 | return ggml_type_size(tensor->type); |
1299 | 0 | } |
1300 | | |
1301 | 0 | bool ggml_is_scalar(const struct ggml_tensor * tensor) { |
1302 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1303 | |
|
1304 | 0 | return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1305 | 0 | } |
1306 | | |
1307 | 0 | bool ggml_is_vector(const struct ggml_tensor * tensor) { |
1308 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1309 | |
|
1310 | 0 | return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1311 | 0 | } |
1312 | | |
1313 | 0 | bool ggml_is_matrix(const struct ggml_tensor * tensor) { |
1314 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1315 | |
|
1316 | 0 | return tensor->ne[2] == 1 && tensor->ne[3] == 1; |
1317 | 0 | } |
1318 | | |
1319 | 0 | bool ggml_is_3d(const struct ggml_tensor * tensor) { |
1320 | 0 | return tensor->ne[3] == 1; |
1321 | 0 | } |
1322 | | |
1323 | 0 | int ggml_n_dims(const struct ggml_tensor * tensor) { |
1324 | 0 | for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) { |
1325 | 0 | if (tensor->ne[i] > 1) { |
1326 | 0 | return i + 1; |
1327 | 0 | } |
1328 | 0 | } |
1329 | 0 | return 1; |
1330 | 0 | } |
1331 | | |
1332 | 0 | enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { |
1333 | 0 | enum ggml_type wtype = GGML_TYPE_COUNT; |
1334 | |
|
1335 | 0 | switch (ftype) { |
1336 | 0 | case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break; |
1337 | 0 | case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break; |
1338 | 0 | case GGML_FTYPE_MOSTLY_BF16: wtype = GGML_TYPE_BF16; break; |
1339 | 0 | case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break; |
1340 | 0 | case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break; |
1341 | 0 | case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break; |
1342 | 0 | case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break; |
1343 | 0 | case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break; |
1344 | 0 | case GGML_FTYPE_MOSTLY_MXFP4: wtype = GGML_TYPE_MXFP4; break; |
1345 | 0 | case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break; |
1346 | 0 | case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break; |
1347 | 0 | case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break; |
1348 | 0 | case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break; |
1349 | 0 | case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break; |
1350 | 0 | case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break; |
1351 | 0 | case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break; |
1352 | 0 | case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break; |
1353 | 0 | case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break; |
1354 | 0 | case GGML_FTYPE_MOSTLY_IQ1_M: wtype = GGML_TYPE_IQ1_M; break; |
1355 | 0 | case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break; |
1356 | 0 | case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break; |
1357 | 0 | case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break; |
1358 | 0 | case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break; |
1359 | 0 | case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break; |
1360 | 0 | case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break; |
1361 | 0 | } |
1362 | | |
1363 | 0 | GGML_ASSERT(wtype != GGML_TYPE_COUNT); |
1364 | |
|
1365 | 0 | return wtype; |
1366 | 0 | } |
1367 | | |
1368 | 239 | size_t ggml_tensor_overhead(void) { |
1369 | 239 | return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE; |
1370 | 239 | } |
1371 | | |
1372 | 0 | bool ggml_is_transposed(const struct ggml_tensor * tensor) { |
1373 | 0 | return tensor->nb[0] > tensor->nb[1]; |
1374 | 0 | } |
1375 | | |
1376 | 0 | static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) { |
1377 | 0 | size_t next_nb = ggml_type_size(tensor->type); |
1378 | 0 | if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) { |
1379 | 0 | return false; |
1380 | 0 | } |
1381 | 0 | next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type); |
1382 | 0 | for (int i = 1; i < GGML_MAX_DIMS; i++) { |
1383 | 0 | if (tensor->ne[i] != 1) { |
1384 | 0 | if (i > n) { |
1385 | 0 | if (tensor->nb[i] != next_nb) { |
1386 | 0 | return false; |
1387 | 0 | } |
1388 | 0 | next_nb *= tensor->ne[i]; |
1389 | 0 | } else { |
1390 | | // this dimension does not need to be contiguous |
1391 | 0 | next_nb = tensor->ne[i]*tensor->nb[i]; |
1392 | 0 | } |
1393 | 0 | } |
1394 | 0 | } |
1395 | 0 | return true; |
1396 | 0 | } |
1397 | | |
1398 | 0 | bool ggml_is_contiguous(const struct ggml_tensor * tensor) { |
1399 | 0 | return ggml_is_contiguous_0(tensor); |
1400 | 0 | } |
1401 | | |
1402 | 0 | bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) { |
1403 | 0 | return ggml_is_contiguous_n(tensor, 0); |
1404 | 0 | } |
1405 | | |
1406 | 0 | bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) { |
1407 | 0 | return ggml_is_contiguous_n(tensor, 1); |
1408 | 0 | } |
1409 | | |
1410 | 0 | bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) { |
1411 | 0 | return ggml_is_contiguous_n(tensor, 2); |
1412 | 0 | } |
1413 | | |
1414 | 0 | bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) { |
1415 | 0 | return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type); |
1416 | 0 | } |
1417 | | |
1418 | 0 | bool ggml_is_permuted(const struct ggml_tensor * tensor) { |
1419 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1420 | |
|
1421 | 0 | return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3]; |
1422 | 0 | } |
1423 | | |
1424 | 0 | bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) { |
1425 | 0 | return |
1426 | 0 | tensor->nb[0] > tensor->nb[2] && |
1427 | 0 | tensor->nb[1] > tensor->nb[0] && |
1428 | 0 | tensor->nb[2] == ggml_type_size(tensor->type); |
1429 | 0 | } |
1430 | | |
1431 | 0 | bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) { |
1432 | 0 | return |
1433 | 0 | tensor->ne[0] == ggml_blck_size(tensor->type) || |
1434 | 0 | tensor->nb[0] == ggml_type_size(tensor->type); |
1435 | 0 | } |
1436 | | |
1437 | 0 | static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { |
1438 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1439 | |
|
1440 | 0 | return |
1441 | 0 | tensor->nb[0] == ggml_type_size(tensor->type) && |
1442 | 0 | tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && |
1443 | 0 | tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; |
1444 | 0 | } |
1445 | | |
1446 | 0 | bool ggml_is_empty(const struct ggml_tensor * tensor) { |
1447 | 0 | for (int i = 0; i < GGML_MAX_DIMS; ++i) { |
1448 | 0 | if (tensor->ne[i] == 0) { |
1449 | | // empty if any dimension has no elements |
1450 | 0 | return true; |
1451 | 0 | } |
1452 | 0 | } |
1453 | 0 | return false; |
1454 | 0 | } |
1455 | | |
1456 | 0 | bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1457 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1458 | |
|
1459 | 0 | return |
1460 | 0 | (t0->ne[0] == t1->ne[0]) && |
1461 | 0 | (t0->ne[1] == t1->ne[1]) && |
1462 | 0 | (t0->ne[2] == t1->ne[2]) && |
1463 | 0 | (t0->ne[3] == t1->ne[3]); |
1464 | 0 | } |
1465 | | |
1466 | 0 | bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1467 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1468 | |
|
1469 | 0 | return |
1470 | 0 | (t0->nb[0] == t1->nb[0]) && |
1471 | 0 | (t0->nb[1] == t1->nb[1]) && |
1472 | 0 | (t0->nb[2] == t1->nb[2]) && |
1473 | 0 | (t0->nb[3] == t1->nb[3]); |
1474 | 0 | } |
1475 | | |
1476 | | // check if t1 can be represented as a repetition of t0 |
1477 | 0 | bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1478 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1479 | |
|
1480 | 0 | return ggml_is_empty(t0) ? ggml_is_empty(t1) : |
1481 | 0 | (t1->ne[0]%t0->ne[0] == 0) && |
1482 | 0 | (t1->ne[1]%t0->ne[1] == 0) && |
1483 | 0 | (t1->ne[2]%t0->ne[2] == 0) && |
1484 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
1485 | 0 | } |
1486 | | |
1487 | 0 | static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
1488 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
1489 | |
|
1490 | 0 | return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1); |
1491 | 0 | } |
1492 | | |
1493 | | // assert that pointer is aligned to GGML_MEM_ALIGN |
1494 | | #define GGML_ASSERT_ALIGNED(ptr) \ |
1495 | 1.87k | GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0) |
1496 | | |
1497 | | //////////////////////////////////////////////////////////////////////////////// |
1498 | | |
1499 | 1.10k | struct ggml_context * ggml_init(struct ggml_init_params params) { |
1500 | 1.10k | bool is_first_call = true; |
1501 | | |
1502 | 1.10k | ggml_critical_section_start(); |
1503 | | |
1504 | 1.10k | if (is_first_call) { |
1505 | | // initialize time system (required on Windows) |
1506 | 1.10k | ggml_time_init(); |
1507 | | |
1508 | 1.10k | is_first_call = false; |
1509 | 1.10k | } |
1510 | | |
1511 | 1.10k | ggml_critical_section_end(); |
1512 | | |
1513 | 1.10k | struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context)); |
1514 | | |
1515 | | // allow to call ggml_init with 0 size |
1516 | 1.10k | if (params.mem_size == 0) { |
1517 | 972 | params.mem_size = GGML_MEM_ALIGN; |
1518 | 972 | } |
1519 | | |
1520 | 1.10k | const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); |
1521 | | |
1522 | 1.10k | *ctx = (struct ggml_context) { |
1523 | 1.10k | /*.mem_size =*/ mem_size, |
1524 | 1.10k | /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), |
1525 | 1.10k | /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, |
1526 | 1.10k | /*.no_alloc =*/ params.no_alloc, |
1527 | 1.10k | /*.n_objects =*/ 0, |
1528 | 1.10k | /*.objects_begin =*/ NULL, |
1529 | 1.10k | /*.objects_end =*/ NULL, |
1530 | 1.10k | }; |
1531 | | |
1532 | 1.10k | GGML_ASSERT(ctx->mem_buffer != NULL); |
1533 | | |
1534 | 1.10k | GGML_ASSERT_ALIGNED(ctx->mem_buffer); |
1535 | | |
1536 | 1.10k | GGML_PRINT_DEBUG("%s: context initialized\n", __func__); |
1537 | | |
1538 | 1.10k | return ctx; |
1539 | 1.10k | } |
1540 | | |
1541 | 0 | void ggml_reset(struct ggml_context * ctx) { |
1542 | 0 | if (ctx == NULL) { |
1543 | 0 | return; |
1544 | 0 | } |
1545 | | |
1546 | 0 | ctx->n_objects = 0; |
1547 | 0 | ctx->objects_begin = NULL; |
1548 | 0 | ctx->objects_end = NULL; |
1549 | 0 | } |
1550 | | |
1551 | 1.10k | void ggml_free(struct ggml_context * ctx) { |
1552 | 1.10k | if (ctx == NULL) { |
1553 | 0 | return; |
1554 | 0 | } |
1555 | | |
1556 | 1.10k | if (ctx->mem_buffer_owned) { |
1557 | 1.10k | ggml_aligned_free(ctx->mem_buffer, ctx->mem_size); |
1558 | 1.10k | } |
1559 | | |
1560 | 1.10k | GGML_FREE(ctx); |
1561 | 1.10k | } |
1562 | | |
1563 | 0 | size_t ggml_used_mem(const struct ggml_context * ctx) { |
1564 | 0 | return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; |
1565 | 0 | } |
1566 | | |
1567 | 0 | bool ggml_get_no_alloc(struct ggml_context * ctx) { |
1568 | 0 | return ctx->no_alloc; |
1569 | 0 | } |
1570 | | |
1571 | 478 | void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { |
1572 | 478 | ctx->no_alloc = no_alloc; |
1573 | 478 | } |
1574 | | |
1575 | 0 | void * ggml_get_mem_buffer(const struct ggml_context * ctx) { |
1576 | 0 | return ctx->mem_buffer; |
1577 | 0 | } |
1578 | | |
1579 | 0 | size_t ggml_get_mem_size(const struct ggml_context * ctx) { |
1580 | 0 | return ctx->mem_size; |
1581 | 0 | } |
1582 | | |
1583 | 0 | size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { |
1584 | 0 | size_t max_size = 0; |
1585 | |
|
1586 | 0 | for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) { |
1587 | 0 | size_t bytes = ggml_nbytes(tensor); |
1588 | 0 | max_size = MAX(max_size, bytes); |
1589 | 0 | } |
1590 | |
|
1591 | 0 | return max_size; |
1592 | 0 | } |
1593 | | |
1594 | | //////////////////////////////////////////////////////////////////////////////// |
1595 | | |
1596 | 769 | static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) { |
1597 | | // always insert objects at the end of the context's memory pool |
1598 | 769 | struct ggml_object * obj_cur = ctx->objects_end; |
1599 | | |
1600 | 769 | const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs; |
1601 | 769 | const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size; |
1602 | 769 | const size_t cur_end = cur_offs + cur_size; |
1603 | | |
1604 | | // align to GGML_MEM_ALIGN |
1605 | 769 | size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN); |
1606 | | |
1607 | 769 | char * const mem_buffer = ctx->mem_buffer; |
1608 | 769 | struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end); |
1609 | | |
1610 | 769 | if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) { |
1611 | 0 | GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", |
1612 | 0 | __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size); |
1613 | | #ifndef NDEBUG |
1614 | | GGML_ABORT("not enough space in the context's memory pool"); |
1615 | | #endif |
1616 | 0 | return NULL; |
1617 | 0 | } |
1618 | | |
1619 | 769 | *obj_new = (struct ggml_object) { |
1620 | 769 | .offs = cur_end + GGML_OBJECT_SIZE, |
1621 | 769 | .size = size_needed, |
1622 | 769 | .next = NULL, |
1623 | 769 | .type = type, |
1624 | 769 | }; |
1625 | | |
1626 | 769 | GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs); |
1627 | | |
1628 | 769 | if (obj_cur != NULL) { |
1629 | 639 | obj_cur->next = obj_new; |
1630 | 639 | } else { |
1631 | | // this is the first object in this context |
1632 | 130 | ctx->objects_begin = obj_new; |
1633 | 130 | } |
1634 | | |
1635 | 769 | ctx->objects_end = obj_new; |
1636 | | |
1637 | | //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size); |
1638 | | |
1639 | 769 | return obj_new; |
1640 | 769 | } |
1641 | | |
1642 | | static struct ggml_tensor * ggml_new_tensor_impl( |
1643 | | struct ggml_context * ctx, |
1644 | | enum ggml_type type, |
1645 | | int n_dims, |
1646 | | const int64_t * ne, |
1647 | | struct ggml_tensor * view_src, |
1648 | 769 | size_t view_offs) { |
1649 | | |
1650 | 769 | GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT); |
1651 | 769 | GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS); |
1652 | | |
1653 | | // find the base tensor and absolute offset |
1654 | 769 | if (view_src != NULL && view_src->view_src != NULL) { |
1655 | 0 | view_offs += view_src->view_offs; |
1656 | 0 | view_src = view_src->view_src; |
1657 | 0 | } |
1658 | | |
1659 | 769 | size_t data_size = ggml_row_size(type, ne[0]); |
1660 | 3.07k | for (int i = 1; i < n_dims; i++) { |
1661 | 2.30k | data_size *= ne[i]; |
1662 | 2.30k | } |
1663 | | |
1664 | 769 | GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src)); |
1665 | | |
1666 | 769 | void * data = view_src != NULL ? view_src->data : NULL; |
1667 | 769 | if (data != NULL) { |
1668 | 0 | data = (char *) data + view_offs; |
1669 | 0 | } |
1670 | | |
1671 | 769 | size_t obj_alloc_size = 0; |
1672 | | |
1673 | 769 | if (view_src == NULL && !ctx->no_alloc) { |
1674 | | // allocate tensor data in the context's memory pool |
1675 | 0 | obj_alloc_size = data_size; |
1676 | 0 | } |
1677 | | |
1678 | 769 | struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size); |
1679 | 769 | GGML_ASSERT(obj_new); |
1680 | | |
1681 | 769 | struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); |
1682 | | |
1683 | 769 | *result = (struct ggml_tensor) { |
1684 | 769 | /*.type =*/ type, |
1685 | 769 | /*.buffer =*/ NULL, |
1686 | 769 | /*.ne =*/ { 1, 1, 1, 1 }, |
1687 | 769 | /*.nb =*/ { 0, 0, 0, 0 }, |
1688 | 769 | /*.op =*/ GGML_OP_NONE, |
1689 | 769 | /*.op_params =*/ { 0 }, |
1690 | 769 | /*.flags =*/ 0, |
1691 | 769 | /*.src =*/ { NULL }, |
1692 | 769 | /*.view_src =*/ view_src, |
1693 | 769 | /*.view_offs =*/ view_offs, |
1694 | 769 | /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, |
1695 | 769 | /*.name =*/ { 0 }, |
1696 | 769 | /*.extra =*/ NULL, |
1697 | 769 | /*.padding =*/ { 0 }, |
1698 | 769 | }; |
1699 | | |
1700 | | // TODO: this should not be needed as long as we don't rely on aligned SIMD loads |
1701 | | //GGML_ASSERT_ALIGNED(result->data); |
1702 | | |
1703 | 3.84k | for (int i = 0; i < n_dims; i++) { |
1704 | 3.07k | result->ne[i] = ne[i]; |
1705 | 3.07k | } |
1706 | | |
1707 | 769 | result->nb[0] = ggml_type_size(type); |
1708 | 769 | result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type)); |
1709 | 2.30k | for (int i = 2; i < GGML_MAX_DIMS; i++) { |
1710 | 1.53k | result->nb[i] = result->nb[i - 1]*result->ne[i - 1]; |
1711 | 1.53k | } |
1712 | | |
1713 | 769 | ctx->n_objects++; |
1714 | | |
1715 | 769 | return result; |
1716 | 769 | } |
1717 | | |
1718 | | struct ggml_tensor * ggml_new_tensor( |
1719 | | struct ggml_context * ctx, |
1720 | | enum ggml_type type, |
1721 | | int n_dims, |
1722 | 769 | const int64_t * ne) { |
1723 | 769 | return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0); |
1724 | 769 | } |
1725 | | |
1726 | | struct ggml_tensor * ggml_new_tensor_1d( |
1727 | | struct ggml_context * ctx, |
1728 | | enum ggml_type type, |
1729 | 0 | int64_t ne0) { |
1730 | 0 | return ggml_new_tensor(ctx, type, 1, &ne0); |
1731 | 0 | } |
1732 | | |
1733 | | struct ggml_tensor * ggml_new_tensor_2d( |
1734 | | struct ggml_context * ctx, |
1735 | | enum ggml_type type, |
1736 | | int64_t ne0, |
1737 | 0 | int64_t ne1) { |
1738 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
1739 | 0 | return ggml_new_tensor(ctx, type, 2, ne); |
1740 | 0 | } |
1741 | | |
1742 | | struct ggml_tensor * ggml_new_tensor_3d( |
1743 | | struct ggml_context * ctx, |
1744 | | enum ggml_type type, |
1745 | | int64_t ne0, |
1746 | | int64_t ne1, |
1747 | 0 | int64_t ne2) { |
1748 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
1749 | 0 | return ggml_new_tensor(ctx, type, 3, ne); |
1750 | 0 | } |
1751 | | |
1752 | | struct ggml_tensor * ggml_new_tensor_4d( |
1753 | | struct ggml_context * ctx, |
1754 | | enum ggml_type type, |
1755 | | int64_t ne0, |
1756 | | int64_t ne1, |
1757 | | int64_t ne2, |
1758 | 0 | int64_t ne3) { |
1759 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
1760 | 0 | return ggml_new_tensor(ctx, type, 4, ne); |
1761 | 0 | } |
1762 | | |
1763 | 0 | void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) { |
1764 | 0 | struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes); |
1765 | |
|
1766 | 0 | return (uint8_t *)ctx->mem_buffer + obj->offs; |
1767 | 0 | } |
1768 | | |
1769 | 0 | struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { |
1770 | 0 | return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne); |
1771 | 0 | } |
1772 | | |
1773 | 0 | void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) { |
1774 | 0 | const int64_t ne2 = tensor->ne[2]; |
1775 | 0 | const int64_t ne1 = tensor->ne[1]; |
1776 | 0 | const int64_t ne0 = tensor->ne[0]; |
1777 | |
|
1778 | 0 | const int64_t i3_ = (i/(ne2*ne1*ne0)); |
1779 | 0 | const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0); |
1780 | 0 | const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0; |
1781 | 0 | const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0); |
1782 | |
|
1783 | 0 | if (i0) { |
1784 | 0 | * i0 = i0_; |
1785 | 0 | } |
1786 | 0 | if (i1) { |
1787 | 0 | * i1 = i1_; |
1788 | 0 | } |
1789 | 0 | if (i2) { |
1790 | 0 | * i2 = i2_; |
1791 | 0 | } |
1792 | 0 | if (i3) { |
1793 | 0 | * i3 = i3_; |
1794 | 0 | } |
1795 | 0 | } |
1796 | | |
1797 | 0 | void * ggml_get_data(const struct ggml_tensor * tensor) { |
1798 | 0 | return tensor->data; |
1799 | 0 | } |
1800 | | |
1801 | 0 | float * ggml_get_data_f32(const struct ggml_tensor * tensor) { |
1802 | 0 | assert(tensor->type == GGML_TYPE_F32); |
1803 | 0 | return (float *)(tensor->data); |
1804 | 0 | } |
1805 | | |
1806 | 0 | enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { |
1807 | 0 | GGML_ASSERT(tensor->op == GGML_OP_UNARY); |
1808 | 0 | return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); |
1809 | 0 | } |
1810 | | |
1811 | 0 | enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) { |
1812 | 0 | GGML_ASSERT(tensor->op == GGML_OP_GLU); |
1813 | 0 | return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0); |
1814 | 0 | } |
1815 | | |
1816 | 717 | const char * ggml_get_name(const struct ggml_tensor * tensor) { |
1817 | 717 | return tensor->name; |
1818 | 717 | } |
1819 | | |
1820 | 1.84k | struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) { |
1821 | 1.84k | size_t i; |
1822 | 14.8k | for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) { |
1823 | 12.9k | tensor->name[i] = name[i]; |
1824 | 12.9k | } |
1825 | 1.84k | tensor->name[i] = '\0'; |
1826 | 1.84k | return tensor; |
1827 | 1.84k | } |
1828 | | |
1829 | 0 | struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) { |
1830 | 0 | va_list args; |
1831 | 0 | va_start(args, fmt); |
1832 | 0 | vsnprintf(tensor->name, sizeof(tensor->name), fmt, args); |
1833 | 0 | va_end(args); |
1834 | 0 | return tensor; |
1835 | 0 | } |
1836 | | |
1837 | | struct ggml_tensor * ggml_view_tensor( |
1838 | | struct ggml_context * ctx, |
1839 | 0 | struct ggml_tensor * src) { |
1840 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0); |
1841 | 0 | ggml_format_name(result, "%s (view)", src->name); |
1842 | |
|
1843 | 0 | for (int i = 0; i < GGML_MAX_DIMS; i++) { |
1844 | 0 | result->nb[i] = src->nb[i]; |
1845 | 0 | } |
1846 | |
|
1847 | 0 | return result; |
1848 | 0 | } |
1849 | | |
1850 | 237 | struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) { |
1851 | 237 | struct ggml_object * obj = ctx->objects_begin; |
1852 | | |
1853 | 237 | char * const mem_buffer = ctx->mem_buffer; |
1854 | | |
1855 | 237 | while (obj != NULL) { |
1856 | 130 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1857 | 130 | return (struct ggml_tensor *)(mem_buffer + obj->offs); |
1858 | 130 | } |
1859 | | |
1860 | 0 | obj = obj->next; |
1861 | 0 | } |
1862 | | |
1863 | 107 | return NULL; |
1864 | 237 | } |
1865 | | |
1866 | 559 | struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) { |
1867 | 559 | struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE); |
1868 | 559 | obj = obj->next; |
1869 | | |
1870 | 559 | char * const mem_buffer = ctx->mem_buffer; |
1871 | | |
1872 | 559 | while (obj != NULL) { |
1873 | 508 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1874 | 508 | return (struct ggml_tensor *)(mem_buffer + obj->offs); |
1875 | 508 | } |
1876 | | |
1877 | 0 | obj = obj->next; |
1878 | 0 | } |
1879 | | |
1880 | 51 | return NULL; |
1881 | 559 | } |
1882 | | |
1883 | 0 | struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { |
1884 | 0 | struct ggml_object * obj = ctx->objects_begin; |
1885 | |
|
1886 | 0 | char * const mem_buffer = ctx->mem_buffer; |
1887 | |
|
1888 | 0 | while (obj != NULL) { |
1889 | 0 | if (obj->type == GGML_OBJECT_TYPE_TENSOR) { |
1890 | 0 | struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs); |
1891 | 0 | if (strcmp(cur->name, name) == 0) { |
1892 | 0 | return cur; |
1893 | 0 | } |
1894 | 0 | } |
1895 | | |
1896 | 0 | obj = obj->next; |
1897 | 0 | } |
1898 | | |
1899 | 0 | return NULL; |
1900 | 0 | } |
1901 | | |
1902 | | //////////////////////////////////////////////////////////////////////////////// |
1903 | | |
1904 | | // ggml_dup |
1905 | | |
1906 | | static struct ggml_tensor * ggml_dup_impl( |
1907 | | struct ggml_context * ctx, |
1908 | | struct ggml_tensor * a, |
1909 | 0 | bool inplace) { |
1910 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
1911 | |
|
1912 | 0 | result->op = GGML_OP_DUP; |
1913 | 0 | result->src[0] = a; |
1914 | |
|
1915 | 0 | return result; |
1916 | 0 | } |
1917 | | |
1918 | | struct ggml_tensor * ggml_dup( |
1919 | | struct ggml_context * ctx, |
1920 | 0 | struct ggml_tensor * a) { |
1921 | 0 | return ggml_dup_impl(ctx, a, false); |
1922 | 0 | } |
1923 | | |
1924 | | struct ggml_tensor * ggml_dup_inplace( |
1925 | | struct ggml_context * ctx, |
1926 | 0 | struct ggml_tensor * a) { |
1927 | 0 | return ggml_dup_impl(ctx, a, true); |
1928 | 0 | } |
1929 | | |
1930 | | // ggml_add |
1931 | | |
1932 | | static struct ggml_tensor * ggml_add_impl( |
1933 | | struct ggml_context * ctx, |
1934 | | struct ggml_tensor * a, |
1935 | | struct ggml_tensor * b, |
1936 | 0 | bool inplace) { |
1937 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
1938 | |
|
1939 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
1940 | |
|
1941 | 0 | result->op = GGML_OP_ADD; |
1942 | 0 | result->src[0] = a; |
1943 | 0 | result->src[1] = b; |
1944 | |
|
1945 | 0 | return result; |
1946 | 0 | } |
1947 | | |
1948 | | struct ggml_tensor * ggml_add( |
1949 | | struct ggml_context * ctx, |
1950 | | struct ggml_tensor * a, |
1951 | 0 | struct ggml_tensor * b) { |
1952 | 0 | return ggml_add_impl(ctx, a, b, false); |
1953 | 0 | } |
1954 | | |
1955 | | struct ggml_tensor * ggml_add_inplace( |
1956 | | struct ggml_context * ctx, |
1957 | | struct ggml_tensor * a, |
1958 | 0 | struct ggml_tensor * b) { |
1959 | 0 | return ggml_add_impl(ctx, a, b, true); |
1960 | 0 | } |
1961 | | |
1962 | | // ggml_add_cast |
1963 | | |
1964 | | static struct ggml_tensor * ggml_add_cast_impl( |
1965 | | struct ggml_context * ctx, |
1966 | | struct ggml_tensor * a, |
1967 | | struct ggml_tensor * b, |
1968 | 0 | enum ggml_type type) { |
1969 | | // TODO: support less-strict constraint |
1970 | | // GGML_ASSERT(ggml_can_repeat(b, a)); |
1971 | 0 | GGML_ASSERT(ggml_can_repeat_rows(b, a)); |
1972 | | |
1973 | | // currently only supported for quantized input and f16 |
1974 | 0 | GGML_ASSERT(ggml_is_quantized(a->type) || |
1975 | 0 | a->type == GGML_TYPE_F16 || |
1976 | 0 | a->type == GGML_TYPE_BF16); |
1977 | |
|
1978 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne); |
1979 | |
|
1980 | 0 | result->op = GGML_OP_ADD; |
1981 | 0 | result->src[0] = a; |
1982 | 0 | result->src[1] = b; |
1983 | |
|
1984 | 0 | return result; |
1985 | 0 | } |
1986 | | |
1987 | | struct ggml_tensor * ggml_add_cast( |
1988 | | struct ggml_context * ctx, |
1989 | | struct ggml_tensor * a, |
1990 | | struct ggml_tensor * b, |
1991 | 0 | enum ggml_type type) { |
1992 | 0 | return ggml_add_cast_impl(ctx, a, b, type); |
1993 | 0 | } |
1994 | | |
1995 | | struct ggml_tensor * ggml_add_id( |
1996 | | struct ggml_context * ctx, |
1997 | | struct ggml_tensor * a, |
1998 | | struct ggml_tensor * b, |
1999 | 0 | struct ggml_tensor * ids) { |
2000 | |
|
2001 | 0 | GGML_ASSERT(a->ne[0] == b->ne[0]); |
2002 | 0 | GGML_ASSERT(a->ne[1] == ids->ne[0]); |
2003 | 0 | GGML_ASSERT(a->ne[2] == ids->ne[1]); |
2004 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
2005 | |
|
2006 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2007 | |
|
2008 | 0 | result->op = GGML_OP_ADD_ID; |
2009 | 0 | result->src[0] = a; |
2010 | 0 | result->src[1] = b; |
2011 | 0 | result->src[2] = ids; |
2012 | |
|
2013 | 0 | return result; |
2014 | 0 | } |
2015 | | |
2016 | | // ggml_add1 |
2017 | | |
2018 | | static struct ggml_tensor * ggml_add1_impl( |
2019 | | struct ggml_context * ctx, |
2020 | | struct ggml_tensor * a, |
2021 | | struct ggml_tensor * b, |
2022 | 0 | bool inplace) { |
2023 | 0 | GGML_ASSERT(ggml_is_scalar(b)); |
2024 | 0 | GGML_ASSERT(ggml_is_padded_1d(a)); |
2025 | |
|
2026 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2027 | |
|
2028 | 0 | result->op = GGML_OP_ADD1; |
2029 | 0 | result->src[0] = a; |
2030 | 0 | result->src[1] = b; |
2031 | |
|
2032 | 0 | return result; |
2033 | 0 | } |
2034 | | |
2035 | | struct ggml_tensor * ggml_add1( |
2036 | | struct ggml_context * ctx, |
2037 | | struct ggml_tensor * a, |
2038 | 0 | struct ggml_tensor * b) { |
2039 | 0 | return ggml_add1_impl(ctx, a, b, false); |
2040 | 0 | } |
2041 | | |
2042 | | struct ggml_tensor * ggml_add1_inplace( |
2043 | | struct ggml_context * ctx, |
2044 | | struct ggml_tensor * a, |
2045 | 0 | struct ggml_tensor * b) { |
2046 | 0 | return ggml_add1_impl(ctx, a, b, true); |
2047 | 0 | } |
2048 | | |
2049 | | // ggml_acc |
2050 | | |
2051 | | static struct ggml_tensor * ggml_acc_impl( |
2052 | | struct ggml_context * ctx, |
2053 | | struct ggml_tensor * a, |
2054 | | struct ggml_tensor * b, |
2055 | | size_t nb1, |
2056 | | size_t nb2, |
2057 | | size_t nb3, |
2058 | | size_t offset, |
2059 | 0 | bool inplace) { |
2060 | 0 | GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a)); |
2061 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
2062 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
2063 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
2064 | |
|
2065 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2066 | |
|
2067 | 0 | int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; |
2068 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
2069 | |
|
2070 | 0 | result->op = GGML_OP_ACC; |
2071 | 0 | result->src[0] = a; |
2072 | 0 | result->src[1] = b; |
2073 | |
|
2074 | 0 | return result; |
2075 | 0 | } |
2076 | | |
2077 | | struct ggml_tensor * ggml_acc( |
2078 | | struct ggml_context * ctx, |
2079 | | struct ggml_tensor * a, |
2080 | | struct ggml_tensor * b, |
2081 | | size_t nb1, |
2082 | | size_t nb2, |
2083 | | size_t nb3, |
2084 | 0 | size_t offset) { |
2085 | 0 | return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false); |
2086 | 0 | } |
2087 | | |
2088 | | struct ggml_tensor * ggml_acc_inplace( |
2089 | | struct ggml_context * ctx, |
2090 | | struct ggml_tensor * a, |
2091 | | struct ggml_tensor * b, |
2092 | | size_t nb1, |
2093 | | size_t nb2, |
2094 | | size_t nb3, |
2095 | 0 | size_t offset) { |
2096 | 0 | return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true); |
2097 | 0 | } |
2098 | | |
2099 | | // ggml_sub |
2100 | | |
2101 | | static struct ggml_tensor * ggml_sub_impl( |
2102 | | struct ggml_context * ctx, |
2103 | | struct ggml_tensor * a, |
2104 | | struct ggml_tensor * b, |
2105 | 0 | bool inplace) { |
2106 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2107 | |
|
2108 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2109 | |
|
2110 | 0 | result->op = GGML_OP_SUB; |
2111 | 0 | result->src[0] = a; |
2112 | 0 | result->src[1] = b; |
2113 | |
|
2114 | 0 | return result; |
2115 | 0 | } |
2116 | | |
2117 | | struct ggml_tensor * ggml_sub( |
2118 | | struct ggml_context * ctx, |
2119 | | struct ggml_tensor * a, |
2120 | 0 | struct ggml_tensor * b) { |
2121 | 0 | return ggml_sub_impl(ctx, a, b, false); |
2122 | 0 | } |
2123 | | |
2124 | | struct ggml_tensor * ggml_sub_inplace( |
2125 | | struct ggml_context * ctx, |
2126 | | struct ggml_tensor * a, |
2127 | 0 | struct ggml_tensor * b) { |
2128 | 0 | return ggml_sub_impl(ctx, a, b, true); |
2129 | 0 | } |
2130 | | |
2131 | | // ggml_mul |
2132 | | |
2133 | | static struct ggml_tensor * ggml_mul_impl( |
2134 | | struct ggml_context * ctx, |
2135 | | struct ggml_tensor * a, |
2136 | | struct ggml_tensor * b, |
2137 | 0 | bool inplace) { |
2138 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2139 | |
|
2140 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2141 | |
|
2142 | 0 | result->op = GGML_OP_MUL; |
2143 | 0 | result->src[0] = a; |
2144 | 0 | result->src[1] = b; |
2145 | |
|
2146 | 0 | return result; |
2147 | 0 | } |
2148 | | |
2149 | | struct ggml_tensor * ggml_mul( |
2150 | | struct ggml_context * ctx, |
2151 | | struct ggml_tensor * a, |
2152 | 0 | struct ggml_tensor * b) { |
2153 | 0 | return ggml_mul_impl(ctx, a, b, false); |
2154 | 0 | } |
2155 | | |
2156 | | struct ggml_tensor * ggml_mul_inplace( |
2157 | | struct ggml_context * ctx, |
2158 | | struct ggml_tensor * a, |
2159 | 0 | struct ggml_tensor * b) { |
2160 | 0 | return ggml_mul_impl(ctx, a, b, true); |
2161 | 0 | } |
2162 | | |
2163 | | // ggml_div |
2164 | | |
2165 | | static struct ggml_tensor * ggml_div_impl( |
2166 | | struct ggml_context * ctx, |
2167 | | struct ggml_tensor * a, |
2168 | | struct ggml_tensor * b, |
2169 | 0 | bool inplace) { |
2170 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2171 | |
|
2172 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2173 | |
|
2174 | 0 | result->op = GGML_OP_DIV; |
2175 | 0 | result->src[0] = a; |
2176 | 0 | result->src[1] = b; |
2177 | |
|
2178 | 0 | return result; |
2179 | 0 | } |
2180 | | |
2181 | | struct ggml_tensor * ggml_div( |
2182 | | struct ggml_context * ctx, |
2183 | | struct ggml_tensor * a, |
2184 | 0 | struct ggml_tensor * b) { |
2185 | 0 | return ggml_div_impl(ctx, a, b, false); |
2186 | 0 | } |
2187 | | |
2188 | | struct ggml_tensor * ggml_div_inplace( |
2189 | | struct ggml_context * ctx, |
2190 | | struct ggml_tensor * a, |
2191 | 0 | struct ggml_tensor * b) { |
2192 | 0 | return ggml_div_impl(ctx, a, b, true); |
2193 | 0 | } |
2194 | | |
2195 | | // ggml_sqr |
2196 | | |
2197 | | static struct ggml_tensor * ggml_sqr_impl( |
2198 | | struct ggml_context * ctx, |
2199 | | struct ggml_tensor * a, |
2200 | 0 | bool inplace) { |
2201 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2202 | |
|
2203 | 0 | result->op = GGML_OP_SQR; |
2204 | 0 | result->src[0] = a; |
2205 | |
|
2206 | 0 | return result; |
2207 | 0 | } |
2208 | | |
2209 | | struct ggml_tensor * ggml_sqr( |
2210 | | struct ggml_context * ctx, |
2211 | 0 | struct ggml_tensor * a) { |
2212 | 0 | return ggml_sqr_impl(ctx, a, false); |
2213 | 0 | } |
2214 | | |
2215 | | struct ggml_tensor * ggml_sqr_inplace( |
2216 | | struct ggml_context * ctx, |
2217 | 0 | struct ggml_tensor * a) { |
2218 | 0 | return ggml_sqr_impl(ctx, a, true); |
2219 | 0 | } |
2220 | | |
2221 | | // ggml_sqrt |
2222 | | |
2223 | | static struct ggml_tensor * ggml_sqrt_impl( |
2224 | | struct ggml_context * ctx, |
2225 | | struct ggml_tensor * a, |
2226 | 0 | bool inplace) { |
2227 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2228 | |
|
2229 | 0 | result->op = GGML_OP_SQRT; |
2230 | 0 | result->src[0] = a; |
2231 | |
|
2232 | 0 | return result; |
2233 | 0 | } |
2234 | | |
2235 | | struct ggml_tensor * ggml_sqrt( |
2236 | | struct ggml_context * ctx, |
2237 | 0 | struct ggml_tensor * a) { |
2238 | 0 | return ggml_sqrt_impl(ctx, a, false); |
2239 | 0 | } |
2240 | | |
2241 | | struct ggml_tensor * ggml_sqrt_inplace( |
2242 | | struct ggml_context * ctx, |
2243 | 0 | struct ggml_tensor * a) { |
2244 | 0 | return ggml_sqrt_impl(ctx, a, true); |
2245 | 0 | } |
2246 | | |
2247 | | // ggml_log |
2248 | | |
2249 | | static struct ggml_tensor * ggml_log_impl( |
2250 | | struct ggml_context * ctx, |
2251 | | struct ggml_tensor * a, |
2252 | 0 | bool inplace) { |
2253 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2254 | |
|
2255 | 0 | result->op = GGML_OP_LOG; |
2256 | 0 | result->src[0] = a; |
2257 | |
|
2258 | 0 | return result; |
2259 | 0 | } |
2260 | | |
2261 | | struct ggml_tensor * ggml_log( |
2262 | | struct ggml_context * ctx, |
2263 | 0 | struct ggml_tensor * a) { |
2264 | 0 | return ggml_log_impl(ctx, a, false); |
2265 | 0 | } |
2266 | | |
2267 | | struct ggml_tensor * ggml_log_inplace( |
2268 | | struct ggml_context * ctx, |
2269 | 0 | struct ggml_tensor * a) { |
2270 | 0 | return ggml_log_impl(ctx, a, true); |
2271 | 0 | } |
2272 | | |
2273 | | struct ggml_tensor * ggml_expm1( |
2274 | | struct ggml_context * ctx, |
2275 | 0 | struct ggml_tensor * a) { |
2276 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1); |
2277 | 0 | } |
2278 | | |
2279 | | struct ggml_tensor * ggml_expm1_inplace( |
2280 | | struct ggml_context * ctx, |
2281 | 0 | struct ggml_tensor * a) { |
2282 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1); |
2283 | 0 | } |
2284 | | |
2285 | | struct ggml_tensor * ggml_softplus( |
2286 | | struct ggml_context * ctx, |
2287 | 0 | struct ggml_tensor * a) { |
2288 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS); |
2289 | 0 | } |
2290 | | |
2291 | | struct ggml_tensor * ggml_softplus_inplace( |
2292 | | struct ggml_context * ctx, |
2293 | 0 | struct ggml_tensor * a) { |
2294 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS); |
2295 | 0 | } |
2296 | | |
2297 | | // ggml_sin |
2298 | | |
2299 | | static struct ggml_tensor * ggml_sin_impl( |
2300 | | struct ggml_context * ctx, |
2301 | | struct ggml_tensor * a, |
2302 | 0 | bool inplace) { |
2303 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2304 | |
|
2305 | 0 | result->op = GGML_OP_SIN; |
2306 | 0 | result->src[0] = a; |
2307 | |
|
2308 | 0 | return result; |
2309 | 0 | } |
2310 | | |
2311 | | struct ggml_tensor * ggml_sin( |
2312 | | struct ggml_context * ctx, |
2313 | 0 | struct ggml_tensor * a) { |
2314 | 0 | return ggml_sin_impl(ctx, a, false); |
2315 | 0 | } |
2316 | | |
2317 | | struct ggml_tensor * ggml_sin_inplace( |
2318 | | struct ggml_context * ctx, |
2319 | 0 | struct ggml_tensor * a) { |
2320 | 0 | return ggml_sin_impl(ctx, a, true); |
2321 | 0 | } |
2322 | | |
2323 | | // ggml_cos |
2324 | | |
2325 | | static struct ggml_tensor * ggml_cos_impl( |
2326 | | struct ggml_context * ctx, |
2327 | | struct ggml_tensor * a, |
2328 | 0 | bool inplace) { |
2329 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2330 | |
|
2331 | 0 | result->op = GGML_OP_COS; |
2332 | 0 | result->src[0] = a; |
2333 | |
|
2334 | 0 | return result; |
2335 | 0 | } |
2336 | | |
2337 | | struct ggml_tensor * ggml_cos( |
2338 | | struct ggml_context * ctx, |
2339 | 0 | struct ggml_tensor * a) { |
2340 | 0 | return ggml_cos_impl(ctx, a, false); |
2341 | 0 | } |
2342 | | |
2343 | | struct ggml_tensor * ggml_cos_inplace( |
2344 | | struct ggml_context * ctx, |
2345 | 0 | struct ggml_tensor * a) { |
2346 | 0 | return ggml_cos_impl(ctx, a, true); |
2347 | 0 | } |
2348 | | |
2349 | | // ggml_sum |
2350 | | |
2351 | | struct ggml_tensor * ggml_sum( |
2352 | | struct ggml_context * ctx, |
2353 | 0 | struct ggml_tensor * a) { |
2354 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); |
2355 | |
|
2356 | 0 | result->op = GGML_OP_SUM; |
2357 | 0 | result->src[0] = a; |
2358 | |
|
2359 | 0 | return result; |
2360 | 0 | } |
2361 | | |
2362 | | // ggml_sum_rows |
2363 | | |
2364 | | struct ggml_tensor * ggml_sum_rows( |
2365 | | struct ggml_context * ctx, |
2366 | 0 | struct ggml_tensor * a) { |
2367 | 0 | int64_t ne[GGML_MAX_DIMS] = { 1 }; |
2368 | 0 | for (int i = 1; i < GGML_MAX_DIMS; ++i) { |
2369 | 0 | ne[i] = a->ne[i]; |
2370 | 0 | } |
2371 | |
|
2372 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne); |
2373 | |
|
2374 | 0 | result->op = GGML_OP_SUM_ROWS; |
2375 | 0 | result->src[0] = a; |
2376 | |
|
2377 | 0 | return result; |
2378 | 0 | } |
2379 | | |
2380 | | // ggml_cumsum |
2381 | | |
2382 | | struct ggml_tensor * ggml_cumsum( |
2383 | | struct ggml_context * ctx, |
2384 | 0 | struct ggml_tensor * a) { |
2385 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
2386 | |
|
2387 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2388 | |
|
2389 | 0 | result->op = GGML_OP_CUMSUM; |
2390 | 0 | result->src[0] = a; |
2391 | |
|
2392 | 0 | return result; |
2393 | 0 | } |
2394 | | |
2395 | | // ggml_mean |
2396 | | |
2397 | | struct ggml_tensor * ggml_mean( |
2398 | | struct ggml_context * ctx, |
2399 | 0 | struct ggml_tensor * a) { |
2400 | 0 | int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] }; |
2401 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
2402 | |
|
2403 | 0 | result->op = GGML_OP_MEAN; |
2404 | 0 | result->src[0] = a; |
2405 | |
|
2406 | 0 | return result; |
2407 | 0 | } |
2408 | | |
2409 | | // ggml_argmax |
2410 | | |
2411 | | struct ggml_tensor * ggml_argmax( |
2412 | | struct ggml_context * ctx, |
2413 | 0 | struct ggml_tensor * a) { |
2414 | 0 | GGML_ASSERT(ggml_is_matrix(a)); |
2415 | 0 | GGML_ASSERT(a->ne[0] <= INT32_MAX); |
2416 | |
|
2417 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]); |
2418 | |
|
2419 | 0 | result->op = GGML_OP_ARGMAX; |
2420 | 0 | result->src[0] = a; |
2421 | |
|
2422 | 0 | return result; |
2423 | 0 | } |
2424 | | |
2425 | | // ggml_count_equal |
2426 | | |
2427 | | struct ggml_tensor * ggml_count_equal( |
2428 | | struct ggml_context * ctx, |
2429 | | struct ggml_tensor * a, |
2430 | 0 | struct ggml_tensor * b) { |
2431 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
2432 | |
|
2433 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1); |
2434 | |
|
2435 | 0 | result->op = GGML_OP_COUNT_EQUAL; |
2436 | 0 | result->src[0] = a; |
2437 | 0 | result->src[1] = b; |
2438 | |
|
2439 | 0 | return result; |
2440 | 0 | } |
2441 | | |
2442 | | // ggml_repeat |
2443 | | |
2444 | | struct ggml_tensor * ggml_repeat( |
2445 | | struct ggml_context * ctx, |
2446 | | struct ggml_tensor * a, |
2447 | 0 | struct ggml_tensor * b) { |
2448 | 0 | GGML_ASSERT(ggml_can_repeat(a, b)); |
2449 | |
|
2450 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); |
2451 | |
|
2452 | 0 | result->op = GGML_OP_REPEAT; |
2453 | 0 | result->src[0] = a; |
2454 | |
|
2455 | 0 | return result; |
2456 | 0 | } |
2457 | | |
2458 | | struct ggml_tensor * ggml_repeat_4d( |
2459 | | struct ggml_context * ctx, |
2460 | | struct ggml_tensor * a, |
2461 | 0 | int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { |
2462 | 0 | const bool can_repeat = ggml_is_empty(a) || ( |
2463 | 0 | (ne0 % a->ne[0] == 0) && |
2464 | 0 | (ne1 % a->ne[1] == 0) && |
2465 | 0 | (ne2 % a->ne[2] == 0) && |
2466 | 0 | (ne3 % a->ne[3] == 0) |
2467 | 0 | ); |
2468 | 0 | GGML_ASSERT(can_repeat); |
2469 | |
|
2470 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
2471 | |
|
2472 | 0 | result->op = GGML_OP_REPEAT; |
2473 | 0 | result->src[0] = a; |
2474 | |
|
2475 | 0 | return result; |
2476 | 0 | } |
2477 | | |
2478 | | // ggml_repeat_back |
2479 | | |
2480 | | struct ggml_tensor * ggml_repeat_back( |
2481 | | struct ggml_context * ctx, |
2482 | | struct ggml_tensor * a, |
2483 | 0 | struct ggml_tensor * b) { |
2484 | 0 | GGML_ASSERT(ggml_can_repeat(b, a)); |
2485 | |
|
2486 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne); |
2487 | |
|
2488 | 0 | result->op = GGML_OP_REPEAT_BACK; |
2489 | 0 | result->src[0] = a; |
2490 | |
|
2491 | 0 | return result; |
2492 | 0 | } |
2493 | | |
2494 | | // ggml_concat |
2495 | | |
2496 | | struct ggml_tensor * ggml_concat( |
2497 | | struct ggml_context * ctx, |
2498 | | struct ggml_tensor * a, |
2499 | | struct ggml_tensor * b, |
2500 | 0 | int dim) { |
2501 | 0 | GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS); |
2502 | 0 | GGML_ASSERT(a->type == b->type); |
2503 | |
|
2504 | 0 | int64_t ne[GGML_MAX_DIMS]; |
2505 | 0 | for (int d = 0; d < GGML_MAX_DIMS; ++d) { |
2506 | 0 | if (d == dim) { |
2507 | 0 | ne[d] = a->ne[d] + b->ne[d]; |
2508 | 0 | continue; |
2509 | 0 | } |
2510 | 0 | GGML_ASSERT(a->ne[d] == b->ne[d]); |
2511 | 0 | ne[d] = a->ne[d]; |
2512 | 0 | } |
2513 | |
|
2514 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne); |
2515 | |
|
2516 | 0 | ggml_set_op_params_i32(result, 0, dim); |
2517 | |
|
2518 | 0 | result->op = GGML_OP_CONCAT; |
2519 | 0 | result->src[0] = a; |
2520 | 0 | result->src[1] = b; |
2521 | |
|
2522 | 0 | return result; |
2523 | 0 | } |
2524 | | |
2525 | | // ggml_abs |
2526 | | |
2527 | | struct ggml_tensor * ggml_abs( |
2528 | | struct ggml_context * ctx, |
2529 | 0 | struct ggml_tensor * a) { |
2530 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ABS); |
2531 | 0 | } |
2532 | | |
2533 | | struct ggml_tensor * ggml_abs_inplace( |
2534 | | struct ggml_context * ctx, |
2535 | 0 | struct ggml_tensor * a) { |
2536 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); |
2537 | 0 | } |
2538 | | |
2539 | | // ggml_sgn |
2540 | | |
2541 | | struct ggml_tensor * ggml_sgn( |
2542 | | struct ggml_context * ctx, |
2543 | 0 | struct ggml_tensor * a) { |
2544 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SGN); |
2545 | 0 | } |
2546 | | |
2547 | | struct ggml_tensor * ggml_sgn_inplace( |
2548 | | struct ggml_context * ctx, |
2549 | 0 | struct ggml_tensor * a) { |
2550 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN); |
2551 | 0 | } |
2552 | | |
2553 | | // ggml_neg |
2554 | | |
2555 | | struct ggml_tensor * ggml_neg( |
2556 | | struct ggml_context * ctx, |
2557 | 0 | struct ggml_tensor * a) { |
2558 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_NEG); |
2559 | 0 | } |
2560 | | |
2561 | | struct ggml_tensor * ggml_neg_inplace( |
2562 | | struct ggml_context * ctx, |
2563 | 0 | struct ggml_tensor * a) { |
2564 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG); |
2565 | 0 | } |
2566 | | |
2567 | | // ggml_step |
2568 | | |
2569 | | struct ggml_tensor * ggml_step( |
2570 | | struct ggml_context * ctx, |
2571 | 0 | struct ggml_tensor * a) { |
2572 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_STEP); |
2573 | 0 | } |
2574 | | |
2575 | | struct ggml_tensor * ggml_step_inplace( |
2576 | | struct ggml_context * ctx, |
2577 | 0 | struct ggml_tensor * a) { |
2578 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP); |
2579 | 0 | } |
2580 | | |
2581 | | // ggml_tanh |
2582 | | |
2583 | | struct ggml_tensor * ggml_tanh( |
2584 | | struct ggml_context * ctx, |
2585 | 0 | struct ggml_tensor * a) { |
2586 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_TANH); |
2587 | 0 | } |
2588 | | |
2589 | | struct ggml_tensor * ggml_tanh_inplace( |
2590 | | struct ggml_context * ctx, |
2591 | 0 | struct ggml_tensor * a) { |
2592 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH); |
2593 | 0 | } |
2594 | | |
2595 | | // ggml_elu |
2596 | | |
2597 | | struct ggml_tensor * ggml_elu( |
2598 | | struct ggml_context * ctx, |
2599 | 0 | struct ggml_tensor * a) { |
2600 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ELU); |
2601 | 0 | } |
2602 | | |
2603 | | struct ggml_tensor * ggml_elu_inplace( |
2604 | | struct ggml_context * ctx, |
2605 | 0 | struct ggml_tensor * a) { |
2606 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU); |
2607 | 0 | } |
2608 | | |
2609 | | // ggml_relu |
2610 | | |
2611 | | struct ggml_tensor * ggml_relu( |
2612 | | struct ggml_context * ctx, |
2613 | 0 | struct ggml_tensor * a) { |
2614 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_RELU); |
2615 | 0 | } |
2616 | | |
2617 | | struct ggml_tensor * ggml_relu_inplace( |
2618 | | struct ggml_context * ctx, |
2619 | 0 | struct ggml_tensor * a) { |
2620 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU); |
2621 | 0 | } |
2622 | | |
2623 | | // ggml_leaky_relu |
2624 | | |
2625 | | struct ggml_tensor * ggml_leaky_relu( |
2626 | | struct ggml_context * ctx, |
2627 | | struct ggml_tensor * a, |
2628 | | float negative_slope, |
2629 | 0 | bool inplace) { |
2630 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
2631 | |
|
2632 | 0 | ggml_set_op_params(result, &negative_slope, sizeof(negative_slope)); |
2633 | |
|
2634 | 0 | result->op = GGML_OP_LEAKY_RELU; |
2635 | 0 | result->src[0] = a; |
2636 | |
|
2637 | 0 | return result; |
2638 | 0 | } |
2639 | | |
2640 | | // ggml_sigmoid |
2641 | | |
2642 | | struct ggml_tensor * ggml_sigmoid( |
2643 | | struct ggml_context * ctx, |
2644 | 0 | struct ggml_tensor * a) { |
2645 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID); |
2646 | 0 | } |
2647 | | |
2648 | | struct ggml_tensor * ggml_sigmoid_inplace( |
2649 | | struct ggml_context * ctx, |
2650 | 0 | struct ggml_tensor * a) { |
2651 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID); |
2652 | 0 | } |
2653 | | |
2654 | | // ggml_gelu |
2655 | | |
2656 | | struct ggml_tensor * ggml_gelu( |
2657 | | struct ggml_context * ctx, |
2658 | 0 | struct ggml_tensor * a) { |
2659 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU); |
2660 | 0 | } |
2661 | | |
2662 | | struct ggml_tensor * ggml_gelu_inplace( |
2663 | | struct ggml_context * ctx, |
2664 | 0 | struct ggml_tensor * a) { |
2665 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); |
2666 | 0 | } |
2667 | | |
2668 | | // ggml_gelu_erf |
2669 | | |
2670 | | struct ggml_tensor * ggml_gelu_erf( |
2671 | | struct ggml_context * ctx, |
2672 | 0 | struct ggml_tensor * a) { |
2673 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF); |
2674 | 0 | } |
2675 | | |
2676 | | struct ggml_tensor * ggml_gelu_erf_inplace( |
2677 | | struct ggml_context * ctx, |
2678 | 0 | struct ggml_tensor * a) { |
2679 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF); |
2680 | 0 | } |
2681 | | |
2682 | | // ggml_gelu_quick |
2683 | | |
2684 | | struct ggml_tensor * ggml_gelu_quick( |
2685 | | struct ggml_context * ctx, |
2686 | 0 | struct ggml_tensor * a) { |
2687 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK); |
2688 | 0 | } |
2689 | | |
2690 | | struct ggml_tensor * ggml_gelu_quick_inplace( |
2691 | | struct ggml_context * ctx, |
2692 | 0 | struct ggml_tensor * a) { |
2693 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK); |
2694 | 0 | } |
2695 | | |
2696 | | // ggml_silu |
2697 | | |
2698 | | struct ggml_tensor * ggml_silu( |
2699 | | struct ggml_context * ctx, |
2700 | 0 | struct ggml_tensor * a) { |
2701 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_SILU); |
2702 | 0 | } |
2703 | | |
2704 | | struct ggml_tensor * ggml_silu_inplace( |
2705 | | struct ggml_context * ctx, |
2706 | 0 | struct ggml_tensor * a) { |
2707 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU); |
2708 | 0 | } |
2709 | | |
2710 | | // ggml_xielu |
2711 | | |
2712 | | struct ggml_tensor * ggml_xielu( |
2713 | | struct ggml_context * ctx, |
2714 | | struct ggml_tensor * a, |
2715 | | float alpha_n, |
2716 | | float alpha_p, |
2717 | | float beta, |
2718 | 0 | float eps) { |
2719 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2720 | |
|
2721 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU); |
2722 | 0 | ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n)); |
2723 | 0 | ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p)); |
2724 | 0 | ggml_set_op_params_f32(result, 3, beta); |
2725 | 0 | ggml_set_op_params_f32(result, 4, eps); |
2726 | |
|
2727 | 0 | result->op = GGML_OP_UNARY; |
2728 | 0 | result->src[0] = a; |
2729 | |
|
2730 | 0 | return result; |
2731 | 0 | } |
2732 | | |
2733 | | // ggml_silu_back |
2734 | | |
2735 | | struct ggml_tensor * ggml_silu_back( |
2736 | | struct ggml_context * ctx, |
2737 | | struct ggml_tensor * a, |
2738 | 0 | struct ggml_tensor * b) { |
2739 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
2740 | |
|
2741 | 0 | result->op = GGML_OP_SILU_BACK; |
2742 | 0 | result->src[0] = a; |
2743 | 0 | result->src[1] = b; |
2744 | |
|
2745 | 0 | return result; |
2746 | 0 | } |
2747 | | |
2748 | | // ggml hardswish |
2749 | | |
2750 | | struct ggml_tensor * ggml_hardswish( |
2751 | | struct ggml_context * ctx, |
2752 | 0 | struct ggml_tensor * a) { |
2753 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH); |
2754 | 0 | } |
2755 | | |
2756 | | // ggml hardsigmoid |
2757 | | |
2758 | | struct ggml_tensor * ggml_hardsigmoid( |
2759 | | struct ggml_context * ctx, |
2760 | 0 | struct ggml_tensor * a) { |
2761 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID); |
2762 | 0 | } |
2763 | | |
2764 | | // ggml exp |
2765 | | |
2766 | | struct ggml_tensor * ggml_exp( |
2767 | | struct ggml_context * ctx, |
2768 | 0 | struct ggml_tensor * a) { |
2769 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_EXP); |
2770 | 0 | } |
2771 | | |
2772 | | struct ggml_tensor * ggml_exp_inplace( |
2773 | | struct ggml_context * ctx, |
2774 | 0 | struct ggml_tensor * a) { |
2775 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP); |
2776 | 0 | } |
2777 | | |
2778 | | // ggml_glu |
2779 | | |
2780 | | static struct ggml_tensor * ggml_glu_impl( |
2781 | | struct ggml_context * ctx, |
2782 | | struct ggml_tensor * a, |
2783 | | struct ggml_tensor * b, |
2784 | | enum ggml_glu_op op, |
2785 | 0 | bool swapped) { |
2786 | 0 | GGML_ASSERT(ggml_is_contiguous_1(a)); |
2787 | |
|
2788 | 0 | if (b) { |
2789 | 0 | GGML_ASSERT(ggml_is_contiguous_1(b)); |
2790 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
2791 | 0 | GGML_ASSERT(a->type == b->type); |
2792 | 0 | } |
2793 | |
|
2794 | 0 | int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i]; |
2795 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0); |
2796 | |
|
2797 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) op); |
2798 | 0 | ggml_set_op_params_i32(result, 1, (int32_t) swapped); |
2799 | |
|
2800 | 0 | result->op = GGML_OP_GLU; |
2801 | 0 | result->src[0] = a; |
2802 | 0 | result->src[1] = b; |
2803 | |
|
2804 | 0 | return result; |
2805 | 0 | } |
2806 | | |
2807 | | // ggml_floor |
2808 | | |
2809 | | struct ggml_tensor * ggml_floor( |
2810 | | struct ggml_context * ctx, |
2811 | 0 | struct ggml_tensor * a) { |
2812 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR); |
2813 | 0 | } |
2814 | | |
2815 | | struct ggml_tensor * ggml_floor_inplace( |
2816 | | struct ggml_context * ctx, |
2817 | 0 | struct ggml_tensor * a) { |
2818 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR); |
2819 | 0 | } |
2820 | | |
2821 | | // ggml_ceil |
2822 | | |
2823 | | struct ggml_tensor * ggml_ceil( |
2824 | | struct ggml_context * ctx, |
2825 | 0 | struct ggml_tensor * a) { |
2826 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL); |
2827 | 0 | } |
2828 | | |
2829 | | struct ggml_tensor * ggml_ceil_inplace( |
2830 | | struct ggml_context * ctx, |
2831 | 0 | struct ggml_tensor * a) { |
2832 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL); |
2833 | 0 | } |
2834 | | |
2835 | | //ggml_round |
2836 | | |
2837 | | struct ggml_tensor * ggml_round( |
2838 | | struct ggml_context * ctx, |
2839 | 0 | struct ggml_tensor * a) { |
2840 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND); |
2841 | 0 | } |
2842 | | |
2843 | | struct ggml_tensor * ggml_round_inplace( |
2844 | | struct ggml_context * ctx, |
2845 | 0 | struct ggml_tensor * a) { |
2846 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND); |
2847 | 0 | } |
2848 | | |
2849 | | //ggml_trunc |
2850 | | |
2851 | | struct ggml_tensor * ggml_trunc( |
2852 | | struct ggml_context * ctx, |
2853 | 0 | struct ggml_tensor * a) { |
2854 | 0 | return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC); |
2855 | 0 | } |
2856 | | |
2857 | | struct ggml_tensor * ggml_trunc_inplace( |
2858 | | struct ggml_context * ctx, |
2859 | 0 | struct ggml_tensor * a) { |
2860 | 0 | return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC); |
2861 | 0 | } |
2862 | | |
2863 | | struct ggml_tensor * ggml_glu( |
2864 | | struct ggml_context * ctx, |
2865 | | struct ggml_tensor * a, |
2866 | | enum ggml_glu_op op, |
2867 | 0 | bool swapped) { |
2868 | 0 | return ggml_glu_impl(ctx, a, NULL, op, swapped); |
2869 | 0 | } |
2870 | | |
2871 | | struct ggml_tensor * ggml_glu_split( |
2872 | | struct ggml_context * ctx, |
2873 | | struct ggml_tensor * a, |
2874 | | struct ggml_tensor * b, |
2875 | 0 | enum ggml_glu_op op) { |
2876 | 0 | return ggml_glu_impl(ctx, a, b, op, false); |
2877 | 0 | } |
2878 | | |
2879 | | // ggml_reglu |
2880 | | |
2881 | | struct ggml_tensor * ggml_reglu( |
2882 | | struct ggml_context * ctx, |
2883 | 0 | struct ggml_tensor * a) { |
2884 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false); |
2885 | 0 | } |
2886 | | |
2887 | | struct ggml_tensor * ggml_reglu_swapped( |
2888 | | struct ggml_context * ctx, |
2889 | 0 | struct ggml_tensor * a) { |
2890 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true); |
2891 | 0 | } |
2892 | | |
2893 | | struct ggml_tensor * ggml_reglu_split( |
2894 | | struct ggml_context * ctx, |
2895 | | struct ggml_tensor * a, |
2896 | 0 | struct ggml_tensor * b) { |
2897 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false); |
2898 | 0 | } |
2899 | | |
2900 | | // ggml_geglu |
2901 | | |
2902 | | struct ggml_tensor * ggml_geglu( |
2903 | | struct ggml_context * ctx, |
2904 | 0 | struct ggml_tensor * a) { |
2905 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false); |
2906 | 0 | } |
2907 | | |
2908 | | struct ggml_tensor * ggml_geglu_swapped( |
2909 | | struct ggml_context * ctx, |
2910 | 0 | struct ggml_tensor * a) { |
2911 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true); |
2912 | 0 | } |
2913 | | |
2914 | | struct ggml_tensor * ggml_geglu_split( |
2915 | | struct ggml_context * ctx, |
2916 | | struct ggml_tensor * a, |
2917 | 0 | struct ggml_tensor * b) { |
2918 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false); |
2919 | 0 | } |
2920 | | |
2921 | | // ggml_swiglu |
2922 | | |
2923 | | struct ggml_tensor * ggml_swiglu( |
2924 | | struct ggml_context * ctx, |
2925 | 0 | struct ggml_tensor * a) { |
2926 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false); |
2927 | 0 | } |
2928 | | |
2929 | | struct ggml_tensor * ggml_swiglu_swapped( |
2930 | | struct ggml_context * ctx, |
2931 | 0 | struct ggml_tensor * a) { |
2932 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true); |
2933 | 0 | } |
2934 | | |
2935 | | struct ggml_tensor * ggml_swiglu_split( |
2936 | | struct ggml_context * ctx, |
2937 | | struct ggml_tensor * a, |
2938 | 0 | struct ggml_tensor * b) { |
2939 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false); |
2940 | 0 | } |
2941 | | |
2942 | | // ggml_geglu_erf |
2943 | | |
2944 | | struct ggml_tensor * ggml_geglu_erf( |
2945 | | struct ggml_context * ctx, |
2946 | 0 | struct ggml_tensor * a) { |
2947 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false); |
2948 | 0 | } |
2949 | | |
2950 | | struct ggml_tensor * ggml_geglu_erf_swapped( |
2951 | | struct ggml_context * ctx, |
2952 | 0 | struct ggml_tensor * a) { |
2953 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true); |
2954 | 0 | } |
2955 | | |
2956 | | struct ggml_tensor * ggml_geglu_erf_split( |
2957 | | struct ggml_context * ctx, |
2958 | | struct ggml_tensor * a, |
2959 | 0 | struct ggml_tensor * b) { |
2960 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false); |
2961 | 0 | } |
2962 | | |
2963 | | // ggml_geglu_quick |
2964 | | |
2965 | | struct ggml_tensor * ggml_geglu_quick( |
2966 | | struct ggml_context * ctx, |
2967 | 0 | struct ggml_tensor * a) { |
2968 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false); |
2969 | 0 | } |
2970 | | |
2971 | | struct ggml_tensor * ggml_geglu_quick_swapped( |
2972 | | struct ggml_context * ctx, |
2973 | 0 | struct ggml_tensor * a) { |
2974 | 0 | return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true); |
2975 | 0 | } |
2976 | | |
2977 | | struct ggml_tensor * ggml_geglu_quick_split( |
2978 | | struct ggml_context * ctx, |
2979 | | struct ggml_tensor * a, |
2980 | 0 | struct ggml_tensor * b) { |
2981 | 0 | return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false); |
2982 | 0 | } |
2983 | | |
2984 | | struct ggml_tensor * ggml_swiglu_oai( |
2985 | | struct ggml_context * ctx, |
2986 | | struct ggml_tensor * a, |
2987 | | struct ggml_tensor * b, |
2988 | | float alpha, |
2989 | 0 | float limit) { |
2990 | 0 | struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false); |
2991 | 0 | ggml_set_op_params_f32(result, 2, alpha); |
2992 | 0 | ggml_set_op_params_f32(result, 3, limit); |
2993 | |
|
2994 | 0 | return result; |
2995 | 0 | } |
2996 | | |
2997 | | // ggml_norm |
2998 | | |
2999 | | static struct ggml_tensor * ggml_norm_impl( |
3000 | | struct ggml_context * ctx, |
3001 | | struct ggml_tensor * a, |
3002 | | float eps, |
3003 | 0 | bool inplace) { |
3004 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3005 | |
|
3006 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3007 | |
|
3008 | 0 | result->op = GGML_OP_NORM; |
3009 | 0 | result->src[0] = a; |
3010 | |
|
3011 | 0 | return result; |
3012 | 0 | } |
3013 | | |
3014 | | struct ggml_tensor * ggml_norm( |
3015 | | struct ggml_context * ctx, |
3016 | | struct ggml_tensor * a, |
3017 | 0 | float eps) { |
3018 | 0 | return ggml_norm_impl(ctx, a, eps, false); |
3019 | 0 | } |
3020 | | |
3021 | | struct ggml_tensor * ggml_norm_inplace( |
3022 | | struct ggml_context * ctx, |
3023 | | struct ggml_tensor * a, |
3024 | 0 | float eps) { |
3025 | 0 | return ggml_norm_impl(ctx, a, eps, true); |
3026 | 0 | } |
3027 | | |
3028 | | // ggml_rms_norm |
3029 | | |
3030 | | static struct ggml_tensor * ggml_rms_norm_impl( |
3031 | | struct ggml_context * ctx, |
3032 | | struct ggml_tensor * a, |
3033 | | float eps, |
3034 | 0 | bool inplace) { |
3035 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3036 | |
|
3037 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3038 | |
|
3039 | 0 | result->op = GGML_OP_RMS_NORM; |
3040 | 0 | result->src[0] = a; |
3041 | |
|
3042 | 0 | return result; |
3043 | 0 | } |
3044 | | |
3045 | | struct ggml_tensor * ggml_rms_norm( |
3046 | | struct ggml_context * ctx, |
3047 | | struct ggml_tensor * a, |
3048 | 0 | float eps) { |
3049 | 0 | return ggml_rms_norm_impl(ctx, a, eps, false); |
3050 | 0 | } |
3051 | | |
3052 | | struct ggml_tensor * ggml_rms_norm_inplace( |
3053 | | struct ggml_context * ctx, |
3054 | | struct ggml_tensor * a, |
3055 | 0 | float eps) { |
3056 | 0 | return ggml_rms_norm_impl(ctx, a, eps, true); |
3057 | 0 | } |
3058 | | |
3059 | | // ggml_rms_norm_back |
3060 | | |
3061 | | struct ggml_tensor * ggml_rms_norm_back( |
3062 | | struct ggml_context * ctx, |
3063 | | struct ggml_tensor * a, |
3064 | | struct ggml_tensor * b, |
3065 | 0 | float eps) { |
3066 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
3067 | |
|
3068 | 0 | ggml_set_op_params(result, &eps, sizeof(eps)); |
3069 | |
|
3070 | 0 | result->op = GGML_OP_RMS_NORM_BACK; |
3071 | 0 | result->src[0] = a; |
3072 | 0 | result->src[1] = b; |
3073 | |
|
3074 | 0 | return result; |
3075 | 0 | } |
3076 | | |
3077 | | // ggml_group_norm |
3078 | | |
3079 | | static struct ggml_tensor * ggml_group_norm_impl( |
3080 | | struct ggml_context * ctx, |
3081 | | struct ggml_tensor * a, |
3082 | | int n_groups, |
3083 | | float eps, |
3084 | 0 | bool inplace) { |
3085 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3086 | |
|
3087 | 0 | ggml_set_op_params_i32(result, 0, n_groups); |
3088 | 0 | ggml_set_op_params_f32(result, 1, eps); |
3089 | |
|
3090 | 0 | result->op = GGML_OP_GROUP_NORM; |
3091 | 0 | result->src[0] = a; |
3092 | |
|
3093 | 0 | return result; |
3094 | 0 | } |
3095 | | |
3096 | | struct ggml_tensor * ggml_group_norm( |
3097 | | struct ggml_context * ctx, |
3098 | | struct ggml_tensor * a, |
3099 | | int n_groups, |
3100 | 0 | float eps) { |
3101 | 0 | return ggml_group_norm_impl(ctx, a, n_groups, eps, false); |
3102 | 0 | } |
3103 | | |
3104 | | struct ggml_tensor * ggml_group_norm_inplace( |
3105 | | struct ggml_context * ctx, |
3106 | | struct ggml_tensor * a, |
3107 | | int n_groups, |
3108 | 0 | float eps) { |
3109 | 0 | return ggml_group_norm_impl(ctx, a, n_groups, eps, true); |
3110 | 0 | } |
3111 | | |
3112 | | // ggml_l2_norm |
3113 | | |
3114 | | static struct ggml_tensor * ggml_l2_norm_impl( |
3115 | | struct ggml_context * ctx, |
3116 | | struct ggml_tensor * a, |
3117 | | float eps, |
3118 | 0 | bool inplace) { |
3119 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3120 | |
|
3121 | 0 | ggml_set_op_params_f32(result, 0, eps); |
3122 | |
|
3123 | 0 | result->op = GGML_OP_L2_NORM; |
3124 | 0 | result->src[0] = a; |
3125 | |
|
3126 | 0 | return result; |
3127 | 0 | } |
3128 | | |
3129 | | struct ggml_tensor * ggml_l2_norm( |
3130 | | struct ggml_context * ctx, |
3131 | | struct ggml_tensor * a, |
3132 | 0 | float eps) { |
3133 | 0 | return ggml_l2_norm_impl(ctx, a, eps, false); |
3134 | 0 | } |
3135 | | |
3136 | | struct ggml_tensor * ggml_l2_norm_inplace( |
3137 | | struct ggml_context * ctx, |
3138 | | struct ggml_tensor * a, |
3139 | 0 | float eps) { |
3140 | 0 | return ggml_l2_norm_impl(ctx, a, eps, true); |
3141 | 0 | } |
3142 | | |
3143 | | // ggml_mul_mat |
3144 | | |
3145 | 0 | static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
3146 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
3147 | |
|
3148 | 0 | return (t0->ne[0] == t1->ne[0]) && |
3149 | 0 | (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable |
3150 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
3151 | 0 | } |
3152 | | |
3153 | | struct ggml_tensor * ggml_mul_mat( |
3154 | | struct ggml_context * ctx, |
3155 | | struct ggml_tensor * a, |
3156 | 0 | struct ggml_tensor * b) { |
3157 | 0 | GGML_ASSERT(ggml_can_mul_mat(a, b)); |
3158 | 0 | GGML_ASSERT(!ggml_is_transposed(a)); |
3159 | |
|
3160 | 0 | const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] }; |
3161 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3162 | |
|
3163 | 0 | result->op = GGML_OP_MUL_MAT; |
3164 | 0 | result->src[0] = a; |
3165 | 0 | result->src[1] = b; |
3166 | |
|
3167 | 0 | return result; |
3168 | 0 | } |
3169 | | |
3170 | | void ggml_mul_mat_set_prec( |
3171 | | struct ggml_tensor * a, |
3172 | 0 | enum ggml_prec prec) { |
3173 | 0 | GGML_ASSERT(a->op == GGML_OP_MUL_MAT); |
3174 | |
|
3175 | 0 | const int32_t prec_i32 = (int32_t) prec; |
3176 | |
|
3177 | 0 | ggml_set_op_params_i32(a, 0, prec_i32); |
3178 | 0 | } |
3179 | | |
3180 | | // ggml_mul_mat_id |
3181 | | |
3182 | | /* |
3183 | | c = ggml_mul_mat_id(ctx, as, b, ids); |
3184 | | |
3185 | | as -> [cols, rows, n_expert] |
3186 | | b -> [cols, n_expert_used, n_tokens] |
3187 | | ids -> [n_expert_used, n_tokens] (i32) |
3188 | | c -> [rows, n_expert_used, n_tokens] |
3189 | | |
3190 | | in b, n_expert_used can be broadcasted to match the n_expert_used of ids |
3191 | | |
3192 | | c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids |
3193 | | */ |
3194 | | struct ggml_tensor * ggml_mul_mat_id( |
3195 | | struct ggml_context * ctx, |
3196 | | struct ggml_tensor * as, |
3197 | | struct ggml_tensor * b, |
3198 | 0 | struct ggml_tensor * ids) { |
3199 | 0 | GGML_ASSERT(!ggml_is_transposed(as)); |
3200 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
3201 | |
|
3202 | 0 | GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert) |
3203 | 0 | GGML_ASSERT(b->ne[3] == 1); // b is 3d |
3204 | 0 | GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d |
3205 | 0 | GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row |
3206 | 0 | GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat |
3207 | 0 | GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast |
3208 | |
|
3209 | 0 | const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 }; |
3210 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3211 | |
|
3212 | 0 | result->op = GGML_OP_MUL_MAT_ID; |
3213 | 0 | result->src[0] = as; |
3214 | 0 | result->src[1] = b; |
3215 | 0 | result->src[2] = ids; |
3216 | |
|
3217 | 0 | return result; |
3218 | 0 | } |
3219 | | |
3220 | | // ggml_out_prod |
3221 | | |
3222 | 0 | static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { |
3223 | 0 | static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
3224 | |
|
3225 | 0 | return (t0->ne[1] == t1->ne[1]) && |
3226 | 0 | (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable |
3227 | 0 | (t1->ne[3]%t0->ne[3] == 0); |
3228 | 0 | } |
3229 | | |
3230 | | struct ggml_tensor * ggml_out_prod( |
3231 | | struct ggml_context * ctx, |
3232 | | struct ggml_tensor * a, |
3233 | 0 | struct ggml_tensor * b) { |
3234 | 0 | GGML_ASSERT(ggml_can_out_prod(a, b)); |
3235 | 0 | GGML_ASSERT(!ggml_is_transposed(a)); |
3236 | | |
3237 | | // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3] |
3238 | 0 | const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] }; |
3239 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
3240 | |
|
3241 | 0 | result->op = GGML_OP_OUT_PROD; |
3242 | 0 | result->src[0] = a; |
3243 | 0 | result->src[1] = b; |
3244 | |
|
3245 | 0 | return result; |
3246 | 0 | } |
3247 | | |
3248 | | // ggml_scale |
3249 | | |
3250 | | static struct ggml_tensor * ggml_scale_impl( |
3251 | | struct ggml_context * ctx, |
3252 | | struct ggml_tensor * a, |
3253 | | float s, |
3254 | | float b, |
3255 | 0 | bool inplace) { |
3256 | 0 | GGML_ASSERT(ggml_is_padded_1d(a)); |
3257 | |
|
3258 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3259 | |
|
3260 | 0 | float params[2] = { s, b }; |
3261 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
3262 | |
|
3263 | 0 | result->op = GGML_OP_SCALE; |
3264 | 0 | result->src[0] = a; |
3265 | |
|
3266 | 0 | return result; |
3267 | 0 | } |
3268 | | |
3269 | | struct ggml_tensor * ggml_scale( |
3270 | | struct ggml_context * ctx, |
3271 | | struct ggml_tensor * a, |
3272 | 0 | float s) { |
3273 | 0 | return ggml_scale_impl(ctx, a, s, 0.0, false); |
3274 | 0 | } |
3275 | | |
3276 | | struct ggml_tensor * ggml_scale_inplace( |
3277 | | struct ggml_context * ctx, |
3278 | | struct ggml_tensor * a, |
3279 | 0 | float s) { |
3280 | 0 | return ggml_scale_impl(ctx, a, s, 0.0, true); |
3281 | 0 | } |
3282 | | |
3283 | | struct ggml_tensor * ggml_scale_bias( |
3284 | | struct ggml_context * ctx, |
3285 | | struct ggml_tensor * a, |
3286 | | float s, |
3287 | 0 | float b) { |
3288 | 0 | return ggml_scale_impl(ctx, a, s, b, false); |
3289 | 0 | } |
3290 | | |
3291 | | struct ggml_tensor * ggml_scale_bias_inplace( |
3292 | | struct ggml_context * ctx, |
3293 | | struct ggml_tensor * a, |
3294 | | float s, |
3295 | 0 | float b) { |
3296 | 0 | return ggml_scale_impl(ctx, a, s, b, true); |
3297 | 0 | } |
3298 | | |
3299 | | // ggml_set |
3300 | | |
3301 | | static struct ggml_tensor * ggml_set_impl( |
3302 | | struct ggml_context * ctx, |
3303 | | struct ggml_tensor * a, |
3304 | | struct ggml_tensor * b, |
3305 | | size_t nb1, |
3306 | | size_t nb2, |
3307 | | size_t nb3, |
3308 | | size_t offset, |
3309 | 0 | bool inplace) { |
3310 | 0 | GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b)); |
3311 | | |
3312 | | // make a view of the destination |
3313 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3314 | |
|
3315 | 0 | GGML_ASSERT(offset < (size_t)(1 << 30)); |
3316 | 0 | int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; |
3317 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3318 | |
|
3319 | 0 | result->op = GGML_OP_SET; |
3320 | 0 | result->src[0] = a; |
3321 | 0 | result->src[1] = b; |
3322 | |
|
3323 | 0 | return result; |
3324 | 0 | } |
3325 | | |
3326 | | struct ggml_tensor * ggml_set( |
3327 | | struct ggml_context * ctx, |
3328 | | struct ggml_tensor * a, |
3329 | | struct ggml_tensor * b, |
3330 | | size_t nb1, |
3331 | | size_t nb2, |
3332 | | size_t nb3, |
3333 | 0 | size_t offset) { |
3334 | 0 | return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false); |
3335 | 0 | } |
3336 | | |
3337 | | struct ggml_tensor * ggml_set_inplace( |
3338 | | struct ggml_context * ctx, |
3339 | | struct ggml_tensor * a, |
3340 | | struct ggml_tensor * b, |
3341 | | size_t nb1, |
3342 | | size_t nb2, |
3343 | | size_t nb3, |
3344 | 0 | size_t offset) { |
3345 | 0 | return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true); |
3346 | 0 | } |
3347 | | |
3348 | | struct ggml_tensor * ggml_set_1d( |
3349 | | struct ggml_context * ctx, |
3350 | | struct ggml_tensor * a, |
3351 | | struct ggml_tensor * b, |
3352 | 0 | size_t offset) { |
3353 | 0 | return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false); |
3354 | 0 | } |
3355 | | |
3356 | | struct ggml_tensor * ggml_set_1d_inplace( |
3357 | | struct ggml_context * ctx, |
3358 | | struct ggml_tensor * a, |
3359 | | struct ggml_tensor * b, |
3360 | 0 | size_t offset) { |
3361 | 0 | return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true); |
3362 | 0 | } |
3363 | | |
3364 | | struct ggml_tensor * ggml_set_2d( |
3365 | | struct ggml_context * ctx, |
3366 | | struct ggml_tensor * a, |
3367 | | struct ggml_tensor * b, |
3368 | | size_t nb1, |
3369 | 0 | size_t offset) { |
3370 | 0 | return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false); |
3371 | 0 | } |
3372 | | |
3373 | | struct ggml_tensor * ggml_set_2d_inplace( |
3374 | | struct ggml_context * ctx, |
3375 | | struct ggml_tensor * a, |
3376 | | struct ggml_tensor * b, |
3377 | | size_t nb1, |
3378 | 0 | size_t offset) { |
3379 | 0 | return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true); |
3380 | 0 | } |
3381 | | |
3382 | | // ggml_cpy |
3383 | | |
3384 | | static struct ggml_tensor * ggml_cpy_impl( |
3385 | | struct ggml_context * ctx, |
3386 | | struct ggml_tensor * a, |
3387 | 0 | struct ggml_tensor * b) { |
3388 | 0 | GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); |
3389 | | |
3390 | | // make a view of the destination |
3391 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, b); |
3392 | 0 | if (strlen(b->name) > 0) { |
3393 | 0 | ggml_format_name(result, "%s (copy of %s)", b->name, a->name); |
3394 | 0 | } else { |
3395 | 0 | ggml_format_name(result, "%s (copy)", a->name); |
3396 | 0 | } |
3397 | |
|
3398 | 0 | result->op = GGML_OP_CPY; |
3399 | 0 | result->src[0] = a; |
3400 | 0 | result->src[1] = b; |
3401 | |
|
3402 | 0 | return result; |
3403 | 0 | } |
3404 | | |
3405 | | struct ggml_tensor * ggml_cpy( |
3406 | | struct ggml_context * ctx, |
3407 | | struct ggml_tensor * a, |
3408 | 0 | struct ggml_tensor * b) { |
3409 | 0 | return ggml_cpy_impl(ctx, a, b); |
3410 | 0 | } |
3411 | | |
3412 | | struct ggml_tensor * ggml_cast( |
3413 | | struct ggml_context * ctx, |
3414 | | struct ggml_tensor * a, |
3415 | 0 | enum ggml_type type) { |
3416 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne); |
3417 | 0 | ggml_format_name(result, "%s (copy)", a->name); |
3418 | |
|
3419 | 0 | result->op = GGML_OP_CPY; |
3420 | 0 | result->src[0] = a; |
3421 | 0 | result->src[1] = result; |
3422 | |
|
3423 | 0 | return result; |
3424 | 0 | } |
3425 | | |
3426 | | // ggml_cont |
3427 | | |
3428 | | static struct ggml_tensor * ggml_cont_impl( |
3429 | | struct ggml_context * ctx, |
3430 | 0 | struct ggml_tensor * a) { |
3431 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
3432 | 0 | ggml_format_name(result, "%s (cont)", a->name); |
3433 | |
|
3434 | 0 | result->op = GGML_OP_CONT; |
3435 | 0 | result->src[0] = a; |
3436 | |
|
3437 | 0 | return result; |
3438 | 0 | } |
3439 | | |
3440 | | struct ggml_tensor * ggml_cont( |
3441 | | struct ggml_context * ctx, |
3442 | 0 | struct ggml_tensor * a) { |
3443 | 0 | return ggml_cont_impl(ctx, a); |
3444 | 0 | } |
3445 | | |
3446 | | // make contiguous, with new shape |
3447 | | GGML_API struct ggml_tensor * ggml_cont_1d( |
3448 | | struct ggml_context * ctx, |
3449 | | struct ggml_tensor * a, |
3450 | 0 | int64_t ne0) { |
3451 | 0 | return ggml_cont_4d(ctx, a, ne0, 1, 1, 1); |
3452 | 0 | } |
3453 | | |
3454 | | GGML_API struct ggml_tensor * ggml_cont_2d( |
3455 | | struct ggml_context * ctx, |
3456 | | struct ggml_tensor * a, |
3457 | | int64_t ne0, |
3458 | 0 | int64_t ne1) { |
3459 | 0 | return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1); |
3460 | 0 | } |
3461 | | |
3462 | | GGML_API struct ggml_tensor * ggml_cont_3d( |
3463 | | struct ggml_context * ctx, |
3464 | | struct ggml_tensor * a, |
3465 | | int64_t ne0, |
3466 | | int64_t ne1, |
3467 | 0 | int64_t ne2) { |
3468 | 0 | return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1); |
3469 | 0 | } |
3470 | | |
3471 | | struct ggml_tensor * ggml_cont_4d( |
3472 | | struct ggml_context * ctx, |
3473 | | struct ggml_tensor * a, |
3474 | | int64_t ne0, |
3475 | | int64_t ne1, |
3476 | | int64_t ne2, |
3477 | 0 | int64_t ne3) { |
3478 | 0 | GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3)); |
3479 | |
|
3480 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
3481 | 0 | ggml_format_name(result, "%s (cont)", a->name); |
3482 | |
|
3483 | 0 | result->op = GGML_OP_CONT; |
3484 | 0 | result->src[0] = a; |
3485 | |
|
3486 | 0 | return result; |
3487 | 0 | } |
3488 | | |
3489 | | // ggml_reshape |
3490 | | |
3491 | | struct ggml_tensor * ggml_reshape( |
3492 | | struct ggml_context * ctx, |
3493 | | struct ggml_tensor * a, |
3494 | 0 | struct ggml_tensor * b) { |
3495 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3496 | | // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous. |
3497 | 0 | GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b)); |
3498 | |
|
3499 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0); |
3500 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3501 | |
|
3502 | 0 | result->op = GGML_OP_RESHAPE; |
3503 | 0 | result->src[0] = a; |
3504 | |
|
3505 | 0 | return result; |
3506 | 0 | } |
3507 | | |
3508 | | struct ggml_tensor * ggml_reshape_1d( |
3509 | | struct ggml_context * ctx, |
3510 | | struct ggml_tensor * a, |
3511 | 0 | int64_t ne0) { |
3512 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3513 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0); |
3514 | |
|
3515 | 0 | const int64_t ne[1] = { ne0 }; |
3516 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0); |
3517 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3518 | |
|
3519 | 0 | result->op = GGML_OP_RESHAPE; |
3520 | 0 | result->src[0] = a; |
3521 | |
|
3522 | 0 | return result; |
3523 | 0 | } |
3524 | | |
3525 | | struct ggml_tensor * ggml_reshape_2d( |
3526 | | struct ggml_context * ctx, |
3527 | | struct ggml_tensor * a, |
3528 | | int64_t ne0, |
3529 | 0 | int64_t ne1) { |
3530 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3531 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1); |
3532 | |
|
3533 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
3534 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0); |
3535 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3536 | |
|
3537 | 0 | result->op = GGML_OP_RESHAPE; |
3538 | 0 | result->src[0] = a; |
3539 | |
|
3540 | 0 | return result; |
3541 | 0 | } |
3542 | | |
3543 | | struct ggml_tensor * ggml_reshape_3d( |
3544 | | struct ggml_context * ctx, |
3545 | | struct ggml_tensor * a, |
3546 | | int64_t ne0, |
3547 | | int64_t ne1, |
3548 | 0 | int64_t ne2) { |
3549 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3550 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2); |
3551 | |
|
3552 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
3553 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0); |
3554 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3555 | |
|
3556 | 0 | result->op = GGML_OP_RESHAPE; |
3557 | 0 | result->src[0] = a; |
3558 | |
|
3559 | 0 | return result; |
3560 | 0 | } |
3561 | | |
3562 | | struct ggml_tensor * ggml_reshape_4d( |
3563 | | struct ggml_context * ctx, |
3564 | | struct ggml_tensor * a, |
3565 | | int64_t ne0, |
3566 | | int64_t ne1, |
3567 | | int64_t ne2, |
3568 | 0 | int64_t ne3) { |
3569 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3570 | 0 | GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3); |
3571 | |
|
3572 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
3573 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0); |
3574 | 0 | ggml_format_name(result, "%s (reshaped)", a->name); |
3575 | |
|
3576 | 0 | result->op = GGML_OP_RESHAPE; |
3577 | 0 | result->src[0] = a; |
3578 | |
|
3579 | 0 | return result; |
3580 | 0 | } |
3581 | | |
3582 | | static struct ggml_tensor * ggml_view_impl( |
3583 | | struct ggml_context * ctx, |
3584 | | struct ggml_tensor * a, |
3585 | | int n_dims, |
3586 | | const int64_t * ne, |
3587 | 0 | size_t offset) { |
3588 | 0 | struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset); |
3589 | 0 | ggml_format_name(result, "%s (view)", a->name); |
3590 | |
|
3591 | 0 | ggml_set_op_params(result, &offset, sizeof(offset)); |
3592 | |
|
3593 | 0 | result->op = GGML_OP_VIEW; |
3594 | 0 | result->src[0] = a; |
3595 | |
|
3596 | 0 | return result; |
3597 | 0 | } |
3598 | | |
3599 | | // ggml_view_1d |
3600 | | |
3601 | | struct ggml_tensor * ggml_view_1d( |
3602 | | struct ggml_context * ctx, |
3603 | | struct ggml_tensor * a, |
3604 | | int64_t ne0, |
3605 | 0 | size_t offset) { |
3606 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset); |
3607 | |
|
3608 | 0 | return result; |
3609 | 0 | } |
3610 | | |
3611 | | // ggml_view_2d |
3612 | | |
3613 | | struct ggml_tensor * ggml_view_2d( |
3614 | | struct ggml_context * ctx, |
3615 | | struct ggml_tensor * a, |
3616 | | int64_t ne0, |
3617 | | int64_t ne1, |
3618 | | size_t nb1, |
3619 | 0 | size_t offset) { |
3620 | 0 | const int64_t ne[2] = { ne0, ne1 }; |
3621 | |
|
3622 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset); |
3623 | |
|
3624 | 0 | result->nb[1] = nb1; |
3625 | 0 | result->nb[2] = result->nb[1]*ne1; |
3626 | 0 | result->nb[3] = result->nb[2]; |
3627 | |
|
3628 | 0 | return result; |
3629 | 0 | } |
3630 | | |
3631 | | // ggml_view_3d |
3632 | | |
3633 | | struct ggml_tensor * ggml_view_3d( |
3634 | | struct ggml_context * ctx, |
3635 | | struct ggml_tensor * a, |
3636 | | int64_t ne0, |
3637 | | int64_t ne1, |
3638 | | int64_t ne2, |
3639 | | size_t nb1, |
3640 | | size_t nb2, |
3641 | 0 | size_t offset) { |
3642 | 0 | const int64_t ne[3] = { ne0, ne1, ne2 }; |
3643 | |
|
3644 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset); |
3645 | |
|
3646 | 0 | result->nb[1] = nb1; |
3647 | 0 | result->nb[2] = nb2; |
3648 | 0 | result->nb[3] = result->nb[2]*ne2; |
3649 | |
|
3650 | 0 | return result; |
3651 | 0 | } |
3652 | | |
3653 | | // ggml_view_4d |
3654 | | |
3655 | | struct ggml_tensor * ggml_view_4d( |
3656 | | struct ggml_context * ctx, |
3657 | | struct ggml_tensor * a, |
3658 | | int64_t ne0, |
3659 | | int64_t ne1, |
3660 | | int64_t ne2, |
3661 | | int64_t ne3, |
3662 | | size_t nb1, |
3663 | | size_t nb2, |
3664 | | size_t nb3, |
3665 | 0 | size_t offset) { |
3666 | 0 | const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |
3667 | |
|
3668 | 0 | struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset); |
3669 | |
|
3670 | 0 | result->nb[1] = nb1; |
3671 | 0 | result->nb[2] = nb2; |
3672 | 0 | result->nb[3] = nb3; |
3673 | |
|
3674 | 0 | return result; |
3675 | 0 | } |
3676 | | |
3677 | | // ggml_permute |
3678 | | |
3679 | | struct ggml_tensor * ggml_permute( |
3680 | | struct ggml_context * ctx, |
3681 | | struct ggml_tensor * a, |
3682 | | int axis0, |
3683 | | int axis1, |
3684 | | int axis2, |
3685 | 0 | int axis3) { |
3686 | 0 | GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS); |
3687 | 0 | GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS); |
3688 | 0 | GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS); |
3689 | 0 | GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS); |
3690 | |
|
3691 | 0 | GGML_ASSERT(axis0 != axis1); |
3692 | 0 | GGML_ASSERT(axis0 != axis2); |
3693 | 0 | GGML_ASSERT(axis0 != axis3); |
3694 | 0 | GGML_ASSERT(axis1 != axis2); |
3695 | 0 | GGML_ASSERT(axis1 != axis3); |
3696 | 0 | GGML_ASSERT(axis2 != axis3); |
3697 | |
|
3698 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3699 | 0 | ggml_format_name(result, "%s (permuted)", a->name); |
3700 | |
|
3701 | 0 | int ne[GGML_MAX_DIMS]; |
3702 | 0 | int nb[GGML_MAX_DIMS]; |
3703 | |
|
3704 | 0 | ne[axis0] = a->ne[0]; |
3705 | 0 | ne[axis1] = a->ne[1]; |
3706 | 0 | ne[axis2] = a->ne[2]; |
3707 | 0 | ne[axis3] = a->ne[3]; |
3708 | |
|
3709 | 0 | nb[axis0] = a->nb[0]; |
3710 | 0 | nb[axis1] = a->nb[1]; |
3711 | 0 | nb[axis2] = a->nb[2]; |
3712 | 0 | nb[axis3] = a->nb[3]; |
3713 | |
|
3714 | 0 | result->ne[0] = ne[0]; |
3715 | 0 | result->ne[1] = ne[1]; |
3716 | 0 | result->ne[2] = ne[2]; |
3717 | 0 | result->ne[3] = ne[3]; |
3718 | |
|
3719 | 0 | result->nb[0] = nb[0]; |
3720 | 0 | result->nb[1] = nb[1]; |
3721 | 0 | result->nb[2] = nb[2]; |
3722 | 0 | result->nb[3] = nb[3]; |
3723 | |
|
3724 | 0 | result->op = GGML_OP_PERMUTE; |
3725 | 0 | result->src[0] = a; |
3726 | |
|
3727 | 0 | int32_t params[] = { axis0, axis1, axis2, axis3 }; |
3728 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3729 | |
|
3730 | 0 | return result; |
3731 | 0 | } |
3732 | | |
3733 | | // ggml_transpose |
3734 | | |
3735 | | struct ggml_tensor * ggml_transpose( |
3736 | | struct ggml_context * ctx, |
3737 | 0 | struct ggml_tensor * a) { |
3738 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3739 | 0 | ggml_format_name(result, "%s (transposed)", a->name); |
3740 | |
|
3741 | 0 | result->ne[0] = a->ne[1]; |
3742 | 0 | result->ne[1] = a->ne[0]; |
3743 | |
|
3744 | 0 | result->nb[0] = a->nb[1]; |
3745 | 0 | result->nb[1] = a->nb[0]; |
3746 | |
|
3747 | 0 | result->op = GGML_OP_TRANSPOSE; |
3748 | 0 | result->src[0] = a; |
3749 | |
|
3750 | 0 | return result; |
3751 | 0 | } |
3752 | | |
3753 | | // ggml_get_rows |
3754 | | |
3755 | | struct ggml_tensor * ggml_get_rows( |
3756 | | struct ggml_context * ctx, |
3757 | | struct ggml_tensor * a, |
3758 | 0 | struct ggml_tensor * b) { |
3759 | 0 | GGML_ASSERT(a->ne[2] == b->ne[1]); |
3760 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
3761 | 0 | GGML_ASSERT(b->ne[3] == 1); |
3762 | 0 | GGML_ASSERT(b->type == GGML_TYPE_I32); |
3763 | | |
3764 | | // TODO: implement non F32 return |
3765 | 0 | enum ggml_type type = GGML_TYPE_F32; |
3766 | 0 | if (a->type == GGML_TYPE_I32) { |
3767 | 0 | type = a->type; |
3768 | 0 | } |
3769 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]); |
3770 | |
|
3771 | 0 | result->op = GGML_OP_GET_ROWS; |
3772 | 0 | result->src[0] = a; |
3773 | 0 | result->src[1] = b; |
3774 | |
|
3775 | 0 | return result; |
3776 | 0 | } |
3777 | | |
3778 | | // ggml_get_rows_back |
3779 | | |
3780 | | struct ggml_tensor * ggml_get_rows_back( |
3781 | | struct ggml_context * ctx, |
3782 | | struct ggml_tensor * a, |
3783 | | struct ggml_tensor * b, |
3784 | 0 | struct ggml_tensor * c) { |
3785 | 0 | GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); |
3786 | 0 | GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0])); |
3787 | | |
3788 | | // TODO: implement non F32 return |
3789 | | //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]); |
3790 | 0 | struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]); |
3791 | |
|
3792 | 0 | result->op = GGML_OP_GET_ROWS_BACK; |
3793 | 0 | result->src[0] = a; |
3794 | 0 | result->src[1] = b; |
3795 | |
|
3796 | 0 | return result; |
3797 | 0 | } |
3798 | | |
3799 | | // ggml_set_rows |
3800 | | |
3801 | | struct ggml_tensor * ggml_set_rows( |
3802 | | struct ggml_context * ctx, |
3803 | | struct ggml_tensor * a, |
3804 | | struct ggml_tensor * b, |
3805 | 0 | struct ggml_tensor * c) { |
3806 | 0 | GGML_ASSERT(a->ne[0] == b->ne[0]); |
3807 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
3808 | 0 | GGML_ASSERT(a->ne[3] == b->ne[3]); |
3809 | 0 | GGML_ASSERT(b->ne[1] == c->ne[0]); |
3810 | 0 | GGML_ASSERT(b->ne[2] % c->ne[1] == 0); |
3811 | 0 | GGML_ASSERT(b->ne[3] % c->ne[2] == 0); |
3812 | 0 | GGML_ASSERT(c->ne[3] == 1); |
3813 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
3814 | 0 | GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32); |
3815 | |
|
3816 | 0 | GGML_ASSERT(ggml_is_contiguous_rows(a)); |
3817 | 0 | GGML_ASSERT(ggml_is_contiguous_rows(b)); |
3818 | |
|
3819 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
3820 | |
|
3821 | 0 | result->op = GGML_OP_SET_ROWS; |
3822 | 0 | result->src[0] = b; |
3823 | 0 | result->src[1] = c; |
3824 | 0 | result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931) |
3825 | |
|
3826 | 0 | return result; |
3827 | 0 | } |
3828 | | |
3829 | | // ggml_diag |
3830 | | |
3831 | | struct ggml_tensor * ggml_diag( |
3832 | | struct ggml_context * ctx, |
3833 | 0 | struct ggml_tensor * a) { |
3834 | 0 | GGML_ASSERT(a->ne[1] == 1); |
3835 | |
|
3836 | 0 | const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] }; |
3837 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne); |
3838 | |
|
3839 | 0 | result->op = GGML_OP_DIAG; |
3840 | 0 | result->src[0] = a; |
3841 | |
|
3842 | 0 | return result; |
3843 | 0 | } |
3844 | | |
3845 | | // ggml_diag_mask_inf |
3846 | | |
3847 | | static struct ggml_tensor * ggml_diag_mask_inf_impl( |
3848 | | struct ggml_context * ctx, |
3849 | | struct ggml_tensor * a, |
3850 | | int n_past, |
3851 | 0 | bool inplace) { |
3852 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3853 | |
|
3854 | 0 | int32_t params[] = { n_past }; |
3855 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3856 | |
|
3857 | 0 | result->op = GGML_OP_DIAG_MASK_INF; |
3858 | 0 | result->src[0] = a; |
3859 | |
|
3860 | 0 | return result; |
3861 | 0 | } |
3862 | | |
3863 | | struct ggml_tensor * ggml_diag_mask_inf( |
3864 | | struct ggml_context * ctx, |
3865 | | struct ggml_tensor * a, |
3866 | 0 | int n_past) { |
3867 | 0 | return ggml_diag_mask_inf_impl(ctx, a, n_past, false); |
3868 | 0 | } |
3869 | | |
3870 | | struct ggml_tensor * ggml_diag_mask_inf_inplace( |
3871 | | struct ggml_context * ctx, |
3872 | | struct ggml_tensor * a, |
3873 | 0 | int n_past) { |
3874 | 0 | return ggml_diag_mask_inf_impl(ctx, a, n_past, true); |
3875 | 0 | } |
3876 | | |
3877 | | // ggml_diag_mask_zero |
3878 | | |
3879 | | static struct ggml_tensor * ggml_diag_mask_zero_impl( |
3880 | | struct ggml_context * ctx, |
3881 | | struct ggml_tensor * a, |
3882 | | int n_past, |
3883 | 0 | bool inplace) { |
3884 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3885 | |
|
3886 | 0 | int32_t params[] = { n_past }; |
3887 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3888 | |
|
3889 | 0 | result->op = GGML_OP_DIAG_MASK_ZERO; |
3890 | 0 | result->src[0] = a; |
3891 | |
|
3892 | 0 | return result; |
3893 | 0 | } |
3894 | | |
3895 | | struct ggml_tensor * ggml_diag_mask_zero( |
3896 | | struct ggml_context * ctx, |
3897 | | struct ggml_tensor * a, |
3898 | 0 | int n_past) { |
3899 | 0 | return ggml_diag_mask_zero_impl(ctx, a, n_past, false); |
3900 | 0 | } |
3901 | | |
3902 | | struct ggml_tensor * ggml_diag_mask_zero_inplace( |
3903 | | struct ggml_context * ctx, |
3904 | | struct ggml_tensor * a, |
3905 | 0 | int n_past) { |
3906 | 0 | return ggml_diag_mask_zero_impl(ctx, a, n_past, true); |
3907 | 0 | } |
3908 | | |
3909 | | // ggml_soft_max |
3910 | | |
3911 | | static struct ggml_tensor * ggml_soft_max_impl( |
3912 | | struct ggml_context * ctx, |
3913 | | struct ggml_tensor * a, |
3914 | | struct ggml_tensor * mask, |
3915 | | float scale, |
3916 | | float max_bias, |
3917 | 0 | bool inplace) { |
3918 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
3919 | |
|
3920 | 0 | if (mask) { |
3921 | 0 | GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32); |
3922 | 0 | GGML_ASSERT(ggml_is_contiguous(mask)); |
3923 | 0 | GGML_ASSERT(mask->ne[0] == a->ne[0]); |
3924 | 0 | GGML_ASSERT(mask->ne[1] >= a->ne[1]); |
3925 | 0 | GGML_ASSERT(a->ne[2]%mask->ne[2] == 0); |
3926 | 0 | GGML_ASSERT(a->ne[3]%mask->ne[3] == 0); |
3927 | 0 | } |
3928 | |
|
3929 | 0 | if (max_bias > 0.0f) { |
3930 | 0 | GGML_ASSERT(mask); |
3931 | 0 | } |
3932 | |
|
3933 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
3934 | |
|
3935 | 0 | float params[] = { scale, max_bias }; |
3936 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
3937 | |
|
3938 | 0 | result->op = GGML_OP_SOFT_MAX; |
3939 | 0 | result->src[0] = a; |
3940 | 0 | result->src[1] = mask; |
3941 | |
|
3942 | 0 | return result; |
3943 | 0 | } |
3944 | | |
3945 | | struct ggml_tensor * ggml_soft_max( |
3946 | | struct ggml_context * ctx, |
3947 | 0 | struct ggml_tensor * a) { |
3948 | 0 | return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false); |
3949 | 0 | } |
3950 | | |
3951 | | struct ggml_tensor * ggml_soft_max_inplace( |
3952 | | struct ggml_context * ctx, |
3953 | 0 | struct ggml_tensor * a) { |
3954 | 0 | return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true); |
3955 | 0 | } |
3956 | | |
3957 | | struct ggml_tensor * ggml_soft_max_ext( |
3958 | | struct ggml_context * ctx, |
3959 | | struct ggml_tensor * a, |
3960 | | struct ggml_tensor * mask, |
3961 | | float scale, |
3962 | 0 | float max_bias) { |
3963 | 0 | return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false); |
3964 | 0 | } |
3965 | | |
3966 | | struct ggml_tensor * ggml_soft_max_ext_inplace( |
3967 | | struct ggml_context * ctx, |
3968 | | struct ggml_tensor * a, |
3969 | | struct ggml_tensor * mask, |
3970 | | float scale, |
3971 | 0 | float max_bias) { |
3972 | 0 | return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true); |
3973 | 0 | } |
3974 | | |
3975 | | void ggml_soft_max_add_sinks( |
3976 | | struct ggml_tensor * a, |
3977 | 0 | struct ggml_tensor * sinks) { |
3978 | 0 | if (!sinks) { |
3979 | 0 | a->src[2] = NULL; |
3980 | 0 | return; |
3981 | 0 | } |
3982 | | |
3983 | 0 | GGML_ASSERT(a->op == GGML_OP_SOFT_MAX); |
3984 | 0 | GGML_ASSERT(a->src[2] == NULL); |
3985 | 0 | GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]); |
3986 | 0 | GGML_ASSERT(sinks->type == GGML_TYPE_F32); |
3987 | |
|
3988 | 0 | a->src[2] = sinks; |
3989 | 0 | } |
3990 | | |
3991 | | // ggml_soft_max_ext_back |
3992 | | |
3993 | | static struct ggml_tensor * ggml_soft_max_ext_back_impl( |
3994 | | struct ggml_context * ctx, |
3995 | | struct ggml_tensor * a, |
3996 | | struct ggml_tensor * b, |
3997 | | float scale, |
3998 | | float max_bias, |
3999 | 0 | bool inplace) { |
4000 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
4001 | |
|
4002 | 0 | result->op = GGML_OP_SOFT_MAX_BACK; |
4003 | 0 | result->src[0] = a; |
4004 | 0 | result->src[1] = b; |
4005 | |
|
4006 | 0 | memcpy((float *) result->op_params + 0, &scale, sizeof(float)); |
4007 | 0 | memcpy((float *) result->op_params + 1, &max_bias, sizeof(float)); |
4008 | |
|
4009 | 0 | return result; |
4010 | 0 | } |
4011 | | |
4012 | | struct ggml_tensor * ggml_soft_max_ext_back( |
4013 | | struct ggml_context * ctx, |
4014 | | struct ggml_tensor * a, |
4015 | | struct ggml_tensor * b, |
4016 | | float scale, |
4017 | 0 | float max_bias) { |
4018 | 0 | return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false); |
4019 | 0 | } |
4020 | | |
4021 | | struct ggml_tensor * ggml_soft_max_ext_back_inplace( |
4022 | | struct ggml_context * ctx, |
4023 | | struct ggml_tensor * a, |
4024 | | struct ggml_tensor * b, |
4025 | | float scale, |
4026 | 0 | float max_bias) { |
4027 | 0 | return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true); |
4028 | 0 | } |
4029 | | |
4030 | | // ggml_rope |
4031 | | |
4032 | | static struct ggml_tensor * ggml_rope_impl( |
4033 | | struct ggml_context * ctx, |
4034 | | struct ggml_tensor * a, |
4035 | | struct ggml_tensor * b, |
4036 | | struct ggml_tensor * c, |
4037 | | int n_dims, |
4038 | | int sections[GGML_MROPE_SECTIONS], |
4039 | | int mode, |
4040 | | int n_ctx_orig, |
4041 | | float freq_base, |
4042 | | float freq_scale, |
4043 | | float ext_factor, |
4044 | | float attn_factor, |
4045 | | float beta_fast, |
4046 | | float beta_slow, |
4047 | 0 | bool inplace) { |
4048 | 0 | GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported"); |
4049 | |
|
4050 | 0 | GGML_ASSERT(ggml_is_vector(b)); |
4051 | 0 | GGML_ASSERT(b->type == GGML_TYPE_I32); |
4052 | |
|
4053 | 0 | bool mrope_used = mode & GGML_ROPE_TYPE_MROPE; |
4054 | 0 | if (mrope_used) { |
4055 | 0 | GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token |
4056 | 0 | } else { |
4057 | 0 | GGML_ASSERT(a->ne[2] == b->ne[0]); |
4058 | 0 | } |
4059 | |
|
4060 | 0 | if (c) { |
4061 | 0 | GGML_ASSERT(c->type == GGML_TYPE_F32); |
4062 | 0 | GGML_ASSERT(c->ne[0] >= n_dims / 2); |
4063 | 0 | } |
4064 | |
|
4065 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
4066 | |
|
4067 | 0 | int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig }; |
4068 | 0 | memcpy(params + 5, &freq_base, sizeof(float)); |
4069 | 0 | memcpy(params + 6, &freq_scale, sizeof(float)); |
4070 | 0 | memcpy(params + 7, &ext_factor, sizeof(float)); |
4071 | 0 | memcpy(params + 8, &attn_factor, sizeof(float)); |
4072 | 0 | memcpy(params + 9, &beta_fast, sizeof(float)); |
4073 | 0 | memcpy(params + 10, &beta_slow, sizeof(float)); |
4074 | 0 | if (mrope_used && sections) { |
4075 | 0 | memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS); |
4076 | 0 | } else { |
4077 | 0 | memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS); |
4078 | 0 | } |
4079 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4080 | |
|
4081 | 0 | result->op = GGML_OP_ROPE; |
4082 | 0 | result->src[0] = a; |
4083 | 0 | result->src[1] = b; |
4084 | 0 | result->src[2] = c; |
4085 | |
|
4086 | 0 | return result; |
4087 | 0 | } |
4088 | | |
4089 | | struct ggml_tensor * ggml_rope( |
4090 | | struct ggml_context * ctx, |
4091 | | struct ggml_tensor * a, |
4092 | | struct ggml_tensor * b, |
4093 | | int n_dims, |
4094 | 0 | int mode) { |
4095 | 0 | return ggml_rope_impl( |
4096 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false |
4097 | 0 | ); |
4098 | 0 | } |
4099 | | |
4100 | | struct ggml_tensor * ggml_rope_multi( |
4101 | | struct ggml_context * ctx, |
4102 | | struct ggml_tensor * a, |
4103 | | struct ggml_tensor * b, |
4104 | | struct ggml_tensor * c, |
4105 | | int n_dims, |
4106 | | int sections[GGML_MROPE_SECTIONS], |
4107 | | int mode, |
4108 | | int n_ctx_orig, |
4109 | | float freq_base, |
4110 | | float freq_scale, |
4111 | | float ext_factor, |
4112 | | float attn_factor, |
4113 | | float beta_fast, |
4114 | 0 | float beta_slow) { |
4115 | 0 | return ggml_rope_impl( |
4116 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, |
4117 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4118 | 0 | ); |
4119 | 0 | } |
4120 | | |
4121 | | struct ggml_tensor * ggml_rope_multi_inplace( |
4122 | | struct ggml_context * ctx, |
4123 | | struct ggml_tensor * a, |
4124 | | struct ggml_tensor * b, |
4125 | | struct ggml_tensor * c, |
4126 | | int n_dims, |
4127 | | int sections[GGML_MROPE_SECTIONS], |
4128 | | int mode, |
4129 | | int n_ctx_orig, |
4130 | | float freq_base, |
4131 | | float freq_scale, |
4132 | | float ext_factor, |
4133 | | float attn_factor, |
4134 | | float beta_fast, |
4135 | 0 | float beta_slow) { |
4136 | 0 | return ggml_rope_impl( |
4137 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, |
4138 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4139 | 0 | ); |
4140 | 0 | } |
4141 | | |
4142 | | struct ggml_tensor * ggml_rope_inplace( |
4143 | | struct ggml_context * ctx, |
4144 | | struct ggml_tensor * a, |
4145 | | struct ggml_tensor * b, |
4146 | | int n_dims, |
4147 | 0 | int mode) { |
4148 | 0 | return ggml_rope_impl( |
4149 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true |
4150 | 0 | ); |
4151 | 0 | } |
4152 | | |
4153 | | struct ggml_tensor * ggml_rope_ext( |
4154 | | struct ggml_context * ctx, |
4155 | | struct ggml_tensor * a, |
4156 | | struct ggml_tensor * b, |
4157 | | struct ggml_tensor * c, |
4158 | | int n_dims, |
4159 | | int mode, |
4160 | | int n_ctx_orig, |
4161 | | float freq_base, |
4162 | | float freq_scale, |
4163 | | float ext_factor, |
4164 | | float attn_factor, |
4165 | | float beta_fast, |
4166 | 0 | float beta_slow) { |
4167 | 0 | return ggml_rope_impl( |
4168 | 0 | ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4169 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4170 | 0 | ); |
4171 | 0 | } |
4172 | | |
4173 | | struct ggml_tensor * ggml_rope_ext_inplace( |
4174 | | struct ggml_context * ctx, |
4175 | | struct ggml_tensor * a, |
4176 | | struct ggml_tensor * b, |
4177 | | struct ggml_tensor * c, |
4178 | | int n_dims, |
4179 | | int mode, |
4180 | | int n_ctx_orig, |
4181 | | float freq_base, |
4182 | | float freq_scale, |
4183 | | float ext_factor, |
4184 | | float attn_factor, |
4185 | | float beta_fast, |
4186 | 0 | float beta_slow) { |
4187 | 0 | return ggml_rope_impl( |
4188 | 0 | ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4189 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4190 | 0 | ); |
4191 | 0 | } |
4192 | | |
4193 | | struct ggml_tensor * ggml_rope_custom( |
4194 | | struct ggml_context * ctx, |
4195 | | struct ggml_tensor * a, |
4196 | | struct ggml_tensor * b, |
4197 | | int n_dims, |
4198 | | int mode, |
4199 | | int n_ctx_orig, |
4200 | | float freq_base, |
4201 | | float freq_scale, |
4202 | | float ext_factor, |
4203 | | float attn_factor, |
4204 | | float beta_fast, |
4205 | 0 | float beta_slow) { |
4206 | 0 | return ggml_rope_impl( |
4207 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4208 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, false |
4209 | 0 | ); |
4210 | 0 | } |
4211 | | |
4212 | | struct ggml_tensor * ggml_rope_custom_inplace( |
4213 | | struct ggml_context * ctx, |
4214 | | struct ggml_tensor * a, |
4215 | | struct ggml_tensor * b, |
4216 | | int n_dims, |
4217 | | int mode, |
4218 | | int n_ctx_orig, |
4219 | | float freq_base, |
4220 | | float freq_scale, |
4221 | | float ext_factor, |
4222 | | float attn_factor, |
4223 | | float beta_fast, |
4224 | 0 | float beta_slow) { |
4225 | 0 | return ggml_rope_impl( |
4226 | 0 | ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale, |
4227 | 0 | ext_factor, attn_factor, beta_fast, beta_slow, true |
4228 | 0 | ); |
4229 | 0 | } |
4230 | | |
4231 | | // Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get |
4232 | | // `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))` |
4233 | 0 | static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) { |
4234 | 0 | return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base)); |
4235 | 0 | } |
4236 | | |
4237 | | void ggml_rope_yarn_corr_dims( |
4238 | | int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2] |
4239 | 0 | ) { |
4240 | | // start and end correction dims |
4241 | 0 | float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base)); |
4242 | 0 | float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base)); |
4243 | 0 | dims[0] = MAX(0, start); |
4244 | 0 | dims[1] = MIN(n_dims - 1, end); |
4245 | 0 | } |
4246 | | |
4247 | | // ggml_rope_back |
4248 | | |
4249 | | struct ggml_tensor * ggml_rope_ext_back( |
4250 | | struct ggml_context * ctx, |
4251 | | struct ggml_tensor * a, |
4252 | | struct ggml_tensor * b, |
4253 | | struct ggml_tensor * c, |
4254 | | int n_dims, |
4255 | | int mode, |
4256 | | int n_ctx_orig, |
4257 | | float freq_base, |
4258 | | float freq_scale, |
4259 | | float ext_factor, |
4260 | | float attn_factor, |
4261 | | float beta_fast, |
4262 | 0 | float beta_slow) { |
4263 | 0 | struct ggml_tensor * result = ggml_rope_ext( |
4264 | 0 | ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
4265 | 0 | result->op = GGML_OP_ROPE_BACK; |
4266 | 0 | return result; |
4267 | 0 | } |
4268 | | |
4269 | | struct ggml_tensor * ggml_rope_multi_back( |
4270 | | struct ggml_context * ctx, |
4271 | | struct ggml_tensor * a, |
4272 | | struct ggml_tensor * b, |
4273 | | struct ggml_tensor * c, |
4274 | | int n_dims, |
4275 | | int sections[4], |
4276 | | int mode, |
4277 | | int n_ctx_orig, |
4278 | | float freq_base, |
4279 | | float freq_scale, |
4280 | | float ext_factor, |
4281 | | float attn_factor, |
4282 | | float beta_fast, |
4283 | 0 | float beta_slow) { |
4284 | 0 | struct ggml_tensor * result = ggml_rope_multi( |
4285 | 0 | ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
4286 | 0 | result->op = GGML_OP_ROPE_BACK; |
4287 | 0 | return result; |
4288 | 0 | } |
4289 | | // ggml_clamp |
4290 | | |
4291 | | struct ggml_tensor * ggml_clamp( |
4292 | | struct ggml_context * ctx, |
4293 | | struct ggml_tensor * a, |
4294 | | float min, |
4295 | 0 | float max) { |
4296 | | // TODO: when implement backward, fix this: |
4297 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
4298 | |
|
4299 | 0 | float params[] = { min, max }; |
4300 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4301 | |
|
4302 | 0 | result->op = GGML_OP_CLAMP; |
4303 | 0 | result->src[0] = a; |
4304 | |
|
4305 | 0 | return result; |
4306 | 0 | } |
4307 | | |
4308 | 0 | static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) { |
4309 | 0 | return (ins + 2 * p - d * (ks - 1) - 1) / s + 1; |
4310 | 0 | } |
4311 | | |
4312 | | // im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW] |
4313 | | // a: [OC,IC, KH, KW] |
4314 | | // b: [N, IC, IH, IW] |
4315 | | // result: [N, OH, OW, IC*KH*KW] |
4316 | | struct ggml_tensor * ggml_im2col( |
4317 | | struct ggml_context * ctx, |
4318 | | struct ggml_tensor * a, |
4319 | | struct ggml_tensor * b, |
4320 | | int s0, |
4321 | | int s1, |
4322 | | int p0, |
4323 | | int p1, |
4324 | | int d0, |
4325 | | int d1, |
4326 | | bool is_2D, |
4327 | 0 | enum ggml_type dst_type) { |
4328 | 0 | if (is_2D) { |
4329 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
4330 | 0 | } else { |
4331 | | //GGML_ASSERT(b->ne[1] % a->ne[1] == 0); |
4332 | 0 | GGML_ASSERT(b->ne[1] == a->ne[1]); |
4333 | 0 | GGML_ASSERT(b->ne[3] == 1); |
4334 | 0 | } |
4335 | |
|
4336 | 0 | const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0; |
4337 | 0 | const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4338 | |
|
4339 | 0 | GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a"); |
4340 | 0 | GGML_ASSERT((OW > 0) && "b too small compared to a"); |
4341 | |
|
4342 | 0 | const int64_t ne[4] = { |
4343 | 0 | is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0], |
4344 | 0 | OW, |
4345 | 0 | is_2D ? OH : b->ne[2], |
4346 | 0 | is_2D ? b->ne[3] : 1, |
4347 | 0 | }; |
4348 | |
|
4349 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); |
4350 | 0 | int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; |
4351 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4352 | |
|
4353 | 0 | result->op = GGML_OP_IM2COL; |
4354 | 0 | result->src[0] = a; |
4355 | 0 | result->src[1] = b; |
4356 | |
|
4357 | 0 | return result; |
4358 | 0 | } |
4359 | | |
4360 | | struct ggml_tensor * ggml_im2col_back( |
4361 | | struct ggml_context * ctx, |
4362 | | struct ggml_tensor * a, |
4363 | | struct ggml_tensor * b, |
4364 | | int64_t * ne, |
4365 | | int s0, |
4366 | | int s1, |
4367 | | int p0, |
4368 | | int p1, |
4369 | | int d0, |
4370 | | int d1, |
4371 | 0 | bool is_2D) { |
4372 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4373 | 0 | int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) }; |
4374 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4375 | |
|
4376 | 0 | result->op = GGML_OP_IM2COL_BACK; |
4377 | 0 | result->src[0] = a; |
4378 | 0 | result->src[1] = b; |
4379 | |
|
4380 | 0 | return result; |
4381 | 0 | } |
4382 | | |
4383 | | // ggml_conv_1d |
4384 | | |
4385 | | struct ggml_tensor * ggml_conv_1d( |
4386 | | struct ggml_context * ctx, |
4387 | | struct ggml_tensor * a, |
4388 | | struct ggml_tensor * b, |
4389 | | int s0, |
4390 | | int p0, |
4391 | 0 | int d0) { |
4392 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K] |
4393 | |
|
4394 | 0 | struct ggml_tensor * result = |
4395 | 0 | ggml_mul_mat(ctx, |
4396 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K] |
4397 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K] |
4398 | |
|
4399 | 0 | result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL] |
4400 | |
|
4401 | 0 | return result; |
4402 | 0 | } |
4403 | | |
4404 | | // ggml_conv_1d_ph |
4405 | | |
4406 | | struct ggml_tensor* ggml_conv_1d_ph( |
4407 | | struct ggml_context * ctx, |
4408 | | struct ggml_tensor * a, |
4409 | | struct ggml_tensor * b, |
4410 | | int s, |
4411 | 0 | int d) { |
4412 | 0 | return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d); |
4413 | 0 | } |
4414 | | |
4415 | | // ggml_conv_1d_dw |
4416 | | |
4417 | | struct ggml_tensor * ggml_conv_1d_dw( |
4418 | | struct ggml_context * ctx, |
4419 | | struct ggml_tensor * a, |
4420 | | struct ggml_tensor * b, |
4421 | | int s0, |
4422 | | int p0, |
4423 | 0 | int d0) { |
4424 | 0 | struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]); |
4425 | |
|
4426 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); |
4427 | |
|
4428 | 0 | struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a); |
4429 | |
|
4430 | 0 | result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1); |
4431 | |
|
4432 | 0 | return result; |
4433 | 0 | } |
4434 | | |
4435 | | // ggml_conv_1d_dw_ph |
4436 | | |
4437 | | struct ggml_tensor * ggml_conv_1d_dw_ph( |
4438 | | struct ggml_context * ctx, |
4439 | | struct ggml_tensor * a, |
4440 | | struct ggml_tensor * b, |
4441 | | int s0, |
4442 | 0 | int d0) { |
4443 | 0 | return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0); |
4444 | 0 | } |
4445 | | |
4446 | | // ggml_conv_transpose_1d |
4447 | | |
4448 | 0 | static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) { |
4449 | 0 | return (ins - 1) * s - 2 * p + d * (ks - 1) + 1; |
4450 | 0 | } |
4451 | | |
4452 | | GGML_API struct ggml_tensor * ggml_conv_transpose_1d( |
4453 | | struct ggml_context * ctx, |
4454 | | struct ggml_tensor * a, |
4455 | | struct ggml_tensor * b, |
4456 | | int s0, |
4457 | | int p0, |
4458 | 0 | int d0) { |
4459 | 0 | GGML_ASSERT(ggml_is_matrix(b)); |
4460 | 0 | GGML_ASSERT(a->ne[2] == b->ne[1]); |
4461 | 0 | GGML_ASSERT(a->ne[3] == 1); |
4462 | |
|
4463 | 0 | GGML_ASSERT(p0 == 0); |
4464 | 0 | GGML_ASSERT(d0 == 1); |
4465 | |
|
4466 | 0 | const int64_t ne[4] = { |
4467 | 0 | ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/), |
4468 | 0 | a->ne[1], b->ne[2], 1, |
4469 | 0 | }; |
4470 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4471 | |
|
4472 | 0 | int32_t params[] = { s0, p0, d0 }; |
4473 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4474 | |
|
4475 | 0 | result->op = GGML_OP_CONV_TRANSPOSE_1D; |
4476 | 0 | result->src[0] = a; |
4477 | 0 | result->src[1] = b; |
4478 | |
|
4479 | 0 | return result; |
4480 | 0 | } |
4481 | | |
4482 | | // ggml_conv_2d |
4483 | | |
4484 | | // a: [OC,IC, KH, KW] |
4485 | | // b: [N, IC, IH, IW] |
4486 | | // result: [N, OC, OH, OW] |
4487 | | struct ggml_tensor * ggml_conv_2d( |
4488 | | struct ggml_context * ctx, |
4489 | | struct ggml_tensor * a, |
4490 | | struct ggml_tensor * b, |
4491 | | int s0, |
4492 | | int s1, |
4493 | | int p0, |
4494 | | int p1, |
4495 | | int d0, |
4496 | 0 | int d1) { |
4497 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW] |
4498 | |
|
4499 | 0 | struct ggml_tensor * result = |
4500 | 0 | ggml_mul_mat(ctx, |
4501 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW] |
4502 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW] |
4503 | |
|
4504 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW] |
4505 | 0 | result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW] |
4506 | | |
4507 | |
|
4508 | 0 | return result; |
4509 | 0 | } |
4510 | | |
4511 | | // a: [OC*IC, KD, KH, KW] |
4512 | | // b: [N*IC, ID, IH, IW] |
4513 | | // result: [N*OD, OH, OW, IC * KD * KH * KW] |
4514 | | struct ggml_tensor * ggml_im2col_3d( |
4515 | | struct ggml_context * ctx, |
4516 | | struct ggml_tensor * a, |
4517 | | struct ggml_tensor * b, |
4518 | | int64_t IC, |
4519 | | int s0, // stride width |
4520 | | int s1, // stride height |
4521 | | int s2, // stride depth |
4522 | | int p0, // padding width |
4523 | | int p1, // padding height |
4524 | | int p2, // padding depth |
4525 | | int d0, // dilation width |
4526 | | int d1, // dilation height |
4527 | | int d2, // dilation depth |
4528 | 0 | enum ggml_type dst_type) { |
4529 | 0 | const int64_t N = b->ne[3] / IC; |
4530 | 0 | const int64_t ID = b->ne[2]; |
4531 | 0 | const int64_t IH = b->ne[1]; |
4532 | 0 | const int64_t IW = b->ne[0]; |
4533 | |
|
4534 | 0 | const int64_t OC = a->ne[3] / IC; |
4535 | 0 | UNUSED(OC); |
4536 | 0 | const int64_t KD = a->ne[2]; |
4537 | 0 | const int64_t KH = a->ne[1]; |
4538 | 0 | const int64_t KW = a->ne[0]; |
4539 | 0 | const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2); |
4540 | 0 | const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1); |
4541 | 0 | const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0); |
4542 | |
|
4543 | 0 | GGML_ASSERT((OD > 0) && "b too small compared to a"); |
4544 | 0 | GGML_ASSERT((OH > 0) && "b too small compared to a"); |
4545 | 0 | GGML_ASSERT((OW > 0) && "b too small compared to a"); |
4546 | | |
4547 | |
|
4548 | 0 | const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N}; |
4549 | |
|
4550 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne); |
4551 | 0 | int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC}; |
4552 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4553 | |
|
4554 | 0 | result->op = GGML_OP_IM2COL_3D; |
4555 | 0 | result->src[0] = a; |
4556 | 0 | result->src[1] = b; |
4557 | |
|
4558 | 0 | return result; |
4559 | 0 | } |
4560 | | |
4561 | | // a: [OC*IC, KD, KH, KW] |
4562 | | // b: [N*IC, ID, IH, IW] |
4563 | | // result: [N*OC, OD, OH, OW] |
4564 | | struct ggml_tensor * ggml_conv_3d( |
4565 | | struct ggml_context * ctx, |
4566 | | struct ggml_tensor * a, |
4567 | | struct ggml_tensor * b, |
4568 | | int64_t IC, |
4569 | | int s0, // stride width |
4570 | | int s1, // stride height |
4571 | | int s2, // stride depth |
4572 | | int p0, // padding width |
4573 | | int p1, // padding height |
4574 | | int p2, // padding depth |
4575 | | int d0, // dilation width |
4576 | | int d1, // dilation height |
4577 | | int d2 // dilation depth |
4578 | 0 | ) { |
4579 | 0 | struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW] |
4580 | |
|
4581 | 0 | int64_t OC = a->ne[3] / IC; |
4582 | 0 | int64_t N = b->ne[3] / IC; |
4583 | 0 | struct ggml_tensor * result = |
4584 | 0 | ggml_mul_mat(ctx, |
4585 | 0 | ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW] |
4586 | 0 | ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC)); // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW] |
4587 | |
|
4588 | 0 | int64_t OD = im2col->ne[3] / N; |
4589 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW] |
4590 | 0 | result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW] |
4591 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW] |
4592 | |
|
4593 | 0 | return result; |
4594 | 0 | } |
4595 | | |
4596 | | // ggml_conv_2d_sk_p0 |
4597 | | |
4598 | | struct ggml_tensor * ggml_conv_2d_sk_p0( |
4599 | | struct ggml_context * ctx, |
4600 | | struct ggml_tensor * a, |
4601 | 0 | struct ggml_tensor * b) { |
4602 | 0 | return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1); |
4603 | 0 | } |
4604 | | |
4605 | | // ggml_conv_2d_s1_ph |
4606 | | |
4607 | | struct ggml_tensor * ggml_conv_2d_s1_ph( |
4608 | | struct ggml_context * ctx, |
4609 | | struct ggml_tensor * a, |
4610 | 0 | struct ggml_tensor * b) { |
4611 | 0 | return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1); |
4612 | 0 | } |
4613 | | |
4614 | | // ggml_conv_2d_dw |
4615 | | |
4616 | | struct ggml_tensor * ggml_conv_2d_dw( |
4617 | | struct ggml_context * ctx, |
4618 | | struct ggml_tensor * a, |
4619 | | struct ggml_tensor * b, |
4620 | | int s0, |
4621 | | int s1, |
4622 | | int p0, |
4623 | | int p1, |
4624 | | int d0, |
4625 | 0 | int d1) { |
4626 | 0 | struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]); |
4627 | 0 | struct ggml_tensor * im2col = ggml_im2col(ctx, new_a, |
4628 | 0 | ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]), |
4629 | 0 | s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW] |
4630 | 0 | struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW] |
4631 | |
|
4632 | 0 | new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW] |
4633 | 0 | struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b); |
4634 | 0 | result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW] |
4635 | |
|
4636 | 0 | return result; |
4637 | 0 | } |
4638 | | |
4639 | | // ggml_conv_2d_dw_direct |
4640 | | |
4641 | | struct ggml_tensor * ggml_conv_2d_dw_direct( |
4642 | | struct ggml_context * ctx, |
4643 | | struct ggml_tensor * a, |
4644 | | struct ggml_tensor * b, |
4645 | | int stride0, |
4646 | | int stride1, |
4647 | | int pad0, |
4648 | | int pad1, |
4649 | | int dilation0, |
4650 | 0 | int dilation1) { |
4651 | 0 | GGML_ASSERT(a->ne[2] == 1); |
4652 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
4653 | 0 | int64_t ne[4]; |
4654 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0); |
4655 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1); |
4656 | 0 | ne[2] = b->ne[2]; |
4657 | 0 | ne[3] = b->ne[3]; |
4658 | |
|
4659 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); |
4660 | |
|
4661 | 0 | if (ggml_is_contiguous_channels(b)) { |
4662 | | // Result will be permuted the same way as input (CWHN order) |
4663 | 0 | const int64_t type_size = ggml_type_size(result->type); |
4664 | 0 | GGML_ASSERT(ggml_blck_size(result->type) == 1); |
4665 | 0 | result->nb[0] = result->ne[2] * type_size; |
4666 | 0 | result->nb[1] = result->ne[0] * result->nb[0]; |
4667 | 0 | result->nb[2] = type_size; |
4668 | 0 | } |
4669 | |
|
4670 | 0 | int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 }; |
4671 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4672 | |
|
4673 | 0 | result->op = GGML_OP_CONV_2D_DW; |
4674 | 0 | result->src[0] = a; |
4675 | 0 | result->src[1] = b; |
4676 | 0 | return result; |
4677 | 0 | } |
4678 | | |
4679 | | // ggml_conv_2d_direct |
4680 | | |
4681 | | struct ggml_tensor * ggml_conv_2d_direct( |
4682 | | struct ggml_context * ctx, |
4683 | | struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC] |
4684 | | struct ggml_tensor * b, // input data [W, H, C, N] |
4685 | | int s0, // stride dimension 0 |
4686 | | int s1, // stride dimension 1 |
4687 | | int p0, // padding dimension 0 |
4688 | | int p1, // padding dimension 1 |
4689 | | int d0, // dilation dimension 0 |
4690 | 0 | int d1) {// dilation dimension 1 |
4691 | |
|
4692 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
4693 | | //GGML_ASSERT(a->type == b->type); |
4694 | |
|
4695 | 0 | int64_t ne[4]; |
4696 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4697 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); |
4698 | 0 | ne[2] = a->ne[3]; |
4699 | 0 | ne[3] = b->ne[3]; |
4700 | |
|
4701 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); |
4702 | |
|
4703 | 0 | ggml_set_op_params_i32(result, 0, s0); |
4704 | 0 | ggml_set_op_params_i32(result, 1, s1); |
4705 | 0 | ggml_set_op_params_i32(result, 2, p0); |
4706 | 0 | ggml_set_op_params_i32(result, 3, p1); |
4707 | 0 | ggml_set_op_params_i32(result, 4, d0); |
4708 | 0 | ggml_set_op_params_i32(result, 5, d1); |
4709 | |
|
4710 | 0 | result->op = GGML_OP_CONV_2D; |
4711 | 0 | result->src[0] = a; |
4712 | 0 | result->src[1] = b; |
4713 | |
|
4714 | 0 | return result; |
4715 | 0 | } |
4716 | | |
4717 | | // ggml_conv_3d_direct |
4718 | | |
4719 | | struct ggml_tensor * ggml_conv_3d_direct( |
4720 | | struct ggml_context * ctx, |
4721 | | struct ggml_tensor * a, |
4722 | | struct ggml_tensor * b, |
4723 | | int s0, |
4724 | | int s1, |
4725 | | int s2, |
4726 | | int p0, |
4727 | | int p1, |
4728 | | int p2, |
4729 | | int d0, |
4730 | | int d1, |
4731 | | int d2, |
4732 | | int c, |
4733 | | int n, |
4734 | 0 | int oc) { |
4735 | |
|
4736 | 0 | GGML_ASSERT(a->ne[3] == (int64_t) c * oc); |
4737 | 0 | GGML_ASSERT(b->ne[3] == (int64_t) c * n); |
4738 | |
|
4739 | 0 | int64_t ne[4]; |
4740 | 0 | ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); |
4741 | 0 | ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); |
4742 | 0 | ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2); |
4743 | 0 | ne[3] = (int64_t) oc * n; |
4744 | |
|
4745 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4746 | |
|
4747 | 0 | ggml_set_op_params_i32(result, 0, s0); |
4748 | 0 | ggml_set_op_params_i32(result, 1, s1); |
4749 | 0 | ggml_set_op_params_i32(result, 2, s2); |
4750 | 0 | ggml_set_op_params_i32(result, 3, p0); |
4751 | 0 | ggml_set_op_params_i32(result, 4, p1); |
4752 | 0 | ggml_set_op_params_i32(result, 5, p2); |
4753 | 0 | ggml_set_op_params_i32(result, 6, d0); |
4754 | 0 | ggml_set_op_params_i32(result, 7, d1); |
4755 | 0 | ggml_set_op_params_i32(result, 8, d2); |
4756 | 0 | ggml_set_op_params_i32(result, 9, c); |
4757 | 0 | ggml_set_op_params_i32(result, 10, n); |
4758 | 0 | ggml_set_op_params_i32(result, 11, oc); |
4759 | |
|
4760 | 0 | result->op = GGML_OP_CONV_3D; |
4761 | 0 | result->src[0] = a; |
4762 | 0 | result->src[1] = b; |
4763 | |
|
4764 | 0 | return result; |
4765 | 0 | } |
4766 | | |
4767 | | // ggml_conv_transpose_2d_p0 |
4768 | | |
4769 | 0 | static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) { |
4770 | 0 | return (ins - 1) * s - 2 * p + ks; |
4771 | 0 | } |
4772 | | |
4773 | | struct ggml_tensor * ggml_conv_transpose_2d_p0( |
4774 | | struct ggml_context * ctx, |
4775 | | struct ggml_tensor * a, |
4776 | | struct ggml_tensor * b, |
4777 | 0 | int stride) { |
4778 | 0 | GGML_ASSERT(a->ne[3] == b->ne[2]); |
4779 | |
|
4780 | 0 | const int64_t ne[4] = { |
4781 | 0 | ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/), |
4782 | 0 | ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/), |
4783 | 0 | a->ne[2], b->ne[3], |
4784 | 0 | }; |
4785 | |
|
4786 | 0 | struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4787 | |
|
4788 | 0 | ggml_set_op_params_i32(result, 0, stride); |
4789 | |
|
4790 | 0 | result->op = GGML_OP_CONV_TRANSPOSE_2D; |
4791 | 0 | result->src[0] = a; |
4792 | 0 | result->src[1] = b; |
4793 | |
|
4794 | 0 | return result; |
4795 | 0 | } |
4796 | | |
4797 | | // ggml_pool_* |
4798 | | |
4799 | 0 | static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) { |
4800 | 0 | return (ins + 2 * p - ks) / s + 1; |
4801 | 0 | } |
4802 | | |
4803 | | // ggml_pool_1d |
4804 | | |
4805 | | struct ggml_tensor * ggml_pool_1d( |
4806 | | struct ggml_context * ctx, |
4807 | | struct ggml_tensor * a, |
4808 | | enum ggml_op_pool op, |
4809 | | int k0, |
4810 | | int s0, |
4811 | 0 | int p0) { |
4812 | 0 | const int64_t ne[4] = { |
4813 | 0 | ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), |
4814 | 0 | a->ne[1], |
4815 | 0 | a->ne[2], |
4816 | 0 | a->ne[3], |
4817 | 0 | }; |
4818 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4819 | |
|
4820 | 0 | int32_t params[] = { op, k0, s0, p0 }; |
4821 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4822 | |
|
4823 | 0 | result->op = GGML_OP_POOL_1D; |
4824 | 0 | result->src[0] = a; |
4825 | |
|
4826 | 0 | return result; |
4827 | 0 | } |
4828 | | |
4829 | | // ggml_pool_2d |
4830 | | |
4831 | | struct ggml_tensor * ggml_pool_2d( |
4832 | | struct ggml_context * ctx, |
4833 | | struct ggml_tensor * a, |
4834 | | enum ggml_op_pool op, |
4835 | | int k0, |
4836 | | int k1, |
4837 | | int s0, |
4838 | | int s1, |
4839 | | float p0, |
4840 | 0 | float p1) { |
4841 | 0 | struct ggml_tensor * result; |
4842 | 0 | const int64_t ne[4] = { |
4843 | 0 | ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), |
4844 | 0 | ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), |
4845 | 0 | a->ne[2], |
4846 | 0 | a->ne[3], |
4847 | 0 | }; |
4848 | 0 | result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
4849 | |
|
4850 | 0 | int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; |
4851 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4852 | |
|
4853 | 0 | result->op = GGML_OP_POOL_2D; |
4854 | 0 | result->src[0] = a; |
4855 | |
|
4856 | 0 | return result; |
4857 | 0 | } |
4858 | | |
4859 | | struct ggml_tensor * ggml_pool_2d_back( |
4860 | | struct ggml_context * ctx, |
4861 | | struct ggml_tensor * a, |
4862 | | struct ggml_tensor * af, |
4863 | | enum ggml_op_pool op, |
4864 | | int k0, |
4865 | | int k1, |
4866 | | int s0, |
4867 | | int s1, |
4868 | | float p0, |
4869 | 0 | float p1) { |
4870 | 0 | struct ggml_tensor * result; |
4871 | 0 | result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne); |
4872 | |
|
4873 | 0 | int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; |
4874 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
4875 | |
|
4876 | 0 | result->op = GGML_OP_POOL_2D_BACK; |
4877 | 0 | result->src[0] = a; |
4878 | 0 | result->src[1] = af; |
4879 | |
|
4880 | 0 | return result; |
4881 | 0 | } |
4882 | | |
4883 | | // ggml_upscale / ggml_interpolate |
4884 | | |
4885 | | static struct ggml_tensor * ggml_interpolate_impl( |
4886 | | struct ggml_context * ctx, |
4887 | | struct ggml_tensor * a, |
4888 | | int64_t ne0, |
4889 | | int64_t ne1, |
4890 | | int64_t ne2, |
4891 | | int64_t ne3, |
4892 | 0 | uint32_t mode) { |
4893 | 0 | GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); |
4894 | |
|
4895 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); |
4896 | |
|
4897 | 0 | ggml_set_op_params_i32(result, 0, (int32_t)mode); |
4898 | |
|
4899 | 0 | result->op = GGML_OP_UPSCALE; |
4900 | 0 | result->src[0] = a; |
4901 | |
|
4902 | 0 | return result; |
4903 | 0 | } |
4904 | | |
4905 | | struct ggml_tensor * ggml_upscale( |
4906 | | struct ggml_context * ctx, |
4907 | | struct ggml_tensor * a, |
4908 | | int scale_factor, |
4909 | 0 | enum ggml_scale_mode mode) { |
4910 | 0 | GGML_ASSERT(scale_factor > 1); |
4911 | 0 | return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); |
4912 | 0 | } |
4913 | | |
4914 | | struct ggml_tensor * ggml_upscale_ext( |
4915 | | struct ggml_context * ctx, |
4916 | | struct ggml_tensor * a, |
4917 | | int ne0, |
4918 | | int ne1, |
4919 | | int ne2, |
4920 | | int ne3, |
4921 | 0 | enum ggml_scale_mode mode) { |
4922 | 0 | return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); |
4923 | 0 | } |
4924 | | |
4925 | | struct ggml_tensor * ggml_interpolate( |
4926 | | struct ggml_context * ctx, |
4927 | | struct ggml_tensor * a, |
4928 | | int64_t ne0, |
4929 | | int64_t ne1, |
4930 | | int64_t ne2, |
4931 | | int64_t ne3, |
4932 | 0 | uint32_t mode) { |
4933 | 0 | return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); |
4934 | 0 | } |
4935 | | |
4936 | | // ggml_pad |
4937 | | |
4938 | | struct ggml_tensor * ggml_pad( |
4939 | | struct ggml_context * ctx, |
4940 | | struct ggml_tensor * a, |
4941 | | int p0, |
4942 | | int p1, |
4943 | | int p2, |
4944 | 0 | int p3) { |
4945 | 0 | return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3); |
4946 | 0 | } |
4947 | | |
4948 | | struct ggml_tensor * ggml_pad_ext( |
4949 | | struct ggml_context * ctx, |
4950 | | struct ggml_tensor * a, |
4951 | | int lp0, |
4952 | | int rp0, |
4953 | | int lp1, |
4954 | | int rp1, |
4955 | | int lp2, |
4956 | | int rp2, |
4957 | | int lp3, |
4958 | | int rp3 |
4959 | 0 | ) { |
4960 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, |
4961 | 0 | a->ne[0] + lp0 + rp0, |
4962 | 0 | a->ne[1] + lp1 + rp1, |
4963 | 0 | a->ne[2] + lp2 + rp2, |
4964 | 0 | a->ne[3] + lp3 + rp3); |
4965 | |
|
4966 | 0 | ggml_set_op_params_i32(result, 0, lp0); |
4967 | 0 | ggml_set_op_params_i32(result, 1, rp0); |
4968 | 0 | ggml_set_op_params_i32(result, 2, lp1); |
4969 | 0 | ggml_set_op_params_i32(result, 3, rp1); |
4970 | 0 | ggml_set_op_params_i32(result, 4, lp2); |
4971 | 0 | ggml_set_op_params_i32(result, 5, rp2); |
4972 | 0 | ggml_set_op_params_i32(result, 6, lp3); |
4973 | 0 | ggml_set_op_params_i32(result, 7, rp3); |
4974 | | |
4975 | |
|
4976 | 0 | result->op = GGML_OP_PAD; |
4977 | 0 | result->src[0] = a; |
4978 | |
|
4979 | 0 | return result; |
4980 | 0 | } |
4981 | | |
4982 | | // ggml_pad_reflect_1d |
4983 | | |
4984 | | struct ggml_tensor * ggml_pad_reflect_1d( |
4985 | | struct ggml_context * ctx, |
4986 | | struct ggml_tensor * a, |
4987 | | int p0, |
4988 | 0 | int p1) { |
4989 | 0 | GGML_ASSERT(p0 >= 0); |
4990 | 0 | GGML_ASSERT(p1 >= 0); |
4991 | |
|
4992 | 0 | GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the |
4993 | 0 | GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded |
4994 | |
|
4995 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
4996 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
4997 | |
|
4998 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, |
4999 | 0 | a->ne[0] + p0 + p1, |
5000 | 0 | a->ne[1], |
5001 | 0 | a->ne[2], |
5002 | 0 | a->ne[3]); |
5003 | |
|
5004 | 0 | int32_t params[] = { p0, p1 }; |
5005 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5006 | |
|
5007 | 0 | result->op = GGML_OP_PAD_REFLECT_1D; |
5008 | 0 | result->src[0] = a; |
5009 | |
|
5010 | 0 | return result; |
5011 | 0 | } |
5012 | | |
5013 | | // ggml_roll |
5014 | | |
5015 | | struct ggml_tensor * ggml_roll( |
5016 | | struct ggml_context * ctx, |
5017 | | struct ggml_tensor * a, |
5018 | | int shift0, |
5019 | | int shift1, |
5020 | | int shift2, |
5021 | 0 | int shift3) { |
5022 | 0 | GGML_ASSERT(a->nb[0] == ggml_type_size(a->type)); |
5023 | 0 | GGML_ASSERT(abs(shift0) < a->ne[0]); |
5024 | 0 | GGML_ASSERT(abs(shift1) < a->ne[1]); |
5025 | 0 | GGML_ASSERT(abs(shift2) < a->ne[2]); |
5026 | 0 | GGML_ASSERT(abs(shift3) < a->ne[3]); |
5027 | |
|
5028 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
5029 | |
|
5030 | 0 | ggml_set_op_params_i32(result, 0, shift0); |
5031 | 0 | ggml_set_op_params_i32(result, 1, shift1); |
5032 | 0 | ggml_set_op_params_i32(result, 2, shift2); |
5033 | 0 | ggml_set_op_params_i32(result, 3, shift3); |
5034 | |
|
5035 | 0 | result->op = GGML_OP_ROLL; |
5036 | 0 | result->src[0] = a; |
5037 | |
|
5038 | 0 | return result; |
5039 | 0 | } |
5040 | | |
5041 | | // ggml_arange |
5042 | | |
5043 | | struct ggml_tensor * ggml_arange( |
5044 | | struct ggml_context * ctx, |
5045 | | float start, |
5046 | | float stop, |
5047 | 0 | float step) { |
5048 | 0 | GGML_ASSERT(stop > start); |
5049 | |
|
5050 | 0 | const int64_t steps = (int64_t) ceilf((stop - start) / step); |
5051 | |
|
5052 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps); |
5053 | |
|
5054 | 0 | ggml_set_op_params_f32(result, 0, start); |
5055 | 0 | ggml_set_op_params_f32(result, 1, stop); |
5056 | 0 | ggml_set_op_params_f32(result, 2, step); |
5057 | |
|
5058 | 0 | result->op = GGML_OP_ARANGE; |
5059 | |
|
5060 | 0 | return result; |
5061 | 0 | } |
5062 | | |
5063 | | // ggml_timestep_embedding |
5064 | | |
5065 | | struct ggml_tensor * ggml_timestep_embedding( |
5066 | | struct ggml_context * ctx, |
5067 | | struct ggml_tensor * timesteps, |
5068 | | int dim, |
5069 | 0 | int max_period) { |
5070 | |
|
5071 | 0 | struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]); |
5072 | |
|
5073 | 0 | ggml_set_op_params_i32(result, 0, dim); |
5074 | 0 | ggml_set_op_params_i32(result, 1, max_period); |
5075 | |
|
5076 | 0 | result->op = GGML_OP_TIMESTEP_EMBEDDING; |
5077 | 0 | result->src[0] = timesteps; |
5078 | |
|
5079 | 0 | return result; |
5080 | 0 | } |
5081 | | |
5082 | | // ggml_tri |
5083 | | |
5084 | | struct ggml_tensor * ggml_tri( |
5085 | | struct ggml_context * ctx, |
5086 | | struct ggml_tensor * a, |
5087 | 0 | enum ggml_tri_type type) { |
5088 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5089 | |
|
5090 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5091 | 0 | GGML_ASSERT(a->ne[0] == a->ne[1]); |
5092 | |
|
5093 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, a); |
5094 | |
|
5095 | 0 | ggml_set_op_params_i32(result, 0, type); |
5096 | |
|
5097 | 0 | result->op = GGML_OP_TRI; |
5098 | 0 | result->src[0] = a; |
5099 | |
|
5100 | 0 | return result; |
5101 | 0 | } |
5102 | | |
5103 | | // ggml_fill |
5104 | | |
5105 | | static struct ggml_tensor * ggml_fill_impl( |
5106 | | struct ggml_context * ctx, |
5107 | | struct ggml_tensor * a, |
5108 | | float c, |
5109 | 0 | bool inplace) { |
5110 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5111 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5112 | |
|
5113 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5114 | |
|
5115 | 0 | ggml_set_op_params_f32(result, 0, c); |
5116 | |
|
5117 | 0 | result->op = GGML_OP_FILL; |
5118 | 0 | result->src[0] = a; |
5119 | |
|
5120 | 0 | return result; |
5121 | 0 | } |
5122 | | |
5123 | | struct ggml_tensor * ggml_fill( |
5124 | | struct ggml_context * ctx, |
5125 | | struct ggml_tensor * a, |
5126 | 0 | float c) { |
5127 | 0 | return ggml_fill_impl(ctx, a, c, false); |
5128 | 0 | } |
5129 | | |
5130 | | struct ggml_tensor * ggml_fill_inplace( |
5131 | | struct ggml_context * ctx, |
5132 | | struct ggml_tensor * a, |
5133 | 0 | float c) { |
5134 | 0 | return ggml_fill_impl(ctx, a, c, true); |
5135 | 0 | } |
5136 | | |
5137 | | // ggml_argsort |
5138 | | |
5139 | | struct ggml_tensor * ggml_argsort( |
5140 | | struct ggml_context * ctx, |
5141 | | struct ggml_tensor * a, |
5142 | 0 | enum ggml_sort_order order) { |
5143 | 0 | GGML_ASSERT(a->ne[0] <= INT32_MAX); |
5144 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne); |
5145 | |
|
5146 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) order); |
5147 | |
|
5148 | 0 | result->op = GGML_OP_ARGSORT; |
5149 | 0 | result->src[0] = a; |
5150 | |
|
5151 | 0 | return result; |
5152 | 0 | } |
5153 | | |
5154 | | // ggml_top_k |
5155 | | |
5156 | | struct ggml_tensor * ggml_top_k( |
5157 | | struct ggml_context * ctx, |
5158 | | struct ggml_tensor * a, |
5159 | 0 | int k) { |
5160 | 0 | GGML_ASSERT(a->ne[0] >= k); |
5161 | |
|
5162 | 0 | struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC); |
5163 | |
|
5164 | 0 | result = ggml_view_4d(ctx, result, |
5165 | 0 | k, result->ne[1], result->ne[2], result->ne[3], |
5166 | 0 | result->nb[1], result->nb[2], result->nb[3], |
5167 | 0 | 0); |
5168 | |
|
5169 | 0 | return result; |
5170 | 0 | } |
5171 | | |
5172 | | // ggml_flash_attn_ext |
5173 | | |
5174 | | struct ggml_tensor * ggml_flash_attn_ext( |
5175 | | struct ggml_context * ctx, |
5176 | | struct ggml_tensor * q, |
5177 | | struct ggml_tensor * k, |
5178 | | struct ggml_tensor * v, |
5179 | | struct ggml_tensor * mask, |
5180 | | float scale, |
5181 | | float max_bias, |
5182 | 0 | float logit_softcap) { |
5183 | 0 | GGML_ASSERT(ggml_can_mul_mat(k, q)); |
5184 | | // TODO: check if vT can be multiplied by (k*qT) |
5185 | |
|
5186 | 0 | GGML_ASSERT(q->ne[3] == k->ne[3]); |
5187 | 0 | GGML_ASSERT(q->ne[3] == v->ne[3]); |
5188 | |
|
5189 | 0 | if (mask) { |
5190 | 0 | GGML_ASSERT(ggml_is_contiguous(mask)); |
5191 | 0 | GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) && |
5192 | 0 | "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big"); |
5193 | | //GGML_ASSERT(ggml_can_repeat_rows(mask, qk)); |
5194 | |
|
5195 | 0 | GGML_ASSERT(q->ne[2] % mask->ne[2] == 0); |
5196 | 0 | GGML_ASSERT(q->ne[3] % mask->ne[3] == 0); |
5197 | 0 | } |
5198 | |
|
5199 | 0 | if (max_bias > 0.0f) { |
5200 | 0 | GGML_ASSERT(mask); |
5201 | 0 | } |
5202 | | |
5203 | | // permute(0, 2, 1, 3) |
5204 | 0 | int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] }; |
5205 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5206 | |
|
5207 | 0 | float params[] = { scale, max_bias, logit_softcap }; |
5208 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5209 | |
|
5210 | 0 | result->op = GGML_OP_FLASH_ATTN_EXT; |
5211 | 0 | result->src[0] = q; |
5212 | 0 | result->src[1] = k; |
5213 | 0 | result->src[2] = v; |
5214 | 0 | result->src[3] = mask; |
5215 | |
|
5216 | 0 | return result; |
5217 | 0 | } |
5218 | | |
5219 | | void ggml_flash_attn_ext_set_prec( |
5220 | | struct ggml_tensor * a, |
5221 | 0 | enum ggml_prec prec) { |
5222 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5223 | |
|
5224 | 0 | const int32_t prec_i32 = (int32_t) prec; |
5225 | |
|
5226 | 0 | ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second |
5227 | 0 | } |
5228 | | |
5229 | | enum ggml_prec ggml_flash_attn_ext_get_prec( |
5230 | 0 | const struct ggml_tensor * a) { |
5231 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5232 | |
|
5233 | 0 | const int32_t prec_i32 = ggml_get_op_params_i32(a, 3); |
5234 | |
|
5235 | 0 | return (enum ggml_prec) prec_i32; |
5236 | 0 | } |
5237 | | |
5238 | | void ggml_flash_attn_ext_add_sinks( |
5239 | | struct ggml_tensor * a, |
5240 | 0 | struct ggml_tensor * sinks) { |
5241 | 0 | if (!sinks) { |
5242 | 0 | a->src[4] = NULL; |
5243 | 0 | return; |
5244 | 0 | } |
5245 | | |
5246 | 0 | GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT); |
5247 | 0 | GGML_ASSERT(a->src[4] == NULL); |
5248 | 0 | GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]); |
5249 | 0 | GGML_ASSERT(sinks->type == GGML_TYPE_F32); |
5250 | |
|
5251 | 0 | a->src[4] = sinks; |
5252 | 0 | } |
5253 | | |
5254 | | // ggml_flash_attn_back |
5255 | | |
5256 | | struct ggml_tensor * ggml_flash_attn_back( |
5257 | | struct ggml_context * ctx, |
5258 | | struct ggml_tensor * q, |
5259 | | struct ggml_tensor * k, |
5260 | | struct ggml_tensor * v, |
5261 | | struct ggml_tensor * d, |
5262 | 0 | bool masked) { |
5263 | 0 | GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes"); |
5264 | |
|
5265 | 0 | GGML_ASSERT(ggml_can_mul_mat(k, q)); |
5266 | | // TODO: check if vT can be multiplied by (k*qT) |
5267 | | |
5268 | | // d shape [D,N,ne2,ne3] |
5269 | | // q shape [D,N,ne2,ne3] |
5270 | | // k shape [D,M,kvne2,ne3] |
5271 | | // v shape [M,D,kvne2,ne3] |
5272 | |
|
5273 | 0 | const int64_t D = q->ne[0]; |
5274 | 0 | const int64_t N = q->ne[1]; |
5275 | 0 | const int64_t M = k->ne[1]; |
5276 | 0 | const int64_t ne2 = q->ne[2]; |
5277 | 0 | const int64_t ne3 = q->ne[3]; |
5278 | 0 | const int64_t kvne2 = k->ne[2]; |
5279 | |
|
5280 | 0 | GGML_ASSERT(k->ne[0] == D); |
5281 | 0 | GGML_ASSERT(v->ne[0] == M); |
5282 | 0 | GGML_ASSERT(v->ne[1] == D); |
5283 | 0 | GGML_ASSERT(d->ne[0] == D); |
5284 | 0 | GGML_ASSERT(d->ne[1] == N); |
5285 | 0 | GGML_ASSERT(k->ne[2] == kvne2); |
5286 | 0 | GGML_ASSERT(k->ne[3] == ne3); |
5287 | 0 | GGML_ASSERT(v->ne[2] == kvne2); |
5288 | 0 | GGML_ASSERT(v->ne[3] == ne3); |
5289 | 0 | GGML_ASSERT(d->ne[2] == ne2); |
5290 | 0 | GGML_ASSERT(d->ne[3] == ne3); |
5291 | |
|
5292 | 0 | GGML_ASSERT(ne2 % kvne2 == 0); |
5293 | | |
5294 | | // store gradients of q, k and v as continuous tensors concatenated in result. |
5295 | | // note: v and gradv are actually transposed, i.e. v->ne[0] != D. |
5296 | 0 | const int64_t elem_q = ggml_nelements(q); |
5297 | 0 | const int64_t elem_k = ggml_nelements(k); |
5298 | 0 | const int64_t elem_v = ggml_nelements(v); |
5299 | |
|
5300 | 0 | enum ggml_type result_type = GGML_TYPE_F32; |
5301 | 0 | GGML_ASSERT(ggml_blck_size(result_type) == 1); |
5302 | 0 | const size_t tsize = ggml_type_size(result_type); |
5303 | |
|
5304 | 0 | const size_t offs_q = 0; |
5305 | 0 | const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN); |
5306 | 0 | const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN); |
5307 | 0 | const size_t end = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN); |
5308 | |
|
5309 | 0 | const size_t nelements = (end + tsize - 1)/tsize; |
5310 | |
|
5311 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements); |
5312 | |
|
5313 | 0 | int32_t masked_i = masked ? 1 : 0; |
5314 | 0 | ggml_set_op_params(result, &masked_i, sizeof(masked_i)); |
5315 | |
|
5316 | 0 | result->op = GGML_OP_FLASH_ATTN_BACK; |
5317 | 0 | result->src[0] = q; |
5318 | 0 | result->src[1] = k; |
5319 | 0 | result->src[2] = v; |
5320 | 0 | result->src[3] = d; |
5321 | |
|
5322 | 0 | return result; |
5323 | 0 | } |
5324 | | |
5325 | | // ggml_ssm_conv |
5326 | | |
5327 | | struct ggml_tensor * ggml_ssm_conv( |
5328 | | struct ggml_context * ctx, |
5329 | | struct ggml_tensor * sx, |
5330 | 0 | struct ggml_tensor * c) { |
5331 | 0 | GGML_ASSERT(ggml_is_3d(sx)); |
5332 | 0 | GGML_ASSERT(ggml_is_matrix(c)); |
5333 | |
|
5334 | 0 | const int64_t d_conv = c->ne[0]; |
5335 | 0 | const int64_t d_inner = c->ne[1]; |
5336 | 0 | const int64_t n_t = sx->ne[0] - d_conv + 1; // tokens per sequence |
5337 | 0 | const int64_t n_s = sx->ne[2]; |
5338 | | |
5339 | | // TODO: maybe support other strides than 1? |
5340 | 0 | GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t); |
5341 | 0 | GGML_ASSERT(sx->ne[1] == d_inner); |
5342 | 0 | GGML_ASSERT(n_t >= 0); |
5343 | |
|
5344 | 0 | struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s); |
5345 | |
|
5346 | 0 | result->op = GGML_OP_SSM_CONV; |
5347 | 0 | result->src[0] = sx; |
5348 | 0 | result->src[1] = c; |
5349 | |
|
5350 | 0 | return result; |
5351 | 0 | } |
5352 | | |
5353 | | // ggml_ssm_scan |
5354 | | |
5355 | | struct ggml_tensor * ggml_ssm_scan( |
5356 | | struct ggml_context * ctx, |
5357 | | struct ggml_tensor * s, |
5358 | | struct ggml_tensor * x, |
5359 | | struct ggml_tensor * dt, |
5360 | | struct ggml_tensor * A, |
5361 | | struct ggml_tensor * B, |
5362 | | struct ggml_tensor * C, |
5363 | 0 | struct ggml_tensor * ids) { |
5364 | 0 | GGML_ASSERT(ggml_is_contiguous(s)); |
5365 | 0 | GGML_ASSERT(ggml_is_contiguous(dt)); |
5366 | 0 | GGML_ASSERT(ggml_is_contiguous(A)); |
5367 | 0 | GGML_ASSERT(x->nb[0] == ggml_type_size(x->type)); |
5368 | 0 | GGML_ASSERT(B->nb[0] == ggml_type_size(B->type)); |
5369 | 0 | GGML_ASSERT(C->nb[0] == ggml_type_size(C->type)); |
5370 | 0 | GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]); |
5371 | 0 | GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]); |
5372 | 0 | GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]); |
5373 | 0 | GGML_ASSERT(ggml_are_same_shape(B, C)); |
5374 | 0 | GGML_ASSERT(ids->type == GGML_TYPE_I32); |
5375 | |
|
5376 | 0 | { |
5377 | 0 | const int64_t d_state = s->ne[0]; |
5378 | 0 | const int64_t head_dim = x->ne[0]; |
5379 | 0 | const int64_t n_head = x->ne[1]; |
5380 | 0 | const int64_t n_seq_tokens = x->ne[2]; |
5381 | 0 | const int64_t n_seqs = x->ne[3]; |
5382 | |
|
5383 | 0 | GGML_ASSERT(dt->ne[0] == n_head); |
5384 | 0 | GGML_ASSERT(dt->ne[1] == n_seq_tokens); |
5385 | 0 | GGML_ASSERT(dt->ne[2] == n_seqs); |
5386 | 0 | GGML_ASSERT(ggml_is_3d(dt)); |
5387 | 0 | GGML_ASSERT(s->ne[1] == head_dim); |
5388 | 0 | GGML_ASSERT(s->ne[2] == n_head); |
5389 | 0 | GGML_ASSERT(B->ne[0] == d_state); |
5390 | 0 | GGML_ASSERT(B->ne[2] == n_seq_tokens); |
5391 | 0 | GGML_ASSERT(B->ne[3] == n_seqs); |
5392 | 0 | GGML_ASSERT(ids->ne[0] == n_seqs); |
5393 | 0 | GGML_ASSERT(ggml_is_vector(ids)); |
5394 | 0 | GGML_ASSERT(A->ne[1] == n_head); |
5395 | 0 | GGML_ASSERT(ggml_is_matrix(A)); |
5396 | |
|
5397 | 0 | if (A->ne[0] != 1) { |
5398 | | // Mamba-1 has more granular decay factors |
5399 | 0 | GGML_ASSERT(A->ne[0] == d_state); |
5400 | 0 | } |
5401 | 0 | } |
5402 | | |
5403 | | // concatenated y + ssm_states |
5404 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]); |
5405 | |
|
5406 | 0 | result->op = GGML_OP_SSM_SCAN; |
5407 | 0 | result->src[0] = s; |
5408 | 0 | result->src[1] = x; |
5409 | 0 | result->src[2] = dt; |
5410 | 0 | result->src[3] = A; |
5411 | 0 | result->src[4] = B; |
5412 | 0 | result->src[5] = C; |
5413 | 0 | result->src[6] = ids; |
5414 | |
|
5415 | 0 | return result; |
5416 | 0 | } |
5417 | | |
5418 | | // ggml_win_part |
5419 | | |
5420 | | struct ggml_tensor * ggml_win_part( |
5421 | | struct ggml_context * ctx, |
5422 | | struct ggml_tensor * a, |
5423 | 0 | int w) { |
5424 | 0 | GGML_ASSERT(a->ne[3] == 1); |
5425 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5426 | | |
5427 | | // padding |
5428 | 0 | const int px = (w - a->ne[1]%w)%w; |
5429 | 0 | const int py = (w - a->ne[2]%w)%w; |
5430 | |
|
5431 | 0 | const int npx = (px + a->ne[1])/w; |
5432 | 0 | const int npy = (py + a->ne[2])/w; |
5433 | 0 | const int np = npx*npy; |
5434 | |
|
5435 | 0 | const int64_t ne[4] = { a->ne[0], w, w, np, }; |
5436 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5437 | |
|
5438 | 0 | int32_t params[] = { npx, npy, w }; |
5439 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5440 | |
|
5441 | 0 | result->op = GGML_OP_WIN_PART; |
5442 | 0 | result->src[0] = a; |
5443 | |
|
5444 | 0 | return result; |
5445 | 0 | } |
5446 | | |
5447 | | // ggml_win_unpart |
5448 | | |
5449 | | struct ggml_tensor * ggml_win_unpart( |
5450 | | struct ggml_context * ctx, |
5451 | | struct ggml_tensor * a, |
5452 | | int w0, |
5453 | | int h0, |
5454 | 0 | int w) { |
5455 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
5456 | |
|
5457 | 0 | const int64_t ne[4] = { a->ne[0], w0, h0, 1, }; |
5458 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); |
5459 | |
|
5460 | 0 | int32_t params[] = { w }; |
5461 | 0 | ggml_set_op_params(result, params, sizeof(params)); |
5462 | |
|
5463 | 0 | result->op = GGML_OP_WIN_UNPART; |
5464 | 0 | result->src[0] = a; |
5465 | |
|
5466 | 0 | return result; |
5467 | 0 | } |
5468 | | |
5469 | | // ggml_get_rel_pos |
5470 | | |
5471 | | struct ggml_tensor * ggml_get_rel_pos( |
5472 | | struct ggml_context * ctx, |
5473 | | struct ggml_tensor * a, |
5474 | | int qh, |
5475 | 0 | int kh) { |
5476 | 0 | GGML_ASSERT(qh == kh); |
5477 | 0 | GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]); |
5478 | |
|
5479 | 0 | const int64_t ne[4] = { a->ne[0], kh, qh, 1, }; |
5480 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne); |
5481 | |
|
5482 | 0 | result->op = GGML_OP_GET_REL_POS; |
5483 | 0 | result->src[0] = a; |
5484 | |
|
5485 | 0 | return result; |
5486 | 0 | } |
5487 | | |
5488 | | // ggml_add_rel_pos |
5489 | | |
5490 | | static struct ggml_tensor * ggml_add_rel_pos_impl( |
5491 | | struct ggml_context * ctx, |
5492 | | struct ggml_tensor * a, |
5493 | | struct ggml_tensor * pw, |
5494 | | struct ggml_tensor * ph, |
5495 | 0 | bool inplace) { |
5496 | 0 | GGML_ASSERT(ggml_are_same_shape(pw, ph)); |
5497 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5498 | 0 | GGML_ASSERT(ggml_is_contiguous(pw)); |
5499 | 0 | GGML_ASSERT(ggml_is_contiguous(ph)); |
5500 | 0 | GGML_ASSERT(ph->type == GGML_TYPE_F32); |
5501 | 0 | GGML_ASSERT(pw->type == GGML_TYPE_F32); |
5502 | 0 | GGML_ASSERT(pw->ne[3] == a->ne[2]); |
5503 | 0 | GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]); |
5504 | 0 | GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]); |
5505 | |
|
5506 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5507 | 0 | ggml_set_op_params_i32(result, 0, inplace ? 1 : 0); |
5508 | |
|
5509 | 0 | result->op = GGML_OP_ADD_REL_POS; |
5510 | 0 | result->src[0] = a; |
5511 | 0 | result->src[1] = pw; |
5512 | 0 | result->src[2] = ph; |
5513 | |
|
5514 | 0 | return result; |
5515 | 0 | } |
5516 | | |
5517 | | struct ggml_tensor * ggml_add_rel_pos( |
5518 | | struct ggml_context * ctx, |
5519 | | struct ggml_tensor * a, |
5520 | | struct ggml_tensor * pw, |
5521 | 0 | struct ggml_tensor * ph) { |
5522 | 0 | return ggml_add_rel_pos_impl(ctx, a, pw, ph, false); |
5523 | 0 | } |
5524 | | |
5525 | | struct ggml_tensor * ggml_add_rel_pos_inplace( |
5526 | | struct ggml_context * ctx, |
5527 | | struct ggml_tensor * a, |
5528 | | struct ggml_tensor * pw, |
5529 | 0 | struct ggml_tensor * ph) { |
5530 | 0 | return ggml_add_rel_pos_impl(ctx, a, pw, ph, true); |
5531 | 0 | } |
5532 | | |
5533 | | // ggml_rwkv_wkv6 |
5534 | | |
5535 | | struct ggml_tensor * ggml_rwkv_wkv6( |
5536 | | struct ggml_context * ctx, |
5537 | | struct ggml_tensor * k, |
5538 | | struct ggml_tensor * v, |
5539 | | struct ggml_tensor * r, |
5540 | | struct ggml_tensor * tf, |
5541 | | struct ggml_tensor * td, |
5542 | 0 | struct ggml_tensor * state) { |
5543 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5544 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5545 | 0 | GGML_ASSERT(ggml_is_contiguous(r)); |
5546 | 0 | GGML_ASSERT(ggml_is_contiguous(tf)); |
5547 | 0 | GGML_ASSERT(ggml_is_contiguous(td)); |
5548 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5549 | |
|
5550 | 0 | const int64_t S = k->ne[0]; |
5551 | 0 | const int64_t H = k->ne[1]; |
5552 | 0 | const int64_t n_tokens = k->ne[2]; |
5553 | 0 | const int64_t n_seqs = state->ne[1]; |
5554 | 0 | { |
5555 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5556 | 0 | GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens); |
5557 | 0 | GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens); |
5558 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5559 | 0 | } |
5560 | | |
5561 | | // concat output and new_state |
5562 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5563 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5564 | |
|
5565 | 0 | result->op = GGML_OP_RWKV_WKV6; |
5566 | 0 | result->src[0] = k; |
5567 | 0 | result->src[1] = v; |
5568 | 0 | result->src[2] = r; |
5569 | 0 | result->src[3] = tf; |
5570 | 0 | result->src[4] = td; |
5571 | 0 | result->src[5] = state; |
5572 | |
|
5573 | 0 | return result; |
5574 | 0 | } |
5575 | | |
5576 | | // ggml_gated_linear_attn |
5577 | | |
5578 | | struct ggml_tensor * ggml_gated_linear_attn( |
5579 | | struct ggml_context * ctx, |
5580 | | struct ggml_tensor * k, |
5581 | | struct ggml_tensor * v, |
5582 | | struct ggml_tensor * q, |
5583 | | struct ggml_tensor * g, |
5584 | | struct ggml_tensor * state, |
5585 | 0 | float scale) { |
5586 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5587 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5588 | 0 | GGML_ASSERT(ggml_is_contiguous(q)); |
5589 | 0 | GGML_ASSERT(ggml_is_contiguous(g)); |
5590 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5591 | |
|
5592 | 0 | const int64_t S = k->ne[0]; |
5593 | 0 | const int64_t H = k->ne[1]; |
5594 | 0 | const int64_t n_tokens = k->ne[2]; |
5595 | 0 | const int64_t n_seqs = state->ne[1]; |
5596 | 0 | { |
5597 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5598 | 0 | GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens); |
5599 | 0 | GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens); |
5600 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5601 | 0 | } |
5602 | | |
5603 | | // concat output and new_state |
5604 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5605 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5606 | |
|
5607 | 0 | ggml_set_op_params_f32(result, 0, scale); |
5608 | |
|
5609 | 0 | result->op = GGML_OP_GATED_LINEAR_ATTN; |
5610 | 0 | result->src[0] = k; |
5611 | 0 | result->src[1] = v; |
5612 | 0 | result->src[2] = q; |
5613 | 0 | result->src[3] = g; |
5614 | 0 | result->src[4] = state; |
5615 | |
|
5616 | 0 | return result; |
5617 | 0 | } |
5618 | | |
5619 | | // ggml_rwkv_wkv7 |
5620 | | |
5621 | | struct ggml_tensor * ggml_rwkv_wkv7( |
5622 | | struct ggml_context * ctx, |
5623 | | struct ggml_tensor * r, |
5624 | | struct ggml_tensor * w, |
5625 | | struct ggml_tensor * k, |
5626 | | struct ggml_tensor * v, |
5627 | | struct ggml_tensor * a, |
5628 | | struct ggml_tensor * b, |
5629 | 0 | struct ggml_tensor * state) { |
5630 | 0 | GGML_ASSERT(ggml_is_contiguous(r)); |
5631 | 0 | GGML_ASSERT(ggml_is_contiguous(w)); |
5632 | 0 | GGML_ASSERT(ggml_is_contiguous(k)); |
5633 | 0 | GGML_ASSERT(ggml_is_contiguous(v)); |
5634 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
5635 | 0 | GGML_ASSERT(ggml_is_contiguous(b)); |
5636 | 0 | GGML_ASSERT(ggml_is_contiguous(state)); |
5637 | |
|
5638 | 0 | const int64_t S = k->ne[0]; |
5639 | 0 | const int64_t H = k->ne[1]; |
5640 | 0 | const int64_t n_tokens = k->ne[2]; |
5641 | 0 | const int64_t n_seqs = state->ne[1]; |
5642 | 0 | { |
5643 | 0 | GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens); |
5644 | 0 | GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens); |
5645 | 0 | GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens); |
5646 | 0 | GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens); |
5647 | 0 | GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens); |
5648 | 0 | GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs); |
5649 | 0 | } |
5650 | | |
5651 | | // concat output and new_state |
5652 | 0 | const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 }; |
5653 | 0 | struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); |
5654 | |
|
5655 | 0 | result->op = GGML_OP_RWKV_WKV7; |
5656 | 0 | result->src[0] = r; |
5657 | 0 | result->src[1] = w; |
5658 | 0 | result->src[2] = k; |
5659 | 0 | result->src[3] = v; |
5660 | 0 | result->src[4] = a; |
5661 | 0 | result->src[5] = b; |
5662 | 0 | result->src[6] = state; |
5663 | |
|
5664 | 0 | return result; |
5665 | 0 | } |
5666 | | |
5667 | | // ggml_unary |
5668 | | |
5669 | | static struct ggml_tensor * ggml_unary_impl( |
5670 | | struct ggml_context * ctx, |
5671 | | struct ggml_tensor * a, |
5672 | | enum ggml_unary_op op, |
5673 | 0 | bool inplace) { |
5674 | 0 | GGML_ASSERT(ggml_is_contiguous_1(a)); |
5675 | |
|
5676 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5677 | |
|
5678 | 0 | ggml_set_op_params_i32(result, 0, (int32_t) op); |
5679 | |
|
5680 | 0 | result->op = GGML_OP_UNARY; |
5681 | 0 | result->src[0] = a; |
5682 | |
|
5683 | 0 | return result; |
5684 | 0 | } |
5685 | | |
5686 | | struct ggml_tensor * ggml_unary( |
5687 | | struct ggml_context * ctx, |
5688 | | struct ggml_tensor * a, |
5689 | 0 | enum ggml_unary_op op) { |
5690 | 0 | return ggml_unary_impl(ctx, a, op, false); |
5691 | 0 | } |
5692 | | |
5693 | | struct ggml_tensor * ggml_unary_inplace( |
5694 | | struct ggml_context * ctx, |
5695 | | struct ggml_tensor * a, |
5696 | 0 | enum ggml_unary_op op) { |
5697 | 0 | return ggml_unary_impl(ctx, a, op, true); |
5698 | 0 | } |
5699 | | |
5700 | | // ggml_map_custom1 |
5701 | | |
5702 | | static struct ggml_tensor * ggml_map_custom1_impl( |
5703 | | struct ggml_context * ctx, |
5704 | | struct ggml_tensor * a, |
5705 | | const ggml_custom1_op_t fun, |
5706 | | int n_tasks, |
5707 | | void * userdata, |
5708 | 0 | bool inplace) { |
5709 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5710 | |
|
5711 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5712 | |
|
5713 | 0 | struct ggml_map_custom1_op_params params = { |
5714 | 0 | /*.fun =*/ fun, |
5715 | 0 | /*.n_tasks =*/ n_tasks, |
5716 | 0 | /*.userdata =*/ userdata |
5717 | 0 | }; |
5718 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5719 | |
|
5720 | 0 | result->op = GGML_OP_MAP_CUSTOM1; |
5721 | 0 | result->src[0] = a; |
5722 | |
|
5723 | 0 | return result; |
5724 | 0 | } |
5725 | | |
5726 | | struct ggml_tensor * ggml_map_custom1( |
5727 | | struct ggml_context * ctx, |
5728 | | struct ggml_tensor * a, |
5729 | | const ggml_custom1_op_t fun, |
5730 | | int n_tasks, |
5731 | 0 | void * userdata) { |
5732 | 0 | return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false); |
5733 | 0 | } |
5734 | | |
5735 | | struct ggml_tensor * ggml_map_custom1_inplace( |
5736 | | struct ggml_context * ctx, |
5737 | | struct ggml_tensor * a, |
5738 | | const ggml_custom1_op_t fun, |
5739 | | int n_tasks, |
5740 | 0 | void * userdata) { |
5741 | 0 | return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true); |
5742 | 0 | } |
5743 | | |
5744 | | // ggml_map_custom2 |
5745 | | |
5746 | | static struct ggml_tensor * ggml_map_custom2_impl( |
5747 | | struct ggml_context * ctx, |
5748 | | struct ggml_tensor * a, |
5749 | | struct ggml_tensor * b, |
5750 | | const ggml_custom2_op_t fun, |
5751 | | int n_tasks, |
5752 | | void * userdata, |
5753 | 0 | bool inplace) { |
5754 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5755 | |
|
5756 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5757 | |
|
5758 | 0 | struct ggml_map_custom2_op_params params = { |
5759 | 0 | /*.fun =*/ fun, |
5760 | 0 | /*.n_tasks =*/ n_tasks, |
5761 | 0 | /*.userdata =*/ userdata |
5762 | 0 | }; |
5763 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5764 | |
|
5765 | 0 | result->op = GGML_OP_MAP_CUSTOM2; |
5766 | 0 | result->src[0] = a; |
5767 | 0 | result->src[1] = b; |
5768 | |
|
5769 | 0 | return result; |
5770 | 0 | } |
5771 | | |
5772 | | struct ggml_tensor * ggml_map_custom2( |
5773 | | struct ggml_context * ctx, |
5774 | | struct ggml_tensor * a, |
5775 | | struct ggml_tensor * b, |
5776 | | const ggml_custom2_op_t fun, |
5777 | | int n_tasks, |
5778 | 0 | void * userdata) { |
5779 | 0 | return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false); |
5780 | 0 | } |
5781 | | |
5782 | | struct ggml_tensor * ggml_map_custom2_inplace( |
5783 | | struct ggml_context * ctx, |
5784 | | struct ggml_tensor * a, |
5785 | | struct ggml_tensor * b, |
5786 | | const ggml_custom2_op_t fun, |
5787 | | int n_tasks, |
5788 | 0 | void * userdata) { |
5789 | 0 | return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true); |
5790 | 0 | } |
5791 | | |
5792 | | // ggml_map_custom3 |
5793 | | |
5794 | | static struct ggml_tensor * ggml_map_custom3_impl( |
5795 | | struct ggml_context * ctx, |
5796 | | struct ggml_tensor * a, |
5797 | | struct ggml_tensor * b, |
5798 | | struct ggml_tensor * c, |
5799 | | const ggml_custom3_op_t fun, |
5800 | | int n_tasks, |
5801 | | void * userdata, |
5802 | 0 | bool inplace) { |
5803 | 0 | GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0); |
5804 | |
|
5805 | 0 | struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |
5806 | |
|
5807 | 0 | struct ggml_map_custom3_op_params params = { |
5808 | 0 | /*.fun =*/ fun, |
5809 | 0 | /*.n_tasks =*/ n_tasks, |
5810 | 0 | /*.userdata =*/ userdata |
5811 | 0 | }; |
5812 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5813 | |
|
5814 | 0 | result->op = GGML_OP_MAP_CUSTOM3; |
5815 | 0 | result->src[0] = a; |
5816 | 0 | result->src[1] = b; |
5817 | 0 | result->src[2] = c; |
5818 | |
|
5819 | 0 | return result; |
5820 | 0 | } |
5821 | | |
5822 | | struct ggml_tensor * ggml_map_custom3( |
5823 | | struct ggml_context * ctx, |
5824 | | struct ggml_tensor * a, |
5825 | | struct ggml_tensor * b, |
5826 | | struct ggml_tensor * c, |
5827 | | const ggml_custom3_op_t fun, |
5828 | | int n_tasks, |
5829 | 0 | void * userdata) { |
5830 | 0 | return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false); |
5831 | 0 | } |
5832 | | |
5833 | | struct ggml_tensor * ggml_map_custom3_inplace( |
5834 | | struct ggml_context * ctx, |
5835 | | struct ggml_tensor * a, |
5836 | | struct ggml_tensor * b, |
5837 | | struct ggml_tensor * c, |
5838 | | const ggml_custom3_op_t fun, |
5839 | | int n_tasks, |
5840 | 0 | void * userdata) { |
5841 | 0 | return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true); |
5842 | 0 | } |
5843 | | |
5844 | | struct ggml_tensor * ggml_custom_4d( |
5845 | | struct ggml_context * ctx, |
5846 | | enum ggml_type type, |
5847 | | int64_t ne0, |
5848 | | int64_t ne1, |
5849 | | int64_t ne2, |
5850 | | int64_t ne3, |
5851 | | struct ggml_tensor ** args, |
5852 | | int n_args, |
5853 | | ggml_custom_op_t fun, |
5854 | | int n_tasks, |
5855 | 0 | void * userdata) { |
5856 | |
|
5857 | 0 | GGML_ASSERT(n_args < GGML_MAX_SRC); |
5858 | |
|
5859 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3); |
5860 | |
|
5861 | 0 | struct ggml_custom_op_params params = { |
5862 | 0 | /*.fun =*/ fun, |
5863 | 0 | /*.n_tasks =*/ n_tasks, |
5864 | 0 | /*.userdata =*/ userdata |
5865 | 0 | }; |
5866 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5867 | |
|
5868 | 0 | result->op = GGML_OP_CUSTOM; |
5869 | 0 | for (int i = 0; i < n_args; i++) { |
5870 | 0 | result->src[i] = args[i]; |
5871 | 0 | } |
5872 | |
|
5873 | 0 | return result; |
5874 | 0 | } |
5875 | | |
5876 | | struct ggml_tensor * ggml_custom_inplace( |
5877 | | struct ggml_context * ctx, |
5878 | | struct ggml_tensor * a, |
5879 | | struct ggml_tensor ** args, |
5880 | | int n_args, |
5881 | | ggml_custom_op_t fun, |
5882 | | int n_tasks, |
5883 | 0 | void * userdata) { |
5884 | |
|
5885 | 0 | GGML_ASSERT(n_args < GGML_MAX_SRC - 1); |
5886 | |
|
5887 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
5888 | |
|
5889 | 0 | struct ggml_custom_op_params params = { |
5890 | 0 | /*.fun =*/ fun, |
5891 | 0 | /*.n_tasks =*/ n_tasks, |
5892 | 0 | /*.userdata =*/ userdata |
5893 | 0 | }; |
5894 | 0 | ggml_set_op_params(result, ¶ms, sizeof(params)); |
5895 | |
|
5896 | 0 | result->op = GGML_OP_CUSTOM; |
5897 | 0 | result->src[0] = a; |
5898 | 0 | for (int i = 0; i < n_args; i++) { |
5899 | 0 | result->src[i + 1] = args[i]; |
5900 | 0 | } |
5901 | |
|
5902 | 0 | return result; |
5903 | 0 | } |
5904 | | // ggml_cross_entropy_loss |
5905 | | |
5906 | | struct ggml_tensor * ggml_cross_entropy_loss( |
5907 | | struct ggml_context * ctx, |
5908 | | struct ggml_tensor * a, |
5909 | 0 | struct ggml_tensor * b) { |
5910 | 0 | GGML_ASSERT(ggml_are_same_shape(a, b)); |
5911 | |
|
5912 | 0 | struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); |
5913 | |
|
5914 | 0 | result->op = GGML_OP_CROSS_ENTROPY_LOSS; |
5915 | 0 | result->src[0] = a; |
5916 | 0 | result->src[1] = b; |
5917 | |
|
5918 | 0 | return result; |
5919 | 0 | } |
5920 | | |
5921 | | // ggml_cross_entropy_loss_back |
5922 | | |
5923 | | struct ggml_tensor * ggml_cross_entropy_loss_back( |
5924 | | struct ggml_context * ctx, |
5925 | | struct ggml_tensor * a, |
5926 | | struct ggml_tensor * b, |
5927 | 0 | struct ggml_tensor * c) { |
5928 | 0 | GGML_ASSERT(ggml_is_scalar(a)); |
5929 | 0 | GGML_ASSERT(ggml_are_same_shape(b, c)); |
5930 | |
|
5931 | 0 | struct ggml_tensor * result = ggml_dup_tensor(ctx, b); |
5932 | |
|
5933 | 0 | result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK; |
5934 | 0 | result->src[0] = a; |
5935 | 0 | result->src[1] = b; |
5936 | 0 | result->src[2] = c; |
5937 | |
|
5938 | 0 | return result; |
5939 | 0 | } |
5940 | | |
5941 | | // opt_step_adamw |
5942 | | |
5943 | | struct ggml_tensor * ggml_opt_step_adamw( |
5944 | | struct ggml_context * ctx, |
5945 | | struct ggml_tensor * a, |
5946 | | struct ggml_tensor * grad, |
5947 | | struct ggml_tensor * m, |
5948 | | struct ggml_tensor * v, |
5949 | 0 | struct ggml_tensor * adamw_params) { |
5950 | 0 | GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM); |
5951 | 0 | GGML_ASSERT(ggml_are_same_shape(a, grad)); |
5952 | 0 | GGML_ASSERT(ggml_are_same_shape(a, m)); |
5953 | 0 | GGML_ASSERT(ggml_are_same_shape(a, v)); |
5954 | 0 | GGML_ASSERT(adamw_params->type == GGML_TYPE_F32); |
5955 | 0 | GGML_ASSERT(ggml_nelements(adamw_params) == 7); |
5956 | |
|
5957 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
5958 | |
|
5959 | 0 | result->op = GGML_OP_OPT_STEP_ADAMW; |
5960 | 0 | result->src[0] = a; |
5961 | 0 | result->src[1] = grad; |
5962 | 0 | result->src[2] = m; |
5963 | 0 | result->src[3] = v; |
5964 | 0 | result->src[4] = adamw_params; |
5965 | |
|
5966 | 0 | return result; |
5967 | 0 | } |
5968 | | |
5969 | | // opt_step_sgd |
5970 | | |
5971 | | struct ggml_tensor * ggml_opt_step_sgd( |
5972 | | struct ggml_context * ctx, |
5973 | | struct ggml_tensor * a, |
5974 | | struct ggml_tensor * grad, |
5975 | 0 | struct ggml_tensor * params) { |
5976 | 0 | GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM); |
5977 | 0 | GGML_ASSERT(ggml_are_same_shape(a, grad)); |
5978 | 0 | GGML_ASSERT(params->type == GGML_TYPE_F32); |
5979 | 0 | GGML_ASSERT(ggml_nelements(params) == 2); |
5980 | |
|
5981 | 0 | struct ggml_tensor * result = ggml_view_tensor(ctx, a); |
5982 | |
|
5983 | 0 | result->op = GGML_OP_OPT_STEP_SGD; |
5984 | 0 | result->src[0] = a; |
5985 | 0 | result->src[1] = grad; |
5986 | 0 | result->src[2] = params; |
5987 | |
|
5988 | 0 | return result; |
5989 | 0 | } |
5990 | | |
5991 | | // solve_tri |
5992 | | |
5993 | | struct ggml_tensor * ggml_solve_tri( |
5994 | | struct ggml_context * ctx, |
5995 | | struct ggml_tensor * a, |
5996 | | struct ggml_tensor * b, |
5997 | | bool left, |
5998 | | bool lower, |
5999 | 0 | bool uni) { |
6000 | 0 | GGML_ASSERT(a->type == GGML_TYPE_F32); |
6001 | 0 | GGML_ASSERT(b->type == GGML_TYPE_F32); |
6002 | | |
6003 | | // A must be square and lower diagonal |
6004 | 0 | GGML_ASSERT(a->ne[0] == a->ne[1]); |
6005 | | // B must have same outer dimension as A |
6006 | 0 | GGML_ASSERT(a->ne[1] == b->ne[1]); |
6007 | | |
6008 | | // batch dimensions must be equal |
6009 | 0 | GGML_ASSERT(a->ne[2] == b->ne[2]); |
6010 | 0 | GGML_ASSERT(a->ne[3] == b->ne[3]); |
6011 | |
|
6012 | 0 | GGML_ASSERT(ggml_is_contiguous(a)); |
6013 | 0 | GGML_ASSERT(ggml_is_contiguous(b)); |
6014 | |
|
6015 | 0 | GGML_ASSERT(lower && left && !uni); // TODO: support other variants |
6016 | |
|
6017 | 0 | struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]); |
6018 | |
|
6019 | 0 | result->op = GGML_OP_SOLVE_TRI; |
6020 | 0 | result->src[0] = a; |
6021 | 0 | result->src[1] = b; |
6022 | |
|
6023 | 0 | return result; |
6024 | 0 | } |
6025 | | |
6026 | | //////////////////////////////////////////////////////////////////////////////// |
6027 | | |
6028 | 0 | struct ggml_hash_set ggml_hash_set_new(size_t size) { |
6029 | 0 | size = ggml_hash_size(size); |
6030 | 0 | struct ggml_hash_set result; |
6031 | 0 | result.size = size; |
6032 | 0 | result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size); |
6033 | 0 | result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t)); |
6034 | 0 | return result; |
6035 | 0 | } |
6036 | | |
6037 | 0 | void ggml_hash_set_reset(struct ggml_hash_set * hash_set) { |
6038 | 0 | memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size)); |
6039 | 0 | } |
6040 | | |
6041 | 0 | void ggml_hash_set_free(struct ggml_hash_set * hash_set) { |
6042 | 0 | GGML_FREE(hash_set->used); |
6043 | 0 | GGML_FREE(hash_set->keys); |
6044 | 0 | } |
6045 | | |
6046 | 0 | size_t ggml_hash_size(size_t min_sz) { |
6047 | | // next primes after powers of two |
6048 | 0 | static const size_t primes[] = { |
6049 | 0 | 2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031, |
6050 | 0 | 2053, 4099, 8209, 16411, 32771, 65537, 131101, |
6051 | 0 | 262147, 524309, 1048583, 2097169, 4194319, 8388617, |
6052 | 0 | 16777259, 33554467, 67108879, 134217757, 268435459, |
6053 | 0 | 536870923, 1073741827, 2147483659 |
6054 | 0 | }; |
6055 | 0 | static const size_t n_primes = sizeof(primes)/sizeof(primes[0]); |
6056 | | |
6057 | | // find the smallest prime that is larger or equal than min_sz |
6058 | 0 | size_t l = 0; |
6059 | 0 | size_t r = n_primes; |
6060 | 0 | while (l < r) { |
6061 | 0 | size_t m = (l + r)/2; |
6062 | 0 | if (primes[m] < min_sz) { |
6063 | 0 | l = m + 1; |
6064 | 0 | } else { |
6065 | 0 | r = m; |
6066 | 0 | } |
6067 | 0 | } |
6068 | 0 | size_t sz = l < n_primes ? primes[l] : min_sz | 1; |
6069 | 0 | return sz; |
6070 | 0 | } |
6071 | | |
6072 | | struct hash_map { |
6073 | | struct ggml_hash_set set; |
6074 | | struct ggml_tensor ** vals; |
6075 | | }; |
6076 | | |
6077 | 0 | static struct hash_map * ggml_new_hash_map(size_t size) { |
6078 | 0 | struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map)); |
6079 | 0 | result->set = ggml_hash_set_new(size); |
6080 | 0 | result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *)); |
6081 | 0 | return result; |
6082 | 0 | } |
6083 | | |
6084 | 0 | static void ggml_hash_map_free(struct hash_map * map) { |
6085 | 0 | ggml_hash_set_free(&map->set); |
6086 | 0 | GGML_FREE(map->vals); |
6087 | 0 | GGML_FREE(map); |
6088 | 0 | } |
6089 | | |
6090 | | // utility functions to change gradients |
6091 | | // isrc is the index of tensor in cgraph->visited_has_set.keys |
6092 | | // the corresponding gradient (accumulators) are also at position isrc |
6093 | | // if tensor has a gradient accumulator, modify that accumulator in-place |
6094 | | // else if there is no gradient for tensor, set the corresponding value |
6095 | | // else, just add/subtract/etc. the gradients |
6096 | | |
6097 | | static void ggml_add_or_set( |
6098 | | struct ggml_context * ctx, |
6099 | | struct ggml_cgraph * cgraph, |
6100 | | size_t isrc, |
6101 | 0 | struct ggml_tensor * tensor) { |
6102 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6103 | 0 | GGML_ASSERT(src); |
6104 | 0 | if (cgraph->grads[isrc]) { |
6105 | 0 | cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]); |
6106 | 0 | } else { |
6107 | 0 | cgraph->grads[isrc] = tensor; |
6108 | 0 | } |
6109 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6110 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6111 | 0 | } |
6112 | | |
6113 | | static void ggml_acc_or_set( |
6114 | | struct ggml_context * ctx, |
6115 | | struct ggml_cgraph * cgraph, |
6116 | | size_t isrc, |
6117 | | struct ggml_tensor * tensor, |
6118 | | const size_t nb1, |
6119 | | const size_t nb2, |
6120 | | const size_t nb3, |
6121 | 0 | const size_t offset) { |
6122 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6123 | 0 | GGML_ASSERT(src); |
6124 | 0 | if (cgraph->grads[isrc]) { |
6125 | 0 | cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]); |
6126 | 0 | } else { |
6127 | 0 | struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN |
6128 | 0 | cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false); |
6129 | 0 | } |
6130 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name); |
6131 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6132 | 0 | } |
6133 | | |
6134 | | static void ggml_add1_or_set( |
6135 | | struct ggml_context * ctx, |
6136 | | struct ggml_cgraph * cgraph, |
6137 | | size_t isrc, |
6138 | 0 | struct ggml_tensor * tensor) { |
6139 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6140 | 0 | GGML_ASSERT(src); |
6141 | 0 | if (cgraph->grads[isrc]) { |
6142 | 0 | cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); |
6143 | 0 | } else { |
6144 | 0 | cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src); |
6145 | 0 | } |
6146 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6147 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6148 | 0 | } |
6149 | | |
6150 | | static void ggml_sub_or_set( |
6151 | | struct ggml_context * ctx, |
6152 | | struct ggml_cgraph * cgraph, |
6153 | | size_t isrc, |
6154 | 0 | struct ggml_tensor * tensor) { |
6155 | 0 | struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc]; |
6156 | 0 | GGML_ASSERT(src); |
6157 | 0 | if (cgraph->grads[isrc]) { |
6158 | 0 | cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]); |
6159 | 0 | } else { |
6160 | 0 | cgraph->grads[isrc] = ggml_neg(ctx, tensor); |
6161 | 0 | } |
6162 | 0 | ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name); |
6163 | 0 | ggml_build_forward_expand(cgraph, cgraph->grads[isrc]); |
6164 | 0 | } |
6165 | | |
6166 | | static void ggml_compute_backward( |
6167 | 0 | struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) { |
6168 | 0 | struct ggml_tensor * tensor = cgraph->nodes[i]; |
6169 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, tensor); |
6170 | |
|
6171 | 0 | if (!grad) { |
6172 | 0 | return; |
6173 | 0 | } |
6174 | | |
6175 | 0 | struct ggml_tensor * src0 = tensor->src[0]; |
6176 | 0 | struct ggml_tensor * src1 = tensor->src[1]; |
6177 | 0 | struct ggml_tensor * src2 = tensor->src[2]; |
6178 | 0 | struct ggml_hash_set * hash_set = &cgraph->visited_hash_set; |
6179 | 0 | const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1; |
6180 | 0 | const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1; |
6181 | 0 | const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1; |
6182 | 0 | const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0]; |
6183 | 0 | const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1]; |
6184 | 0 | const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2]; |
6185 | |
|
6186 | 0 | switch (tensor->op) { |
6187 | 0 | case GGML_OP_DUP: { |
6188 | 0 | if (src0_needs_grads) { |
6189 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6190 | 0 | } |
6191 | 0 | } break; |
6192 | 0 | case GGML_OP_ADD: { |
6193 | 0 | if (src0_needs_grads) { |
6194 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6195 | 0 | } |
6196 | 0 | if (src1_needs_grads) { |
6197 | 0 | struct ggml_tensor * tmp = grad; |
6198 | 0 | if (!ggml_are_same_shape(src0, src1)) { |
6199 | 0 | tmp = ggml_repeat_back(ctx, tmp, src1); |
6200 | 0 | } |
6201 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, tmp); |
6202 | 0 | } |
6203 | 0 | } break; |
6204 | 0 | case GGML_OP_ADD1: { |
6205 | 0 | if (src0_needs_grads) { |
6206 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6207 | 0 | } |
6208 | 0 | if (src1_needs_grads) { |
6209 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean |
6210 | 0 | } |
6211 | 0 | } break; |
6212 | 0 | case GGML_OP_ACC: { |
6213 | 0 | if (src0_needs_grads) { |
6214 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6215 | 0 | } |
6216 | 0 | if (src1_needs_grads) { |
6217 | 0 | const size_t nb1 = ((int32_t *) tensor->op_params)[0]; |
6218 | 0 | const size_t nb2 = ((int32_t *) tensor->op_params)[1]; |
6219 | 0 | const size_t nb3 = ((int32_t *) tensor->op_params)[2]; |
6220 | 0 | const size_t offset = ((int32_t *) tensor->op_params)[3]; |
6221 | |
|
6222 | 0 | struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx, |
6223 | 0 | grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], |
6224 | 0 | nb1, nb2, nb3, offset); |
6225 | |
|
6226 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1)); |
6227 | 0 | } |
6228 | 0 | } break; |
6229 | 0 | case GGML_OP_SUB: { |
6230 | 0 | if (src0_needs_grads) { |
6231 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, grad); |
6232 | 0 | } |
6233 | 0 | if (src1_needs_grads) { |
6234 | 0 | ggml_sub_or_set(ctx, cgraph, isrc1, grad); |
6235 | 0 | } |
6236 | 0 | } break; |
6237 | 0 | case GGML_OP_MUL: { |
6238 | 0 | if (src0_needs_grads) { |
6239 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1)); |
6240 | 0 | } |
6241 | 0 | if (src1_needs_grads) { |
6242 | 0 | struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad); |
6243 | 0 | if (!ggml_are_same_shape(src0, src1)) { |
6244 | 0 | tmp = ggml_repeat_back(ctx, tmp, src1); |
6245 | 0 | } |
6246 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, tmp); |
6247 | 0 | } |
6248 | 0 | } break; |
6249 | 0 | case GGML_OP_DIV: { |
6250 | 0 | if (src0_needs_grads) { |
6251 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1)); |
6252 | 0 | } |
6253 | 0 | if (src1_needs_grads) { |
6254 | 0 | ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1))); |
6255 | 0 | } |
6256 | 0 | } break; |
6257 | 0 | case GGML_OP_SQR: { |
6258 | 0 | if (src0_needs_grads) { |
6259 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f)); |
6260 | 0 | } |
6261 | 0 | } break; |
6262 | 0 | case GGML_OP_SQRT: { |
6263 | 0 | if (src0_needs_grads) { |
6264 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f)); |
6265 | 0 | } |
6266 | 0 | } break; |
6267 | 0 | case GGML_OP_LOG: { |
6268 | 0 | if (src0_needs_grads) { |
6269 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0)); |
6270 | 0 | } |
6271 | 0 | } break; |
6272 | 0 | case GGML_OP_SIN: { |
6273 | 0 | if (src0_needs_grads) { |
6274 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0))); |
6275 | 0 | } |
6276 | 0 | } break; |
6277 | 0 | case GGML_OP_COS: { |
6278 | 0 | if (src0_needs_grads) { |
6279 | 0 | ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0))); |
6280 | 0 | } |
6281 | 0 | } break; |
6282 | 0 | case GGML_OP_SUM: { |
6283 | 0 | if (src0_needs_grads) { |
6284 | 0 | ggml_add1_or_set(ctx, cgraph, isrc0, grad); |
6285 | 0 | } |
6286 | 0 | } break; |
6287 | 0 | case GGML_OP_SUM_ROWS: { |
6288 | 0 | if (src0_needs_grads) { |
6289 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0)); |
6290 | 0 | } |
6291 | 0 | } break; |
6292 | 0 | case GGML_OP_MEAN: { |
6293 | 0 | if (src0_needs_grads) { |
6294 | 0 | ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false)); |
6295 | 0 | } |
6296 | 0 | } break; |
6297 | 0 | case GGML_OP_REPEAT: { |
6298 | 0 | if (src0_needs_grads) { |
6299 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0)); |
6300 | 0 | } |
6301 | 0 | } break; |
6302 | 0 | case GGML_OP_REPEAT_BACK: { |
6303 | 0 | if (src0_needs_grads) { |
6304 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0)); |
6305 | 0 | } |
6306 | 0 | } break; |
6307 | 0 | case GGML_OP_RMS_NORM: { |
6308 | 0 | if (src0_needs_grads) { |
6309 | 0 | float eps; |
6310 | 0 | memcpy(&eps, tensor->op_params, sizeof(float)); |
6311 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps)); |
6312 | 0 | } |
6313 | 0 | } break; |
6314 | 0 | case GGML_OP_MUL_MAT: { |
6315 | | // https://cs231n.github.io/optimization-2/#staged |
6316 | | // # forward pass |
6317 | | // s0 = np.random.randn(5, 10) |
6318 | | // s1 = np.random.randn(10, 3) |
6319 | | // t = s0.dot(s1) |
6320 | | |
6321 | | // # now suppose we had the gradient on t from above in the circuit |
6322 | | // dt = np.random.randn(*t.shape) # same shape as t |
6323 | | // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix |
6324 | | // ds1 = t.T.dot(dt) |
6325 | | |
6326 | | // tensor.shape [m,p,qq,rr] |
6327 | | // src0.shape [n,m,q1,r1] |
6328 | | // src1.shape [n,p,qq,rr] |
6329 | |
|
6330 | 0 | if (src0_needs_grads) { |
6331 | 0 | GGML_ASSERT(grad->ne[2] == src1->ne[2]); |
6332 | 0 | GGML_ASSERT(grad->ne[3] == src1->ne[3]); |
6333 | 0 | struct ggml_tensor * tmp = |
6334 | 0 | ggml_out_prod(ctx, // [n,m,qq,rr] |
6335 | 0 | src1, // [n,p,qq,rr] |
6336 | 0 | grad); // [m,p,qq,rr] |
6337 | 0 | if (!ggml_are_same_shape(tmp, src0)) { |
6338 | 0 | GGML_ASSERT(tmp->ne[0] == src0->ne[0]); |
6339 | 0 | GGML_ASSERT(tmp->ne[1] == src0->ne[1]); |
6340 | 0 | GGML_ASSERT(tmp->ne[3] == 1); |
6341 | |
|
6342 | 0 | const int64_t nr2 = tmp->ne[2] / src0->ne[2]; |
6343 | 0 | const size_t nb2 = tmp->nb[2] * nr2; |
6344 | 0 | const size_t nb3 = tmp->nb[2]; |
6345 | |
|
6346 | 0 | tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0); |
6347 | 0 | tmp = ggml_repeat_back(ctx, tmp, src0); |
6348 | 0 | } |
6349 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, tmp); |
6350 | 0 | } |
6351 | 0 | if (src1_needs_grads) { |
6352 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, |
6353 | | // ggml_mul_mat(ctx, // [n,p,qq,rr] |
6354 | | // ggml_cont(ctx, // [m,n,q1,r1] |
6355 | | // ggml_transpose(ctx, src0)), // [m,n,q1,r1] |
6356 | | // grad), // [m,p,qq,rr] |
6357 | | |
6358 | | // when src0 is bigger than tensor->grad (this is mostly the case in llama), |
6359 | | // avoid transpose of src0, rather transpose smaller tensor->grad |
6360 | | // and then use ggml_out_prod |
6361 | 0 | ggml_out_prod(ctx, // [n,p,qq,rr] |
6362 | 0 | src0, // [n,m,q1,r1] |
6363 | 0 | ggml_transpose(ctx, // [p,m,qq,rr] |
6364 | 0 | grad))); // [m,p,qq,rr] |
6365 | 0 | } |
6366 | 0 | } break; |
6367 | 0 | case GGML_OP_SCALE: { |
6368 | 0 | if (src0_needs_grads) { |
6369 | 0 | float s; |
6370 | 0 | memcpy(&s, tensor->op_params, sizeof(float)); |
6371 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false)); |
6372 | 0 | } |
6373 | 0 | } break; |
6374 | 0 | case GGML_OP_SET: { |
6375 | 0 | const size_t nb1 = ((const int32_t *) tensor->op_params)[0]; |
6376 | 0 | const size_t nb2 = ((const int32_t *) tensor->op_params)[1]; |
6377 | 0 | const size_t nb3 = ((const int32_t *) tensor->op_params)[2]; |
6378 | 0 | const size_t offset = ((const int32_t *) tensor->op_params)[3]; |
6379 | |
|
6380 | 0 | struct ggml_tensor * tensor_grad_view = NULL; |
6381 | |
|
6382 | 0 | if (src0_needs_grads || src1_needs_grads) { |
6383 | 0 | GGML_ASSERT(src0->type == tensor->type); |
6384 | 0 | GGML_ASSERT(!cgraph->grads[isrc0] || cgraph->grads[isrc0]->type == grad->type); |
6385 | 0 | GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type); |
6386 | |
|
6387 | 0 | tensor_grad_view = ggml_view_4d(ctx, |
6388 | 0 | grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], |
6389 | 0 | nb1, nb2, nb3, offset); |
6390 | 0 | } |
6391 | |
|
6392 | 0 | if (src0_needs_grads) { |
6393 | 0 | struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view); |
6394 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false)); |
6395 | 0 | } |
6396 | |
|
6397 | 0 | if (src1_needs_grads) { |
6398 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1)); |
6399 | 0 | } |
6400 | 0 | } break; |
6401 | 0 | case GGML_OP_CPY: { |
6402 | | // cpy overwrites value of src1 by src0 and returns view(src1) |
6403 | | // the overwriting is mathematically equivalent to: |
6404 | | // tensor = src0 * 1 + src1 * 0 |
6405 | 0 | if (src0_needs_grads) { |
6406 | | // dsrc0 = dtensor * 1 |
6407 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0)); |
6408 | 0 | } |
6409 | 0 | if (src1_needs_grads) { |
6410 | | // dsrc1 = dtensor * 0 -> noop |
6411 | 0 | } |
6412 | 0 | } break; |
6413 | 0 | case GGML_OP_CONT: { |
6414 | | // same as cpy |
6415 | 0 | if (src0_needs_grads) { |
6416 | 0 | GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0])); |
6417 | 0 | GGML_ASSERT(ggml_is_contiguous(grad)); |
6418 | 0 | GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0)); |
6419 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, |
6420 | 0 | ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0)); |
6421 | 0 | } |
6422 | 0 | } break; |
6423 | 0 | case GGML_OP_RESHAPE: { |
6424 | 0 | if (src0_needs_grads) { |
6425 | 0 | struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad); |
6426 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0)); |
6427 | 0 | } |
6428 | 0 | } break; |
6429 | 0 | case GGML_OP_VIEW: { |
6430 | 0 | if (src0_needs_grads) { |
6431 | 0 | size_t offset; |
6432 | |
|
6433 | 0 | memcpy(&offset, tensor->op_params, sizeof(offset)); |
6434 | |
|
6435 | 0 | size_t nb1 = tensor->nb[1]; |
6436 | 0 | size_t nb2 = tensor->nb[2]; |
6437 | 0 | size_t nb3 = tensor->nb[3]; |
6438 | |
|
6439 | 0 | if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) { |
6440 | | // gradient is typically F32, but src0 could be other type |
6441 | 0 | size_t ng = ggml_element_size(cgraph->grads[isrc0]); |
6442 | 0 | size_t n0 = ggml_element_size(src0); |
6443 | 0 | GGML_ASSERT(offset % n0 == 0); |
6444 | 0 | GGML_ASSERT(nb1 % n0 == 0); |
6445 | 0 | GGML_ASSERT(nb2 % n0 == 0); |
6446 | 0 | GGML_ASSERT(nb3 % n0 == 0); |
6447 | 0 | offset = (offset / n0) * ng; |
6448 | 0 | nb1 = (nb1 / n0) * ng; |
6449 | 0 | nb2 = (nb2 / n0) * ng; |
6450 | 0 | nb3 = (nb3 / n0) * ng; |
6451 | 0 | } |
6452 | |
|
6453 | 0 | ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset); |
6454 | 0 | } |
6455 | 0 | } break; |
6456 | 0 | case GGML_OP_PERMUTE: { |
6457 | 0 | if (src0_needs_grads) { |
6458 | 0 | const int32_t * axes = (const int32_t *) tensor->op_params; |
6459 | 0 | const int axis0 = axes[0] & 0x3; |
6460 | 0 | const int axis1 = axes[1] & 0x3; |
6461 | 0 | const int axis2 = axes[2] & 0x3; |
6462 | 0 | const int axis3 = axes[3] & 0x3; |
6463 | 0 | int axb[4] = {0,0,0,0}; // axes backward |
6464 | 0 | axb[axis0] = 0; |
6465 | 0 | axb[axis1] = 1; |
6466 | 0 | axb[axis2] = 2; |
6467 | 0 | axb[axis3] = 3; |
6468 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3])); |
6469 | 0 | } |
6470 | 0 | } break; |
6471 | 0 | case GGML_OP_TRANSPOSE: { |
6472 | 0 | if (src0_needs_grads) { |
6473 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad)); |
6474 | 0 | } |
6475 | 0 | } break; |
6476 | 0 | case GGML_OP_GET_ROWS: { |
6477 | 0 | if (src0_needs_grads) { |
6478 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0)); |
6479 | 0 | } |
6480 | 0 | if (src1_needs_grads) { |
6481 | | // noop |
6482 | 0 | } |
6483 | 0 | } break; |
6484 | 0 | case GGML_OP_DIAG_MASK_INF: { |
6485 | 0 | if (src0_needs_grads) { |
6486 | | /* ggml_diag_mask_inf_impl() shouldn't be here */ |
6487 | | /* ref: https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */ |
6488 | 0 | const int n_past = ((const int32_t *) tensor->op_params)[0]; |
6489 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false)); |
6490 | 0 | } |
6491 | 0 | } break; |
6492 | 0 | case GGML_OP_DIAG_MASK_ZERO: { |
6493 | 0 | if (src0_needs_grads) { |
6494 | 0 | const int n_past = ((const int32_t *) tensor->op_params)[0]; |
6495 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false)); |
6496 | 0 | } |
6497 | 0 | } break; |
6498 | 0 | case GGML_OP_SOFT_MAX: { |
6499 | 0 | if (src0_needs_grads) { |
6500 | 0 | float scale = 1.0f; |
6501 | 0 | float max_bias = 0.0f; |
6502 | |
|
6503 | 0 | memcpy(&scale, (const float *) tensor->op_params + 0, sizeof(float)); |
6504 | 0 | memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float)); |
6505 | |
|
6506 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias)); |
6507 | 0 | } |
6508 | 0 | GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented"); |
6509 | 0 | } break; |
6510 | 0 | case GGML_OP_ROPE: { |
6511 | 0 | if (src0_needs_grads) { |
6512 | | //const int n_past = ((int32_t *) tensor->op_params)[0]; |
6513 | 0 | const int n_dims = ((const int32_t *) tensor->op_params)[1]; |
6514 | 0 | const int mode = ((const int32_t *) tensor->op_params)[2]; |
6515 | | //const int n_ctx = ((int32_t *) tensor->op_params)[3]; |
6516 | 0 | const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4]; |
6517 | 0 | float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow; |
6518 | 0 | int sections[4] = {0, 0, 0, 0}; |
6519 | |
|
6520 | 0 | memcpy(&freq_base, (const float *) tensor->op_params + 5, sizeof(float)); |
6521 | 0 | memcpy(&freq_scale, (const float *) tensor->op_params + 6, sizeof(float)); |
6522 | 0 | memcpy(&ext_factor, (const float *) tensor->op_params + 7, sizeof(float)); |
6523 | 0 | memcpy(&attn_factor, (const float *) tensor->op_params + 8, sizeof(float)); |
6524 | 0 | memcpy(&beta_fast, (const float *) tensor->op_params + 9, sizeof(float)); |
6525 | 0 | memcpy(&beta_slow, (const float *) tensor->op_params + 10, sizeof(float)); |
6526 | 0 | memcpy(§ions, tensor->op_params + 11, sizeof(sections)); |
6527 | |
|
6528 | 0 | struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ? |
6529 | 0 | ggml_rope_ext_back(ctx, grad, src1, src2, n_dims, |
6530 | 0 | mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) : |
6531 | 0 | ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections, |
6532 | 0 | mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow); |
6533 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, rope_back); |
6534 | 0 | } |
6535 | 0 | GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented"); |
6536 | 0 | } break; |
6537 | 0 | case GGML_OP_IM2COL: { |
6538 | 0 | if (src1_needs_grads) { |
6539 | 0 | const int32_t s0 = ggml_get_op_params_i32(tensor, 0); |
6540 | 0 | const int32_t s1 = ggml_get_op_params_i32(tensor, 1); |
6541 | 0 | const int32_t p0 = ggml_get_op_params_i32(tensor, 2); |
6542 | 0 | const int32_t p1 = ggml_get_op_params_i32(tensor, 3); |
6543 | 0 | const int32_t d0 = ggml_get_op_params_i32(tensor, 4); |
6544 | 0 | const int32_t d1 = ggml_get_op_params_i32(tensor, 5); |
6545 | 0 | const bool is_2D = ggml_get_op_params_i32(tensor, 6) == 1; |
6546 | |
|
6547 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D)); |
6548 | 0 | } |
6549 | 0 | } break; |
6550 | 0 | case GGML_OP_POOL_2D: { |
6551 | 0 | if (src0_needs_grads) { |
6552 | 0 | const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0); |
6553 | 0 | const int32_t k0 = ggml_get_op_params_i32(tensor, 1); |
6554 | 0 | const int32_t k1 = ggml_get_op_params_i32(tensor, 2); |
6555 | 0 | const int32_t s0 = ggml_get_op_params_i32(tensor, 3); |
6556 | 0 | const int32_t s1 = ggml_get_op_params_i32(tensor, 4); |
6557 | 0 | const int32_t p0 = ggml_get_op_params_i32(tensor, 5); |
6558 | 0 | const int32_t p1 = ggml_get_op_params_i32(tensor, 6); |
6559 | |
|
6560 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1)); |
6561 | 0 | } |
6562 | 0 | } break; |
6563 | 0 | case GGML_OP_WIN_PART: |
6564 | 0 | case GGML_OP_WIN_UNPART: |
6565 | 0 | case GGML_OP_UNARY: { |
6566 | 0 | switch (ggml_get_unary_op(tensor)) { |
6567 | 0 | case GGML_UNARY_OP_ABS: { |
6568 | 0 | if (src0_needs_grads) { |
6569 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad)); |
6570 | 0 | } |
6571 | 0 | } break; |
6572 | 0 | case GGML_UNARY_OP_SGN: { |
6573 | | // noop |
6574 | 0 | } break; |
6575 | 0 | case GGML_UNARY_OP_NEG: { |
6576 | 0 | if (src0_needs_grads) { |
6577 | 0 | ggml_sub_or_set(ctx, cgraph, isrc0, grad); |
6578 | 0 | } |
6579 | 0 | } break; |
6580 | 0 | case GGML_UNARY_OP_STEP: { |
6581 | | // noop |
6582 | 0 | } break; |
6583 | 0 | case GGML_UNARY_OP_RELU: { |
6584 | 0 | if (src0_needs_grads) { |
6585 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad)); |
6586 | 0 | } |
6587 | 0 | } break; |
6588 | 0 | case GGML_UNARY_OP_SILU: { |
6589 | 0 | if (src0_needs_grads) { |
6590 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0)); |
6591 | 0 | } |
6592 | 0 | } break; |
6593 | 0 | case GGML_UNARY_OP_EXP: { |
6594 | 0 | if (src0_needs_grads) { |
6595 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad)); |
6596 | 0 | } |
6597 | 0 | } break; |
6598 | 0 | case GGML_UNARY_OP_EXPM1: { |
6599 | 0 | if (src0_needs_grads) { |
6600 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0))); |
6601 | 0 | } |
6602 | 0 | } break; |
6603 | 0 | case GGML_UNARY_OP_SOFTPLUS: { |
6604 | 0 | if (src0_needs_grads) { |
6605 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0))); |
6606 | 0 | } |
6607 | 0 | } break; |
6608 | 0 | default: { |
6609 | 0 | fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n", |
6610 | 0 | __func__, ggml_unary_op_name(ggml_get_unary_op(tensor))); |
6611 | 0 | GGML_ABORT("fatal error"); |
6612 | 0 | } //break; |
6613 | 0 | } |
6614 | 0 | } break; |
6615 | 0 | case GGML_OP_CROSS_ENTROPY_LOSS: { |
6616 | 0 | if (src0_needs_grads) { |
6617 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1)); |
6618 | 0 | } |
6619 | 0 | GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented"); |
6620 | 0 | } break; |
6621 | 0 | case GGML_OP_GLU: { |
6622 | 0 | switch (ggml_get_glu_op(tensor)) { |
6623 | 0 | case GGML_GLU_OP_SWIGLU: { |
6624 | 0 | if (src0_needs_grads) { |
6625 | 0 | GGML_ASSERT(src1 && "backward pass only implemented for split swiglu"); |
6626 | 0 | ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0)); |
6627 | 0 | } |
6628 | 0 | if (src1_needs_grads) { |
6629 | 0 | ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad)); |
6630 | 0 | } |
6631 | 0 | } break; |
6632 | 0 | default: { |
6633 | 0 | GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor))); |
6634 | 0 | } //break; |
6635 | 0 | } |
6636 | 0 | } break; |
6637 | 0 | case GGML_OP_NONE: { |
6638 | | // noop |
6639 | 0 | } break; |
6640 | 0 | case GGML_OP_COUNT: |
6641 | 0 | default: { |
6642 | 0 | GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op)); |
6643 | 0 | } //break; |
6644 | 0 | } |
6645 | | |
6646 | 0 | GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0])); |
6647 | 0 | GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1])); |
6648 | 0 | GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2])); |
6649 | 0 | } |
6650 | | |
6651 | 0 | static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { |
6652 | | // check if already visited |
6653 | 0 | size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); |
6654 | 0 | GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL); |
6655 | 0 | if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) { |
6656 | | // This is the first time we see this node in the current graph. |
6657 | 0 | cgraph->visited_hash_set.keys[node_hash_pos] = node; |
6658 | 0 | ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos); |
6659 | 0 | cgraph->use_counts[node_hash_pos] = 0; |
6660 | 0 | } else { |
6661 | | // already visited |
6662 | 0 | return node_hash_pos; |
6663 | 0 | } |
6664 | | |
6665 | 0 | for (int i = 0; i < GGML_MAX_SRC; ++i) { |
6666 | 0 | const int k = |
6667 | 0 | (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i : |
6668 | 0 | (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) : |
6669 | 0 | /* unknown order, just fall back to using i */ i; |
6670 | |
|
6671 | 0 | struct ggml_tensor * src = node->src[k]; |
6672 | 0 | if (src) { |
6673 | 0 | size_t src_hash_pos = ggml_visit_parents(cgraph, src); |
6674 | | |
6675 | | // Update the use count for this operand. |
6676 | 0 | cgraph->use_counts[src_hash_pos]++; |
6677 | 0 | } |
6678 | 0 | } |
6679 | |
|
6680 | 0 | if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) { |
6681 | | // reached a leaf node, not part of the gradient graph (e.g. a constant) |
6682 | 0 | GGML_ASSERT(cgraph->n_leafs < cgraph->size); |
6683 | |
|
6684 | 0 | if (strlen(node->name) == 0) { |
6685 | 0 | ggml_format_name(node, "leaf_%d", cgraph->n_leafs); |
6686 | 0 | } |
6687 | |
|
6688 | 0 | cgraph->leafs[cgraph->n_leafs] = node; |
6689 | 0 | cgraph->n_leafs++; |
6690 | 0 | } else { |
6691 | 0 | GGML_ASSERT(cgraph->n_nodes < cgraph->size); |
6692 | |
|
6693 | 0 | if (strlen(node->name) == 0) { |
6694 | 0 | ggml_format_name(node, "node_%d", cgraph->n_nodes); |
6695 | 0 | } |
6696 | |
|
6697 | 0 | cgraph->nodes[cgraph->n_nodes] = node; |
6698 | 0 | cgraph->n_nodes++; |
6699 | 0 | } |
6700 | |
|
6701 | 0 | return node_hash_pos; |
6702 | 0 | } |
6703 | | |
6704 | 0 | static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) { |
6705 | 0 | if (!expand) { |
6706 | | // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand |
6707 | 0 | ggml_graph_clear(cgraph); |
6708 | 0 | } |
6709 | |
|
6710 | 0 | const int n0 = cgraph->n_nodes; |
6711 | |
|
6712 | 0 | ggml_visit_parents(cgraph, tensor); |
6713 | |
|
6714 | 0 | const int n_new = cgraph->n_nodes - n0; |
6715 | 0 | GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new); |
6716 | |
|
6717 | 0 | if (n_new > 0) { |
6718 | | // the last added node should always be starting point |
6719 | 0 | GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor); |
6720 | 0 | } |
6721 | 0 | } |
6722 | | |
6723 | 0 | void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { |
6724 | 0 | ggml_build_forward_impl(cgraph, tensor, true); |
6725 | 0 | } |
6726 | | |
6727 | | void ggml_build_backward_expand( |
6728 | | struct ggml_context * ctx, |
6729 | | struct ggml_cgraph * cgraph, |
6730 | 0 | struct ggml_tensor ** grad_accs) { |
6731 | 0 | GGML_ASSERT(cgraph->n_nodes > 0); |
6732 | 0 | GGML_ASSERT(cgraph->grads); |
6733 | 0 | GGML_ASSERT(cgraph->grad_accs); |
6734 | |
|
6735 | 0 | const int n_nodes_f = cgraph->n_nodes; |
6736 | |
|
6737 | 0 | memset(cgraph->grads, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6738 | 0 | memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6739 | 0 | bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool)); |
6740 | |
|
6741 | 0 | { |
6742 | 0 | bool any_params = false; |
6743 | 0 | bool any_loss = false; |
6744 | 0 | for (int i = 0; i < n_nodes_f; ++i) { |
6745 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
6746 | 0 | any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM); |
6747 | 0 | any_loss = any_loss || (node->flags & GGML_TENSOR_FLAG_LOSS); |
6748 | 0 | } |
6749 | 0 | GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?"); |
6750 | 0 | GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?"); |
6751 | 0 | } |
6752 | |
|
6753 | 0 | for (int i = 0; i < n_nodes_f; ++i) { |
6754 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
6755 | |
|
6756 | 0 | if (node->type == GGML_TYPE_I32) { |
6757 | 0 | continue; |
6758 | 0 | } |
6759 | | |
6760 | 0 | bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS); |
6761 | 0 | bool ignore_src[GGML_MAX_SRC] = {false}; |
6762 | 0 | switch (node->op) { |
6763 | | // gradients in node->src[0] for one reason or another have no effect on output gradients |
6764 | 0 | case GGML_OP_IM2COL: // only used for its shape |
6765 | 0 | case GGML_OP_IM2COL_BACK: // same as IM2COL |
6766 | 0 | ignore_src[0] = true; |
6767 | 0 | break; |
6768 | 0 | case GGML_OP_UNARY: { |
6769 | 0 | const enum ggml_unary_op uop = ggml_get_unary_op(node); |
6770 | | // SGN and STEP unary ops are piecewise constant |
6771 | 0 | if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) { |
6772 | 0 | ignore_src[0] = true; |
6773 | 0 | } |
6774 | 0 | } break; |
6775 | | |
6776 | | // gradients in node->src[1] for one reason or another have no effect on output gradients |
6777 | 0 | case GGML_OP_CPY: // gradients in CPY target are irrelevant |
6778 | 0 | case GGML_OP_GET_ROWS: // row indices not differentiable |
6779 | 0 | case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS |
6780 | 0 | case GGML_OP_ROPE: // positions not differentiable |
6781 | 0 | ignore_src[1] = true; |
6782 | 0 | break; |
6783 | | |
6784 | 0 | default: |
6785 | 0 | break; |
6786 | 0 | } |
6787 | 0 | for (int j = 0; j < GGML_MAX_SRC; ++j) { |
6788 | 0 | if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) { |
6789 | 0 | continue; |
6790 | 0 | } |
6791 | 0 | GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16); |
6792 | 0 | node_needs_grad = true; |
6793 | 0 | break; |
6794 | 0 | } |
6795 | 0 | if (!node_needs_grad) { |
6796 | 0 | continue; |
6797 | 0 | } |
6798 | | |
6799 | | // inplace operations are currently not supported |
6800 | 0 | GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW || |
6801 | 0 | node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE); |
6802 | |
|
6803 | 0 | const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node); |
6804 | 0 | GGML_ASSERT(ihash != GGML_HASHSET_FULL); |
6805 | 0 | GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash)); |
6806 | 0 | if (grad_accs && grad_accs[i]) { |
6807 | 0 | cgraph->grad_accs[ihash] = grad_accs[i]; |
6808 | 0 | cgraph->grads[ihash] = cgraph->grad_accs[ihash]; |
6809 | 0 | } else if (node->flags & GGML_TENSOR_FLAG_LOSS) { |
6810 | | // loss tensors always need a gradient accumulator |
6811 | 0 | cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne); |
6812 | 0 | cgraph->grads[ihash] = cgraph->grad_accs[ihash]; |
6813 | 0 | } |
6814 | 0 | grads_needed[ihash] = true; |
6815 | 0 | } |
6816 | | |
6817 | 0 | for (int i = n_nodes_f - 1; i >= 0; --i) { |
6818 | | // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation |
6819 | | // use allocator to automatically make inplace operations |
6820 | 0 | ggml_compute_backward(ctx, cgraph, i, grads_needed); |
6821 | 0 | } |
6822 | |
|
6823 | 0 | free(grads_needed); |
6824 | 0 | } |
6825 | | |
6826 | 0 | static void * incr_ptr_aligned(void ** p, size_t size, size_t align) { |
6827 | 0 | void * ptr = *p; |
6828 | 0 | ptr = (void *) GGML_PAD((uintptr_t) ptr, align); |
6829 | 0 | *p = (void *) ((char *) ptr + size); |
6830 | 0 | return ptr; |
6831 | 0 | } |
6832 | | |
6833 | 0 | static size_t ggml_graph_nbytes(size_t size, bool grads) { |
6834 | 0 | size_t hash_size = ggml_hash_size(size * 2); |
6835 | 0 | void * p = 0; |
6836 | 0 | incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1); |
6837 | 0 | incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes |
6838 | 0 | incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs |
6839 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts |
6840 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys |
6841 | 0 | if (grads) { |
6842 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads |
6843 | 0 | incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs |
6844 | 0 | } |
6845 | 0 | incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); |
6846 | |
|
6847 | 0 | size_t nbytes = (size_t) p; |
6848 | 0 | return nbytes; |
6849 | 0 | } |
6850 | | |
6851 | 0 | size_t ggml_graph_overhead_custom(size_t size, bool grads) { |
6852 | 0 | return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN); |
6853 | 0 | } |
6854 | | |
6855 | 0 | size_t ggml_graph_overhead(void) { |
6856 | 0 | return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false); |
6857 | 0 | } |
6858 | | |
6859 | 0 | struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) { |
6860 | 0 | const size_t obj_size = ggml_graph_nbytes(size, grads); |
6861 | 0 | struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size); |
6862 | 0 | struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); |
6863 | | |
6864 | | // the size of the hash table is doubled since it needs to hold both nodes and leafs |
6865 | 0 | size_t hash_size = ggml_hash_size(size * 2); |
6866 | |
|
6867 | 0 | void * p = cgraph + 1; |
6868 | |
|
6869 | 0 | struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6870 | 0 | struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6871 | 0 | int32_t * use_counts_ptr = incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); |
6872 | 0 | struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); |
6873 | 0 | struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; |
6874 | 0 | struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; |
6875 | |
|
6876 | 0 | ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); |
6877 | | |
6878 | | // check that we allocated the correct amount of memory |
6879 | 0 | assert(obj_size == (size_t)((char *)p - (char *)cgraph)); |
6880 | |
|
6881 | 0 | *cgraph = (struct ggml_cgraph) { |
6882 | 0 | /*.size =*/ size, |
6883 | 0 | /*.n_nodes =*/ 0, |
6884 | 0 | /*.n_leafs =*/ 0, |
6885 | 0 | /*.nodes =*/ nodes_ptr, |
6886 | 0 | /*.grads =*/ grads_ptr, |
6887 | 0 | /*.grad_accs =*/ grad_accs_ptr, |
6888 | 0 | /*.leafs =*/ leafs_ptr, |
6889 | 0 | /*.use_counts =*/ use_counts_ptr, |
6890 | 0 | /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr }, |
6891 | 0 | /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, |
6892 | 0 | }; |
6893 | |
|
6894 | 0 | ggml_hash_set_reset(&cgraph->visited_hash_set); |
6895 | 0 | if (grads) { |
6896 | 0 | memset(cgraph->grads, 0, hash_size*sizeof(struct ggml_tensor *)); |
6897 | 0 | memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *)); |
6898 | 0 | } |
6899 | |
|
6900 | 0 | return cgraph; |
6901 | 0 | } |
6902 | | |
6903 | 0 | struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { |
6904 | 0 | return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false); |
6905 | 0 | } |
6906 | | |
6907 | 0 | struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) { |
6908 | 0 | struct ggml_cgraph cgraph = { |
6909 | 0 | /*.size =*/ 0, |
6910 | 0 | /*.n_nodes =*/ i1 - i0, |
6911 | 0 | /*.n_leafs =*/ 0, |
6912 | 0 | /*.nodes =*/ cgraph0->nodes + i0, |
6913 | 0 | /*.grads =*/ NULL, // gradients would need visited_hash_set |
6914 | 0 | /*.grad_accs =*/ NULL, |
6915 | 0 | /*.leafs =*/ NULL, |
6916 | 0 | /*.use_counts =*/ cgraph0->use_counts, |
6917 | 0 | /*.visited_hash_set =*/ cgraph0->visited_hash_set, |
6918 | 0 | /*.order =*/ cgraph0->order, |
6919 | 0 | }; |
6920 | |
|
6921 | 0 | return cgraph; |
6922 | 0 | } |
6923 | | |
6924 | 0 | void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { |
6925 | 0 | GGML_ASSERT(dst->size >= src->n_leafs); |
6926 | 0 | GGML_ASSERT(dst->size >= src->n_nodes); |
6927 | 0 | GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size); |
6928 | |
|
6929 | 0 | dst->n_leafs = src->n_leafs; |
6930 | 0 | dst->n_nodes = src->n_nodes; |
6931 | 0 | dst->order = src->order; |
6932 | |
|
6933 | 0 | for (int i = 0; i < src->n_leafs; ++i) { |
6934 | 0 | dst->leafs[i] = src->leafs[i]; |
6935 | 0 | } |
6936 | |
|
6937 | 0 | for (int i = 0; i < src->n_nodes; ++i) { |
6938 | 0 | dst->nodes[i] = src->nodes[i]; |
6939 | 0 | } |
6940 | |
|
6941 | 0 | for (size_t i = 0; i < src->visited_hash_set.size; ++i) { |
6942 | | // copy all hashset keys (tensors) that are in use |
6943 | 0 | if (ggml_bitset_get(src->visited_hash_set.used, i)) { |
6944 | 0 | size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]); |
6945 | 0 | dst->use_counts[new_hash_pos] = src->use_counts[i]; |
6946 | 0 | } |
6947 | 0 | } |
6948 | |
|
6949 | 0 | if (dst->grads) { |
6950 | 0 | memset(dst->grads, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6951 | 0 | memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *)); |
6952 | 0 | } |
6953 | 0 | if (src->grads) { |
6954 | 0 | GGML_ASSERT(dst->grads != NULL); |
6955 | 0 | GGML_ASSERT(dst->grad_accs != NULL); |
6956 | 0 | for (int i = 0; i < src->n_nodes; ++i) { |
6957 | 0 | const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]); |
6958 | 0 | const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]); |
6959 | |
|
6960 | 0 | GGML_ASSERT(igrad_src != GGML_HASHSET_FULL); |
6961 | 0 | GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src)); |
6962 | 0 | GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL); |
6963 | 0 | GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst)); |
6964 | |
|
6965 | 0 | dst->grads[igrad_dst] = src->grads[igrad_src]; |
6966 | 0 | dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src]; |
6967 | 0 | } |
6968 | 0 | } |
6969 | 0 | } |
6970 | | |
6971 | 0 | struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) { |
6972 | 0 | struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads); |
6973 | 0 | ggml_graph_cpy(cgraph, result); |
6974 | 0 | return result; |
6975 | 0 | } |
6976 | | |
6977 | 0 | struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { |
6978 | 0 | if (ggml_is_empty(tensor)) { |
6979 | 0 | return tensor; |
6980 | 0 | } |
6981 | 0 | if (tensor->buffer) { |
6982 | 0 | ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor)); |
6983 | 0 | } else { |
6984 | 0 | GGML_ASSERT(tensor->data); |
6985 | 0 | memset(tensor->data, 0, ggml_nbytes(tensor)); |
6986 | 0 | } |
6987 | 0 | return tensor; |
6988 | 0 | } |
6989 | | |
6990 | 0 | void ggml_graph_reset(struct ggml_cgraph * cgraph) { |
6991 | 0 | if (!cgraph) { |
6992 | 0 | return; |
6993 | 0 | } |
6994 | 0 | GGML_ASSERT(cgraph->grads != NULL); |
6995 | |
|
6996 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
6997 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
6998 | 0 | struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node); |
6999 | |
|
7000 | 0 | if (node->op == GGML_OP_OPT_STEP_ADAMW) { |
7001 | | // clear momenta |
7002 | 0 | ggml_set_zero(node->src[2]); |
7003 | 0 | ggml_set_zero(node->src[3]); |
7004 | 0 | } |
7005 | | |
7006 | | // initial gradients of loss should be 1, 0 otherwise |
7007 | 0 | if (grad_acc) { |
7008 | 0 | if (node->flags & GGML_TENSOR_FLAG_LOSS) { |
7009 | 0 | GGML_ASSERT(grad_acc->type == GGML_TYPE_F32); |
7010 | 0 | GGML_ASSERT(ggml_is_scalar(grad_acc)); |
7011 | |
|
7012 | 0 | const float onef = 1.0f; |
7013 | 0 | if (grad_acc->buffer) { |
7014 | 0 | ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float)); |
7015 | 0 | } else { |
7016 | 0 | GGML_ASSERT(grad_acc->data); |
7017 | 0 | *((float *) grad_acc->data) = onef; |
7018 | 0 | } |
7019 | 0 | } else { |
7020 | 0 | ggml_set_zero(grad_acc); |
7021 | 0 | } |
7022 | 0 | } |
7023 | 0 | } |
7024 | 0 | } |
7025 | | |
7026 | 0 | void ggml_graph_clear(struct ggml_cgraph * cgraph) { |
7027 | 0 | cgraph->n_leafs = 0; |
7028 | 0 | cgraph->n_nodes = 0; |
7029 | 0 | ggml_hash_set_reset(&cgraph->visited_hash_set); |
7030 | 0 | } |
7031 | | |
7032 | 0 | int ggml_graph_size(struct ggml_cgraph * cgraph) { |
7033 | 0 | return cgraph->size; |
7034 | 0 | } |
7035 | | |
7036 | 0 | struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) { |
7037 | 0 | if (i < 0) { |
7038 | 0 | GGML_ASSERT(cgraph->n_nodes + i >= 0); |
7039 | 0 | return cgraph->nodes[cgraph->n_nodes + i]; |
7040 | 0 | } |
7041 | | |
7042 | 0 | GGML_ASSERT(i < cgraph->n_nodes); |
7043 | 0 | return cgraph->nodes[i]; |
7044 | 0 | } |
7045 | | |
7046 | 0 | struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) { |
7047 | 0 | return cgraph->nodes; |
7048 | 0 | } |
7049 | | |
7050 | 0 | int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) { |
7051 | 0 | return cgraph->n_nodes; |
7052 | 0 | } |
7053 | | |
7054 | 0 | void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { |
7055 | 0 | GGML_ASSERT(cgraph->size > cgraph->n_nodes); |
7056 | 0 | cgraph->nodes[cgraph->n_nodes] = tensor; |
7057 | 0 | cgraph->n_nodes++; |
7058 | 0 | } |
7059 | | |
7060 | 0 | struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) { |
7061 | 0 | for (int i = 0; i < cgraph->n_leafs; i++) { |
7062 | 0 | struct ggml_tensor * leaf = cgraph->leafs[i]; |
7063 | |
|
7064 | 0 | if (strcmp(leaf->name, name) == 0) { |
7065 | 0 | return leaf; |
7066 | 0 | } |
7067 | 0 | } |
7068 | | |
7069 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7070 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
7071 | |
|
7072 | 0 | if (strcmp(node->name, name) == 0) { |
7073 | 0 | return node; |
7074 | 0 | } |
7075 | 0 | } |
7076 | | |
7077 | 0 | return NULL; |
7078 | 0 | } |
7079 | | |
7080 | 0 | struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7081 | 0 | const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); |
7082 | 0 | return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL; |
7083 | 0 | } |
7084 | | |
7085 | 0 | struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7086 | 0 | const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node); |
7087 | 0 | return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL; |
7088 | 0 | } |
7089 | | |
7090 | 0 | void ggml_graph_print(const struct ggml_cgraph * cgraph) { |
7091 | 0 | GGML_LOG_INFO("=== GRAPH ===\n"); |
7092 | |
|
7093 | 0 | GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes); |
7094 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7095 | 0 | struct ggml_tensor * node = cgraph->nodes[i]; |
7096 | |
|
7097 | 0 | GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n", |
7098 | 0 | i, |
7099 | 0 | node->ne[0], node->ne[1], node->ne[2], |
7100 | 0 | ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : |
7101 | 0 | ggml_graph_get_grad(cgraph, node) ? "g" : " "); |
7102 | 0 | } |
7103 | |
|
7104 | 0 | GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs); |
7105 | 0 | for (int i = 0; i < cgraph->n_leafs; i++) { |
7106 | 0 | struct ggml_tensor * node = cgraph->leafs[i]; |
7107 | |
|
7108 | 0 | GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n", |
7109 | 0 | i, |
7110 | 0 | node->ne[0], node->ne[1], |
7111 | 0 | ggml_op_name(node->op), |
7112 | 0 | ggml_get_name(node)); |
7113 | 0 | } |
7114 | |
|
7115 | 0 | GGML_LOG_INFO("========================================\n"); |
7116 | 0 | } |
7117 | | |
7118 | | static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph, |
7119 | | const int * idxs, |
7120 | | int count, |
7121 | 0 | const struct ggml_tensor * tensor) { |
7122 | 0 | GGML_ASSERT(cgraph && idxs); |
7123 | 0 | for (int i = 0; i < count; ++i) { |
7124 | 0 | const int node_idx = idxs[i]; |
7125 | |
|
7126 | 0 | if (node_idx >= cgraph->n_nodes) { |
7127 | 0 | return -1; |
7128 | 0 | } |
7129 | 0 | if (cgraph->nodes[node_idx] == tensor) { |
7130 | 0 | return i; |
7131 | 0 | } |
7132 | 0 | } |
7133 | 0 | return -1; |
7134 | 0 | } |
7135 | | |
7136 | | bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph, |
7137 | | const int * node_idxs, |
7138 | | int count, |
7139 | | const enum ggml_op * ops, |
7140 | | const int * outputs, |
7141 | 0 | int num_outputs) { |
7142 | 0 | GGML_ASSERT(outputs && num_outputs > 0); |
7143 | |
|
7144 | 0 | for (int i = 0; i < count; ++i) { |
7145 | 0 | if (node_idxs[i] >= cgraph->n_nodes) { |
7146 | 0 | return false; |
7147 | 0 | } |
7148 | | |
7149 | 0 | const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]]; |
7150 | |
|
7151 | 0 | if (node->op != ops[i]) { |
7152 | 0 | return false; |
7153 | 0 | } |
7154 | | |
7155 | 0 | if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) { |
7156 | 0 | continue; |
7157 | 0 | } |
7158 | | |
7159 | 0 | if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { |
7160 | 0 | return false; |
7161 | 0 | } |
7162 | | |
7163 | 0 | int subgraph_uses = 0; |
7164 | 0 | for (int j = i + 1; j < count; ++j) { |
7165 | 0 | const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]]; |
7166 | 0 | for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) { |
7167 | 0 | if (other_node->src[src_idx] == node) { |
7168 | 0 | subgraph_uses++; |
7169 | 0 | } |
7170 | 0 | } |
7171 | 0 | } |
7172 | |
|
7173 | 0 | if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) { |
7174 | 0 | return false; |
7175 | 0 | } |
7176 | | |
7177 | | // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph |
7178 | 0 | struct ggml_tensor * view_src = node->view_src; |
7179 | 0 | while (view_src) { |
7180 | 0 | if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) { |
7181 | 0 | return false; |
7182 | 0 | } |
7183 | 0 | view_src = view_src->view_src; |
7184 | 0 | } |
7185 | 0 | } |
7186 | | |
7187 | 0 | return true; |
7188 | 0 | } |
7189 | | |
7190 | | // check if node is part of the graph |
7191 | 0 | static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7192 | 0 | if (cgraph == NULL) { |
7193 | 0 | return true; |
7194 | 0 | } |
7195 | | |
7196 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7197 | 0 | if (cgraph->nodes[i] == node) { |
7198 | 0 | return true; |
7199 | 0 | } |
7200 | 0 | } |
7201 | | |
7202 | 0 | return false; |
7203 | 0 | } |
7204 | | |
7205 | 0 | static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { |
7206 | 0 | for (int i = 0; i < cgraph->n_nodes; i++) { |
7207 | 0 | struct ggml_tensor * parent = cgraph->nodes[i]; |
7208 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent); |
7209 | |
|
7210 | 0 | if (grad == node) { |
7211 | 0 | return parent; |
7212 | 0 | } |
7213 | 0 | } |
7214 | | |
7215 | 0 | return NULL; |
7216 | 0 | } |
7217 | | |
7218 | 0 | static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { |
7219 | 0 | struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node); |
7220 | 0 | struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent); |
7221 | 0 | fprintf(fp, " \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n", |
7222 | 0 | gparent0 ? (void *) gparent0 : (void *) parent, |
7223 | 0 | gparent ? (void *) gparent : (void *) node, |
7224 | 0 | gparent ? "empty" : "vee", |
7225 | 0 | gparent ? "dashed" : "solid", |
7226 | 0 | label); |
7227 | 0 | } |
7228 | | |
7229 | 0 | static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { |
7230 | 0 | fprintf(fp, " \"%p\" -> \"%p\" [ label = \"%s\"; ]\n", |
7231 | 0 | (void *) parent, |
7232 | 0 | (void *) node, |
7233 | 0 | label); |
7234 | 0 | } |
7235 | | |
7236 | 0 | void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) { |
7237 | 0 | char color[16]; |
7238 | |
|
7239 | 0 | FILE * fp = ggml_fopen(filename, "w"); |
7240 | 0 | GGML_ASSERT(fp); |
7241 | |
|
7242 | 0 | fprintf(fp, "digraph G {\n"); |
7243 | 0 | fprintf(fp, " newrank = true;\n"); |
7244 | 0 | fprintf(fp, " rankdir = TB;\n"); |
7245 | |
|
7246 | 0 | for (int i = 0; i < gb->n_nodes; i++) { |
7247 | 0 | struct ggml_tensor * node = gb->nodes[i]; |
7248 | 0 | struct ggml_tensor * grad = ggml_graph_get_grad(gb, node); |
7249 | |
|
7250 | 0 | if (ggml_graph_get_parent(gb, node) != NULL) { |
7251 | 0 | continue; |
7252 | 0 | } |
7253 | | |
7254 | 0 | if (node->flags & GGML_TENSOR_FLAG_PARAM) { |
7255 | 0 | snprintf(color, sizeof(color), "yellow"); |
7256 | 0 | } else if (grad) { |
7257 | 0 | if (ggml_graph_find(gf, node)) { |
7258 | 0 | snprintf(color, sizeof(color), "green"); |
7259 | 0 | } else { |
7260 | 0 | snprintf(color, sizeof(color), "lightblue"); |
7261 | 0 | } |
7262 | 0 | } else { |
7263 | 0 | snprintf(color, sizeof(color), "white"); |
7264 | 0 | } |
7265 | |
|
7266 | 0 | fprintf(fp, " \"%p\" [ " |
7267 | 0 | "style = filled; fillcolor = %s; shape = record; " |
7268 | 0 | "label=\"", |
7269 | 0 | (void *) node, color); |
7270 | |
|
7271 | 0 | if (strlen(node->name) > 0) { |
7272 | 0 | fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); |
7273 | 0 | } else { |
7274 | 0 | fprintf(fp, "(%s)|", ggml_type_name(node->type)); |
7275 | 0 | } |
7276 | |
|
7277 | 0 | if (ggml_is_matrix(node)) { |
7278 | 0 | fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); |
7279 | 0 | } else { |
7280 | 0 | fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); |
7281 | 0 | } |
7282 | |
|
7283 | 0 | if (grad) { |
7284 | 0 | fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op)); |
7285 | 0 | } else { |
7286 | 0 | fprintf(fp, "\"; ]\n"); |
7287 | 0 | } |
7288 | 0 | } |
7289 | |
|
7290 | 0 | for (int i = 0; i < gb->n_leafs; i++) { |
7291 | 0 | struct ggml_tensor * node = gb->leafs[i]; |
7292 | |
|
7293 | 0 | snprintf(color, sizeof(color), "pink"); |
7294 | |
|
7295 | 0 | fprintf(fp, " \"%p\" [ " |
7296 | 0 | "style = filled; fillcolor = %s; shape = record; " |
7297 | 0 | "label=\"<x>", |
7298 | 0 | (void *) node, color); |
7299 | |
|
7300 | 0 | if (strlen(node->name) > 0) { |
7301 | 0 | fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); |
7302 | 0 | } else { |
7303 | 0 | fprintf(fp, "(%s)|", ggml_type_name(node->type)); |
7304 | 0 | } |
7305 | |
|
7306 | 0 | fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]); |
7307 | 0 | if (ggml_nelements(node) < 5 && node->data != NULL) { |
7308 | 0 | fprintf(fp, " | ("); |
7309 | 0 | for (int j = 0; j < ggml_nelements(node); j++) { |
7310 | | // FIXME: use ggml-backend to obtain the tensor data |
7311 | | //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) { |
7312 | | // fprintf(fp, "%d", ggml_get_i32_1d(node, j)); |
7313 | | //} |
7314 | | //else if (node->type == GGML_TYPE_F32 || |
7315 | | // node->type == GGML_TYPE_F16 || |
7316 | | // node->type == GGML_TYPE_BF16) { |
7317 | | // fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j)); |
7318 | | //} |
7319 | | //else |
7320 | 0 | { |
7321 | 0 | fprintf(fp, "#"); |
7322 | 0 | } |
7323 | 0 | if (j < ggml_nelements(node) - 1) { |
7324 | 0 | fprintf(fp, ", "); |
7325 | 0 | } |
7326 | 0 | } |
7327 | 0 | fprintf(fp, ")"); |
7328 | 0 | } |
7329 | 0 | fprintf(fp, "\"; ]\n"); |
7330 | 0 | } |
7331 | |
|
7332 | 0 | for (int i = 0; i < gb->n_nodes; i++) { |
7333 | 0 | struct ggml_tensor * node = gb->nodes[i]; |
7334 | |
|
7335 | 0 | for (int j = 0; j < GGML_MAX_SRC; j++) { |
7336 | 0 | if (node->src[j]) { |
7337 | 0 | char label[16]; |
7338 | 0 | snprintf(label, sizeof(label), "src %d", j); |
7339 | 0 | ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label); |
7340 | 0 | } |
7341 | 0 | } |
7342 | 0 | } |
7343 | |
|
7344 | 0 | for (int i = 0; i < gb->n_leafs; i++) { |
7345 | 0 | struct ggml_tensor * node = gb->leafs[i]; |
7346 | |
|
7347 | 0 | for (int j = 0; j < GGML_MAX_SRC; j++) { |
7348 | 0 | if (node->src[j]) { |
7349 | 0 | char label[16]; |
7350 | 0 | snprintf(label, sizeof(label), "src %d", j); |
7351 | 0 | ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label); |
7352 | 0 | } |
7353 | 0 | } |
7354 | 0 | } |
7355 | |
|
7356 | 0 | fprintf(fp, "}\n"); |
7357 | |
|
7358 | 0 | fclose(fp); |
7359 | |
|
7360 | 0 | GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename); |
7361 | 0 | } |
7362 | | |
7363 | | //////////////////////////////////////////////////////////////////////////////// |
7364 | | |
7365 | 0 | void ggml_set_input(struct ggml_tensor * tensor) { |
7366 | 0 | tensor->flags |= GGML_TENSOR_FLAG_INPUT; |
7367 | 0 | } |
7368 | | |
7369 | 0 | void ggml_set_output(struct ggml_tensor * tensor) { |
7370 | 0 | tensor->flags |= GGML_TENSOR_FLAG_OUTPUT; |
7371 | 0 | } |
7372 | | |
7373 | 0 | void ggml_set_param(struct ggml_tensor * tensor) { |
7374 | 0 | GGML_ASSERT(tensor->op == GGML_OP_NONE); |
7375 | 0 | tensor->flags |= GGML_TENSOR_FLAG_PARAM; |
7376 | 0 | } |
7377 | | |
7378 | 0 | void ggml_set_loss(struct ggml_tensor * tensor) { |
7379 | 0 | GGML_ASSERT(ggml_is_scalar(tensor)); |
7380 | 0 | GGML_ASSERT(tensor->type == GGML_TYPE_F32); |
7381 | 0 | tensor->flags |= GGML_TENSOR_FLAG_LOSS; |
7382 | 0 | } |
7383 | | |
7384 | | //////////////////////////////////////////////////////////////////////////////// |
7385 | | |
7386 | 0 | void ggml_quantize_init(enum ggml_type type) { |
7387 | 0 | ggml_critical_section_start(); |
7388 | |
|
7389 | 0 | switch (type) { |
7390 | 0 | case GGML_TYPE_IQ2_XXS: |
7391 | 0 | case GGML_TYPE_IQ2_XS: |
7392 | 0 | case GGML_TYPE_IQ2_S: |
7393 | 0 | case GGML_TYPE_IQ1_S: |
7394 | 0 | case GGML_TYPE_IQ1_M: iq2xs_init_impl(type); break; |
7395 | 0 | case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break; |
7396 | 0 | case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break; |
7397 | 0 | default: // nothing |
7398 | 0 | break; |
7399 | 0 | } |
7400 | | |
7401 | 0 | ggml_critical_section_end(); |
7402 | 0 | } |
7403 | | |
7404 | 862 | void ggml_quantize_free(void) { |
7405 | 862 | ggml_critical_section_start(); |
7406 | | |
7407 | 862 | iq2xs_free_impl(GGML_TYPE_IQ2_XXS); |
7408 | 862 | iq2xs_free_impl(GGML_TYPE_IQ2_XS); |
7409 | 862 | iq2xs_free_impl(GGML_TYPE_IQ1_S); |
7410 | 862 | iq3xs_free_impl(256); |
7411 | | |
7412 | 862 | ggml_critical_section_end(); |
7413 | 862 | } |
7414 | | |
7415 | 0 | bool ggml_quantize_requires_imatrix(enum ggml_type type) { |
7416 | 0 | return |
7417 | 0 | type == GGML_TYPE_IQ2_XXS || |
7418 | 0 | type == GGML_TYPE_IQ2_XS || |
7419 | 0 | type == GGML_TYPE_IQ1_S;// || |
7420 | | //type == GGML_TYPE_IQ1_M; |
7421 | 0 | } |
7422 | | |
7423 | | size_t ggml_quantize_chunk( |
7424 | | enum ggml_type type, |
7425 | | const float * src, |
7426 | | void * dst, |
7427 | | int64_t start, |
7428 | | int64_t nrows, |
7429 | | int64_t n_per_row, |
7430 | 0 | const float * imatrix) { |
7431 | 0 | const int64_t n = (int64_t) nrows * n_per_row; |
7432 | |
|
7433 | 0 | if (ggml_quantize_requires_imatrix(type)) { |
7434 | 0 | GGML_ASSERT(imatrix != NULL); |
7435 | 0 | } |
7436 | |
|
7437 | 0 | GGML_ASSERT(start % type_traits[type].blck_size == 0); |
7438 | 0 | GGML_ASSERT(start % n_per_row == 0); |
7439 | |
|
7440 | 0 | ggml_quantize_init(type); // this is noop if already initialized |
7441 | |
|
7442 | 0 | const size_t start_row = start / n_per_row; |
7443 | 0 | const size_t row_size = ggml_row_size(type, n_per_row); |
7444 | |
|
7445 | 0 | size_t result = 0; |
7446 | |
|
7447 | 0 | switch (type) { |
7448 | 0 | case GGML_TYPE_Q4_0: result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7449 | 0 | case GGML_TYPE_Q4_1: result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7450 | 0 | case GGML_TYPE_Q5_0: result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7451 | 0 | case GGML_TYPE_Q5_1: result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7452 | 0 | case GGML_TYPE_Q8_0: result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7453 | 0 | case GGML_TYPE_MXFP4: result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7454 | 0 | case GGML_TYPE_Q2_K: result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7455 | 0 | case GGML_TYPE_Q3_K: result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7456 | 0 | case GGML_TYPE_Q4_K: result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7457 | 0 | case GGML_TYPE_Q5_K: result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7458 | 0 | case GGML_TYPE_Q6_K: result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7459 | 0 | case GGML_TYPE_TQ1_0: result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7460 | 0 | case GGML_TYPE_TQ2_0: result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7461 | 0 | case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7462 | 0 | case GGML_TYPE_IQ2_XS: result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7463 | 0 | case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7464 | 0 | case GGML_TYPE_IQ3_S: result = quantize_iq3_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7465 | 0 | case GGML_TYPE_IQ2_S: result = quantize_iq2_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7466 | 0 | case GGML_TYPE_IQ1_S: result = quantize_iq1_s (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7467 | 0 | case GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7468 | 0 | case GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7469 | 0 | case GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break; |
7470 | 0 | case GGML_TYPE_F16: |
7471 | 0 | { |
7472 | 0 | size_t elemsize = sizeof(ggml_fp16_t); |
7473 | 0 | ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n); |
7474 | 0 | result = n * elemsize; |
7475 | 0 | } break; |
7476 | 0 | case GGML_TYPE_BF16: |
7477 | 0 | { |
7478 | 0 | size_t elemsize = sizeof(ggml_bf16_t); |
7479 | 0 | ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n); |
7480 | 0 | result = n * elemsize; |
7481 | 0 | } break; |
7482 | 0 | case GGML_TYPE_F32: |
7483 | 0 | { |
7484 | 0 | size_t elemsize = sizeof(float); |
7485 | 0 | result = n * elemsize; |
7486 | 0 | memcpy((uint8_t *)dst + start * elemsize, src + start, result); |
7487 | 0 | } break; |
7488 | 0 | default: |
7489 | 0 | assert(false); |
7490 | 0 | } |
7491 | | |
7492 | 0 | GGML_ASSERT(result == nrows * row_size); |
7493 | |
|
7494 | 0 | return result; |
7495 | 0 | } |
7496 | | |
7497 | | //////////////////////////////////////////////////////////////////////////////// |
7498 | | |
7499 | 0 | void ggml_log_set(ggml_log_callback log_callback, void * user_data) { |
7500 | 0 | g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default; |
7501 | 0 | g_logger_state.log_callback_user_data = user_data; |
7502 | 0 | } |
7503 | | |
7504 | 0 | void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) { |
7505 | 0 | p->n_threads = n_threads; |
7506 | 0 | p->prio = 0; // default priority (usually means normal or inherited) |
7507 | 0 | p->poll = 50; // hybrid-polling enabled |
7508 | 0 | p->strict_cpu = false; // no strict placement (all threads share same cpumask) |
7509 | 0 | p->paused = false; // threads are ready to go |
7510 | 0 | memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited) |
7511 | 0 | } |
7512 | | |
7513 | 0 | struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) { |
7514 | 0 | struct ggml_threadpool_params p; |
7515 | 0 | ggml_threadpool_params_init(&p, n_threads); |
7516 | 0 | return p; |
7517 | 0 | } |
7518 | | |
7519 | 0 | bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) { |
7520 | 0 | if (p0->n_threads != p1->n_threads ) return false; |
7521 | 0 | if (p0->prio != p1->prio ) return false; |
7522 | 0 | if (p0->poll != p1->poll ) return false; |
7523 | 0 | if (p0->strict_cpu != p1->strict_cpu ) return false; |
7524 | 0 | return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0; |
7525 | 0 | } |