Coverage Report

Created: 2026-03-03 06:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
214
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
214
    fflush(stdout);
239
240
214
    char message[2048];
241
214
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
214
    va_list args;
244
214
    va_start(args, fmt);
245
214
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
214
    va_end(args);
247
248
214
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
214
    } else {
251
        // default: print error and backtrace to stderr
252
214
        fprintf(stderr, "%s\n", message);
253
        
254
214
    }
255
256
214
    abort();
257
214
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
4.83k
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
4.83k
    if (format == NULL) {
273
0
        return;
274
0
    }
275
4.83k
    va_list args_copy;
276
4.83k
    va_copy(args_copy, args);
277
4.83k
    char buffer[128];
278
4.83k
    int len = vsnprintf(buffer, 128, format, args);
279
4.83k
    if (len < 128) {
280
4.73k
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
4.73k
    } else {
282
97
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
97
        vsnprintf(buffer2, len + 1, format, args_copy);
284
97
        buffer2[len] = 0;
285
97
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
97
        free(buffer2);
287
97
    }
288
4.83k
    va_end(args_copy);
289
4.83k
}
290
291
4.83k
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
4.83k
    va_list args;
293
4.83k
    va_start(args, format);
294
4.83k
    ggml_log_internal_v(level, format, args);
295
4.83k
    va_end(args);
296
4.83k
}
297
298
4.83k
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
4.83k
    (void) level;
300
4.83k
    (void) user_data;
301
4.83k
    fputs(text, stderr);
302
4.83k
    fflush(stderr);
303
4.83k
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
5.11k
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
5.11k
    const int alignment = 64;
321
5.11k
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
5.11k
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
5.11k
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
5.11k
    int result = posix_memalign(&aligned_memory, alignment, size);
353
5.11k
  #endif
354
5.11k
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
5.11k
    return aligned_memory;
369
5.11k
#endif
370
5.11k
}
371
372
5.11k
void ggml_aligned_free(void * ptr, size_t size) {
373
5.11k
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
5.11k
    free(ptr);
386
5.11k
#endif
387
5.11k
}
388
389
390
5.11k
inline static void * ggml_malloc(size_t size) {
391
5.11k
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
5.11k
    void * result = malloc(size);
396
5.11k
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
5.11k
    return result;
401
5.11k
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
5.11k
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
5.11k
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
13.4k
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
8.14k
int64_t ggml_time_us(void) {
545
8.14k
    struct timespec ts;
546
8.14k
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
8.14k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
8.14k
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
5.11k
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
5.11k
    return fopen(fname, mode);
607
5.11k
#endif
608
609
5.11k
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q4_0] = {
657
        .type_name                = "q4_0",
658
        .blck_size                = QK4_0,
659
        .type_size                = sizeof(block_q4_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
663
    },
664
    [GGML_TYPE_Q4_1] = {
665
        .type_name                = "q4_1",
666
        .blck_size                = QK4_1,
667
        .type_size                = sizeof(block_q4_1),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
671
    },
672
    [4] = { // GGML_TYPE_Q4_2
673
        .type_name                = "DEPRECATED",
674
        .blck_size                = 0,
675
        .type_size                = 0,
676
        .is_quantized             = false,
677
    },
678
    [5] = { // GGML_TYPE_Q4_3
679
        .type_name                = "DEPRECATED",
680
        .blck_size                = 0,
681
        .type_size                = 0,
682
        .is_quantized             = false,
683
    },
684
    [GGML_TYPE_Q5_0] = {
685
        .type_name                = "q5_0",
686
        .blck_size                = QK5_0,
687
        .type_size                = sizeof(block_q5_0),
688
        .is_quantized             = true,
689
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
691
    },
692
    [GGML_TYPE_Q5_1] = {
693
        .type_name                = "q5_1",
694
        .blck_size                = QK5_1,
695
        .type_size                = sizeof(block_q5_1),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
699
    },
700
    [GGML_TYPE_Q8_0] = {
701
        .type_name                = "q8_0",
702
        .blck_size                = QK8_0,
703
        .type_size                = sizeof(block_q8_0),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
707
    },
708
    [GGML_TYPE_Q8_1] = {
709
        .type_name                = "q8_1",
710
        .blck_size                = QK8_1,
711
        .type_size                = sizeof(block_q8_1),
712
        .is_quantized             = true,
713
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
714
    },
715
    [GGML_TYPE_MXFP4] = {
716
        .type_name                = "mxfp4",
717
        .blck_size                = QK_MXFP4,
718
        .type_size                = sizeof(block_mxfp4),
719
        .is_quantized             = true,
720
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
721
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
722
    },
723
    [GGML_TYPE_Q2_K] = {
724
        .type_name                = "q2_K",
725
        .blck_size                = QK_K,
726
        .type_size                = sizeof(block_q2_K),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
729
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
730
    },
731
    [GGML_TYPE_Q3_K] = {
732
        .type_name                = "q3_K",
733
        .blck_size                = QK_K,
734
        .type_size                = sizeof(block_q3_K),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
737
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
738
    },
739
    [GGML_TYPE_Q4_K] = {
740
        .type_name                = "q4_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q4_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
746
    },
747
    [GGML_TYPE_Q5_K] = {
748
        .type_name                = "q5_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q5_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
754
    },
755
    [GGML_TYPE_Q6_K] = {
756
        .type_name                = "q6_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q6_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
762
    },
763
    [GGML_TYPE_IQ2_XXS] = {
764
        .type_name                = "iq2_xxs",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_iq2_xxs),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
769
        .from_float_ref           = NULL,
770
    },
771
    [GGML_TYPE_IQ2_XS] = {
772
        .type_name                = "iq2_xs",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_iq2_xs),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
777
        .from_float_ref           = NULL,
778
    },
779
    [GGML_TYPE_IQ3_XXS] = {
780
        .type_name                = "iq3_xxs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq3_xxs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
785
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
786
    },
787
    [GGML_TYPE_IQ3_S] = {
788
        .type_name                = "iq3_s",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq3_s),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
793
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
794
    },
795
    [GGML_TYPE_IQ2_S] = {
796
        .type_name                = "iq2_s",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq2_s),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
802
    },
803
    [GGML_TYPE_IQ1_S] = {
804
        .type_name                = "iq1_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq1_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
809
        .from_float_ref           = NULL,
810
    },
811
    [GGML_TYPE_IQ1_M] = {
812
        .type_name                = "iq1_m",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq1_m),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
817
        .from_float_ref           = NULL,
818
    },
819
    [GGML_TYPE_IQ4_NL] = {
820
        .type_name                = "iq4_nl",
821
        .blck_size                = QK4_NL,
822
        .type_size                = sizeof(block_iq4_nl),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
825
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
826
    },
827
    [GGML_TYPE_IQ4_XS] = {
828
        .type_name                = "iq4_xs",
829
        .blck_size                = QK_K,
830
        .type_size                = sizeof(block_iq4_xs),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
833
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
834
    },
835
    [GGML_TYPE_Q8_K] = {
836
        .type_name                = "q8_K",
837
        .blck_size                = QK_K,
838
        .type_size                = sizeof(block_q8_K),
839
        .is_quantized             = true,
840
    },
841
    [GGML_TYPE_BF16] = {
842
        .type_name                = "bf16",
843
        .blck_size                = 1,
844
        .type_size                = sizeof(ggml_bf16_t),
845
        .is_quantized             = false,
846
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
847
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
848
    },
849
    [31] = { // GGML_TYPE_Q4_0_4_4
850
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
851
        .blck_size                = 0,
852
        .type_size                = 0,
853
        .is_quantized             = false,
854
    },
855
    [32] = { // GGML_TYPE_Q4_0_4_8
856
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
857
        .blck_size                = 0,
858
        .type_size                = 0,
859
        .is_quantized             = false,
860
    },
861
    [33] = { // GGML_TYPE_Q4_0_8_8
862
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
863
        .blck_size                = 0,
864
        .type_size                = 0,
865
        .is_quantized             = false,
866
    },
867
    [GGML_TYPE_TQ1_0] = {
868
        .type_name                = "tq1_0",
869
        .blck_size                = QK_K,
870
        .type_size                = sizeof(block_tq1_0),
871
        .is_quantized             = true,
872
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
873
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
874
    },
875
    [GGML_TYPE_TQ2_0] = {
876
        .type_name                = "tq2_0",
877
        .blck_size                = QK_K,
878
        .type_size                = sizeof(block_tq2_0),
879
        .is_quantized             = true,
880
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
881
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
882
    },
883
    [36] = { // GGML_TYPE_IQ4_NL_4_4
884
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
885
        .blck_size                = 0,
886
        .type_size                = 0,
887
        .is_quantized             = false,
888
    },
889
    [37] = { // GGML_TYPE_IQ4_NL_4_8
890
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
891
        .blck_size                = 0,
892
        .type_size                = 0,
893
        .is_quantized             = false,
894
    },
895
    [38] = { // GGML_TYPE_IQ4_NL_8_8
896
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
897
        .blck_size                = 0,
898
        .type_size                = 0,
899
        .is_quantized             = false,
900
    },
901
};
902
903
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
904
0
    assert(type >= 0);
905
0
    assert(type < GGML_TYPE_COUNT);
906
0
    return &type_traits[type];
907
0
}
908
909
//
910
// ggml object
911
//
912
913
struct ggml_object {
914
    size_t offs;
915
    size_t size;
916
917
    struct ggml_object * next;
918
919
    enum ggml_object_type type;
920
921
    char padding[4];
922
};
923
924
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
925
926
//
927
// ggml context
928
//
929
930
struct ggml_context {
931
    size_t mem_size;
932
    void * mem_buffer;
933
    bool   mem_buffer_owned;
934
    bool   no_alloc;
935
936
    int    n_objects;
937
938
    struct ggml_object * objects_begin;
939
    struct ggml_object * objects_end;
940
};
941
942
//
943
// data types
944
//
945
946
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
947
    "NONE",
948
949
    "DUP",
950
    "ADD",
951
    "ADD_ID",
952
    "ADD1",
953
    "ACC",
954
    "SUB",
955
    "MUL",
956
    "DIV",
957
    "SQR",
958
    "SQRT",
959
    "LOG",
960
    "SIN",
961
    "COS",
962
    "SUM",
963
    "SUM_ROWS",
964
    "CUMSUM",
965
    "MEAN",
966
    "ARGMAX",
967
    "COUNT_EQUAL",
968
    "REPEAT",
969
    "REPEAT_BACK",
970
    "CONCAT",
971
    "SILU_BACK",
972
    "NORM",
973
    "RMS_NORM",
974
    "RMS_NORM_BACK",
975
    "GROUP_NORM",
976
    "L2_NORM",
977
978
    "MUL_MAT",
979
    "MUL_MAT_ID",
980
    "OUT_PROD",
981
982
    "SCALE",
983
    "SET",
984
    "CPY",
985
    "CONT",
986
    "RESHAPE",
987
    "VIEW",
988
    "PERMUTE",
989
    "TRANSPOSE",
990
    "GET_ROWS",
991
    "GET_ROWS_BACK",
992
    "SET_ROWS",
993
    "DIAG",
994
    "DIAG_MASK_INF",
995
    "DIAG_MASK_ZERO",
996
    "SOFT_MAX",
997
    "SOFT_MAX_BACK",
998
    "ROPE",
999
    "ROPE_BACK",
1000
    "CLAMP",
1001
    "CONV_TRANSPOSE_1D",
1002
    "IM2COL",
1003
    "IM2COL_BACK",
1004
    "IM2COL_3D",
1005
    "CONV_2D",
1006
    "CONV_3D",
1007
    "CONV_2D_DW",
1008
    "CONV_TRANSPOSE_2D",
1009
    "POOL_1D",
1010
    "POOL_2D",
1011
    "POOL_2D_BACK",
1012
    "UPSCALE",
1013
    "PAD",
1014
    "PAD_REFLECT_1D",
1015
    "ROLL",
1016
    "ARANGE",
1017
    "TIMESTEP_EMBEDDING",
1018
    "ARGSORT",
1019
    "TOP_K",
1020
    "LEAKY_RELU",
1021
    "TRI",
1022
    "FILL",
1023
1024
    "FLASH_ATTN_EXT",
1025
    "FLASH_ATTN_BACK",
1026
    "SSM_CONV",
1027
    "SSM_SCAN",
1028
    "WIN_PART",
1029
    "WIN_UNPART",
1030
    "GET_REL_POS",
1031
    "ADD_REL_POS",
1032
    "RWKV_WKV6",
1033
    "GATED_LINEAR_ATTN",
1034
    "RWKV_WKV7",
1035
    "SOLVE_TRI",
1036
1037
    "UNARY",
1038
1039
    "MAP_CUSTOM1",
1040
    "MAP_CUSTOM2",
1041
    "MAP_CUSTOM3",
1042
1043
    "CUSTOM",
1044
1045
    "CROSS_ENTROPY_LOSS",
1046
    "CROSS_ENTROPY_LOSS_BACK",
1047
    "OPT_STEP_ADAMW",
1048
    "OPT_STEP_SGD",
1049
1050
    "GLU",
1051
};
1052
1053
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1054
1055
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1056
    "none",
1057
1058
    "x",
1059
    "x+y",
1060
    "x[i]+y",
1061
    "x+y",
1062
    "view(x,nb,offset)+=y->x",
1063
    "x-y",
1064
    "x*y",
1065
    "x/y",
1066
    "x^2",
1067
    "√x",
1068
    "log(x)",
1069
    "sin(x)",
1070
    "cos(x)",
1071
    "Σx",
1072
    "Σx_k",
1073
    "cumsum(x)",
1074
    "Σx/n",
1075
    "argmax(x)",
1076
    "count_equal(x)",
1077
    "repeat(x)",
1078
    "repeat_back(x)",
1079
    "concat(x, y)",
1080
    "silu_back(x)",
1081
    "norm(x)",
1082
    "rms_norm(x)",
1083
    "rms_norm_back(x)",
1084
    "group_norm(x)",
1085
    "l2_norm(x)",
1086
1087
    "X*Y",
1088
    "X[i]*Y",
1089
    "X*Y",
1090
1091
    "x*v",
1092
    "y-\\>view(x)",
1093
    "x-\\>y",
1094
    "cont(x)",
1095
    "reshape(x)",
1096
    "view(x)",
1097
    "permute(x)",
1098
    "transpose(x)",
1099
    "get_rows(x)",
1100
    "get_rows_back(x)",
1101
    "set_rows(x)",
1102
    "diag(x)",
1103
    "diag_mask_inf(x)",
1104
    "diag_mask_zero(x)",
1105
    "soft_max(x)",
1106
    "soft_max_back(x)",
1107
    "rope(x)",
1108
    "rope_back(x)",
1109
    "clamp(x)",
1110
    "conv_transpose_1d(x)",
1111
    "im2col(x)",
1112
    "im2col_back(x)",
1113
    "im2col_3d(x)",
1114
    "conv_2d(x)",
1115
    "conv_3d(x)",
1116
    "conv_2d_dw(x)",
1117
    "conv_transpose_2d(x)",
1118
    "pool_1d(x)",
1119
    "pool_2d(x)",
1120
    "pool_2d_back(x)",
1121
    "upscale(x)",
1122
    "pad(x)",
1123
    "pad_reflect_1d(x)",
1124
    "roll(x)",
1125
    "arange(start, stop, step)",
1126
    "timestep_embedding(timesteps, dim, max_period)",
1127
    "argsort(x)",
1128
    "top_k(x)",
1129
    "leaky_relu(x)",
1130
    "tri(x)",
1131
    "fill(x, c)",
1132
1133
    "flash_attn_ext(x)",
1134
    "flash_attn_back(x)",
1135
    "ssm_conv(x)",
1136
    "ssm_scan(x)",
1137
    "win_part(x)",
1138
    "win_unpart(x)",
1139
    "get_rel_pos(x)",
1140
    "add_rel_pos(x)",
1141
    "rwkv_wkv6(k, v, r, tf, td, s)",
1142
    "gated_linear_attn(k, v, q, gate, s)",
1143
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1144
    "A X = B, A triangular, solve X",
1145
1146
    "unary(x)",
1147
1148
    "map_custom(x)",
1149
    "map_custom(x,y)",
1150
    "map_custom(x,y,z)",
1151
1152
    "custom(x)",
1153
1154
    "cross_entropy_loss(x,y)",
1155
    "cross_entropy_loss_back(x,y)",
1156
    "adamw(x)",
1157
    "sgd(x)",
1158
1159
    "glu(x)",
1160
};
1161
1162
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1163
1164
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1165
1166
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1167
    "ABS",
1168
    "SGN",
1169
    "NEG",
1170
    "STEP",
1171
    "TANH",
1172
    "ELU",
1173
    "RELU",
1174
    "SIGMOID",
1175
    "GELU",
1176
    "GELU_QUICK",
1177
    "SILU",
1178
    "HARDSWISH",
1179
    "HARDSIGMOID",
1180
    "EXP",
1181
    "EXPM1",
1182
    "SOFTPLUS",
1183
    "GELU_ERF",
1184
    "XIELU",
1185
    "FLOOR",
1186
    "CEIL",
1187
    "ROUND",
1188
    "TRUNC",
1189
};
1190
1191
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1192
1193
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1194
    "REGLU",
1195
    "GEGLU",
1196
    "SWIGLU",
1197
    "SWIGLU_OAI",
1198
    "GEGLU_ERF",
1199
    "GEGLU_QUICK",
1200
};
1201
1202
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1203
1204
1205
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1206
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1207
1208
1209
////////////////////////////////////////////////////////////////////////////////
1210
1211
0
void ggml_print_object(const struct ggml_object * obj) {
1212
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1213
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1214
0
}
1215
1216
0
void ggml_print_objects(const struct ggml_context * ctx) {
1217
0
    struct ggml_object * obj = ctx->objects_begin;
1218
1219
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1220
1221
0
    while (obj != NULL) {
1222
0
        ggml_print_object(obj);
1223
0
        obj = obj->next;
1224
0
    }
1225
1226
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1227
0
}
1228
1229
4.12k
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1230
4.12k
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1231
1232
4.12k
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1233
4.12k
}
1234
1235
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1236
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1237
1238
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1239
0
}
1240
1241
5.91k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1242
29.1k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1243
23.3k
        if (tensor->ne[i] <= 0) {
1244
111
            return 0;
1245
111
        }
1246
23.3k
    }
1247
1248
5.79k
    size_t nbytes;
1249
5.79k
    const size_t blck_size = ggml_blck_size(tensor->type);
1250
5.79k
    if (blck_size == 1) {
1251
5.69k
        nbytes = ggml_type_size(tensor->type);
1252
28.4k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1253
22.7k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1254
22.7k
        }
1255
5.69k
    }
1256
109
    else {
1257
109
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1258
436
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1259
327
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1260
327
        }
1261
109
    }
1262
1263
5.79k
    return nbytes;
1264
5.91k
}
1265
1266
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1267
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1268
0
}
1269
1270
14.8k
int64_t ggml_blck_size(enum ggml_type type) {
1271
14.8k
    assert(type >= 0);
1272
14.8k
    assert(type < GGML_TYPE_COUNT);
1273
14.8k
    return type_traits[type].blck_size;
1274
14.8k
}
1275
1276
14.7k
size_t ggml_type_size(enum ggml_type type) {
1277
14.7k
    assert(type >= 0);
1278
14.7k
    assert(type < GGML_TYPE_COUNT);
1279
14.7k
    return type_traits[type].type_size;
1280
14.7k
}
1281
1282
1.72k
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1283
1.72k
    assert(type >= 0);
1284
1.72k
    assert(type < GGML_TYPE_COUNT);
1285
1.72k
    assert(ne % ggml_blck_size(type) == 0);
1286
1.72k
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1287
1.72k
}
1288
1289
279
const char * ggml_type_name(enum ggml_type type) {
1290
279
    assert(type >= 0);
1291
279
    assert(type < GGML_TYPE_COUNT);
1292
279
    return type_traits[type].type_name;
1293
279
}
1294
1295
0
bool ggml_is_quantized(enum ggml_type type) {
1296
0
    assert(type >= 0);
1297
0
    assert(type < GGML_TYPE_COUNT);
1298
0
    return type_traits[type].is_quantized;
1299
0
}
1300
1301
0
const char * ggml_op_name(enum ggml_op op) {
1302
0
    return GGML_OP_NAME[op];
1303
0
}
1304
1305
0
const char * ggml_op_symbol(enum ggml_op op) {
1306
0
    return GGML_OP_SYMBOL[op];
1307
0
}
1308
1309
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1310
0
    return GGML_UNARY_OP_NAME[op];
1311
0
}
1312
1313
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1314
0
    return GGML_GLU_OP_NAME[op];
1315
0
}
1316
1317
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1318
0
    if (t->op == GGML_OP_UNARY) {
1319
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1320
0
        return ggml_unary_op_name(uop);
1321
0
    }
1322
0
    if (t->op == GGML_OP_GLU) {
1323
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1324
0
        return ggml_glu_op_name(gop);
1325
0
    }
1326
0
    return ggml_op_name(t->op);
1327
0
}
1328
1329
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1330
0
    return ggml_type_size(tensor->type);
1331
0
}
1332
1333
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1334
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1335
1336
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1337
0
}
1338
1339
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1340
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1341
1342
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1343
0
}
1344
1345
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1346
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1347
1348
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1349
0
}
1350
1351
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1352
0
    return tensor->ne[3] == 1;
1353
0
}
1354
1355
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1356
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1357
0
        if (tensor->ne[i] > 1) {
1358
0
            return i + 1;
1359
0
        }
1360
0
    }
1361
0
    return 1;
1362
0
}
1363
1364
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1365
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1366
1367
0
    switch (ftype) {
1368
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1369
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1370
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1371
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1372
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1373
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1374
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1375
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1376
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1377
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1378
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1379
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1380
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1381
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1382
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1383
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1384
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1385
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1386
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1387
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1388
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1389
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1390
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1391
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1392
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1393
0
    }
1394
1395
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1396
1397
0
    return wtype;
1398
0
}
1399
1400
1.35k
size_t ggml_tensor_overhead(void) {
1401
1.35k
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1402
1.35k
}
1403
1404
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1405
0
    return tensor->nb[0] > tensor->nb[1];
1406
0
}
1407
1408
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1409
0
    size_t next_nb = ggml_type_size(tensor->type);
1410
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1411
0
        return false;
1412
0
    }
1413
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1414
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1415
0
        if (tensor->ne[i] != 1) {
1416
0
            if (i > n) {
1417
0
                if (tensor->nb[i] != next_nb) {
1418
0
                    return false;
1419
0
                }
1420
0
                next_nb *= tensor->ne[i];
1421
0
            } else {
1422
                // this dimension does not need to be contiguous
1423
0
                next_nb = tensor->ne[i]*tensor->nb[i];
1424
0
            }
1425
0
        }
1426
0
    }
1427
0
    return true;
1428
0
}
1429
1430
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1431
0
    return ggml_is_contiguous_0(tensor);
1432
0
}
1433
1434
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1435
0
    return ggml_is_contiguous_n(tensor, 0);
1436
0
}
1437
1438
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1439
0
    return ggml_is_contiguous_n(tensor, 1);
1440
0
}
1441
1442
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1443
0
    return ggml_is_contiguous_n(tensor, 2);
1444
0
}
1445
1446
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1447
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1448
0
}
1449
1450
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1451
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1452
1453
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1454
0
}
1455
1456
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1457
0
    return
1458
0
        tensor->nb[0] > tensor->nb[2] &&
1459
0
        tensor->nb[1] > tensor->nb[0] &&
1460
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1461
0
}
1462
1463
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1464
0
    return
1465
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1466
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1467
0
}
1468
1469
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1470
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1471
1472
0
    return
1473
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1474
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1475
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1476
0
}
1477
1478
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1479
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1480
0
        if (tensor->ne[i] == 0) {
1481
            // empty if any dimension has no elements
1482
0
            return true;
1483
0
        }
1484
0
    }
1485
0
    return false;
1486
0
}
1487
1488
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1489
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1490
1491
0
    return
1492
0
        (t0->ne[0] == t1->ne[0]) &&
1493
0
        (t0->ne[1] == t1->ne[1]) &&
1494
0
        (t0->ne[2] == t1->ne[2]) &&
1495
0
        (t0->ne[3] == t1->ne[3]);
1496
0
}
1497
1498
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1499
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1500
1501
0
    return
1502
0
        (t0->nb[0] == t1->nb[0]) &&
1503
0
        (t0->nb[1] == t1->nb[1]) &&
1504
0
        (t0->nb[2] == t1->nb[2]) &&
1505
0
        (t0->nb[3] == t1->nb[3]);
1506
0
}
1507
1508
0
bool ggml_is_view(const struct ggml_tensor * t) {
1509
0
    return ggml_impl_is_view(t);
1510
0
}
1511
1512
// check if t1 can be represented as a repetition of t0
1513
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1514
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1515
1516
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1517
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1518
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1519
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1520
0
        (t1->ne[3]%t0->ne[3] == 0);
1521
0
}
1522
1523
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1524
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1525
1526
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1527
0
}
1528
1529
// assert that pointer is aligned to GGML_MEM_ALIGN
1530
#define GGML_ASSERT_ALIGNED(ptr) \
1531
6.84k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1532
1533
////////////////////////////////////////////////////////////////////////////////
1534
1535
5.11k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1536
5.11k
    bool is_first_call = true;
1537
1538
5.11k
    ggml_critical_section_start();
1539
1540
5.11k
    if (is_first_call) {
1541
        // initialize time system (required on Windows)
1542
5.11k
        ggml_time_init();
1543
1544
5.11k
        is_first_call = false;
1545
5.11k
    }
1546
1547
5.11k
    ggml_critical_section_end();
1548
1549
5.11k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1550
1551
    // allow to call ggml_init with 0 size
1552
5.11k
    if (params.mem_size == 0) {
1553
4.70k
        params.mem_size = GGML_MEM_ALIGN;
1554
4.70k
    }
1555
1556
5.11k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1557
1558
5.11k
    *ctx = (struct ggml_context) {
1559
5.11k
        /*.mem_size           =*/ mem_size,
1560
5.11k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1561
5.11k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1562
5.11k
        /*.no_alloc           =*/ params.no_alloc,
1563
5.11k
        /*.n_objects          =*/ 0,
1564
5.11k
        /*.objects_begin      =*/ NULL,
1565
5.11k
        /*.objects_end        =*/ NULL,
1566
5.11k
    };
1567
1568
5.11k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1569
1570
5.11k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1571
1572
5.11k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1573
1574
5.11k
    return ctx;
1575
5.11k
}
1576
1577
0
void ggml_reset(struct ggml_context * ctx) {
1578
0
    if (ctx == NULL) {
1579
0
        return;
1580
0
    }
1581
1582
0
    ctx->n_objects     = 0;
1583
0
    ctx->objects_begin = NULL;
1584
0
    ctx->objects_end   = NULL;
1585
0
}
1586
1587
5.11k
void ggml_free(struct ggml_context * ctx) {
1588
5.11k
    if (ctx == NULL) {
1589
0
        return;
1590
0
    }
1591
1592
5.11k
    if (ctx->mem_buffer_owned) {
1593
5.11k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1594
5.11k
    }
1595
1596
5.11k
    GGML_FREE(ctx);
1597
5.11k
}
1598
1599
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1600
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1601
0
}
1602
1603
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1604
0
    return ctx->no_alloc;
1605
0
}
1606
1607
1.87k
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1608
1.87k
    ctx->no_alloc = no_alloc;
1609
1.87k
}
1610
1611
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1612
0
    return ctx->mem_buffer;
1613
0
}
1614
1615
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1616
0
    return ctx->mem_size;
1617
0
}
1618
1619
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1620
0
    size_t max_size = 0;
1621
1622
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1623
0
        size_t bytes = ggml_nbytes(tensor);
1624
0
        max_size = MAX(max_size, bytes);
1625
0
    }
1626
1627
0
    return max_size;
1628
0
}
1629
1630
////////////////////////////////////////////////////////////////////////////////
1631
1632
1.72k
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1633
    // always insert objects at the end of the context's memory pool
1634
1.72k
    struct ggml_object * obj_cur = ctx->objects_end;
1635
1636
1.72k
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1637
1.72k
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1638
1.72k
    const size_t cur_end  = cur_offs + cur_size;
1639
1640
    // align to GGML_MEM_ALIGN
1641
1.72k
    GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
1642
1.72k
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1643
1644
1.72k
    char * const mem_buffer = ctx->mem_buffer;
1645
1.72k
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1646
1647
    // integer overflow checks
1648
1.72k
    if (cur_end > SIZE_MAX - size_needed) {
1649
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
1650
0
        return NULL;
1651
0
    }
1652
1.72k
    if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
1653
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
1654
0
                cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
1655
0
        return NULL;
1656
0
    }
1657
1658
1.72k
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1659
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1660
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1661
#ifndef NDEBUG
1662
        GGML_ABORT("not enough space in the context's memory pool");
1663
#endif
1664
0
        return NULL;
1665
0
    }
1666
1667
1.72k
    *obj_new = (struct ggml_object) {
1668
1.72k
        .offs = cur_end + GGML_OBJECT_SIZE,
1669
1.72k
        .size = size_needed,
1670
1.72k
        .next = NULL,
1671
1.72k
        .type = type,
1672
1.72k
    };
1673
1674
1.72k
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1675
1676
1.72k
    if (obj_cur != NULL) {
1677
1.30k
        obj_cur->next = obj_new;
1678
1.30k
    } else {
1679
        // this is the first object in this context
1680
418
        ctx->objects_begin = obj_new;
1681
418
    }
1682
1683
1.72k
    ctx->objects_end = obj_new;
1684
1685
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1686
1687
1.72k
    return obj_new;
1688
1.72k
}
1689
1690
static struct ggml_tensor * ggml_new_tensor_impl(
1691
        struct ggml_context * ctx,
1692
        enum   ggml_type      type,
1693
        int                   n_dims,
1694
        const int64_t       * ne,
1695
        struct ggml_tensor  * view_src,
1696
1.72k
        size_t                view_offs) {
1697
1698
1.72k
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1699
1.72k
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1700
1701
    // find the base tensor and absolute offset
1702
1.72k
    if (view_src != NULL && view_src->view_src != NULL) {
1703
0
        view_offs += view_src->view_offs;
1704
0
        view_src   = view_src->view_src;
1705
0
    }
1706
1707
1.72k
    size_t data_size = ggml_row_size(type, ne[0]);
1708
6.89k
    for (int i = 1; i < n_dims; i++) {
1709
5.17k
        data_size *= ne[i];
1710
5.17k
    }
1711
1712
1.72k
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1713
1714
1.72k
    void * data = view_src != NULL ? view_src->data : NULL;
1715
1.72k
    if (data != NULL) {
1716
0
        data = (char *) data + view_offs;
1717
0
    }
1718
1719
1.72k
    size_t obj_alloc_size = 0;
1720
1721
1.72k
    if (view_src == NULL && !ctx->no_alloc) {
1722
        // allocate tensor data in the context's memory pool
1723
0
        obj_alloc_size = data_size;
1724
0
    }
1725
1726
1.72k
    GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
1727
1728
1.72k
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1729
1.72k
    GGML_ASSERT(obj_new);
1730
1731
1.72k
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1732
1733
1.72k
    *result = (struct ggml_tensor) {
1734
1.72k
        /*.type         =*/ type,
1735
1.72k
        /*.buffer       =*/ NULL,
1736
1.72k
        /*.ne           =*/ { 1, 1, 1, 1 },
1737
1.72k
        /*.nb           =*/ { 0, 0, 0, 0 },
1738
1.72k
        /*.op           =*/ GGML_OP_NONE,
1739
1.72k
        /*.op_params    =*/ { 0 },
1740
1.72k
        /*.flags        =*/ 0,
1741
1.72k
        /*.src          =*/ { NULL },
1742
1.72k
        /*.view_src     =*/ view_src,
1743
1.72k
        /*.view_offs    =*/ view_offs,
1744
1.72k
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1745
1.72k
        /*.name         =*/ { 0 },
1746
1.72k
        /*.extra        =*/ NULL,
1747
1.72k
        /*.padding      =*/ { 0 },
1748
1.72k
    };
1749
1750
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1751
    //GGML_ASSERT_ALIGNED(result->data);
1752
1753
8.62k
    for (int i = 0; i < n_dims; i++) {
1754
6.89k
        result->ne[i] = ne[i];
1755
6.89k
    }
1756
1757
1.72k
    result->nb[0] = ggml_type_size(type);
1758
1.72k
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1759
5.17k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1760
3.44k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1761
3.44k
    }
1762
1763
1.72k
    ctx->n_objects++;
1764
1765
1.72k
    return result;
1766
1.72k
}
1767
1768
struct ggml_tensor * ggml_new_tensor(
1769
        struct ggml_context * ctx,
1770
        enum   ggml_type      type,
1771
        int                   n_dims,
1772
1.72k
        const int64_t       * ne) {
1773
1.72k
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1774
1.72k
}
1775
1776
struct ggml_tensor * ggml_new_tensor_1d(
1777
        struct ggml_context * ctx,
1778
        enum   ggml_type      type,
1779
0
        int64_t ne0) {
1780
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1781
0
}
1782
1783
struct ggml_tensor * ggml_new_tensor_2d(
1784
        struct ggml_context * ctx,
1785
        enum   ggml_type      type,
1786
        int64_t ne0,
1787
0
        int64_t ne1) {
1788
0
    const int64_t ne[2] = { ne0, ne1 };
1789
0
    return ggml_new_tensor(ctx, type, 2, ne);
1790
0
}
1791
1792
struct ggml_tensor * ggml_new_tensor_3d(
1793
        struct ggml_context * ctx,
1794
        enum   ggml_type      type,
1795
        int64_t ne0,
1796
        int64_t ne1,
1797
0
        int64_t ne2) {
1798
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1799
0
    return ggml_new_tensor(ctx, type, 3, ne);
1800
0
}
1801
1802
struct ggml_tensor * ggml_new_tensor_4d(
1803
        struct ggml_context * ctx,
1804
        enum   ggml_type type,
1805
        int64_t ne0,
1806
        int64_t ne1,
1807
        int64_t ne2,
1808
0
        int64_t ne3) {
1809
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1810
0
    return ggml_new_tensor(ctx, type, 4, ne);
1811
0
}
1812
1813
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1814
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1815
1816
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1817
0
}
1818
1819
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1820
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1821
0
}
1822
1823
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1824
0
    const int64_t ne2 = tensor->ne[2];
1825
0
    const int64_t ne1 = tensor->ne[1];
1826
0
    const int64_t ne0 = tensor->ne[0];
1827
1828
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1829
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1830
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1831
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1832
1833
0
    if (i0) {
1834
0
        * i0 = i0_;
1835
0
    }
1836
0
    if (i1) {
1837
0
        * i1 = i1_;
1838
0
    }
1839
0
    if (i2) {
1840
0
        * i2 = i2_;
1841
0
    }
1842
0
    if (i3) {
1843
0
        * i3 = i3_;
1844
0
    }
1845
0
}
1846
1847
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1848
0
    return tensor->data;
1849
0
}
1850
1851
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1852
0
    assert(tensor->type == GGML_TYPE_F32);
1853
0
    return (float *)(tensor->data);
1854
0
}
1855
1856
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1857
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1858
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1859
0
}
1860
1861
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1862
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1863
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1864
0
}
1865
1866
1.58k
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1867
1.58k
    return tensor->name;
1868
1.58k
}
1869
1870
5.18k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1871
5.18k
    size_t i;
1872
35.9k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1873
30.7k
        tensor->name[i] = name[i];
1874
30.7k
    }
1875
5.18k
    tensor->name[i] = '\0';
1876
5.18k
    return tensor;
1877
5.18k
}
1878
1879
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1880
0
    va_list args;
1881
0
    va_start(args, fmt);
1882
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1883
0
    va_end(args);
1884
0
    return tensor;
1885
0
}
1886
1887
struct ggml_tensor * ggml_view_tensor(
1888
        struct ggml_context * ctx,
1889
0
        struct ggml_tensor  * src) {
1890
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1891
0
    ggml_format_name(result, "%s (view)", src->name);
1892
1893
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1894
0
        result->nb[i] = src->nb[i];
1895
0
    }
1896
1897
0
    return result;
1898
0
}
1899
1900
931
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1901
931
    struct ggml_object * obj = ctx->objects_begin;
1902
1903
931
    char * const mem_buffer = ctx->mem_buffer;
1904
1905
931
    while (obj != NULL) {
1906
418
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1907
418
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1908
418
        }
1909
1910
0
        obj = obj->next;
1911
0
    }
1912
1913
513
    return NULL;
1914
931
}
1915
1916
1.07k
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1917
1.07k
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1918
1.07k
    obj = obj->next;
1919
1920
1.07k
    char * const mem_buffer = ctx->mem_buffer;
1921
1922
1.07k
    while (obj != NULL) {
1923
910
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1924
910
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1925
910
        }
1926
1927
0
        obj = obj->next;
1928
0
    }
1929
1930
163
    return NULL;
1931
1.07k
}
1932
1933
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1934
0
    struct ggml_object * obj = ctx->objects_begin;
1935
1936
0
    char * const mem_buffer = ctx->mem_buffer;
1937
1938
0
    while (obj != NULL) {
1939
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1940
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1941
0
            if (strcmp(cur->name, name) == 0) {
1942
0
                return cur;
1943
0
            }
1944
0
        }
1945
1946
0
        obj = obj->next;
1947
0
    }
1948
1949
0
    return NULL;
1950
0
}
1951
1952
////////////////////////////////////////////////////////////////////////////////
1953
1954
// ggml_dup
1955
1956
static struct ggml_tensor * ggml_dup_impl(
1957
        struct ggml_context * ctx,
1958
        struct ggml_tensor  * a,
1959
0
        bool                  inplace) {
1960
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1961
1962
0
    result->op     = GGML_OP_DUP;
1963
0
    result->src[0] = a;
1964
1965
0
    return result;
1966
0
}
1967
1968
struct ggml_tensor * ggml_dup(
1969
        struct ggml_context * ctx,
1970
0
        struct ggml_tensor  * a) {
1971
0
    return ggml_dup_impl(ctx, a, false);
1972
0
}
1973
1974
struct ggml_tensor * ggml_dup_inplace(
1975
        struct ggml_context * ctx,
1976
0
        struct ggml_tensor  * a) {
1977
0
    return ggml_dup_impl(ctx, a, true);
1978
0
}
1979
1980
// ggml_add
1981
1982
static struct ggml_tensor * ggml_add_impl(
1983
        struct ggml_context * ctx,
1984
        struct ggml_tensor  * a,
1985
        struct ggml_tensor  * b,
1986
0
        bool                  inplace) {
1987
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1988
1989
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1990
1991
0
    result->op     = GGML_OP_ADD;
1992
0
    result->src[0] = a;
1993
0
    result->src[1] = b;
1994
1995
0
    return result;
1996
0
}
1997
1998
struct ggml_tensor * ggml_add(
1999
        struct ggml_context * ctx,
2000
        struct ggml_tensor  * a,
2001
0
        struct ggml_tensor  * b) {
2002
0
    return ggml_add_impl(ctx, a, b, false);
2003
0
}
2004
2005
struct ggml_tensor * ggml_add_inplace(
2006
        struct ggml_context * ctx,
2007
        struct ggml_tensor  * a,
2008
0
        struct ggml_tensor  * b) {
2009
0
    return ggml_add_impl(ctx, a, b, true);
2010
0
}
2011
2012
// ggml_add_cast
2013
2014
static struct ggml_tensor * ggml_add_cast_impl(
2015
        struct ggml_context * ctx,
2016
        struct ggml_tensor  * a,
2017
        struct ggml_tensor  * b,
2018
0
        enum   ggml_type      type) {
2019
    // TODO: support less-strict constraint
2020
    //       GGML_ASSERT(ggml_can_repeat(b, a));
2021
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
2022
2023
    // currently only supported for quantized input and f16
2024
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2025
0
                a->type == GGML_TYPE_F16 ||
2026
0
                a->type == GGML_TYPE_BF16);
2027
2028
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2029
2030
0
    result->op     = GGML_OP_ADD;
2031
0
    result->src[0] = a;
2032
0
    result->src[1] = b;
2033
2034
0
    return result;
2035
0
}
2036
2037
struct ggml_tensor * ggml_add_cast(
2038
        struct ggml_context * ctx,
2039
        struct ggml_tensor  * a,
2040
        struct ggml_tensor  * b,
2041
0
        enum   ggml_type      type) {
2042
0
    return ggml_add_cast_impl(ctx, a, b, type);
2043
0
}
2044
2045
struct ggml_tensor * ggml_add_id(
2046
            struct ggml_context * ctx,
2047
            struct ggml_tensor  * a,
2048
            struct ggml_tensor  * b,
2049
0
            struct ggml_tensor  * ids) {
2050
2051
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2052
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2053
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2054
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2055
2056
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2057
2058
0
    result->op     = GGML_OP_ADD_ID;
2059
0
    result->src[0] = a;
2060
0
    result->src[1] = b;
2061
0
    result->src[2] = ids;
2062
2063
0
    return result;
2064
0
}
2065
2066
// ggml_add1
2067
2068
static struct ggml_tensor * ggml_add1_impl(
2069
        struct ggml_context * ctx,
2070
        struct ggml_tensor  * a,
2071
        struct ggml_tensor  * b,
2072
0
        bool                  inplace) {
2073
0
    GGML_ASSERT(ggml_is_scalar(b));
2074
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2075
2076
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2077
2078
0
    result->op     = GGML_OP_ADD1;
2079
0
    result->src[0] = a;
2080
0
    result->src[1] = b;
2081
2082
0
    return result;
2083
0
}
2084
2085
struct ggml_tensor * ggml_add1(
2086
        struct ggml_context * ctx,
2087
        struct ggml_tensor  * a,
2088
0
        struct ggml_tensor  * b) {
2089
0
    return ggml_add1_impl(ctx, a, b, false);
2090
0
}
2091
2092
struct ggml_tensor * ggml_add1_inplace(
2093
        struct ggml_context * ctx,
2094
        struct ggml_tensor  * a,
2095
0
        struct ggml_tensor  * b) {
2096
0
    return ggml_add1_impl(ctx, a, b, true);
2097
0
}
2098
2099
// ggml_acc
2100
2101
static struct ggml_tensor * ggml_acc_impl(
2102
        struct ggml_context * ctx,
2103
        struct ggml_tensor  * a,
2104
        struct ggml_tensor  * b,
2105
        size_t                nb1,
2106
        size_t                nb2,
2107
        size_t                nb3,
2108
        size_t                offset,
2109
0
        bool                  inplace) {
2110
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2111
0
    GGML_ASSERT(ggml_is_contiguous(a));
2112
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2113
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2114
2115
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2116
2117
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2118
0
    ggml_set_op_params(result, params, sizeof(params));
2119
2120
0
    result->op     = GGML_OP_ACC;
2121
0
    result->src[0] = a;
2122
0
    result->src[1] = b;
2123
2124
0
    return result;
2125
0
}
2126
2127
struct ggml_tensor * ggml_acc(
2128
        struct ggml_context * ctx,
2129
        struct ggml_tensor  * a,
2130
        struct ggml_tensor  * b,
2131
        size_t                nb1,
2132
        size_t                nb2,
2133
        size_t                nb3,
2134
0
        size_t                offset) {
2135
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2136
0
}
2137
2138
struct ggml_tensor * ggml_acc_inplace(
2139
        struct ggml_context * ctx,
2140
        struct ggml_tensor  * a,
2141
        struct ggml_tensor  * b,
2142
        size_t                nb1,
2143
        size_t                nb2,
2144
        size_t                nb3,
2145
0
        size_t                offset) {
2146
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2147
0
}
2148
2149
// ggml_sub
2150
2151
static struct ggml_tensor * ggml_sub_impl(
2152
        struct ggml_context * ctx,
2153
        struct ggml_tensor  * a,
2154
        struct ggml_tensor  * b,
2155
0
        bool                  inplace) {
2156
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2157
2158
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2159
2160
0
    result->op     = GGML_OP_SUB;
2161
0
    result->src[0] = a;
2162
0
    result->src[1] = b;
2163
2164
0
    return result;
2165
0
}
2166
2167
struct ggml_tensor * ggml_sub(
2168
        struct ggml_context * ctx,
2169
        struct ggml_tensor  * a,
2170
0
        struct ggml_tensor  * b) {
2171
0
    return ggml_sub_impl(ctx, a, b, false);
2172
0
}
2173
2174
struct ggml_tensor * ggml_sub_inplace(
2175
        struct ggml_context * ctx,
2176
        struct ggml_tensor  * a,
2177
0
        struct ggml_tensor  * b) {
2178
0
    return ggml_sub_impl(ctx, a, b, true);
2179
0
}
2180
2181
// ggml_mul
2182
2183
static struct ggml_tensor * ggml_mul_impl(
2184
        struct ggml_context * ctx,
2185
        struct ggml_tensor  * a,
2186
        struct ggml_tensor  * b,
2187
0
        bool                  inplace) {
2188
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2189
2190
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2191
2192
0
    result->op     = GGML_OP_MUL;
2193
0
    result->src[0] = a;
2194
0
    result->src[1] = b;
2195
2196
0
    return result;
2197
0
}
2198
2199
struct ggml_tensor * ggml_mul(
2200
        struct ggml_context * ctx,
2201
        struct ggml_tensor  * a,
2202
0
        struct ggml_tensor  * b) {
2203
0
    return ggml_mul_impl(ctx, a, b, false);
2204
0
}
2205
2206
struct ggml_tensor * ggml_mul_inplace(
2207
        struct ggml_context * ctx,
2208
        struct ggml_tensor  * a,
2209
0
        struct ggml_tensor  * b) {
2210
0
    return ggml_mul_impl(ctx, a, b, true);
2211
0
}
2212
2213
// ggml_div
2214
2215
static struct ggml_tensor * ggml_div_impl(
2216
        struct ggml_context * ctx,
2217
        struct ggml_tensor  * a,
2218
        struct ggml_tensor  * b,
2219
0
        bool                  inplace) {
2220
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2221
2222
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2223
2224
0
    result->op     = GGML_OP_DIV;
2225
0
    result->src[0] = a;
2226
0
    result->src[1] = b;
2227
2228
0
    return result;
2229
0
}
2230
2231
struct ggml_tensor * ggml_div(
2232
        struct ggml_context * ctx,
2233
        struct ggml_tensor  * a,
2234
0
        struct ggml_tensor  * b) {
2235
0
    return ggml_div_impl(ctx, a, b, false);
2236
0
}
2237
2238
struct ggml_tensor * ggml_div_inplace(
2239
        struct ggml_context * ctx,
2240
        struct ggml_tensor  * a,
2241
0
        struct ggml_tensor  * b) {
2242
0
    return ggml_div_impl(ctx, a, b, true);
2243
0
}
2244
2245
// ggml_sqr
2246
2247
static struct ggml_tensor * ggml_sqr_impl(
2248
        struct ggml_context * ctx,
2249
        struct ggml_tensor  * a,
2250
0
        bool                  inplace) {
2251
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2252
2253
0
    result->op     = GGML_OP_SQR;
2254
0
    result->src[0] = a;
2255
2256
0
    return result;
2257
0
}
2258
2259
struct ggml_tensor * ggml_sqr(
2260
        struct ggml_context * ctx,
2261
0
        struct ggml_tensor  * a) {
2262
0
    return ggml_sqr_impl(ctx, a, false);
2263
0
}
2264
2265
struct ggml_tensor * ggml_sqr_inplace(
2266
        struct ggml_context * ctx,
2267
0
        struct ggml_tensor  * a) {
2268
0
    return ggml_sqr_impl(ctx, a, true);
2269
0
}
2270
2271
// ggml_sqrt
2272
2273
static struct ggml_tensor * ggml_sqrt_impl(
2274
        struct ggml_context * ctx,
2275
        struct ggml_tensor  * a,
2276
0
        bool                  inplace) {
2277
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2278
2279
0
    result->op     = GGML_OP_SQRT;
2280
0
    result->src[0] = a;
2281
2282
0
    return result;
2283
0
}
2284
2285
struct ggml_tensor * ggml_sqrt(
2286
        struct ggml_context * ctx,
2287
0
        struct ggml_tensor  * a) {
2288
0
    return ggml_sqrt_impl(ctx, a, false);
2289
0
}
2290
2291
struct ggml_tensor * ggml_sqrt_inplace(
2292
        struct ggml_context * ctx,
2293
0
        struct ggml_tensor  * a) {
2294
0
    return ggml_sqrt_impl(ctx, a, true);
2295
0
}
2296
2297
// ggml_log
2298
2299
static struct ggml_tensor * ggml_log_impl(
2300
        struct ggml_context * ctx,
2301
        struct ggml_tensor  * a,
2302
0
        bool                  inplace) {
2303
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2304
2305
0
    result->op     = GGML_OP_LOG;
2306
0
    result->src[0] = a;
2307
2308
0
    return result;
2309
0
}
2310
2311
struct ggml_tensor * ggml_log(
2312
        struct ggml_context * ctx,
2313
0
        struct ggml_tensor  * a) {
2314
0
    return ggml_log_impl(ctx, a, false);
2315
0
}
2316
2317
struct ggml_tensor * ggml_log_inplace(
2318
        struct ggml_context * ctx,
2319
0
        struct ggml_tensor  * a) {
2320
0
    return ggml_log_impl(ctx, a, true);
2321
0
}
2322
2323
struct ggml_tensor * ggml_expm1(
2324
        struct ggml_context * ctx,
2325
0
        struct ggml_tensor  * a) {
2326
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2327
0
}
2328
2329
struct ggml_tensor * ggml_expm1_inplace(
2330
        struct ggml_context * ctx,
2331
0
        struct ggml_tensor  * a) {
2332
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2333
0
}
2334
2335
struct ggml_tensor * ggml_softplus(
2336
        struct ggml_context * ctx,
2337
0
        struct ggml_tensor  * a) {
2338
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2339
0
}
2340
2341
struct ggml_tensor * ggml_softplus_inplace(
2342
        struct ggml_context * ctx,
2343
0
        struct ggml_tensor  * a) {
2344
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2345
0
}
2346
2347
// ggml_sin
2348
2349
static struct ggml_tensor * ggml_sin_impl(
2350
        struct ggml_context * ctx,
2351
        struct ggml_tensor  * a,
2352
0
        bool                  inplace) {
2353
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2354
2355
0
    result->op     = GGML_OP_SIN;
2356
0
    result->src[0] = a;
2357
2358
0
    return result;
2359
0
}
2360
2361
struct ggml_tensor * ggml_sin(
2362
        struct ggml_context * ctx,
2363
0
        struct ggml_tensor  * a) {
2364
0
    return ggml_sin_impl(ctx, a, false);
2365
0
}
2366
2367
struct ggml_tensor * ggml_sin_inplace(
2368
        struct ggml_context * ctx,
2369
0
        struct ggml_tensor  * a) {
2370
0
    return ggml_sin_impl(ctx, a, true);
2371
0
}
2372
2373
// ggml_cos
2374
2375
static struct ggml_tensor * ggml_cos_impl(
2376
        struct ggml_context * ctx,
2377
        struct ggml_tensor  * a,
2378
0
        bool                  inplace) {
2379
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2380
2381
0
    result->op     = GGML_OP_COS;
2382
0
    result->src[0] = a;
2383
2384
0
    return result;
2385
0
}
2386
2387
struct ggml_tensor * ggml_cos(
2388
        struct ggml_context * ctx,
2389
0
        struct ggml_tensor  * a) {
2390
0
    return ggml_cos_impl(ctx, a, false);
2391
0
}
2392
2393
struct ggml_tensor * ggml_cos_inplace(
2394
        struct ggml_context * ctx,
2395
0
        struct ggml_tensor  * a) {
2396
0
    return ggml_cos_impl(ctx, a, true);
2397
0
}
2398
2399
// ggml_sum
2400
2401
struct ggml_tensor * ggml_sum(
2402
        struct ggml_context * ctx,
2403
0
        struct ggml_tensor  * a) {
2404
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2405
2406
0
    result->op     = GGML_OP_SUM;
2407
0
    result->src[0] = a;
2408
2409
0
    return result;
2410
0
}
2411
2412
// ggml_sum_rows
2413
2414
struct ggml_tensor * ggml_sum_rows(
2415
        struct ggml_context * ctx,
2416
0
        struct ggml_tensor  * a) {
2417
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2418
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2419
0
        ne[i] = a->ne[i];
2420
0
    }
2421
2422
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2423
2424
0
    result->op     = GGML_OP_SUM_ROWS;
2425
0
    result->src[0] = a;
2426
2427
0
    return result;
2428
0
}
2429
2430
// ggml_cumsum
2431
2432
struct ggml_tensor * ggml_cumsum(
2433
        struct ggml_context * ctx,
2434
0
        struct ggml_tensor  * a) {
2435
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2436
2437
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2438
2439
0
    result->op     = GGML_OP_CUMSUM;
2440
0
    result->src[0] = a;
2441
2442
0
    return result;
2443
0
}
2444
2445
// ggml_mean
2446
2447
struct ggml_tensor * ggml_mean(
2448
        struct ggml_context * ctx,
2449
0
        struct ggml_tensor  * a) {
2450
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2451
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2452
2453
0
    result->op     = GGML_OP_MEAN;
2454
0
    result->src[0] = a;
2455
2456
0
    return result;
2457
0
}
2458
2459
// ggml_argmax
2460
2461
struct ggml_tensor * ggml_argmax(
2462
        struct ggml_context * ctx,
2463
0
        struct ggml_tensor  * a) {
2464
0
    GGML_ASSERT(ggml_is_matrix(a));
2465
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2466
2467
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2468
2469
0
    result->op     = GGML_OP_ARGMAX;
2470
0
    result->src[0] = a;
2471
2472
0
    return result;
2473
0
}
2474
2475
// ggml_count_equal
2476
2477
struct ggml_tensor * ggml_count_equal(
2478
        struct ggml_context * ctx,
2479
        struct ggml_tensor  * a,
2480
0
        struct ggml_tensor  * b) {
2481
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2482
2483
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2484
2485
0
    result->op     = GGML_OP_COUNT_EQUAL;
2486
0
    result->src[0] = a;
2487
0
    result->src[1] = b;
2488
2489
0
    return result;
2490
0
}
2491
2492
// ggml_repeat
2493
2494
struct ggml_tensor * ggml_repeat(
2495
        struct ggml_context * ctx,
2496
        struct ggml_tensor  * a,
2497
0
        struct ggml_tensor  * b) {
2498
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2499
2500
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2501
2502
0
    result->op     = GGML_OP_REPEAT;
2503
0
    result->src[0] = a;
2504
2505
0
    return result;
2506
0
}
2507
2508
struct ggml_tensor * ggml_repeat_4d(
2509
        struct ggml_context * ctx,
2510
        struct ggml_tensor * a,
2511
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2512
0
    const bool can_repeat = ggml_is_empty(a) || (
2513
0
        (ne0 % a->ne[0] == 0) &&
2514
0
        (ne1 % a->ne[1] == 0) &&
2515
0
        (ne2 % a->ne[2] == 0) &&
2516
0
        (ne3 % a->ne[3] == 0)
2517
0
    );
2518
0
    GGML_ASSERT(can_repeat);
2519
2520
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2521
2522
0
    result->op     = GGML_OP_REPEAT;
2523
0
    result->src[0] = a;
2524
2525
0
    return result;
2526
0
}
2527
2528
// ggml_repeat_back
2529
2530
struct ggml_tensor * ggml_repeat_back(
2531
        struct ggml_context * ctx,
2532
        struct ggml_tensor  * a,
2533
0
        struct ggml_tensor  * b) {
2534
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2535
2536
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2537
2538
0
    result->op     = GGML_OP_REPEAT_BACK;
2539
0
    result->src[0] = a;
2540
2541
0
    return result;
2542
0
}
2543
2544
// ggml_concat
2545
2546
struct ggml_tensor * ggml_concat(
2547
    struct ggml_context * ctx,
2548
    struct ggml_tensor  * a,
2549
    struct ggml_tensor  * b,
2550
0
    int                   dim) {
2551
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2552
0
    GGML_ASSERT(a->type == b->type);
2553
2554
0
    int64_t ne[GGML_MAX_DIMS];
2555
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2556
0
        if (d == dim) {
2557
0
            ne[d] = a->ne[d] + b->ne[d];
2558
0
            continue;
2559
0
        }
2560
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2561
0
        ne[d] = a->ne[d];
2562
0
    }
2563
2564
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2565
2566
0
    ggml_set_op_params_i32(result, 0, dim);
2567
2568
0
    result->op     = GGML_OP_CONCAT;
2569
0
    result->src[0] = a;
2570
0
    result->src[1] = b;
2571
2572
0
    return result;
2573
0
}
2574
2575
// ggml_abs
2576
2577
struct ggml_tensor * ggml_abs(
2578
        struct ggml_context * ctx,
2579
0
        struct ggml_tensor  * a) {
2580
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2581
0
}
2582
2583
struct ggml_tensor * ggml_abs_inplace(
2584
        struct ggml_context * ctx,
2585
0
        struct ggml_tensor  * a) {
2586
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2587
0
}
2588
2589
// ggml_sgn
2590
2591
struct ggml_tensor * ggml_sgn(
2592
        struct ggml_context * ctx,
2593
0
        struct ggml_tensor  * a) {
2594
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2595
0
}
2596
2597
struct ggml_tensor * ggml_sgn_inplace(
2598
        struct ggml_context * ctx,
2599
0
        struct ggml_tensor  * a) {
2600
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2601
0
}
2602
2603
// ggml_neg
2604
2605
struct ggml_tensor * ggml_neg(
2606
        struct ggml_context * ctx,
2607
0
        struct ggml_tensor  * a) {
2608
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2609
0
}
2610
2611
struct ggml_tensor * ggml_neg_inplace(
2612
        struct ggml_context * ctx,
2613
0
        struct ggml_tensor  * a) {
2614
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2615
0
}
2616
2617
// ggml_step
2618
2619
struct ggml_tensor * ggml_step(
2620
        struct ggml_context * ctx,
2621
0
        struct ggml_tensor  * a) {
2622
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2623
0
}
2624
2625
struct ggml_tensor * ggml_step_inplace(
2626
        struct ggml_context * ctx,
2627
0
        struct ggml_tensor  * a) {
2628
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2629
0
}
2630
2631
// ggml_tanh
2632
2633
struct ggml_tensor * ggml_tanh(
2634
        struct ggml_context * ctx,
2635
0
        struct ggml_tensor  * a) {
2636
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2637
0
}
2638
2639
struct ggml_tensor * ggml_tanh_inplace(
2640
        struct ggml_context * ctx,
2641
0
        struct ggml_tensor  * a) {
2642
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2643
0
}
2644
2645
// ggml_elu
2646
2647
struct ggml_tensor * ggml_elu(
2648
    struct ggml_context * ctx,
2649
0
    struct ggml_tensor  * a) {
2650
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2651
0
}
2652
2653
struct ggml_tensor * ggml_elu_inplace(
2654
    struct ggml_context * ctx,
2655
0
    struct ggml_tensor  * a) {
2656
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2657
0
}
2658
2659
// ggml_relu
2660
2661
struct ggml_tensor * ggml_relu(
2662
        struct ggml_context * ctx,
2663
0
        struct ggml_tensor  * a) {
2664
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2665
0
}
2666
2667
struct ggml_tensor * ggml_relu_inplace(
2668
        struct ggml_context * ctx,
2669
0
        struct ggml_tensor  * a) {
2670
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2671
0
}
2672
2673
// ggml_leaky_relu
2674
2675
struct ggml_tensor * ggml_leaky_relu(
2676
        struct ggml_context * ctx,
2677
        struct ggml_tensor  * a,
2678
        float                 negative_slope,
2679
0
        bool                  inplace) {
2680
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2681
2682
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2683
2684
0
    result->op     = GGML_OP_LEAKY_RELU;
2685
0
    result->src[0] = a;
2686
2687
0
    return result;
2688
0
}
2689
2690
// ggml_sigmoid
2691
2692
struct ggml_tensor * ggml_sigmoid(
2693
        struct ggml_context * ctx,
2694
0
        struct ggml_tensor  * a) {
2695
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2696
0
}
2697
2698
struct ggml_tensor * ggml_sigmoid_inplace(
2699
        struct ggml_context * ctx,
2700
0
        struct ggml_tensor  * a) {
2701
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2702
0
}
2703
2704
// ggml_gelu
2705
2706
struct ggml_tensor * ggml_gelu(
2707
        struct ggml_context * ctx,
2708
0
        struct ggml_tensor  * a) {
2709
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2710
0
}
2711
2712
struct ggml_tensor * ggml_gelu_inplace(
2713
        struct ggml_context * ctx,
2714
0
        struct ggml_tensor  * a) {
2715
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2716
0
}
2717
2718
// ggml_gelu_erf
2719
2720
struct ggml_tensor * ggml_gelu_erf(
2721
        struct ggml_context * ctx,
2722
0
        struct ggml_tensor  * a) {
2723
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2724
0
}
2725
2726
struct ggml_tensor * ggml_gelu_erf_inplace(
2727
        struct ggml_context * ctx,
2728
0
        struct ggml_tensor  * a) {
2729
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2730
0
}
2731
2732
// ggml_gelu_quick
2733
2734
struct ggml_tensor * ggml_gelu_quick(
2735
        struct ggml_context * ctx,
2736
0
        struct ggml_tensor  * a) {
2737
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2738
0
}
2739
2740
struct ggml_tensor * ggml_gelu_quick_inplace(
2741
        struct ggml_context * ctx,
2742
0
        struct ggml_tensor  * a) {
2743
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2744
0
}
2745
2746
// ggml_silu
2747
2748
struct ggml_tensor * ggml_silu(
2749
        struct ggml_context * ctx,
2750
0
        struct ggml_tensor  * a) {
2751
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2752
0
}
2753
2754
struct ggml_tensor * ggml_silu_inplace(
2755
        struct ggml_context * ctx,
2756
0
        struct ggml_tensor  * a) {
2757
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2758
0
}
2759
2760
// ggml_xielu
2761
2762
struct ggml_tensor * ggml_xielu(
2763
        struct ggml_context * ctx,
2764
        struct ggml_tensor  * a,
2765
        float alpha_n,
2766
        float alpha_p,
2767
        float beta,
2768
0
        float eps) {
2769
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2770
2771
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2772
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2773
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2774
0
    ggml_set_op_params_f32(result, 3, beta);
2775
0
    ggml_set_op_params_f32(result, 4, eps);
2776
2777
0
    result->op     = GGML_OP_UNARY;
2778
0
    result->src[0] = a;
2779
2780
0
    return result;
2781
0
}
2782
2783
// ggml_silu_back
2784
2785
struct ggml_tensor * ggml_silu_back(
2786
        struct ggml_context * ctx,
2787
        struct ggml_tensor  * a,
2788
0
        struct ggml_tensor  * b) {
2789
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2790
2791
0
    result->op     = GGML_OP_SILU_BACK;
2792
0
    result->src[0] = a;
2793
0
    result->src[1] = b;
2794
2795
0
    return result;
2796
0
}
2797
2798
// ggml hardswish
2799
2800
struct ggml_tensor * ggml_hardswish(
2801
        struct ggml_context * ctx,
2802
0
        struct ggml_tensor  * a) {
2803
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2804
0
}
2805
2806
// ggml hardsigmoid
2807
2808
struct ggml_tensor * ggml_hardsigmoid(
2809
        struct ggml_context * ctx,
2810
0
        struct ggml_tensor  * a) {
2811
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2812
0
}
2813
2814
// ggml exp
2815
2816
struct ggml_tensor * ggml_exp(
2817
        struct ggml_context * ctx,
2818
0
        struct ggml_tensor  * a) {
2819
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2820
0
}
2821
2822
struct ggml_tensor * ggml_exp_inplace(
2823
        struct ggml_context * ctx,
2824
0
        struct ggml_tensor  * a) {
2825
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2826
0
}
2827
2828
// ggml_glu
2829
2830
static struct ggml_tensor * ggml_glu_impl(
2831
        struct ggml_context * ctx,
2832
        struct ggml_tensor  * a,
2833
        struct ggml_tensor  * b,
2834
        enum ggml_glu_op      op,
2835
0
        bool                  swapped) {
2836
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2837
2838
0
    if (b) {
2839
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2840
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2841
0
        GGML_ASSERT(a->type == b->type);
2842
0
    }
2843
2844
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2845
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2846
2847
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2848
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2849
2850
0
    result->op     = GGML_OP_GLU;
2851
0
    result->src[0] = a;
2852
0
    result->src[1] = b;
2853
2854
0
    return result;
2855
0
}
2856
2857
// ggml_floor
2858
2859
struct ggml_tensor * ggml_floor(
2860
        struct ggml_context * ctx,
2861
0
        struct ggml_tensor  * a) {
2862
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2863
0
}
2864
2865
struct ggml_tensor * ggml_floor_inplace(
2866
        struct ggml_context * ctx,
2867
0
        struct ggml_tensor  * a) {
2868
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2869
0
}
2870
2871
// ggml_ceil
2872
2873
struct ggml_tensor * ggml_ceil(
2874
        struct ggml_context * ctx,
2875
0
        struct ggml_tensor  * a) {
2876
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2877
0
}
2878
2879
struct ggml_tensor * ggml_ceil_inplace(
2880
        struct ggml_context * ctx,
2881
0
        struct ggml_tensor  * a) {
2882
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2883
0
}
2884
2885
//ggml_round
2886
2887
struct ggml_tensor * ggml_round(
2888
        struct ggml_context * ctx,
2889
0
        struct ggml_tensor  * a) {
2890
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2891
0
}
2892
2893
struct ggml_tensor * ggml_round_inplace(
2894
        struct ggml_context * ctx,
2895
0
        struct ggml_tensor  * a) {
2896
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2897
0
}
2898
2899
//ggml_trunc
2900
2901
struct ggml_tensor * ggml_trunc(
2902
        struct ggml_context * ctx,
2903
0
        struct ggml_tensor  * a) {
2904
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2905
0
}
2906
2907
struct ggml_tensor * ggml_trunc_inplace(
2908
        struct ggml_context * ctx,
2909
0
        struct ggml_tensor  * a) {
2910
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2911
0
}
2912
2913
struct ggml_tensor * ggml_glu(
2914
        struct ggml_context * ctx,
2915
        struct ggml_tensor  * a,
2916
        enum ggml_glu_op      op,
2917
0
        bool                  swapped) {
2918
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2919
0
}
2920
2921
struct ggml_tensor * ggml_glu_split(
2922
        struct ggml_context * ctx,
2923
        struct ggml_tensor  * a,
2924
        struct ggml_tensor  * b,
2925
0
        enum ggml_glu_op      op) {
2926
0
    return ggml_glu_impl(ctx, a, b, op, false);
2927
0
}
2928
2929
// ggml_reglu
2930
2931
struct ggml_tensor * ggml_reglu(
2932
        struct ggml_context * ctx,
2933
0
        struct ggml_tensor  * a) {
2934
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2935
0
}
2936
2937
struct ggml_tensor * ggml_reglu_swapped(
2938
        struct ggml_context * ctx,
2939
0
        struct ggml_tensor  * a) {
2940
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2941
0
}
2942
2943
struct ggml_tensor * ggml_reglu_split(
2944
        struct ggml_context * ctx,
2945
        struct ggml_tensor  * a,
2946
0
        struct ggml_tensor  * b) {
2947
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2948
0
}
2949
2950
// ggml_geglu
2951
2952
struct ggml_tensor * ggml_geglu(
2953
        struct ggml_context * ctx,
2954
0
        struct ggml_tensor  * a) {
2955
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2956
0
}
2957
2958
struct ggml_tensor * ggml_geglu_swapped(
2959
        struct ggml_context * ctx,
2960
0
        struct ggml_tensor  * a) {
2961
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2962
0
}
2963
2964
struct ggml_tensor * ggml_geglu_split(
2965
        struct ggml_context * ctx,
2966
        struct ggml_tensor  * a,
2967
0
        struct ggml_tensor  * b) {
2968
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2969
0
}
2970
2971
// ggml_swiglu
2972
2973
struct ggml_tensor * ggml_swiglu(
2974
        struct ggml_context * ctx,
2975
0
        struct ggml_tensor  * a) {
2976
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2977
0
}
2978
2979
struct ggml_tensor * ggml_swiglu_swapped(
2980
        struct ggml_context * ctx,
2981
0
        struct ggml_tensor  * a) {
2982
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2983
0
}
2984
2985
struct ggml_tensor * ggml_swiglu_split(
2986
        struct ggml_context * ctx,
2987
        struct ggml_tensor  * a,
2988
0
        struct ggml_tensor  * b) {
2989
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2990
0
}
2991
2992
// ggml_geglu_erf
2993
2994
struct ggml_tensor * ggml_geglu_erf(
2995
        struct ggml_context * ctx,
2996
0
        struct ggml_tensor  * a) {
2997
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2998
0
}
2999
3000
struct ggml_tensor * ggml_geglu_erf_swapped(
3001
        struct ggml_context * ctx,
3002
0
        struct ggml_tensor  * a) {
3003
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
3004
0
}
3005
3006
struct ggml_tensor * ggml_geglu_erf_split(
3007
        struct ggml_context * ctx,
3008
        struct ggml_tensor  * a,
3009
0
        struct ggml_tensor  * b) {
3010
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
3011
0
}
3012
3013
// ggml_geglu_quick
3014
3015
struct ggml_tensor * ggml_geglu_quick(
3016
        struct ggml_context * ctx,
3017
0
        struct ggml_tensor  * a) {
3018
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
3019
0
}
3020
3021
struct ggml_tensor * ggml_geglu_quick_swapped(
3022
        struct ggml_context * ctx,
3023
0
        struct ggml_tensor  * a) {
3024
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3025
0
}
3026
3027
struct ggml_tensor * ggml_geglu_quick_split(
3028
        struct ggml_context * ctx,
3029
        struct ggml_tensor  * a,
3030
0
        struct ggml_tensor  * b) {
3031
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3032
0
}
3033
3034
struct ggml_tensor * ggml_swiglu_oai(
3035
        struct ggml_context * ctx,
3036
        struct ggml_tensor  * a,
3037
        struct ggml_tensor  * b,
3038
        float                 alpha,
3039
0
        float                 limit) {
3040
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3041
0
    ggml_set_op_params_f32(result, 2, alpha);
3042
0
    ggml_set_op_params_f32(result, 3, limit);
3043
3044
0
    return result;
3045
0
}
3046
3047
// ggml_norm
3048
3049
static struct ggml_tensor * ggml_norm_impl(
3050
        struct ggml_context * ctx,
3051
        struct ggml_tensor  * a,
3052
        float                 eps,
3053
0
        bool                  inplace) {
3054
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3055
3056
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3057
3058
0
    result->op     = GGML_OP_NORM;
3059
0
    result->src[0] = a;
3060
3061
0
    return result;
3062
0
}
3063
3064
struct ggml_tensor * ggml_norm(
3065
        struct ggml_context * ctx,
3066
        struct ggml_tensor  * a,
3067
0
        float                 eps) {
3068
0
    return ggml_norm_impl(ctx, a, eps, false);
3069
0
}
3070
3071
struct ggml_tensor * ggml_norm_inplace(
3072
        struct ggml_context * ctx,
3073
        struct ggml_tensor  * a,
3074
0
        float                 eps) {
3075
0
    return ggml_norm_impl(ctx, a, eps, true);
3076
0
}
3077
3078
// ggml_rms_norm
3079
3080
static struct ggml_tensor * ggml_rms_norm_impl(
3081
        struct ggml_context * ctx,
3082
        struct ggml_tensor  * a,
3083
        float                 eps,
3084
0
        bool                  inplace) {
3085
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3086
3087
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3088
3089
0
    result->op     = GGML_OP_RMS_NORM;
3090
0
    result->src[0] = a;
3091
3092
0
    return result;
3093
0
}
3094
3095
struct ggml_tensor * ggml_rms_norm(
3096
        struct ggml_context * ctx,
3097
        struct ggml_tensor  * a,
3098
0
        float                 eps) {
3099
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3100
0
}
3101
3102
struct ggml_tensor * ggml_rms_norm_inplace(
3103
        struct ggml_context * ctx,
3104
        struct ggml_tensor  * a,
3105
0
        float                 eps) {
3106
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3107
0
}
3108
3109
// ggml_rms_norm_back
3110
3111
struct ggml_tensor * ggml_rms_norm_back(
3112
        struct ggml_context * ctx,
3113
        struct ggml_tensor  * a,
3114
        struct ggml_tensor  * b,
3115
0
        float                 eps) {
3116
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3117
3118
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3119
3120
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3121
0
    result->src[0] = a;
3122
0
    result->src[1] = b;
3123
3124
0
    return result;
3125
0
}
3126
3127
// ggml_group_norm
3128
3129
static struct ggml_tensor * ggml_group_norm_impl(
3130
        struct ggml_context * ctx,
3131
        struct ggml_tensor  * a,
3132
        int                   n_groups,
3133
        float                 eps,
3134
0
        bool                  inplace) {
3135
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3136
3137
0
    ggml_set_op_params_i32(result, 0, n_groups);
3138
0
    ggml_set_op_params_f32(result, 1, eps);
3139
3140
0
    result->op     = GGML_OP_GROUP_NORM;
3141
0
    result->src[0] = a;
3142
3143
0
    return result;
3144
0
}
3145
3146
struct ggml_tensor * ggml_group_norm(
3147
        struct ggml_context * ctx,
3148
        struct ggml_tensor  * a,
3149
        int                   n_groups,
3150
0
        float                 eps) {
3151
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3152
0
}
3153
3154
struct ggml_tensor * ggml_group_norm_inplace(
3155
        struct ggml_context * ctx,
3156
        struct ggml_tensor  * a,
3157
        int                   n_groups,
3158
0
        float                 eps) {
3159
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3160
0
}
3161
3162
// ggml_l2_norm
3163
3164
static struct ggml_tensor * ggml_l2_norm_impl(
3165
        struct ggml_context * ctx,
3166
        struct ggml_tensor  * a,
3167
        float                 eps,
3168
0
        bool                  inplace) {
3169
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3170
3171
0
    ggml_set_op_params_f32(result, 0, eps);
3172
3173
0
    result->op     = GGML_OP_L2_NORM;
3174
0
    result->src[0] = a;
3175
3176
0
    return result;
3177
0
}
3178
3179
struct ggml_tensor * ggml_l2_norm(
3180
        struct ggml_context * ctx,
3181
        struct ggml_tensor  * a,
3182
0
        float                 eps) {
3183
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3184
0
}
3185
3186
struct ggml_tensor * ggml_l2_norm_inplace(
3187
        struct ggml_context * ctx,
3188
        struct ggml_tensor  * a,
3189
0
        float                 eps) {
3190
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3191
0
}
3192
3193
// ggml_mul_mat
3194
3195
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3196
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3197
3198
0
    return (t0->ne[0]           == t1->ne[0])  &&
3199
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3200
0
           (t1->ne[3]%t0->ne[3] == 0);
3201
0
}
3202
3203
struct ggml_tensor * ggml_mul_mat(
3204
        struct ggml_context * ctx,
3205
        struct ggml_tensor  * a,
3206
0
        struct ggml_tensor  * b) {
3207
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3208
0
    GGML_ASSERT(!ggml_is_transposed(a));
3209
3210
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3211
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3212
3213
0
    result->op     = GGML_OP_MUL_MAT;
3214
0
    result->src[0] = a;
3215
0
    result->src[1] = b;
3216
3217
0
    return result;
3218
0
}
3219
3220
void ggml_mul_mat_set_prec(
3221
        struct ggml_tensor * a,
3222
0
        enum ggml_prec       prec) {
3223
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3224
3225
0
    const int32_t prec_i32 = (int32_t) prec;
3226
3227
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3228
0
}
3229
3230
// ggml_mul_mat_id
3231
3232
/*
3233
    c = ggml_mul_mat_id(ctx, as, b, ids);
3234
3235
    as  -> [cols, rows, n_expert]
3236
    b   -> [cols, n_expert_used, n_tokens]
3237
    ids -> [n_expert_used, n_tokens] (i32)
3238
    c   -> [rows, n_expert_used, n_tokens]
3239
3240
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3241
3242
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3243
*/
3244
struct ggml_tensor * ggml_mul_mat_id(
3245
        struct ggml_context * ctx,
3246
        struct ggml_tensor  * as,
3247
        struct ggml_tensor  * b,
3248
0
        struct ggml_tensor  * ids) {
3249
0
    GGML_ASSERT(!ggml_is_transposed(as));
3250
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3251
3252
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3253
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3254
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3255
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3256
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3257
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3258
3259
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3260
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3261
3262
0
    result->op     = GGML_OP_MUL_MAT_ID;
3263
0
    result->src[0] = as;
3264
0
    result->src[1] = b;
3265
0
    result->src[2] = ids;
3266
3267
0
    return result;
3268
0
}
3269
3270
// ggml_out_prod
3271
3272
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3273
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3274
3275
0
    return (t0->ne[1] == t1->ne[1])   &&
3276
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3277
0
           (t1->ne[3]%t0->ne[3] == 0);
3278
0
}
3279
3280
struct ggml_tensor * ggml_out_prod(
3281
        struct ggml_context * ctx,
3282
        struct ggml_tensor  * a,
3283
0
        struct ggml_tensor  * b) {
3284
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3285
0
    GGML_ASSERT(!ggml_is_transposed(a));
3286
3287
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3288
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3289
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3290
3291
0
    result->op     = GGML_OP_OUT_PROD;
3292
0
    result->src[0] = a;
3293
0
    result->src[1] = b;
3294
3295
0
    return result;
3296
0
}
3297
3298
// ggml_scale
3299
3300
static struct ggml_tensor * ggml_scale_impl(
3301
        struct ggml_context * ctx,
3302
        struct ggml_tensor  * a,
3303
        float                 s,
3304
        float                 b,
3305
0
        bool                  inplace) {
3306
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3307
3308
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3309
3310
0
    float params[2] = { s, b };
3311
0
    ggml_set_op_params(result, &params, sizeof(params));
3312
3313
0
    result->op     = GGML_OP_SCALE;
3314
0
    result->src[0] = a;
3315
3316
0
    return result;
3317
0
}
3318
3319
struct ggml_tensor * ggml_scale(
3320
        struct ggml_context * ctx,
3321
        struct ggml_tensor  * a,
3322
0
        float                 s) {
3323
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3324
0
}
3325
3326
struct ggml_tensor * ggml_scale_inplace(
3327
        struct ggml_context * ctx,
3328
        struct ggml_tensor  * a,
3329
0
        float                 s) {
3330
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3331
0
}
3332
3333
struct ggml_tensor * ggml_scale_bias(
3334
        struct ggml_context * ctx,
3335
        struct ggml_tensor  * a,
3336
        float                 s,
3337
0
        float                 b) {
3338
0
    return ggml_scale_impl(ctx, a, s, b, false);
3339
0
}
3340
3341
struct ggml_tensor * ggml_scale_bias_inplace(
3342
        struct ggml_context * ctx,
3343
        struct ggml_tensor  * a,
3344
        float                 s,
3345
0
        float                 b) {
3346
0
    return ggml_scale_impl(ctx, a, s, b, true);
3347
0
}
3348
3349
// ggml_set
3350
3351
static struct ggml_tensor * ggml_set_impl(
3352
        struct ggml_context * ctx,
3353
        struct ggml_tensor  * a,
3354
        struct ggml_tensor  * b,
3355
        size_t                nb1,
3356
        size_t                nb2,
3357
        size_t                nb3,
3358
        size_t                offset,
3359
0
        bool                  inplace) {
3360
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3361
3362
    // make a view of the destination
3363
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3364
3365
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3366
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3367
0
    ggml_set_op_params(result, params, sizeof(params));
3368
3369
0
    result->op     = GGML_OP_SET;
3370
0
    result->src[0] = a;
3371
0
    result->src[1] = b;
3372
3373
0
    return result;
3374
0
}
3375
3376
struct ggml_tensor * ggml_set(
3377
        struct ggml_context * ctx,
3378
        struct ggml_tensor  * a,
3379
        struct ggml_tensor  * b,
3380
        size_t                nb1,
3381
        size_t                nb2,
3382
        size_t                nb3,
3383
0
        size_t                offset) {
3384
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3385
0
}
3386
3387
struct ggml_tensor * ggml_set_inplace(
3388
        struct ggml_context * ctx,
3389
        struct ggml_tensor  * a,
3390
        struct ggml_tensor  * b,
3391
        size_t                nb1,
3392
        size_t                nb2,
3393
        size_t                nb3,
3394
0
        size_t                offset) {
3395
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3396
0
}
3397
3398
struct ggml_tensor * ggml_set_1d(
3399
        struct ggml_context * ctx,
3400
        struct ggml_tensor  * a,
3401
        struct ggml_tensor  * b,
3402
0
        size_t                offset) {
3403
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3404
0
}
3405
3406
struct ggml_tensor * ggml_set_1d_inplace(
3407
        struct ggml_context * ctx,
3408
        struct ggml_tensor  * a,
3409
        struct ggml_tensor  * b,
3410
0
        size_t                offset) {
3411
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3412
0
}
3413
3414
struct ggml_tensor * ggml_set_2d(
3415
        struct ggml_context * ctx,
3416
        struct ggml_tensor  * a,
3417
        struct ggml_tensor  * b,
3418
        size_t                nb1,
3419
0
        size_t                offset) {
3420
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3421
0
}
3422
3423
struct ggml_tensor * ggml_set_2d_inplace(
3424
        struct ggml_context * ctx,
3425
        struct ggml_tensor  * a,
3426
        struct ggml_tensor  * b,
3427
        size_t                nb1,
3428
0
        size_t                offset) {
3429
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3430
0
}
3431
3432
// ggml_cpy
3433
3434
static struct ggml_tensor * ggml_cpy_impl(
3435
        struct ggml_context * ctx,
3436
        struct ggml_tensor  * a,
3437
0
        struct ggml_tensor  * b) {
3438
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3439
3440
    // make a view of the destination
3441
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3442
0
    if (strlen(b->name) > 0) {
3443
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3444
0
    } else {
3445
0
        ggml_format_name(result, "%s (copy)", a->name);
3446
0
    }
3447
3448
0
    result->op     = GGML_OP_CPY;
3449
0
    result->src[0] = a;
3450
0
    result->src[1] = b;
3451
3452
0
    return result;
3453
0
}
3454
3455
struct ggml_tensor * ggml_cpy(
3456
        struct ggml_context * ctx,
3457
        struct ggml_tensor * a,
3458
0
        struct ggml_tensor * b) {
3459
0
    return ggml_cpy_impl(ctx, a, b);
3460
0
}
3461
3462
struct ggml_tensor * ggml_cast(
3463
        struct ggml_context * ctx,
3464
        struct ggml_tensor  * a,
3465
0
        enum   ggml_type      type) {
3466
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3467
0
    ggml_format_name(result, "%s (copy)", a->name);
3468
3469
0
    result->op     = GGML_OP_CPY;
3470
0
    result->src[0] = a;
3471
0
    result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
3472
                             //       backends for consistency with ggml_cpy_impl() above
3473
3474
0
    return result;
3475
0
}
3476
3477
// ggml_cont
3478
3479
static struct ggml_tensor * ggml_cont_impl(
3480
        struct ggml_context * ctx,
3481
0
        struct ggml_tensor  * a) {
3482
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3483
0
    ggml_format_name(result, "%s (cont)", a->name);
3484
3485
0
    result->op     = GGML_OP_CONT;
3486
0
    result->src[0] = a;
3487
3488
0
    return result;
3489
0
}
3490
3491
struct ggml_tensor * ggml_cont(
3492
        struct ggml_context * ctx,
3493
0
        struct ggml_tensor * a) {
3494
0
    return ggml_cont_impl(ctx, a);
3495
0
}
3496
3497
// make contiguous, with new shape
3498
GGML_API struct ggml_tensor * ggml_cont_1d(
3499
        struct ggml_context * ctx,
3500
        struct ggml_tensor  * a,
3501
0
        int64_t               ne0) {
3502
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3503
0
}
3504
3505
GGML_API struct ggml_tensor * ggml_cont_2d(
3506
        struct ggml_context * ctx,
3507
        struct ggml_tensor  * a,
3508
        int64_t               ne0,
3509
0
        int64_t               ne1) {
3510
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3511
0
}
3512
3513
GGML_API struct ggml_tensor * ggml_cont_3d(
3514
        struct ggml_context * ctx,
3515
        struct ggml_tensor  * a,
3516
        int64_t               ne0,
3517
        int64_t               ne1,
3518
0
        int64_t               ne2) {
3519
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3520
0
}
3521
3522
struct ggml_tensor * ggml_cont_4d(
3523
        struct ggml_context * ctx,
3524
        struct ggml_tensor  * a,
3525
        int64_t               ne0,
3526
        int64_t               ne1,
3527
        int64_t               ne2,
3528
0
        int64_t               ne3) {
3529
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3530
3531
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3532
0
    ggml_format_name(result, "%s (cont)", a->name);
3533
3534
0
    result->op     = GGML_OP_CONT;
3535
0
    result->src[0] = a;
3536
3537
0
    return result;
3538
0
}
3539
3540
// ggml_reshape
3541
3542
struct ggml_tensor * ggml_reshape(
3543
        struct ggml_context * ctx,
3544
        struct ggml_tensor * a,
3545
0
        struct ggml_tensor * b) {
3546
0
    GGML_ASSERT(ggml_is_contiguous(a));
3547
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3548
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3549
3550
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3551
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3552
3553
0
    result->op     = GGML_OP_RESHAPE;
3554
0
    result->src[0] = a;
3555
3556
0
    return result;
3557
0
}
3558
3559
struct ggml_tensor * ggml_reshape_1d(
3560
        struct ggml_context * ctx,
3561
        struct ggml_tensor  * a,
3562
0
        int64_t               ne0) {
3563
0
    GGML_ASSERT(ggml_is_contiguous(a));
3564
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3565
3566
0
    const int64_t ne[1] = { ne0 };
3567
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3568
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3569
3570
0
    result->op     = GGML_OP_RESHAPE;
3571
0
    result->src[0] = a;
3572
3573
0
    return result;
3574
0
}
3575
3576
struct ggml_tensor * ggml_reshape_2d(
3577
        struct ggml_context * ctx,
3578
        struct ggml_tensor  * a,
3579
        int64_t               ne0,
3580
0
        int64_t               ne1) {
3581
0
    GGML_ASSERT(ggml_is_contiguous(a));
3582
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3583
3584
0
    const int64_t ne[2] = { ne0, ne1 };
3585
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3586
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3587
3588
0
    result->op     = GGML_OP_RESHAPE;
3589
0
    result->src[0] = a;
3590
3591
0
    return result;
3592
0
}
3593
3594
struct ggml_tensor * ggml_reshape_3d(
3595
        struct ggml_context * ctx,
3596
        struct ggml_tensor  * a,
3597
        int64_t               ne0,
3598
        int64_t               ne1,
3599
0
        int64_t               ne2) {
3600
0
    GGML_ASSERT(ggml_is_contiguous(a));
3601
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3602
3603
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3604
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3605
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3606
3607
0
    result->op     = GGML_OP_RESHAPE;
3608
0
    result->src[0] = a;
3609
3610
0
    return result;
3611
0
}
3612
3613
struct ggml_tensor * ggml_reshape_4d(
3614
        struct ggml_context * ctx,
3615
        struct ggml_tensor  * a,
3616
        int64_t               ne0,
3617
        int64_t               ne1,
3618
        int64_t               ne2,
3619
0
        int64_t               ne3) {
3620
0
    GGML_ASSERT(ggml_is_contiguous(a));
3621
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3622
3623
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3624
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3625
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3626
3627
0
    result->op     = GGML_OP_RESHAPE;
3628
0
    result->src[0] = a;
3629
3630
0
    return result;
3631
0
}
3632
3633
static struct ggml_tensor * ggml_view_impl(
3634
        struct ggml_context * ctx,
3635
        struct ggml_tensor  * a,
3636
        int                   n_dims,
3637
        const int64_t       * ne,
3638
0
        size_t                offset) {
3639
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3640
0
    ggml_format_name(result, "%s (view)", a->name);
3641
3642
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3643
3644
0
    result->op     = GGML_OP_VIEW;
3645
0
    result->src[0] = a;
3646
3647
0
    return result;
3648
0
}
3649
3650
// ggml_view_1d
3651
3652
struct ggml_tensor * ggml_view_1d(
3653
        struct ggml_context * ctx,
3654
        struct ggml_tensor  * a,
3655
        int64_t               ne0,
3656
0
        size_t                offset) {
3657
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3658
3659
0
    return result;
3660
0
}
3661
3662
// ggml_view_2d
3663
3664
struct ggml_tensor * ggml_view_2d(
3665
        struct ggml_context * ctx,
3666
        struct ggml_tensor  * a,
3667
        int64_t               ne0,
3668
        int64_t               ne1,
3669
        size_t                nb1,
3670
0
        size_t                offset) {
3671
0
    const int64_t ne[2] = { ne0, ne1 };
3672
3673
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3674
3675
0
    result->nb[1] = nb1;
3676
0
    result->nb[2] = result->nb[1]*ne1;
3677
0
    result->nb[3] = result->nb[2];
3678
3679
0
    return result;
3680
0
}
3681
3682
// ggml_view_3d
3683
3684
struct ggml_tensor * ggml_view_3d(
3685
        struct ggml_context * ctx,
3686
        struct ggml_tensor  * a,
3687
        int64_t               ne0,
3688
        int64_t               ne1,
3689
        int64_t               ne2,
3690
        size_t                nb1,
3691
        size_t                nb2,
3692
0
        size_t                offset) {
3693
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3694
3695
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3696
3697
0
    result->nb[1] = nb1;
3698
0
    result->nb[2] = nb2;
3699
0
    result->nb[3] = result->nb[2]*ne2;
3700
3701
0
    return result;
3702
0
}
3703
3704
// ggml_view_4d
3705
3706
struct ggml_tensor * ggml_view_4d(
3707
        struct ggml_context * ctx,
3708
        struct ggml_tensor  * a,
3709
        int64_t               ne0,
3710
        int64_t               ne1,
3711
        int64_t               ne2,
3712
        int64_t               ne3,
3713
        size_t                nb1,
3714
        size_t                nb2,
3715
        size_t                nb3,
3716
0
        size_t                offset) {
3717
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3718
3719
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3720
3721
0
    result->nb[1] = nb1;
3722
0
    result->nb[2] = nb2;
3723
0
    result->nb[3] = nb3;
3724
3725
0
    return result;
3726
0
}
3727
3728
// ggml_permute
3729
3730
struct ggml_tensor * ggml_permute(
3731
        struct ggml_context * ctx,
3732
        struct ggml_tensor  * a,
3733
        int                   axis0,
3734
        int                   axis1,
3735
        int                   axis2,
3736
0
        int                   axis3) {
3737
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3738
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3739
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3740
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3741
3742
0
    GGML_ASSERT(axis0 != axis1);
3743
0
    GGML_ASSERT(axis0 != axis2);
3744
0
    GGML_ASSERT(axis0 != axis3);
3745
0
    GGML_ASSERT(axis1 != axis2);
3746
0
    GGML_ASSERT(axis1 != axis3);
3747
0
    GGML_ASSERT(axis2 != axis3);
3748
3749
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3750
0
    ggml_format_name(result, "%s (permuted)", a->name);
3751
3752
0
    int ne[GGML_MAX_DIMS];
3753
0
    int nb[GGML_MAX_DIMS];
3754
3755
0
    ne[axis0] = a->ne[0];
3756
0
    ne[axis1] = a->ne[1];
3757
0
    ne[axis2] = a->ne[2];
3758
0
    ne[axis3] = a->ne[3];
3759
3760
0
    nb[axis0] = a->nb[0];
3761
0
    nb[axis1] = a->nb[1];
3762
0
    nb[axis2] = a->nb[2];
3763
0
    nb[axis3] = a->nb[3];
3764
3765
0
    result->ne[0] = ne[0];
3766
0
    result->ne[1] = ne[1];
3767
0
    result->ne[2] = ne[2];
3768
0
    result->ne[3] = ne[3];
3769
3770
0
    result->nb[0] = nb[0];
3771
0
    result->nb[1] = nb[1];
3772
0
    result->nb[2] = nb[2];
3773
0
    result->nb[3] = nb[3];
3774
3775
0
    result->op     = GGML_OP_PERMUTE;
3776
0
    result->src[0] = a;
3777
3778
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3779
0
    ggml_set_op_params(result, params, sizeof(params));
3780
3781
0
    return result;
3782
0
}
3783
3784
// ggml_transpose
3785
3786
struct ggml_tensor * ggml_transpose(
3787
        struct ggml_context * ctx,
3788
0
        struct ggml_tensor  * a) {
3789
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3790
0
    ggml_format_name(result, "%s (transposed)", a->name);
3791
3792
0
    result->ne[0] = a->ne[1];
3793
0
    result->ne[1] = a->ne[0];
3794
3795
0
    result->nb[0] = a->nb[1];
3796
0
    result->nb[1] = a->nb[0];
3797
3798
0
    result->op     = GGML_OP_TRANSPOSE;
3799
0
    result->src[0] = a;
3800
3801
0
    return result;
3802
0
}
3803
3804
// ggml_get_rows
3805
3806
struct ggml_tensor * ggml_get_rows(
3807
        struct ggml_context * ctx,
3808
        struct ggml_tensor  * a,
3809
0
        struct ggml_tensor  * b) {
3810
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3811
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3812
0
    GGML_ASSERT(b->ne[3] == 1);
3813
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3814
3815
    // TODO: implement non F32 return
3816
0
    enum ggml_type type = GGML_TYPE_F32;
3817
0
    if (a->type == GGML_TYPE_I32) {
3818
0
        type = a->type;
3819
0
    }
3820
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3821
3822
0
    result->op     = GGML_OP_GET_ROWS;
3823
0
    result->src[0] = a;
3824
0
    result->src[1] = b;
3825
3826
0
    return result;
3827
0
}
3828
3829
// ggml_get_rows_back
3830
3831
struct ggml_tensor * ggml_get_rows_back(
3832
        struct ggml_context * ctx,
3833
        struct ggml_tensor  * a,
3834
        struct ggml_tensor  * b,
3835
0
        struct ggml_tensor  * c) {
3836
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3837
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3838
3839
    // TODO: implement non F32 return
3840
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3841
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3842
3843
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3844
0
    result->src[0] = a;
3845
0
    result->src[1] = b;
3846
3847
0
    return result;
3848
0
}
3849
3850
// ggml_set_rows
3851
3852
struct ggml_tensor * ggml_set_rows(
3853
        struct ggml_context * ctx,
3854
        struct ggml_tensor  * a,
3855
        struct ggml_tensor  * b,
3856
0
        struct ggml_tensor  * c) {
3857
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3858
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3859
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3860
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3861
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3862
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3863
0
    GGML_ASSERT(c->ne[3] == 1);
3864
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3865
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3866
3867
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3868
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3869
3870
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3871
3872
0
    result->op     = GGML_OP_SET_ROWS;
3873
0
    result->src[0] = b;
3874
0
    result->src[1] = c;
3875
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3876
3877
0
    return result;
3878
0
}
3879
3880
// ggml_diag
3881
3882
struct ggml_tensor * ggml_diag(
3883
        struct ggml_context * ctx,
3884
0
        struct ggml_tensor  * a) {
3885
0
    GGML_ASSERT(a->ne[1] == 1);
3886
3887
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3888
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3889
3890
0
    result->op     = GGML_OP_DIAG;
3891
0
    result->src[0] = a;
3892
3893
0
    return result;
3894
0
}
3895
3896
// ggml_diag_mask_inf
3897
3898
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3899
        struct ggml_context * ctx,
3900
        struct ggml_tensor  * a,
3901
        int                   n_past,
3902
0
        bool                  inplace) {
3903
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3904
3905
0
    int32_t params[] = { n_past };
3906
0
    ggml_set_op_params(result, params, sizeof(params));
3907
3908
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3909
0
    result->src[0] = a;
3910
3911
0
    return result;
3912
0
}
3913
3914
struct ggml_tensor * ggml_diag_mask_inf(
3915
        struct ggml_context * ctx,
3916
        struct ggml_tensor  * a,
3917
0
        int                   n_past) {
3918
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3919
0
}
3920
3921
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3922
        struct ggml_context * ctx,
3923
        struct ggml_tensor  * a,
3924
0
        int                   n_past) {
3925
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3926
0
}
3927
3928
// ggml_diag_mask_zero
3929
3930
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3931
        struct ggml_context * ctx,
3932
        struct ggml_tensor  * a,
3933
        int                   n_past,
3934
0
        bool                  inplace) {
3935
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3936
3937
0
    int32_t params[] = { n_past };
3938
0
    ggml_set_op_params(result, params, sizeof(params));
3939
3940
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3941
0
    result->src[0] = a;
3942
3943
0
    return result;
3944
0
}
3945
3946
struct ggml_tensor * ggml_diag_mask_zero(
3947
        struct ggml_context * ctx,
3948
        struct ggml_tensor  * a,
3949
0
        int                   n_past) {
3950
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3951
0
}
3952
3953
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3954
        struct ggml_context * ctx,
3955
        struct ggml_tensor  * a,
3956
0
        int                   n_past) {
3957
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3958
0
}
3959
3960
// ggml_soft_max
3961
3962
static struct ggml_tensor * ggml_soft_max_impl(
3963
        struct ggml_context * ctx,
3964
        struct ggml_tensor  * a,
3965
        struct ggml_tensor  * mask,
3966
        float                 scale,
3967
        float                 max_bias,
3968
0
        bool                  inplace) {
3969
0
    GGML_ASSERT(ggml_is_contiguous(a));
3970
3971
0
    if (mask) {
3972
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3973
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3974
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3975
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3976
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3977
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3978
0
    }
3979
3980
0
    if (max_bias > 0.0f) {
3981
0
        GGML_ASSERT(mask);
3982
0
    }
3983
3984
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3985
3986
0
    float params[] = { scale, max_bias };
3987
0
    ggml_set_op_params(result, params, sizeof(params));
3988
3989
0
    result->op     = GGML_OP_SOFT_MAX;
3990
0
    result->src[0] = a;
3991
0
    result->src[1] = mask;
3992
3993
0
    return result;
3994
0
}
3995
3996
struct ggml_tensor * ggml_soft_max(
3997
        struct ggml_context * ctx,
3998
0
        struct ggml_tensor  * a) {
3999
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
4000
0
}
4001
4002
struct ggml_tensor * ggml_soft_max_inplace(
4003
        struct ggml_context * ctx,
4004
0
        struct ggml_tensor  * a) {
4005
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
4006
0
}
4007
4008
struct ggml_tensor * ggml_soft_max_ext(
4009
        struct ggml_context * ctx,
4010
        struct ggml_tensor  * a,
4011
        struct ggml_tensor  * mask,
4012
        float                 scale,
4013
0
        float                 max_bias) {
4014
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
4015
0
}
4016
4017
struct ggml_tensor * ggml_soft_max_ext_inplace(
4018
        struct ggml_context * ctx,
4019
        struct ggml_tensor  * a,
4020
        struct ggml_tensor  * mask,
4021
        float                 scale,
4022
0
        float                 max_bias) {
4023
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
4024
0
}
4025
4026
void ggml_soft_max_add_sinks(
4027
        struct ggml_tensor * a,
4028
0
        struct ggml_tensor * sinks) {
4029
0
    if (!sinks) {
4030
0
        a->src[2] = NULL;
4031
0
        return;
4032
0
    }
4033
4034
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4035
0
    GGML_ASSERT(a->src[2] == NULL);
4036
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4037
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4038
4039
0
    a->src[2] = sinks;
4040
0
}
4041
4042
// ggml_soft_max_ext_back
4043
4044
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4045
        struct ggml_context * ctx,
4046
        struct ggml_tensor  * a,
4047
        struct ggml_tensor  * b,
4048
        float                 scale,
4049
        float                 max_bias,
4050
0
        bool                  inplace) {
4051
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4052
4053
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4054
0
    result->src[0] = a;
4055
0
    result->src[1] = b;
4056
4057
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4058
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4059
4060
0
    return result;
4061
0
}
4062
4063
struct ggml_tensor * ggml_soft_max_ext_back(
4064
        struct ggml_context * ctx,
4065
        struct ggml_tensor  * a,
4066
        struct ggml_tensor  * b,
4067
        float                 scale,
4068
0
        float                 max_bias) {
4069
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4070
0
}
4071
4072
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4073
        struct ggml_context * ctx,
4074
        struct ggml_tensor  * a,
4075
        struct ggml_tensor  * b,
4076
        float                 scale,
4077
0
        float                 max_bias) {
4078
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4079
0
}
4080
4081
// ggml_rope
4082
4083
static struct ggml_tensor * ggml_rope_impl(
4084
        struct ggml_context * ctx,
4085
        struct ggml_tensor  * a,
4086
        struct ggml_tensor  * b,
4087
        struct ggml_tensor  * c,
4088
        int                   n_dims,
4089
        int                   sections[GGML_MROPE_SECTIONS],
4090
        int                   mode,
4091
        int                   n_ctx_orig,
4092
        float                 freq_base,
4093
        float                 freq_scale,
4094
        float                 ext_factor,
4095
        float                 attn_factor,
4096
        float                 beta_fast,
4097
        float                 beta_slow,
4098
0
        bool                  inplace) {
4099
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4100
4101
0
    GGML_ASSERT(ggml_is_vector(b));
4102
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4103
4104
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4105
0
    if (mrope_used) {
4106
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4107
0
    } else {
4108
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4109
0
    }
4110
4111
0
    if (c) {
4112
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4113
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4114
0
    }
4115
4116
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4117
4118
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4119
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4120
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4121
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4122
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4123
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4124
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4125
0
    if (mrope_used && sections) {
4126
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4127
0
    } else {
4128
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4129
0
    }
4130
0
    ggml_set_op_params(result, params, sizeof(params));
4131
4132
0
    result->op     = GGML_OP_ROPE;
4133
0
    result->src[0] = a;
4134
0
    result->src[1] = b;
4135
0
    result->src[2] = c;
4136
4137
0
    return result;
4138
0
}
4139
4140
struct ggml_tensor * ggml_rope(
4141
        struct ggml_context * ctx,
4142
        struct ggml_tensor  * a,
4143
        struct ggml_tensor  * b,
4144
        int                   n_dims,
4145
0
        int                   mode) {
4146
0
    return ggml_rope_impl(
4147
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4148
0
    );
4149
0
}
4150
4151
struct ggml_tensor * ggml_rope_multi(
4152
        struct ggml_context * ctx,
4153
        struct ggml_tensor  * a,
4154
        struct ggml_tensor  * b,
4155
        struct ggml_tensor  * c,
4156
        int                   n_dims,
4157
        int                   sections[GGML_MROPE_SECTIONS],
4158
        int                   mode,
4159
        int                   n_ctx_orig,
4160
        float                 freq_base,
4161
        float                 freq_scale,
4162
        float                 ext_factor,
4163
        float                 attn_factor,
4164
        float                 beta_fast,
4165
0
        float                 beta_slow) {
4166
0
    return ggml_rope_impl(
4167
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4168
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4169
0
    );
4170
0
}
4171
4172
struct ggml_tensor * ggml_rope_multi_inplace(
4173
        struct ggml_context * ctx,
4174
        struct ggml_tensor  * a,
4175
        struct ggml_tensor  * b,
4176
        struct ggml_tensor  * c,
4177
        int                   n_dims,
4178
        int                   sections[GGML_MROPE_SECTIONS],
4179
        int                   mode,
4180
        int                   n_ctx_orig,
4181
        float                 freq_base,
4182
        float                 freq_scale,
4183
        float                 ext_factor,
4184
        float                 attn_factor,
4185
        float                 beta_fast,
4186
0
        float                 beta_slow) {
4187
0
    return ggml_rope_impl(
4188
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4189
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4190
0
    );
4191
0
}
4192
4193
struct ggml_tensor * ggml_rope_inplace(
4194
        struct ggml_context * ctx,
4195
        struct ggml_tensor  * a,
4196
        struct ggml_tensor  * b,
4197
        int                   n_dims,
4198
0
        int                   mode) {
4199
0
    return ggml_rope_impl(
4200
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4201
0
    );
4202
0
}
4203
4204
struct ggml_tensor * ggml_rope_ext(
4205
        struct ggml_context * ctx,
4206
        struct ggml_tensor  * a,
4207
        struct ggml_tensor  * b,
4208
        struct ggml_tensor  * c,
4209
        int                   n_dims,
4210
        int                   mode,
4211
        int                   n_ctx_orig,
4212
        float                 freq_base,
4213
        float                 freq_scale,
4214
        float                 ext_factor,
4215
        float                 attn_factor,
4216
        float                 beta_fast,
4217
0
        float                 beta_slow) {
4218
0
    return ggml_rope_impl(
4219
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4220
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4221
0
    );
4222
0
}
4223
4224
struct ggml_tensor * ggml_rope_ext_inplace(
4225
        struct ggml_context * ctx,
4226
        struct ggml_tensor  * a,
4227
        struct ggml_tensor  * b,
4228
        struct ggml_tensor  * c,
4229
        int                   n_dims,
4230
        int                   mode,
4231
        int                   n_ctx_orig,
4232
        float                 freq_base,
4233
        float                 freq_scale,
4234
        float                 ext_factor,
4235
        float                 attn_factor,
4236
        float                 beta_fast,
4237
0
        float                 beta_slow) {
4238
0
    return ggml_rope_impl(
4239
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4240
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4241
0
    );
4242
0
}
4243
4244
struct ggml_tensor * ggml_rope_custom(
4245
        struct ggml_context * ctx,
4246
        struct ggml_tensor  * a,
4247
        struct ggml_tensor  * b,
4248
        int                   n_dims,
4249
        int                   mode,
4250
        int                   n_ctx_orig,
4251
        float                 freq_base,
4252
        float                 freq_scale,
4253
        float                 ext_factor,
4254
        float                 attn_factor,
4255
        float                 beta_fast,
4256
0
        float                 beta_slow) {
4257
0
    return ggml_rope_impl(
4258
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4259
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4260
0
    );
4261
0
}
4262
4263
struct ggml_tensor * ggml_rope_custom_inplace(
4264
        struct ggml_context * ctx,
4265
        struct ggml_tensor  * a,
4266
        struct ggml_tensor  * b,
4267
        int                   n_dims,
4268
        int                   mode,
4269
        int                   n_ctx_orig,
4270
        float                 freq_base,
4271
        float                 freq_scale,
4272
        float                 ext_factor,
4273
        float                 attn_factor,
4274
        float                 beta_fast,
4275
0
        float                 beta_slow) {
4276
0
    return ggml_rope_impl(
4277
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4278
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4279
0
    );
4280
0
}
4281
4282
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4283
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4284
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4285
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4286
0
}
4287
4288
void ggml_rope_yarn_corr_dims(
4289
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4290
0
) {
4291
    // start and end correction dims
4292
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4293
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4294
0
    dims[0] = MAX(0, start);
4295
0
    dims[1] = MIN(n_dims - 1, end);
4296
0
}
4297
4298
// ggml_rope_back
4299
4300
struct ggml_tensor * ggml_rope_ext_back(
4301
        struct ggml_context * ctx,
4302
        struct ggml_tensor  * a,
4303
        struct ggml_tensor  * b,
4304
        struct ggml_tensor  * c,
4305
        int                   n_dims,
4306
        int                   mode,
4307
        int                   n_ctx_orig,
4308
        float                 freq_base,
4309
        float                 freq_scale,
4310
        float                 ext_factor,
4311
        float                 attn_factor,
4312
        float                 beta_fast,
4313
0
        float                 beta_slow) {
4314
0
    struct ggml_tensor * result = ggml_rope_ext(
4315
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4316
0
    result->op = GGML_OP_ROPE_BACK;
4317
0
    return result;
4318
0
}
4319
4320
struct ggml_tensor * ggml_rope_multi_back(
4321
        struct ggml_context * ctx,
4322
        struct ggml_tensor  * a,
4323
        struct ggml_tensor  * b,
4324
        struct ggml_tensor  * c,
4325
        int                   n_dims,
4326
        int                   sections[4],
4327
        int                   mode,
4328
        int                   n_ctx_orig,
4329
        float                 freq_base,
4330
        float                 freq_scale,
4331
        float                 ext_factor,
4332
        float                 attn_factor,
4333
        float                 beta_fast,
4334
0
        float                 beta_slow) {
4335
0
    struct ggml_tensor * result = ggml_rope_multi(
4336
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4337
0
    result->op = GGML_OP_ROPE_BACK;
4338
0
    return result;
4339
0
}
4340
// ggml_clamp
4341
4342
struct ggml_tensor * ggml_clamp(
4343
        struct ggml_context * ctx,
4344
        struct ggml_tensor  * a,
4345
        float                 min,
4346
0
        float                 max) {
4347
    // TODO: when implement backward, fix this:
4348
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4349
4350
0
    float params[] = { min, max };
4351
0
    ggml_set_op_params(result, params, sizeof(params));
4352
4353
0
    result->op     = GGML_OP_CLAMP;
4354
0
    result->src[0] = a;
4355
4356
0
    return result;
4357
0
}
4358
4359
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4360
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4361
0
}
4362
4363
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4364
// a: [OC,IC, KH, KW]
4365
// b: [N, IC, IH, IW]
4366
// result: [N, OH, OW, IC*KH*KW]
4367
struct ggml_tensor * ggml_im2col(
4368
        struct ggml_context * ctx,
4369
        struct ggml_tensor  * a,
4370
        struct ggml_tensor  * b,
4371
        int                   s0,
4372
        int                   s1,
4373
        int                   p0,
4374
        int                   p1,
4375
        int                   d0,
4376
        int                   d1,
4377
        bool                  is_2D,
4378
0
        enum ggml_type        dst_type) {
4379
0
    if (is_2D) {
4380
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4381
0
    } else {
4382
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4383
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4384
0
        GGML_ASSERT(b->ne[3] == 1);
4385
0
    }
4386
4387
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4388
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4389
4390
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4391
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4392
4393
0
    const int64_t ne[4] = {
4394
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4395
0
        OW,
4396
0
        is_2D ? OH : b->ne[2],
4397
0
        is_2D ?      b->ne[3] : 1,
4398
0
    };
4399
4400
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4401
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4402
0
    ggml_set_op_params(result, params, sizeof(params));
4403
4404
0
    result->op     = GGML_OP_IM2COL;
4405
0
    result->src[0] = a;
4406
0
    result->src[1] = b;
4407
4408
0
    return result;
4409
0
}
4410
4411
struct ggml_tensor * ggml_im2col_back(
4412
        struct ggml_context * ctx,
4413
        struct ggml_tensor  * a,
4414
        struct ggml_tensor  * b,
4415
        int64_t             * ne,
4416
        int                   s0,
4417
        int                   s1,
4418
        int                   p0,
4419
        int                   p1,
4420
        int                   d0,
4421
        int                   d1,
4422
0
        bool                  is_2D) {
4423
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4424
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4425
0
    ggml_set_op_params(result, params, sizeof(params));
4426
4427
0
    result->op     = GGML_OP_IM2COL_BACK;
4428
0
    result->src[0] = a;
4429
0
    result->src[1] = b;
4430
4431
0
    return result;
4432
0
}
4433
4434
// ggml_conv_1d
4435
4436
struct ggml_tensor * ggml_conv_1d(
4437
        struct ggml_context * ctx,
4438
        struct ggml_tensor  * a,
4439
        struct ggml_tensor  * b,
4440
        int                   s0,
4441
        int                   p0,
4442
0
        int                   d0) {
4443
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4444
4445
0
    struct ggml_tensor * result =
4446
0
        ggml_mul_mat(ctx,
4447
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4448
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4449
4450
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4451
4452
0
    return result;
4453
0
}
4454
4455
// ggml_conv_1d_ph
4456
4457
struct ggml_tensor* ggml_conv_1d_ph(
4458
        struct ggml_context * ctx,
4459
        struct ggml_tensor  * a,
4460
        struct ggml_tensor  * b,
4461
        int                   s,
4462
0
        int                   d) {
4463
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4464
0
}
4465
4466
// ggml_conv_1d_dw
4467
4468
struct ggml_tensor * ggml_conv_1d_dw(
4469
        struct ggml_context * ctx,
4470
        struct ggml_tensor  * a,
4471
        struct ggml_tensor  * b,
4472
        int                   s0,
4473
        int                   p0,
4474
0
        int                   d0) {
4475
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4476
4477
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4478
4479
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4480
4481
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4482
4483
0
    return result;
4484
0
}
4485
4486
// ggml_conv_1d_dw_ph
4487
4488
struct ggml_tensor * ggml_conv_1d_dw_ph(
4489
        struct ggml_context * ctx,
4490
        struct ggml_tensor  * a,
4491
        struct ggml_tensor  * b,
4492
        int                   s0,
4493
0
        int                   d0) {
4494
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4495
0
}
4496
4497
// ggml_conv_transpose_1d
4498
4499
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4500
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4501
0
}
4502
4503
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4504
        struct ggml_context * ctx,
4505
        struct ggml_tensor  * a,
4506
        struct ggml_tensor  * b,
4507
        int                   s0,
4508
        int                   p0,
4509
0
        int                   d0) {
4510
0
    GGML_ASSERT(ggml_is_matrix(b));
4511
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4512
0
    GGML_ASSERT(a->ne[3] == 1);
4513
4514
0
    GGML_ASSERT(p0 == 0);
4515
0
    GGML_ASSERT(d0 == 1);
4516
4517
0
    const int64_t ne[4] = {
4518
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4519
0
        a->ne[1], b->ne[2], 1,
4520
0
    };
4521
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4522
4523
0
    int32_t params[] = { s0, p0, d0 };
4524
0
    ggml_set_op_params(result, params, sizeof(params));
4525
4526
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4527
0
    result->src[0] = a;
4528
0
    result->src[1] = b;
4529
4530
0
    return result;
4531
0
}
4532
4533
// ggml_conv_2d
4534
4535
// a: [OC,IC, KH, KW]
4536
// b: [N, IC, IH, IW]
4537
// result: [N, OC, OH, OW]
4538
struct ggml_tensor * ggml_conv_2d(
4539
        struct ggml_context * ctx,
4540
        struct ggml_tensor  * a,
4541
        struct ggml_tensor  * b,
4542
        int                   s0,
4543
        int                   s1,
4544
        int                   p0,
4545
        int                   p1,
4546
        int                   d0,
4547
0
        int                   d1) {
4548
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4549
4550
0
    struct ggml_tensor * result =
4551
0
        ggml_mul_mat(ctx,
4552
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4553
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4554
4555
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4556
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4557
4558
4559
0
    return result;
4560
0
}
4561
4562
// a: [OC*IC, KD, KH, KW]
4563
// b: [N*IC, ID, IH, IW]
4564
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4565
struct ggml_tensor * ggml_im2col_3d(
4566
        struct ggml_context * ctx,
4567
        struct ggml_tensor  * a,
4568
        struct ggml_tensor  * b,
4569
        int64_t               IC,
4570
        int                   s0, // stride width
4571
        int                   s1, // stride height
4572
        int                   s2, // stride depth
4573
        int                   p0, // padding width
4574
        int                   p1, // padding height
4575
        int                   p2, // padding depth
4576
        int                   d0, // dilation width
4577
        int                   d1, // dilation height
4578
        int                   d2, // dilation depth
4579
0
        enum ggml_type        dst_type) {
4580
0
    const int64_t N = b->ne[3] / IC;
4581
0
    const int64_t ID = b->ne[2];
4582
0
    const int64_t IH = b->ne[1];
4583
0
    const int64_t IW = b->ne[0];
4584
4585
0
    const int64_t OC = a->ne[3] / IC;
4586
0
    UNUSED(OC);
4587
0
    const int64_t KD = a->ne[2];
4588
0
    const int64_t KH = a->ne[1];
4589
0
    const int64_t KW = a->ne[0];
4590
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4591
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4592
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4593
4594
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4595
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4596
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4597
4598
4599
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4600
4601
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4602
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4603
0
    ggml_set_op_params(result, params, sizeof(params));
4604
4605
0
    result->op     = GGML_OP_IM2COL_3D;
4606
0
    result->src[0] = a;
4607
0
    result->src[1] = b;
4608
4609
0
    return result;
4610
0
}
4611
4612
// a: [OC*IC, KD, KH, KW]
4613
// b: [N*IC, ID, IH, IW]
4614
// result: [N*OC, OD, OH, OW]
4615
struct ggml_tensor * ggml_conv_3d(
4616
        struct ggml_context * ctx,
4617
        struct ggml_tensor  * a,
4618
        struct ggml_tensor  * b,
4619
        int64_t               IC,
4620
        int                   s0, // stride width
4621
        int                   s1, // stride height
4622
        int                   s2, // stride depth
4623
        int                   p0, // padding width
4624
        int                   p1, // padding height
4625
        int                   p2, // padding depth
4626
        int                   d0, // dilation width
4627
        int                   d1, // dilation height
4628
        int                   d2  // dilation depth
4629
0
        ) {
4630
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4631
4632
0
    int64_t OC = a->ne[3] / IC;
4633
0
    int64_t N = b->ne[3] / IC;
4634
0
    struct ggml_tensor * result =
4635
0
        ggml_mul_mat(ctx,
4636
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4637
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4638
4639
0
    int64_t OD = im2col->ne[3] / N;
4640
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4641
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4642
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4643
4644
0
    return result;
4645
0
}
4646
4647
// ggml_conv_2d_sk_p0
4648
4649
struct ggml_tensor * ggml_conv_2d_sk_p0(
4650
        struct ggml_context * ctx,
4651
        struct ggml_tensor  * a,
4652
0
        struct ggml_tensor  * b) {
4653
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4654
0
}
4655
4656
// ggml_conv_2d_s1_ph
4657
4658
struct ggml_tensor * ggml_conv_2d_s1_ph(
4659
        struct ggml_context * ctx,
4660
        struct ggml_tensor  * a,
4661
0
        struct ggml_tensor  * b) {
4662
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4663
0
}
4664
4665
// ggml_conv_2d_dw
4666
4667
struct ggml_tensor * ggml_conv_2d_dw(
4668
        struct ggml_context * ctx,
4669
        struct ggml_tensor  * a,
4670
        struct ggml_tensor  * b,
4671
        int                   s0,
4672
        int                   s1,
4673
        int                   p0,
4674
        int                   p1,
4675
        int                   d0,
4676
0
        int                   d1) {
4677
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4678
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4679
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4680
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4681
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4682
4683
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4684
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4685
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4686
4687
0
    return result;
4688
0
}
4689
4690
// ggml_conv_2d_dw_direct
4691
4692
struct ggml_tensor * ggml_conv_2d_dw_direct(
4693
        struct ggml_context * ctx,
4694
        struct ggml_tensor  * a,
4695
        struct ggml_tensor  * b,
4696
        int                   stride0,
4697
        int                   stride1,
4698
        int                   pad0,
4699
        int                   pad1,
4700
        int                   dilation0,
4701
0
        int                   dilation1) {
4702
0
    GGML_ASSERT(a->ne[2] == 1);
4703
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4704
0
    int64_t ne[4];
4705
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4706
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4707
0
    ne[2] = b->ne[2];
4708
0
    ne[3] = b->ne[3];
4709
4710
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4711
4712
0
    if (ggml_is_contiguous_channels(b)) {
4713
        // Result will be permuted the same way as input (CWHN order)
4714
0
        const int64_t type_size = ggml_type_size(result->type);
4715
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4716
0
        result->nb[0] = result->ne[2] * type_size;
4717
0
        result->nb[1] = result->ne[0] * result->nb[0];
4718
0
        result->nb[2] = type_size;
4719
0
    }
4720
4721
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4722
0
    ggml_set_op_params(result, params, sizeof(params));
4723
4724
0
    result->op     = GGML_OP_CONV_2D_DW;
4725
0
    result->src[0] = a;
4726
0
    result->src[1] = b;
4727
0
    return result;
4728
0
}
4729
4730
// ggml_conv_2d_direct
4731
4732
struct ggml_tensor * ggml_conv_2d_direct(
4733
        struct ggml_context * ctx,
4734
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4735
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4736
        int                   s0,  // stride dimension 0
4737
        int                   s1,  // stride dimension 1
4738
        int                   p0,  // padding dimension 0
4739
        int                   p1,  // padding dimension 1
4740
        int                   d0,  // dilation dimension 0
4741
0
        int                   d1) {// dilation dimension 1
4742
4743
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4744
    //GGML_ASSERT(a->type == b->type);
4745
4746
0
    int64_t ne[4];
4747
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4748
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4749
0
    ne[2] = a->ne[3];
4750
0
    ne[3] = b->ne[3];
4751
4752
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4753
4754
0
    ggml_set_op_params_i32(result, 0, s0);
4755
0
    ggml_set_op_params_i32(result, 1, s1);
4756
0
    ggml_set_op_params_i32(result, 2, p0);
4757
0
    ggml_set_op_params_i32(result, 3, p1);
4758
0
    ggml_set_op_params_i32(result, 4, d0);
4759
0
    ggml_set_op_params_i32(result, 5, d1);
4760
4761
0
    result->op = GGML_OP_CONV_2D;
4762
0
    result->src[0] = a;
4763
0
    result->src[1] = b;
4764
4765
0
    return result;
4766
0
}
4767
4768
// ggml_conv_3d_direct
4769
4770
struct ggml_tensor * ggml_conv_3d_direct(
4771
        struct ggml_context * ctx,
4772
        struct ggml_tensor  * a,
4773
        struct ggml_tensor  * b,
4774
        int                   s0,
4775
        int                   s1,
4776
        int                   s2,
4777
        int                   p0,
4778
        int                   p1,
4779
        int                   p2,
4780
        int                   d0,
4781
        int                   d1,
4782
        int                   d2,
4783
        int                   c,
4784
        int                   n,
4785
0
        int                   oc) {
4786
4787
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4788
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4789
4790
0
    int64_t ne[4];
4791
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4792
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4793
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4794
0
    ne[3] = (int64_t) oc * n;
4795
4796
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4797
4798
0
    ggml_set_op_params_i32(result, 0,  s0);
4799
0
    ggml_set_op_params_i32(result, 1,  s1);
4800
0
    ggml_set_op_params_i32(result, 2,  s2);
4801
0
    ggml_set_op_params_i32(result, 3,  p0);
4802
0
    ggml_set_op_params_i32(result, 4,  p1);
4803
0
    ggml_set_op_params_i32(result, 5,  p2);
4804
0
    ggml_set_op_params_i32(result, 6,  d0);
4805
0
    ggml_set_op_params_i32(result, 7,  d1);
4806
0
    ggml_set_op_params_i32(result, 8,  d2);
4807
0
    ggml_set_op_params_i32(result, 9,  c);
4808
0
    ggml_set_op_params_i32(result, 10, n);
4809
0
    ggml_set_op_params_i32(result, 11, oc);
4810
4811
0
    result->op = GGML_OP_CONV_3D;
4812
0
    result->src[0] = a;
4813
0
    result->src[1] = b;
4814
4815
0
    return result;
4816
0
}
4817
4818
// ggml_conv_transpose_2d_p0
4819
4820
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4821
0
    return (ins - 1) * s - 2 * p + ks;
4822
0
}
4823
4824
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4825
        struct ggml_context * ctx,
4826
        struct ggml_tensor  * a,
4827
        struct ggml_tensor  * b,
4828
0
        int                   stride) {
4829
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4830
4831
0
    const int64_t ne[4] = {
4832
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4833
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4834
0
        a->ne[2], b->ne[3],
4835
0
    };
4836
4837
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4838
4839
0
    ggml_set_op_params_i32(result, 0, stride);
4840
4841
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4842
0
    result->src[0] = a;
4843
0
    result->src[1] = b;
4844
4845
0
    return result;
4846
0
}
4847
4848
// ggml_pool_*
4849
4850
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4851
0
    return (ins + 2 * p - ks) / s + 1;
4852
0
}
4853
4854
// ggml_pool_1d
4855
4856
struct ggml_tensor * ggml_pool_1d(
4857
        struct ggml_context * ctx,
4858
        struct ggml_tensor  * a,
4859
        enum ggml_op_pool     op,
4860
        int                   k0,
4861
        int                   s0,
4862
0
        int                   p0) {
4863
0
    const int64_t ne[4] = {
4864
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4865
0
        a->ne[1],
4866
0
        a->ne[2],
4867
0
        a->ne[3],
4868
0
    };
4869
0
    GGML_ASSERT(ne[0] > 0);
4870
4871
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4872
4873
0
    int32_t params[] = { op, k0, s0, p0 };
4874
0
    ggml_set_op_params(result, params, sizeof(params));
4875
4876
0
    result->op     = GGML_OP_POOL_1D;
4877
0
    result->src[0] = a;
4878
4879
0
    return result;
4880
0
}
4881
4882
// ggml_pool_2d
4883
4884
struct ggml_tensor * ggml_pool_2d(
4885
        struct ggml_context * ctx,
4886
        struct ggml_tensor  * a,
4887
        enum ggml_op_pool     op,
4888
        int                   k0,
4889
        int                   k1,
4890
        int                   s0,
4891
        int                   s1,
4892
        float                 p0,
4893
0
        float                 p1) {
4894
0
    struct ggml_tensor * result;
4895
0
    const int64_t ne[4] = {
4896
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4897
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4898
0
        a->ne[2],
4899
0
        a->ne[3],
4900
0
    };
4901
0
    GGML_ASSERT(ne[0] > 0);
4902
0
    GGML_ASSERT(ne[1] > 0);
4903
4904
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4905
4906
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4907
0
    ggml_set_op_params(result, params, sizeof(params));
4908
4909
0
    result->op     = GGML_OP_POOL_2D;
4910
0
    result->src[0] = a;
4911
4912
0
    return result;
4913
0
}
4914
4915
struct ggml_tensor * ggml_pool_2d_back(
4916
        struct ggml_context * ctx,
4917
        struct ggml_tensor  * a,
4918
        struct ggml_tensor  * af,
4919
        enum ggml_op_pool     op,
4920
        int                   k0,
4921
        int                   k1,
4922
        int                   s0,
4923
        int                   s1,
4924
        float                 p0,
4925
0
        float                 p1) {
4926
0
    struct ggml_tensor * result;
4927
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4928
4929
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4930
0
    ggml_set_op_params(result, params, sizeof(params));
4931
4932
0
    result->op     = GGML_OP_POOL_2D_BACK;
4933
0
    result->src[0] = a;
4934
0
    result->src[1] = af;
4935
4936
0
    return result;
4937
0
}
4938
4939
// ggml_upscale / ggml_interpolate
4940
4941
static struct ggml_tensor * ggml_interpolate_impl(
4942
        struct ggml_context * ctx,
4943
        struct ggml_tensor  * a,
4944
        int64_t               ne0,
4945
        int64_t               ne1,
4946
        int64_t               ne2,
4947
        int64_t               ne3,
4948
0
        uint32_t              mode) {
4949
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4950
    // TODO: implement antialias for modes other than bilinear
4951
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4952
4953
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4954
4955
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4956
4957
0
    result->op     = GGML_OP_UPSCALE;
4958
0
    result->src[0] = a;
4959
4960
0
    return result;
4961
0
}
4962
4963
struct ggml_tensor * ggml_upscale(
4964
        struct ggml_context * ctx,
4965
        struct ggml_tensor  * a,
4966
        int                   scale_factor,
4967
0
        enum ggml_scale_mode  mode) {
4968
0
    GGML_ASSERT(scale_factor > 1);
4969
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4970
0
}
4971
4972
struct ggml_tensor * ggml_upscale_ext(
4973
        struct ggml_context * ctx,
4974
        struct ggml_tensor  * a,
4975
        int                   ne0,
4976
        int                   ne1,
4977
        int                   ne2,
4978
        int                   ne3,
4979
0
        enum ggml_scale_mode  mode) {
4980
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4981
0
}
4982
4983
struct ggml_tensor * ggml_interpolate(
4984
        struct ggml_context * ctx,
4985
        struct ggml_tensor  * a,
4986
        int64_t               ne0,
4987
        int64_t               ne1,
4988
        int64_t               ne2,
4989
        int64_t               ne3,
4990
0
        uint32_t              mode) {
4991
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4992
0
}
4993
4994
// ggml_pad
4995
4996
struct ggml_tensor * ggml_pad(
4997
        struct ggml_context * ctx,
4998
        struct ggml_tensor  * a,
4999
        int                   p0,
5000
        int                   p1,
5001
        int                   p2,
5002
0
        int                   p3) {
5003
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5004
0
}
5005
5006
// ggml_pad_circular
5007
5008
struct ggml_tensor * ggml_pad_circular(
5009
        struct ggml_context * ctx,
5010
        struct ggml_tensor  * a,
5011
        int                   p0,
5012
        int                   p1,
5013
        int                   p2,
5014
0
        int                   p3) {
5015
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5016
0
}
5017
5018
struct ggml_tensor * ggml_pad_ext(
5019
            struct ggml_context * ctx,
5020
            struct ggml_tensor  * a,
5021
            int                  lp0,
5022
            int                  rp0,
5023
            int                  lp1,
5024
            int                  rp1,
5025
            int                  lp2,
5026
            int                  rp2,
5027
            int                  lp3,
5028
            int                  rp3
5029
0
            ) {
5030
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5031
0
            a->ne[0] + lp0 + rp0,
5032
0
            a->ne[1] + lp1 + rp1,
5033
0
            a->ne[2] + lp2 + rp2,
5034
0
            a->ne[3] + lp3 + rp3);
5035
5036
0
    ggml_set_op_params_i32(result, 0, lp0);
5037
0
    ggml_set_op_params_i32(result, 1, rp0);
5038
0
    ggml_set_op_params_i32(result, 2, lp1);
5039
0
    ggml_set_op_params_i32(result, 3, rp1);
5040
0
    ggml_set_op_params_i32(result, 4, lp2);
5041
0
    ggml_set_op_params_i32(result, 5, rp2);
5042
0
    ggml_set_op_params_i32(result, 6, lp3);
5043
0
    ggml_set_op_params_i32(result, 7, rp3);
5044
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5045
5046
5047
0
    result->op     = GGML_OP_PAD;
5048
0
    result->src[0] = a;
5049
5050
0
    return result;
5051
0
}
5052
5053
// ggml_pad_ext_circular
5054
5055
struct ggml_tensor * ggml_pad_ext_circular(
5056
        struct ggml_context * ctx,
5057
        struct ggml_tensor  * a,
5058
        int                  lp0,
5059
        int                  rp0,
5060
        int                  lp1,
5061
        int                  rp1,
5062
        int                  lp2,
5063
        int                  rp2,
5064
        int                  lp3,
5065
        int                  rp3
5066
0
        ) {
5067
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5068
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5069
0
    return result;
5070
0
}
5071
5072
// ggml_pad_reflect_1d
5073
5074
struct ggml_tensor * ggml_pad_reflect_1d(
5075
        struct ggml_context * ctx,
5076
        struct ggml_tensor  * a,
5077
        int                   p0,
5078
0
        int                   p1) {
5079
0
    GGML_ASSERT(p0 >= 0);
5080
0
    GGML_ASSERT(p1 >= 0);
5081
5082
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5083
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5084
5085
0
    GGML_ASSERT(ggml_is_contiguous(a));
5086
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5087
5088
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5089
0
            a->ne[0] + p0 + p1,
5090
0
            a->ne[1],
5091
0
            a->ne[2],
5092
0
            a->ne[3]);
5093
5094
0
    int32_t params[] = { p0, p1 };
5095
0
    ggml_set_op_params(result, params, sizeof(params));
5096
5097
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5098
0
    result->src[0] = a;
5099
5100
0
    return result;
5101
0
}
5102
5103
// ggml_roll
5104
5105
struct ggml_tensor * ggml_roll(
5106
        struct ggml_context * ctx,
5107
        struct ggml_tensor  * a,
5108
        int                   shift0,
5109
        int                   shift1,
5110
        int                   shift2,
5111
0
        int                   shift3) {
5112
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5113
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5114
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5115
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5116
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5117
5118
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5119
5120
0
    ggml_set_op_params_i32(result, 0, shift0);
5121
0
    ggml_set_op_params_i32(result, 1, shift1);
5122
0
    ggml_set_op_params_i32(result, 2, shift2);
5123
0
    ggml_set_op_params_i32(result, 3, shift3);
5124
5125
0
    result->op     = GGML_OP_ROLL;
5126
0
    result->src[0] = a;
5127
5128
0
    return result;
5129
0
}
5130
5131
// ggml_timestep_embedding
5132
5133
struct ggml_tensor * ggml_timestep_embedding(
5134
        struct ggml_context * ctx,
5135
        struct ggml_tensor  * timesteps,
5136
        int                   dim,
5137
0
        int                   max_period) {
5138
5139
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5140
5141
0
    ggml_set_op_params_i32(result, 0, dim);
5142
0
    ggml_set_op_params_i32(result, 1, max_period);
5143
5144
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5145
0
    result->src[0] = timesteps;
5146
5147
0
    return result;
5148
0
}
5149
5150
// ggml_tri
5151
5152
struct ggml_tensor * ggml_tri(
5153
    struct ggml_context * ctx,
5154
    struct ggml_tensor  * a,
5155
0
    enum ggml_tri_type    type) {
5156
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5157
5158
0
    GGML_ASSERT(ggml_is_contiguous(a));
5159
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5160
5161
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5162
5163
0
    ggml_set_op_params_i32(result, 0, type);
5164
5165
0
    result->op = GGML_OP_TRI;
5166
0
    result->src[0] = a;
5167
5168
0
    return result;
5169
0
}
5170
5171
// ggml_fill
5172
5173
static struct ggml_tensor * ggml_fill_impl(
5174
    struct ggml_context * ctx,
5175
    struct ggml_tensor  * a,
5176
    float                 c,
5177
0
    bool                  inplace) {
5178
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5179
0
    GGML_ASSERT(ggml_is_contiguous(a));
5180
5181
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5182
5183
0
    ggml_set_op_params_f32(result, 0, c);
5184
5185
0
    result->op = GGML_OP_FILL;
5186
0
    result->src[0] = a;
5187
5188
0
    return result;
5189
0
}
5190
5191
struct ggml_tensor * ggml_fill(
5192
    struct ggml_context * ctx,
5193
    struct ggml_tensor  * a,
5194
0
    float                 c) {
5195
0
    return ggml_fill_impl(ctx, a, c, false);
5196
0
}
5197
5198
struct ggml_tensor * ggml_fill_inplace(
5199
    struct ggml_context * ctx,
5200
    struct ggml_tensor  * a,
5201
0
    float                 c) {
5202
0
    return ggml_fill_impl(ctx, a, c, true);
5203
0
}
5204
5205
// ggml_argsort
5206
5207
struct ggml_tensor * ggml_argsort(
5208
        struct ggml_context  * ctx,
5209
        struct ggml_tensor   * a,
5210
0
        enum ggml_sort_order   order) {
5211
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5212
5213
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5214
5215
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5216
5217
0
    result->op     = GGML_OP_ARGSORT;
5218
0
    result->src[0] = a;
5219
5220
0
    return result;
5221
0
}
5222
5223
// ggml_argsort_top_k
5224
5225
struct ggml_tensor * ggml_argsort_top_k(
5226
        struct ggml_context * ctx,
5227
        struct ggml_tensor  * a,
5228
0
        int                   k) {
5229
0
    GGML_ASSERT(a->ne[0] >= k);
5230
5231
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5232
5233
0
    result = ggml_view_4d(ctx, result,
5234
0
                k, result->ne[1], result->ne[2], result->ne[3],
5235
0
                   result->nb[1], result->nb[2], result->nb[3],
5236
0
                0);
5237
5238
0
    return result;
5239
0
}
5240
5241
// ggml_top_k
5242
5243
struct ggml_tensor * ggml_top_k(
5244
        struct ggml_context * ctx,
5245
        struct ggml_tensor  * a,
5246
0
        int                   k) {
5247
0
    GGML_ASSERT(a->ne[0] >= k);
5248
5249
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5250
5251
0
    result->op     = GGML_OP_TOP_K;
5252
0
    result->src[0] = a;
5253
5254
0
    return result;
5255
0
}
5256
5257
// ggml_arange
5258
5259
struct ggml_tensor * ggml_arange(
5260
        struct ggml_context * ctx,
5261
        float                 start,
5262
        float                 stop,
5263
0
        float                 step) {
5264
0
    GGML_ASSERT(stop > start);
5265
5266
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5267
5268
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5269
5270
0
    ggml_set_op_params_f32(result, 0, start);
5271
0
    ggml_set_op_params_f32(result, 1, stop);
5272
0
    ggml_set_op_params_f32(result, 2, step);
5273
5274
0
    result->op = GGML_OP_ARANGE;
5275
5276
0
    return result;
5277
0
}
5278
5279
// ggml_flash_attn_ext
5280
5281
struct ggml_tensor * ggml_flash_attn_ext(
5282
        struct ggml_context * ctx,
5283
        struct ggml_tensor  * q,
5284
        struct ggml_tensor  * k,
5285
        struct ggml_tensor  * v,
5286
        struct ggml_tensor  * mask,
5287
        float                 scale,
5288
        float                 max_bias,
5289
0
        float                 logit_softcap) {
5290
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5291
    // TODO: check if vT can be multiplied by (k*qT)
5292
5293
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5294
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5295
5296
0
    if (mask) {
5297
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5298
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5299
5300
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5301
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5302
0
    }
5303
5304
0
    if (max_bias > 0.0f) {
5305
0
        GGML_ASSERT(mask);
5306
0
    }
5307
5308
    // permute(0, 2, 1, 3)
5309
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5310
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5311
5312
0
    float params[] = { scale, max_bias, logit_softcap };
5313
0
    ggml_set_op_params(result, params, sizeof(params));
5314
5315
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5316
0
    result->src[0] = q;
5317
0
    result->src[1] = k;
5318
0
    result->src[2] = v;
5319
0
    result->src[3] = mask;
5320
5321
0
    return result;
5322
0
}
5323
5324
void ggml_flash_attn_ext_set_prec(
5325
        struct ggml_tensor * a,
5326
0
        enum ggml_prec       prec) {
5327
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5328
5329
0
    const int32_t prec_i32 = (int32_t) prec;
5330
5331
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5332
0
}
5333
5334
enum ggml_prec ggml_flash_attn_ext_get_prec(
5335
0
        const struct ggml_tensor * a) {
5336
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5337
5338
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5339
5340
0
    return (enum ggml_prec) prec_i32;
5341
0
}
5342
5343
void ggml_flash_attn_ext_add_sinks(
5344
        struct ggml_tensor * a,
5345
0
        struct ggml_tensor * sinks) {
5346
0
    if (!sinks) {
5347
0
        a->src[4] = NULL;
5348
0
        return;
5349
0
    }
5350
5351
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5352
0
    GGML_ASSERT(a->src[4] == NULL);
5353
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5354
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5355
5356
0
    a->src[4] = sinks;
5357
0
}
5358
5359
// ggml_flash_attn_back
5360
5361
struct ggml_tensor * ggml_flash_attn_back(
5362
        struct ggml_context * ctx,
5363
        struct ggml_tensor  * q,
5364
        struct ggml_tensor  * k,
5365
        struct ggml_tensor  * v,
5366
        struct ggml_tensor  * d,
5367
0
        bool                  masked) {
5368
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5369
5370
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5371
    // TODO: check if vT can be multiplied by (k*qT)
5372
5373
    // d shape [D,N,ne2,ne3]
5374
    // q shape [D,N,ne2,ne3]
5375
    // k shape [D,M,kvne2,ne3]
5376
    // v shape [M,D,kvne2,ne3]
5377
5378
0
    const int64_t     D = q->ne[0];
5379
0
    const int64_t     N = q->ne[1];
5380
0
    const int64_t     M = k->ne[1];
5381
0
    const int64_t   ne2 = q->ne[2];
5382
0
    const int64_t   ne3 = q->ne[3];
5383
0
    const int64_t kvne2 = k->ne[2];
5384
5385
0
    GGML_ASSERT(k->ne[0] == D);
5386
0
    GGML_ASSERT(v->ne[0] == M);
5387
0
    GGML_ASSERT(v->ne[1] == D);
5388
0
    GGML_ASSERT(d->ne[0] == D);
5389
0
    GGML_ASSERT(d->ne[1] == N);
5390
0
    GGML_ASSERT(k->ne[2] == kvne2);
5391
0
    GGML_ASSERT(k->ne[3] == ne3);
5392
0
    GGML_ASSERT(v->ne[2] == kvne2);
5393
0
    GGML_ASSERT(v->ne[3] == ne3);
5394
0
    GGML_ASSERT(d->ne[2] == ne2);
5395
0
    GGML_ASSERT(d->ne[3] == ne3);
5396
5397
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5398
5399
    // store gradients of q, k and v as continuous tensors concatenated in result.
5400
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5401
0
    const int64_t elem_q = ggml_nelements(q);
5402
0
    const int64_t elem_k = ggml_nelements(k);
5403
0
    const int64_t elem_v = ggml_nelements(v);
5404
5405
0
    enum ggml_type result_type = GGML_TYPE_F32;
5406
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5407
0
    const size_t tsize = ggml_type_size(result_type);
5408
5409
0
    const size_t offs_q = 0;
5410
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5411
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5412
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5413
5414
0
    const size_t nelements = (end + tsize - 1)/tsize;
5415
5416
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5417
5418
0
    int32_t masked_i = masked ? 1 : 0;
5419
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5420
5421
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5422
0
    result->src[0] = q;
5423
0
    result->src[1] = k;
5424
0
    result->src[2] = v;
5425
0
    result->src[3] = d;
5426
5427
0
    return result;
5428
0
}
5429
5430
// ggml_ssm_conv
5431
5432
struct ggml_tensor * ggml_ssm_conv(
5433
        struct ggml_context * ctx,
5434
        struct ggml_tensor  * sx,
5435
0
        struct ggml_tensor  * c) {
5436
0
    GGML_ASSERT(ggml_is_3d(sx));
5437
0
    GGML_ASSERT(ggml_is_matrix(c));
5438
5439
0
    const int64_t d_conv  = c->ne[0];
5440
0
    const int64_t d_inner = c->ne[1];
5441
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5442
0
    const int64_t n_s     = sx->ne[2];
5443
5444
    // TODO: maybe support other strides than 1?
5445
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5446
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5447
0
    GGML_ASSERT(n_t >= 0);
5448
5449
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5450
5451
0
    result->op     = GGML_OP_SSM_CONV;
5452
0
    result->src[0] = sx;
5453
0
    result->src[1] = c;
5454
5455
0
    return result;
5456
0
}
5457
5458
// ggml_ssm_scan
5459
5460
struct ggml_tensor * ggml_ssm_scan(
5461
        struct ggml_context * ctx,
5462
        struct ggml_tensor  * s,
5463
        struct ggml_tensor  * x,
5464
        struct ggml_tensor  * dt,
5465
        struct ggml_tensor  * A,
5466
        struct ggml_tensor  * B,
5467
        struct ggml_tensor  * C,
5468
0
        struct ggml_tensor  * ids) {
5469
0
    GGML_ASSERT(ggml_is_contiguous(s));
5470
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5471
0
    GGML_ASSERT(ggml_is_contiguous(A));
5472
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5473
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5474
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5475
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5476
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5477
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5478
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5479
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5480
5481
0
    {
5482
0
        const int64_t d_state      = s->ne[0];
5483
0
        const int64_t head_dim     = x->ne[0];
5484
0
        const int64_t n_head       = x->ne[1];
5485
0
        const int64_t n_seq_tokens = x->ne[2];
5486
0
        const int64_t n_seqs       = x->ne[3];
5487
5488
0
        GGML_ASSERT(dt->ne[0] == n_head);
5489
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5490
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5491
0
        GGML_ASSERT(ggml_is_3d(dt));
5492
0
        GGML_ASSERT(s->ne[1] == head_dim);
5493
0
        GGML_ASSERT(s->ne[2] == n_head);
5494
0
        GGML_ASSERT(B->ne[0] == d_state);
5495
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5496
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5497
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5498
0
        GGML_ASSERT(ggml_is_vector(ids));
5499
0
        GGML_ASSERT(A->ne[1] == n_head);
5500
0
        GGML_ASSERT(ggml_is_matrix(A));
5501
5502
0
        if (A->ne[0] != 1) {
5503
            // Mamba-1 has more granular decay factors
5504
0
            GGML_ASSERT(A->ne[0] == d_state);
5505
0
        }
5506
0
    }
5507
5508
    // concatenated y + ssm_states
5509
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5510
5511
0
    result->op   = GGML_OP_SSM_SCAN;
5512
0
    result->src[0] = s;
5513
0
    result->src[1] = x;
5514
0
    result->src[2] = dt;
5515
0
    result->src[3] = A;
5516
0
    result->src[4] = B;
5517
0
    result->src[5] = C;
5518
0
    result->src[6] = ids;
5519
5520
0
    return result;
5521
0
}
5522
5523
// ggml_win_part
5524
5525
struct ggml_tensor * ggml_win_part(
5526
        struct ggml_context * ctx,
5527
        struct ggml_tensor  * a,
5528
0
        int                   w) {
5529
0
    GGML_ASSERT(a->ne[3] == 1);
5530
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5531
5532
    // padding
5533
0
    const int px = (w - a->ne[1]%w)%w;
5534
0
    const int py = (w - a->ne[2]%w)%w;
5535
5536
0
    const int npx = (px + a->ne[1])/w;
5537
0
    const int npy = (py + a->ne[2])/w;
5538
0
    const int np  = npx*npy;
5539
5540
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5541
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5542
5543
0
    int32_t params[] = { npx, npy, w };
5544
0
    ggml_set_op_params(result, params, sizeof(params));
5545
5546
0
    result->op     = GGML_OP_WIN_PART;
5547
0
    result->src[0] = a;
5548
5549
0
    return result;
5550
0
}
5551
5552
// ggml_win_unpart
5553
5554
struct ggml_tensor * ggml_win_unpart(
5555
        struct ggml_context * ctx,
5556
        struct ggml_tensor  * a,
5557
        int                   w0,
5558
        int                   h0,
5559
0
        int                   w) {
5560
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5561
5562
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5563
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5564
5565
0
    int32_t params[] = { w };
5566
0
    ggml_set_op_params(result, params, sizeof(params));
5567
5568
0
    result->op     = GGML_OP_WIN_UNPART;
5569
0
    result->src[0] = a;
5570
5571
0
    return result;
5572
0
}
5573
5574
// ggml_get_rel_pos
5575
5576
struct ggml_tensor * ggml_get_rel_pos(
5577
        struct ggml_context * ctx,
5578
        struct ggml_tensor  * a,
5579
        int                   qh,
5580
0
        int                   kh) {
5581
0
    GGML_ASSERT(qh == kh);
5582
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5583
5584
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5585
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5586
5587
0
    result->op     = GGML_OP_GET_REL_POS;
5588
0
    result->src[0] = a;
5589
5590
0
    return result;
5591
0
}
5592
5593
// ggml_add_rel_pos
5594
5595
static struct ggml_tensor * ggml_add_rel_pos_impl(
5596
        struct ggml_context * ctx,
5597
        struct ggml_tensor  * a,
5598
        struct ggml_tensor  * pw,
5599
        struct ggml_tensor  * ph,
5600
0
        bool                  inplace) {
5601
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5602
0
    GGML_ASSERT(ggml_is_contiguous(a));
5603
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5604
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5605
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5606
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5607
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5608
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5609
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5610
5611
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5612
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5613
5614
0
    result->op     = GGML_OP_ADD_REL_POS;
5615
0
    result->src[0] = a;
5616
0
    result->src[1] = pw;
5617
0
    result->src[2] = ph;
5618
5619
0
    return result;
5620
0
}
5621
5622
struct ggml_tensor * ggml_add_rel_pos(
5623
        struct ggml_context * ctx,
5624
        struct ggml_tensor  * a,
5625
        struct ggml_tensor  * pw,
5626
0
        struct ggml_tensor  * ph) {
5627
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5628
0
}
5629
5630
struct ggml_tensor * ggml_add_rel_pos_inplace(
5631
        struct ggml_context * ctx,
5632
        struct ggml_tensor  * a,
5633
        struct ggml_tensor  * pw,
5634
0
        struct ggml_tensor  * ph) {
5635
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5636
0
}
5637
5638
// ggml_rwkv_wkv6
5639
5640
struct ggml_tensor * ggml_rwkv_wkv6(
5641
        struct ggml_context * ctx,
5642
        struct ggml_tensor  * k,
5643
        struct ggml_tensor  * v,
5644
        struct ggml_tensor  * r,
5645
        struct ggml_tensor  * tf,
5646
        struct ggml_tensor  * td,
5647
0
        struct ggml_tensor  * state) {
5648
0
    GGML_ASSERT(ggml_is_contiguous(k));
5649
0
    GGML_ASSERT(ggml_is_contiguous(v));
5650
0
    GGML_ASSERT(ggml_is_contiguous(r));
5651
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5652
0
    GGML_ASSERT(ggml_is_contiguous(td));
5653
0
    GGML_ASSERT(ggml_is_contiguous(state));
5654
5655
0
    const int64_t S = k->ne[0];
5656
0
    const int64_t H = k->ne[1];
5657
0
    const int64_t n_tokens = k->ne[2];
5658
0
    const int64_t n_seqs = state->ne[1];
5659
0
    {
5660
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5661
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5662
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5663
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5664
0
    }
5665
5666
    // concat output and new_state
5667
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5668
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5669
5670
0
    result->op     = GGML_OP_RWKV_WKV6;
5671
0
    result->src[0] = k;
5672
0
    result->src[1] = v;
5673
0
    result->src[2] = r;
5674
0
    result->src[3] = tf;
5675
0
    result->src[4] = td;
5676
0
    result->src[5] = state;
5677
5678
0
    return result;
5679
0
}
5680
5681
// ggml_gated_linear_attn
5682
5683
struct ggml_tensor * ggml_gated_linear_attn(
5684
        struct ggml_context * ctx,
5685
        struct ggml_tensor  * k,
5686
        struct ggml_tensor  * v,
5687
        struct ggml_tensor  * q,
5688
        struct ggml_tensor  * g,
5689
        struct ggml_tensor  * state,
5690
0
        float scale) {
5691
0
    GGML_ASSERT(ggml_is_contiguous(k));
5692
0
    GGML_ASSERT(ggml_is_contiguous(v));
5693
0
    GGML_ASSERT(ggml_is_contiguous(q));
5694
0
    GGML_ASSERT(ggml_is_contiguous(g));
5695
0
    GGML_ASSERT(ggml_is_contiguous(state));
5696
5697
0
    const int64_t S = k->ne[0];
5698
0
    const int64_t H = k->ne[1];
5699
0
    const int64_t n_tokens = k->ne[2];
5700
0
    const int64_t n_seqs = state->ne[1];
5701
0
    {
5702
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5703
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5704
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5705
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5706
0
    }
5707
5708
    // concat output and new_state
5709
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5710
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5711
5712
0
    ggml_set_op_params_f32(result, 0, scale);
5713
5714
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5715
0
    result->src[0] = k;
5716
0
    result->src[1] = v;
5717
0
    result->src[2] = q;
5718
0
    result->src[3] = g;
5719
0
    result->src[4] = state;
5720
5721
0
    return result;
5722
0
}
5723
5724
// ggml_rwkv_wkv7
5725
5726
struct ggml_tensor * ggml_rwkv_wkv7(
5727
        struct ggml_context * ctx,
5728
        struct ggml_tensor  * r,
5729
        struct ggml_tensor  * w,
5730
        struct ggml_tensor  * k,
5731
        struct ggml_tensor  * v,
5732
        struct ggml_tensor  * a,
5733
        struct ggml_tensor  * b,
5734
0
        struct ggml_tensor  * state) {
5735
0
    GGML_ASSERT(ggml_is_contiguous(r));
5736
0
    GGML_ASSERT(ggml_is_contiguous(w));
5737
0
    GGML_ASSERT(ggml_is_contiguous(k));
5738
0
    GGML_ASSERT(ggml_is_contiguous(v));
5739
0
    GGML_ASSERT(ggml_is_contiguous(a));
5740
0
    GGML_ASSERT(ggml_is_contiguous(b));
5741
0
    GGML_ASSERT(ggml_is_contiguous(state));
5742
5743
0
    const int64_t S = k->ne[0];
5744
0
    const int64_t H = k->ne[1];
5745
0
    const int64_t n_tokens = k->ne[2];
5746
0
    const int64_t n_seqs = state->ne[1];
5747
0
    {
5748
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5749
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5750
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5751
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5752
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5753
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5754
0
    }
5755
5756
    // concat output and new_state
5757
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5758
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5759
5760
0
    result->op     = GGML_OP_RWKV_WKV7;
5761
0
    result->src[0] = r;
5762
0
    result->src[1] = w;
5763
0
    result->src[2] = k;
5764
0
    result->src[3] = v;
5765
0
    result->src[4] = a;
5766
0
    result->src[5] = b;
5767
0
    result->src[6] = state;
5768
5769
0
    return result;
5770
0
}
5771
5772
// ggml_unary
5773
5774
static struct ggml_tensor * ggml_unary_impl(
5775
        struct ggml_context * ctx,
5776
        struct ggml_tensor  * a,
5777
        enum ggml_unary_op    op,
5778
0
        bool                  inplace) {
5779
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
5780
5781
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5782
5783
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5784
5785
0
    result->op     = GGML_OP_UNARY;
5786
0
    result->src[0] = a;
5787
5788
0
    return result;
5789
0
}
5790
5791
struct ggml_tensor * ggml_unary(
5792
        struct ggml_context * ctx,
5793
        struct ggml_tensor  * a,
5794
0
        enum ggml_unary_op    op) {
5795
0
    return ggml_unary_impl(ctx, a, op, false);
5796
0
}
5797
5798
struct ggml_tensor * ggml_unary_inplace(
5799
        struct ggml_context * ctx,
5800
        struct ggml_tensor  * a,
5801
0
        enum ggml_unary_op    op) {
5802
0
    return ggml_unary_impl(ctx, a, op, true);
5803
0
}
5804
5805
// ggml_map_custom1
5806
5807
static struct ggml_tensor * ggml_map_custom1_impl(
5808
        struct ggml_context      * ctx,
5809
        struct ggml_tensor       * a,
5810
        const  ggml_custom1_op_t   fun,
5811
        int                        n_tasks,
5812
        void                     * userdata,
5813
0
        bool                       inplace) {
5814
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5815
5816
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5817
5818
0
    struct ggml_map_custom1_op_params params = {
5819
0
        /*.fun      =*/ fun,
5820
0
        /*.n_tasks  =*/ n_tasks,
5821
0
        /*.userdata =*/ userdata
5822
0
    };
5823
0
    ggml_set_op_params(result, &params, sizeof(params));
5824
5825
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5826
0
    result->src[0] = a;
5827
5828
0
    return result;
5829
0
}
5830
5831
struct ggml_tensor * ggml_map_custom1(
5832
        struct ggml_context      * ctx,
5833
        struct ggml_tensor       * a,
5834
        const  ggml_custom1_op_t   fun,
5835
        int                        n_tasks,
5836
0
        void                     * userdata) {
5837
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5838
0
}
5839
5840
struct ggml_tensor * ggml_map_custom1_inplace(
5841
        struct ggml_context      * ctx,
5842
        struct ggml_tensor       * a,
5843
        const  ggml_custom1_op_t   fun,
5844
        int                        n_tasks,
5845
0
        void                     * userdata) {
5846
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5847
0
}
5848
5849
// ggml_map_custom2
5850
5851
static struct ggml_tensor * ggml_map_custom2_impl(
5852
        struct ggml_context      * ctx,
5853
        struct ggml_tensor       * a,
5854
        struct ggml_tensor       * b,
5855
        const  ggml_custom2_op_t   fun,
5856
        int                        n_tasks,
5857
        void                     * userdata,
5858
0
        bool                       inplace) {
5859
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5860
5861
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5862
5863
0
    struct ggml_map_custom2_op_params params = {
5864
0
        /*.fun      =*/ fun,
5865
0
        /*.n_tasks  =*/ n_tasks,
5866
0
        /*.userdata =*/ userdata
5867
0
    };
5868
0
    ggml_set_op_params(result, &params, sizeof(params));
5869
5870
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5871
0
    result->src[0] = a;
5872
0
    result->src[1] = b;
5873
5874
0
    return result;
5875
0
}
5876
5877
struct ggml_tensor * ggml_map_custom2(
5878
        struct ggml_context      * ctx,
5879
        struct ggml_tensor       * a,
5880
        struct ggml_tensor       * b,
5881
        const  ggml_custom2_op_t   fun,
5882
        int                        n_tasks,
5883
0
        void                     * userdata) {
5884
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5885
0
}
5886
5887
struct ggml_tensor * ggml_map_custom2_inplace(
5888
        struct ggml_context      * ctx,
5889
        struct ggml_tensor       * a,
5890
        struct ggml_tensor       * b,
5891
        const  ggml_custom2_op_t   fun,
5892
        int                        n_tasks,
5893
0
        void                     * userdata) {
5894
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5895
0
}
5896
5897
// ggml_map_custom3
5898
5899
static struct ggml_tensor * ggml_map_custom3_impl(
5900
        struct ggml_context      * ctx,
5901
        struct ggml_tensor       * a,
5902
        struct ggml_tensor       * b,
5903
        struct ggml_tensor       * c,
5904
        const  ggml_custom3_op_t   fun,
5905
        int                        n_tasks,
5906
        void                     * userdata,
5907
0
        bool                       inplace) {
5908
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5909
5910
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5911
5912
0
    struct ggml_map_custom3_op_params params = {
5913
0
        /*.fun      =*/ fun,
5914
0
        /*.n_tasks  =*/ n_tasks,
5915
0
        /*.userdata =*/ userdata
5916
0
    };
5917
0
    ggml_set_op_params(result, &params, sizeof(params));
5918
5919
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5920
0
    result->src[0] = a;
5921
0
    result->src[1] = b;
5922
0
    result->src[2] = c;
5923
5924
0
    return result;
5925
0
}
5926
5927
struct ggml_tensor * ggml_map_custom3(
5928
        struct ggml_context      * ctx,
5929
        struct ggml_tensor       * a,
5930
        struct ggml_tensor       * b,
5931
        struct ggml_tensor       * c,
5932
        const  ggml_custom3_op_t   fun,
5933
        int                        n_tasks,
5934
0
        void                     * userdata) {
5935
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5936
0
}
5937
5938
struct ggml_tensor * ggml_map_custom3_inplace(
5939
        struct ggml_context      * ctx,
5940
        struct ggml_tensor       * a,
5941
        struct ggml_tensor       * b,
5942
        struct ggml_tensor       * c,
5943
        const  ggml_custom3_op_t   fun,
5944
        int                        n_tasks,
5945
0
        void                     * userdata) {
5946
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5947
0
}
5948
5949
struct ggml_tensor * ggml_custom_4d(
5950
        struct ggml_context * ctx,
5951
        enum ggml_type        type,
5952
        int64_t               ne0,
5953
        int64_t               ne1,
5954
        int64_t               ne2,
5955
        int64_t               ne3,
5956
        struct ggml_tensor ** args,
5957
        int                   n_args,
5958
        ggml_custom_op_t      fun,
5959
        int                   n_tasks,
5960
0
        void                * userdata) {
5961
5962
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5963
5964
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5965
5966
0
    struct ggml_custom_op_params params = {
5967
0
        /*.fun      =*/ fun,
5968
0
        /*.n_tasks  =*/ n_tasks,
5969
0
        /*.userdata =*/ userdata
5970
0
    };
5971
0
    ggml_set_op_params(result, &params, sizeof(params));
5972
5973
0
    result->op = GGML_OP_CUSTOM;
5974
0
    for (int i = 0; i < n_args; i++) {
5975
0
        result->src[i] = args[i];
5976
0
    }
5977
5978
0
    return result;
5979
0
}
5980
5981
struct ggml_tensor * ggml_custom_inplace(
5982
        struct ggml_context * ctx,
5983
        struct ggml_tensor  * a,
5984
        struct ggml_tensor ** args,
5985
        int                   n_args,
5986
        ggml_custom_op_t      fun,
5987
        int                   n_tasks,
5988
0
        void                * userdata) {
5989
5990
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5991
5992
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5993
5994
0
    struct ggml_custom_op_params params = {
5995
0
        /*.fun      =*/ fun,
5996
0
        /*.n_tasks  =*/ n_tasks,
5997
0
        /*.userdata =*/ userdata
5998
0
    };
5999
0
    ggml_set_op_params(result, &params, sizeof(params));
6000
6001
0
    result->op = GGML_OP_CUSTOM;
6002
0
    result->src[0] = a;
6003
0
    for (int i = 0; i < n_args; i++) {
6004
0
        result->src[i + 1] = args[i];
6005
0
    }
6006
6007
0
    return result;
6008
0
}
6009
// ggml_cross_entropy_loss
6010
6011
struct ggml_tensor * ggml_cross_entropy_loss(
6012
        struct ggml_context * ctx,
6013
        struct ggml_tensor  * a,
6014
0
        struct ggml_tensor  * b) {
6015
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
6016
6017
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
6018
6019
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
6020
0
    result->src[0] = a;
6021
0
    result->src[1] = b;
6022
6023
0
    return result;
6024
0
}
6025
6026
// ggml_cross_entropy_loss_back
6027
6028
struct ggml_tensor * ggml_cross_entropy_loss_back(
6029
        struct ggml_context * ctx,
6030
        struct ggml_tensor  * a,
6031
        struct ggml_tensor  * b,
6032
0
        struct ggml_tensor  * c) {
6033
0
    GGML_ASSERT(ggml_is_scalar(a));
6034
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6035
6036
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6037
6038
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6039
0
    result->src[0] = a;
6040
0
    result->src[1] = b;
6041
0
    result->src[2] = c;
6042
6043
0
    return result;
6044
0
}
6045
6046
// opt_step_adamw
6047
6048
struct ggml_tensor * ggml_opt_step_adamw(
6049
        struct ggml_context * ctx,
6050
        struct ggml_tensor  * a,
6051
        struct ggml_tensor  * grad,
6052
        struct ggml_tensor  * m,
6053
        struct ggml_tensor  * v,
6054
0
        struct ggml_tensor  * adamw_params) {
6055
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6056
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6057
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6058
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6059
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6060
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6061
6062
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6063
6064
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6065
0
    result->src[0] = a;
6066
0
    result->src[1] = grad;
6067
0
    result->src[2] = m;
6068
0
    result->src[3] = v;
6069
0
    result->src[4] = adamw_params;
6070
6071
0
    return result;
6072
0
}
6073
6074
// opt_step_sgd
6075
6076
struct ggml_tensor * ggml_opt_step_sgd(
6077
        struct ggml_context * ctx,
6078
        struct ggml_tensor  * a,
6079
        struct ggml_tensor  * grad,
6080
0
        struct ggml_tensor  * params) {
6081
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6082
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6083
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6084
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6085
6086
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6087
6088
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6089
0
    result->src[0] = a;
6090
0
    result->src[1] = grad;
6091
0
    result->src[2] = params;
6092
6093
0
    return result;
6094
0
}
6095
6096
// solve_tri
6097
6098
struct ggml_tensor * ggml_solve_tri(
6099
        struct ggml_context * ctx,
6100
        struct ggml_tensor  * a,
6101
        struct ggml_tensor  * b,
6102
        bool                  left,
6103
        bool                  lower,
6104
0
        bool                  uni) {
6105
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6106
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6107
6108
    // A must be square and lower diagonal
6109
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6110
    // B must have same outer dimension as A
6111
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6112
6113
    // batch dimensions must be equal
6114
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6115
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6116
6117
0
    GGML_ASSERT(ggml_is_contiguous(a));
6118
0
    GGML_ASSERT(ggml_is_contiguous(b));
6119
6120
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6121
6122
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6123
6124
0
    result->op     = GGML_OP_SOLVE_TRI;
6125
0
    result->src[0] = a;
6126
0
    result->src[1] = b;
6127
6128
0
    return result;
6129
0
}
6130
6131
////////////////////////////////////////////////////////////////////////////////
6132
6133
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6134
0
    size = ggml_hash_size(size);
6135
0
    struct ggml_hash_set result;
6136
0
    result.size = size;
6137
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6138
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6139
0
    return result;
6140
0
}
6141
6142
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6143
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6144
0
}
6145
6146
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6147
0
    GGML_FREE(hash_set->used);
6148
0
    GGML_FREE(hash_set->keys);
6149
0
}
6150
6151
0
size_t ggml_hash_size(size_t min_sz) {
6152
    // next primes after powers of two
6153
0
    static const size_t primes[] = {
6154
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6155
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6156
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6157
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6158
0
        536870923, 1073741827, 2147483659
6159
0
    };
6160
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6161
6162
    // find the smallest prime that is larger or equal than min_sz
6163
0
    size_t l = 0;
6164
0
    size_t r = n_primes;
6165
0
    while (l < r) {
6166
0
        size_t m = (l + r)/2;
6167
0
        if (primes[m] < min_sz) {
6168
0
            l = m + 1;
6169
0
        } else {
6170
0
            r = m;
6171
0
        }
6172
0
    }
6173
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6174
0
    return sz;
6175
0
}
6176
6177
struct hash_map {
6178
    struct ggml_hash_set set;
6179
    struct ggml_tensor ** vals;
6180
};
6181
6182
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6183
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6184
0
    result->set = ggml_hash_set_new(size);
6185
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6186
0
    return result;
6187
0
}
6188
6189
0
static void ggml_hash_map_free(struct hash_map * map) {
6190
0
    ggml_hash_set_free(&map->set);
6191
0
    GGML_FREE(map->vals);
6192
0
    GGML_FREE(map);
6193
0
}
6194
6195
// utility functions to change gradients
6196
// isrc is the index of tensor in cgraph->visited_has_set.keys
6197
// the corresponding gradient (accumulators) are also at position isrc
6198
// if tensor has a gradient accumulator, modify that accumulator in-place
6199
// else if there is no gradient for tensor, set the corresponding value
6200
// else, just add/subtract/etc. the gradients
6201
6202
static void ggml_add_or_set(
6203
        struct ggml_context * ctx,
6204
        struct ggml_cgraph  * cgraph,
6205
        size_t                isrc,
6206
0
        struct ggml_tensor  * tensor) {
6207
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6208
0
    GGML_ASSERT(src);
6209
0
    if (cgraph->grads[isrc]) {
6210
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6211
0
    } else {
6212
0
        cgraph->grads[isrc] = tensor;
6213
0
    }
6214
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6215
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6216
0
}
6217
6218
static void ggml_acc_or_set(
6219
        struct ggml_context * ctx,
6220
        struct ggml_cgraph  * cgraph,
6221
        size_t                isrc,
6222
        struct ggml_tensor  * tensor,
6223
        const  size_t         nb1,
6224
        const  size_t         nb2,
6225
        const  size_t         nb3,
6226
0
        const  size_t         offset) {
6227
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6228
0
    GGML_ASSERT(src);
6229
0
    if (cgraph->grads[isrc]) {
6230
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6231
0
    } else {
6232
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6233
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6234
0
    }
6235
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6236
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6237
0
}
6238
6239
static void ggml_add1_or_set(
6240
        struct ggml_context * ctx,
6241
        struct ggml_cgraph  * cgraph,
6242
        size_t                isrc,
6243
0
        struct ggml_tensor  * tensor) {
6244
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6245
0
    GGML_ASSERT(src);
6246
0
    if (cgraph->grads[isrc]) {
6247
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6248
0
    } else {
6249
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6250
0
    }
6251
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6252
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6253
0
}
6254
6255
static void ggml_sub_or_set(
6256
        struct ggml_context * ctx,
6257
        struct ggml_cgraph  * cgraph,
6258
        size_t                isrc,
6259
0
        struct ggml_tensor  * tensor) {
6260
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6261
0
    GGML_ASSERT(src);
6262
0
    if (cgraph->grads[isrc]) {
6263
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6264
0
    } else {
6265
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6266
0
    }
6267
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6268
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6269
0
}
6270
6271
static void ggml_compute_backward(
6272
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6273
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6274
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6275
6276
0
    if (!grad) {
6277
0
        return;
6278
0
    }
6279
6280
0
    struct ggml_tensor * src0 = tensor->src[0];
6281
0
    struct ggml_tensor * src1 = tensor->src[1];
6282
0
    struct ggml_tensor * src2 = tensor->src[2];
6283
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6284
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6285
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6286
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6287
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6288
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6289
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6290
6291
0
    switch (tensor->op) {
6292
0
        case GGML_OP_DUP: {
6293
0
            if (src0_needs_grads) {
6294
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6295
0
            }
6296
0
        } break;
6297
0
        case GGML_OP_ADD: {
6298
0
            if (src0_needs_grads) {
6299
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6300
0
            }
6301
0
            if (src1_needs_grads) {
6302
0
                struct ggml_tensor * tmp = grad;
6303
0
                if (!ggml_are_same_shape(src0, src1)) {
6304
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6305
0
                }
6306
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6307
0
            }
6308
0
        } break;
6309
0
        case GGML_OP_ADD1: {
6310
0
            if (src0_needs_grads) {
6311
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6312
0
            }
6313
0
            if (src1_needs_grads) {
6314
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6315
0
            }
6316
0
        } break;
6317
0
        case GGML_OP_ACC: {
6318
0
            if (src0_needs_grads) {
6319
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6320
0
            }
6321
0
            if (src1_needs_grads) {
6322
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6323
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6324
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6325
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6326
6327
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6328
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6329
0
                    nb1, nb2, nb3, offset);
6330
6331
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6332
0
            }
6333
0
        } break;
6334
0
        case GGML_OP_SUB: {
6335
0
            if (src0_needs_grads) {
6336
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6337
0
            }
6338
0
            if (src1_needs_grads) {
6339
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6340
0
            }
6341
0
        } break;
6342
0
        case GGML_OP_MUL: {
6343
0
            if (src0_needs_grads) {
6344
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6345
0
            }
6346
0
            if (src1_needs_grads) {
6347
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6348
0
                if (!ggml_are_same_shape(src0, src1)) {
6349
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6350
0
                }
6351
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6352
0
            }
6353
0
        } break;
6354
0
        case GGML_OP_DIV: {
6355
0
            if (src0_needs_grads) {
6356
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6357
0
            }
6358
0
            if (src1_needs_grads) {
6359
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6360
0
            }
6361
0
        } break;
6362
0
        case GGML_OP_SQR: {
6363
0
            if (src0_needs_grads) {
6364
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6365
0
            }
6366
0
        } break;
6367
0
        case GGML_OP_SQRT: {
6368
0
            if (src0_needs_grads) {
6369
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6370
0
            }
6371
0
        } break;
6372
0
        case GGML_OP_LOG: {
6373
0
            if (src0_needs_grads) {
6374
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6375
0
            }
6376
0
        } break;
6377
0
        case GGML_OP_SIN: {
6378
0
            if (src0_needs_grads) {
6379
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6380
0
            }
6381
0
        } break;
6382
0
        case GGML_OP_COS: {
6383
0
            if (src0_needs_grads) {
6384
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6385
0
            }
6386
0
        } break;
6387
0
        case GGML_OP_SUM: {
6388
0
            if (src0_needs_grads) {
6389
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6390
0
            }
6391
0
        } break;
6392
0
        case GGML_OP_SUM_ROWS: {
6393
0
            if (src0_needs_grads) {
6394
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6395
0
            }
6396
0
        } break;
6397
0
        case GGML_OP_MEAN: {
6398
0
            if (src0_needs_grads) {
6399
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6400
0
            }
6401
0
        } break;
6402
0
        case GGML_OP_REPEAT: {
6403
0
            if (src0_needs_grads) {
6404
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6405
0
            }
6406
0
        } break;
6407
0
        case GGML_OP_REPEAT_BACK: {
6408
0
            if (src0_needs_grads) {
6409
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6410
0
            }
6411
0
        } break;
6412
0
        case GGML_OP_RMS_NORM: {
6413
0
            if (src0_needs_grads) {
6414
0
                float eps;
6415
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6416
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6417
0
            }
6418
0
        } break;
6419
0
        case GGML_OP_MUL_MAT: {
6420
            // https://cs231n.github.io/optimization-2/#staged
6421
            // # forward pass
6422
            // s0 = np.random.randn(5, 10)
6423
            // s1 = np.random.randn(10, 3)
6424
            // t = s0.dot(s1)
6425
6426
            // # now suppose we had the gradient on t from above in the circuit
6427
            // dt = np.random.randn(*t.shape) # same shape as t
6428
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6429
            // ds1 = t.T.dot(dt)
6430
6431
            // tensor.shape [m,p,qq,rr]
6432
            // src0.shape   [n,m,q1,r1]
6433
            // src1.shape   [n,p,qq,rr]
6434
6435
0
            if (src0_needs_grads) {
6436
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6437
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6438
0
                struct ggml_tensor * tmp =
6439
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6440
0
                        src1,          // [n,p,qq,rr]
6441
0
                        grad);         // [m,p,qq,rr]
6442
0
                if (!ggml_are_same_shape(tmp, src0)) {
6443
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6444
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6445
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6446
6447
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6448
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6449
0
                    const size_t nb3 = tmp->nb[2];
6450
6451
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6452
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6453
0
                }
6454
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6455
0
            }
6456
0
            if (src1_needs_grads) {
6457
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6458
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6459
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6460
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6461
                        //     grad),                          // [m,p,qq,rr]
6462
6463
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6464
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6465
                        // and then use ggml_out_prod
6466
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6467
0
                            src0,               // [n,m,q1,r1]
6468
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6469
0
                                grad)));        // [m,p,qq,rr]
6470
0
            }
6471
0
        } break;
6472
0
        case GGML_OP_SCALE: {
6473
0
            if (src0_needs_grads) {
6474
0
                float s;
6475
0
                memcpy(&s, tensor->op_params, sizeof(float));
6476
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6477
0
            }
6478
0
        } break;
6479
0
        case GGML_OP_SET: {
6480
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6481
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6482
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6483
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6484
6485
0
            struct ggml_tensor * tensor_grad_view = NULL;
6486
6487
0
            if (src0_needs_grads || src1_needs_grads) {
6488
0
                GGML_ASSERT(src0->type == tensor->type);
6489
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6490
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6491
6492
0
                tensor_grad_view = ggml_view_4d(ctx,
6493
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6494
0
                    nb1, nb2, nb3, offset);
6495
0
            }
6496
6497
0
            if (src0_needs_grads) {
6498
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6499
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6500
0
            }
6501
6502
0
            if (src1_needs_grads) {
6503
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6504
0
            }
6505
0
        } break;
6506
0
        case GGML_OP_CPY: {
6507
            // cpy overwrites value of src1 by src0 and returns view(src1)
6508
            // the overwriting is mathematically equivalent to:
6509
            // tensor = src0 * 1 + src1 * 0
6510
0
            if (src0_needs_grads) {
6511
                // dsrc0 = dtensor * 1
6512
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6513
0
            }
6514
0
            if (src1_needs_grads) {
6515
                // dsrc1 = dtensor * 0 -> noop
6516
0
            }
6517
0
        } break;
6518
0
        case GGML_OP_CONT: {
6519
            // same as cpy
6520
0
            if (src0_needs_grads) {
6521
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6522
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6523
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6524
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6525
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6526
0
            }
6527
0
        } break;
6528
0
        case GGML_OP_RESHAPE: {
6529
0
            if (src0_needs_grads) {
6530
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6531
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6532
0
            }
6533
0
        } break;
6534
0
        case GGML_OP_VIEW: {
6535
0
            if (src0_needs_grads) {
6536
0
                size_t offset;
6537
6538
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6539
6540
0
                size_t nb1 = tensor->nb[1];
6541
0
                size_t nb2 = tensor->nb[2];
6542
0
                size_t nb3 = tensor->nb[3];
6543
6544
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6545
                    // gradient is typically F32, but src0 could be other type
6546
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6547
0
                    size_t n0 = ggml_element_size(src0);
6548
0
                    GGML_ASSERT(offset % n0 == 0);
6549
0
                    GGML_ASSERT(nb1 % n0 == 0);
6550
0
                    GGML_ASSERT(nb2 % n0 == 0);
6551
0
                    GGML_ASSERT(nb3 % n0 == 0);
6552
0
                    offset = (offset / n0) * ng;
6553
0
                    nb1 = (nb1 / n0) * ng;
6554
0
                    nb2 = (nb2 / n0) * ng;
6555
0
                    nb3 = (nb3 / n0) * ng;
6556
0
                }
6557
6558
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6559
0
            }
6560
0
        } break;
6561
0
        case GGML_OP_PERMUTE: {
6562
0
            if (src0_needs_grads) {
6563
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6564
0
                const int axis0 = axes[0] & 0x3;
6565
0
                const int axis1 = axes[1] & 0x3;
6566
0
                const int axis2 = axes[2] & 0x3;
6567
0
                const int axis3 = axes[3] & 0x3;
6568
0
                int axb[4] = {0,0,0,0}; // axes backward
6569
0
                axb[axis0] = 0;
6570
0
                axb[axis1] = 1;
6571
0
                axb[axis2] = 2;
6572
0
                axb[axis3] = 3;
6573
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6574
0
            }
6575
0
        } break;
6576
0
        case GGML_OP_TRANSPOSE: {
6577
0
            if (src0_needs_grads) {
6578
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6579
0
            }
6580
0
        } break;
6581
0
        case GGML_OP_GET_ROWS: {
6582
0
            if (src0_needs_grads) {
6583
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6584
0
            }
6585
0
            if (src1_needs_grads) {
6586
                // noop
6587
0
            }
6588
0
        } break;
6589
0
        case GGML_OP_DIAG_MASK_INF: {
6590
0
            if (src0_needs_grads) {
6591
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6592
                /* ref:  https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
6593
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6594
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6595
0
            }
6596
0
        } break;
6597
0
        case GGML_OP_DIAG_MASK_ZERO: {
6598
0
            if (src0_needs_grads) {
6599
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6600
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6601
0
            }
6602
0
        } break;
6603
0
        case GGML_OP_SOFT_MAX: {
6604
0
            if (src0_needs_grads) {
6605
0
                float scale    = 1.0f;
6606
0
                float max_bias = 0.0f;
6607
6608
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6609
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6610
6611
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6612
0
            }
6613
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6614
0
        } break;
6615
0
        case GGML_OP_ROPE: {
6616
0
            if (src0_needs_grads) {
6617
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6618
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6619
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6620
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6621
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6622
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6623
0
                int sections[4] = {0, 0, 0, 0};
6624
6625
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6626
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6627
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6628
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6629
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6630
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6631
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6632
6633
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6634
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6635
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6636
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6637
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6638
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6639
0
            }
6640
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6641
0
        } break;
6642
0
        case GGML_OP_IM2COL: {
6643
0
            if (src1_needs_grads) {
6644
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6645
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6646
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6647
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6648
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6649
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6650
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6651
6652
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6653
0
            }
6654
0
        } break;
6655
0
        case GGML_OP_POOL_2D: {
6656
0
            if (src0_needs_grads) {
6657
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6658
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6659
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6660
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6661
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6662
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6663
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6664
6665
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6666
0
            }
6667
0
        } break;
6668
0
        case GGML_OP_WIN_PART:
6669
0
        case GGML_OP_WIN_UNPART:
6670
0
        case GGML_OP_UNARY: {
6671
0
            switch (ggml_get_unary_op(tensor)) {
6672
0
                case GGML_UNARY_OP_ABS: {
6673
0
                    if (src0_needs_grads) {
6674
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6675
0
                    }
6676
0
                } break;
6677
0
                case GGML_UNARY_OP_SGN: {
6678
                    // noop
6679
0
                } break;
6680
0
                case GGML_UNARY_OP_NEG: {
6681
0
                    if (src0_needs_grads) {
6682
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6683
0
                    }
6684
0
                } break;
6685
0
                case GGML_UNARY_OP_STEP: {
6686
                    // noop
6687
0
                } break;
6688
0
                case GGML_UNARY_OP_RELU: {
6689
0
                    if (src0_needs_grads) {
6690
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6691
0
                    }
6692
0
                } break;
6693
0
                case GGML_UNARY_OP_SILU: {
6694
0
                    if (src0_needs_grads) {
6695
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6696
0
                    }
6697
0
                } break;
6698
0
                case GGML_UNARY_OP_EXP: {
6699
0
                    if (src0_needs_grads) {
6700
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6701
0
                    }
6702
0
                } break;
6703
0
                case GGML_UNARY_OP_EXPM1: {
6704
0
                    if (src0_needs_grads) {
6705
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6706
0
                    }
6707
0
                } break;
6708
0
                case GGML_UNARY_OP_SOFTPLUS: {
6709
0
                    if (src0_needs_grads) {
6710
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6711
0
                    }
6712
0
                } break;
6713
0
                default: {
6714
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6715
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6716
0
                    GGML_ABORT("fatal error");
6717
0
                } //break;
6718
0
            }
6719
0
        } break;
6720
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6721
0
            if (src0_needs_grads) {
6722
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6723
0
            }
6724
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6725
0
        } break;
6726
0
        case GGML_OP_GLU: {
6727
0
            switch (ggml_get_glu_op(tensor)) {
6728
0
                case GGML_GLU_OP_SWIGLU: {
6729
0
                    if (src0_needs_grads) {
6730
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6731
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6732
0
                    }
6733
0
                    if (src1_needs_grads) {
6734
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6735
0
                    }
6736
0
                } break;
6737
0
                default: {
6738
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6739
0
                } //break;
6740
0
            }
6741
0
        } break;
6742
0
        case GGML_OP_NONE: {
6743
            // noop
6744
0
        } break;
6745
0
        case GGML_OP_COUNT:
6746
0
        default: {
6747
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6748
0
        } //break;
6749
0
    }
6750
6751
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6752
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6753
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6754
0
}
6755
6756
0
static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
6757
0
    if (node->op != GGML_OP_NONE && compute) {
6758
0
        node->flags |= GGML_TENSOR_FLAG_COMPUTE;
6759
0
    }
6760
6761
0
    const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6762
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6763
6764
0
    if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6765
        // already visited
6766
6767
0
        if (compute) {
6768
            // update the compute flag regardless
6769
0
            for (int i = 0; i < GGML_MAX_SRC; ++i) {
6770
0
                struct ggml_tensor * src = node->src[i];
6771
0
                if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
6772
0
                    ggml_visit_parents_graph(cgraph, src, true);
6773
0
                }
6774
0
            }
6775
0
        }
6776
6777
0
        return node_hash_pos;
6778
0
    }
6779
6780
    // This is the first time we see this node in the current graph.
6781
0
    cgraph->visited_hash_set.keys[node_hash_pos] = node;
6782
0
    ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6783
0
    cgraph->use_counts[node_hash_pos] = 0;
6784
6785
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6786
0
        const int k =
6787
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6788
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6789
0
            /* unknown order, just fall back to using i */ i;
6790
6791
0
        struct ggml_tensor * src = node->src[k];
6792
0
        if (src) {
6793
0
            const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
6794
6795
            // Update the use count for this operand.
6796
0
            cgraph->use_counts[src_hash_pos]++;
6797
0
        }
6798
0
    }
6799
6800
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6801
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6802
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6803
6804
0
        if (strlen(node->name) == 0) {
6805
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6806
0
        }
6807
6808
0
        cgraph->leafs[cgraph->n_leafs] = node;
6809
0
        cgraph->n_leafs++;
6810
0
    } else {
6811
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6812
6813
0
        if (strlen(node->name) == 0) {
6814
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6815
0
        }
6816
6817
0
        cgraph->nodes[cgraph->n_nodes] = node;
6818
0
        cgraph->n_nodes++;
6819
0
    }
6820
6821
0
    return node_hash_pos;
6822
0
}
6823
6824
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
6825
0
    if (!expand) {
6826
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6827
0
        ggml_graph_clear(cgraph);
6828
0
    }
6829
6830
0
    const int n_old = cgraph->n_nodes;
6831
6832
0
    ggml_visit_parents_graph(cgraph, tensor, compute);
6833
6834
0
    const int n_new = cgraph->n_nodes - n_old;
6835
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6836
6837
0
    if (n_new > 0) {
6838
        // the last added node should always be starting point
6839
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6840
0
    }
6841
0
}
6842
6843
struct ggml_tensor * ggml_build_forward_select(
6844
        struct ggml_cgraph  * cgraph,
6845
        struct ggml_tensor ** tensors,
6846
        int                   n_tensors,
6847
0
        int                   idx) {
6848
0
    GGML_ASSERT(idx >= 0 && idx < n_tensors);
6849
6850
0
    for (int i = 0; i < n_tensors; i++) {
6851
0
        ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
6852
0
    }
6853
6854
0
    return tensors[idx];
6855
0
}
6856
6857
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6858
0
    ggml_build_forward_impl(cgraph, tensor, true, true);
6859
0
}
6860
6861
void ggml_build_backward_expand(
6862
        struct ggml_context *  ctx,
6863
        struct ggml_cgraph  *  cgraph,
6864
0
        struct ggml_tensor  ** grad_accs) {
6865
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6866
0
    GGML_ASSERT(cgraph->grads);
6867
0
    GGML_ASSERT(cgraph->grad_accs);
6868
6869
0
    const int n_nodes_f = cgraph->n_nodes;
6870
6871
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6872
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6873
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6874
6875
0
    {
6876
0
        bool any_params = false;
6877
0
        bool any_loss   = false;
6878
0
        for (int i = 0; i < n_nodes_f; ++i) {
6879
0
            struct ggml_tensor * node = cgraph->nodes[i];
6880
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6881
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6882
0
        }
6883
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6884
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6885
0
    }
6886
6887
0
    for (int i = 0; i < n_nodes_f; ++i) {
6888
0
        struct ggml_tensor * node = cgraph->nodes[i];
6889
6890
0
        if (node->type == GGML_TYPE_I32) {
6891
0
            continue;
6892
0
        }
6893
6894
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6895
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6896
0
        switch (node->op) {
6897
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6898
0
            case GGML_OP_IM2COL:      // only used for its shape
6899
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6900
0
                ignore_src[0] = true;
6901
0
                break;
6902
0
            case GGML_OP_UNARY: {
6903
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6904
                // SGN and STEP unary ops are piecewise constant
6905
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6906
0
                    ignore_src[0] = true;
6907
0
                }
6908
0
            } break;
6909
6910
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6911
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6912
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6913
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6914
0
            case GGML_OP_ROPE:          // positions not differentiable
6915
0
                ignore_src[1] = true;
6916
0
                break;
6917
6918
0
            default:
6919
0
                break;
6920
0
        }
6921
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6922
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6923
0
                continue;
6924
0
            }
6925
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6926
0
            node_needs_grad = true;
6927
0
            break;
6928
0
        }
6929
0
        if (!node_needs_grad) {
6930
0
            continue;
6931
0
        }
6932
6933
        // inplace operations are currently not supported
6934
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6935
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6936
6937
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6938
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6939
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6940
0
        if (grad_accs && grad_accs[i]) {
6941
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6942
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6943
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6944
            // loss tensors always need a gradient accumulator
6945
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6946
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6947
0
        }
6948
0
        grads_needed[ihash] = true;
6949
0
    }
6950
6951
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6952
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6953
        // use allocator to automatically make inplace operations
6954
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6955
0
    }
6956
6957
0
    free(grads_needed);
6958
0
}
6959
6960
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6961
0
    void * ptr = *p;
6962
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6963
0
    *p = (void *) ((char *) ptr + size);
6964
0
    return ptr;
6965
0
}
6966
6967
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6968
0
    size_t hash_size = ggml_hash_size(size * 2);
6969
0
    void * p = 0;
6970
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6971
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6972
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6973
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6974
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6975
0
    if (grads) {
6976
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6977
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6978
0
    }
6979
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6980
6981
0
    size_t nbytes = (size_t) p;
6982
0
    return nbytes;
6983
0
}
6984
6985
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6986
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6987
0
}
6988
6989
0
size_t ggml_graph_overhead(void) {
6990
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6991
0
}
6992
6993
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6994
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6995
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6996
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6997
6998
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6999
0
    size_t hash_size = ggml_hash_size(size * 2);
7000
7001
0
    void * p = cgraph + 1;
7002
7003
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7004
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7005
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
7006
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7007
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7008
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7009
7010
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7011
7012
    // check that we allocated the correct amount of memory
7013
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
7014
7015
0
    *cgraph = (struct ggml_cgraph) {
7016
0
        /*.size         =*/ size,
7017
0
        /*.n_nodes      =*/ 0,
7018
0
        /*.n_leafs      =*/ 0,
7019
0
        /*.nodes        =*/ nodes_ptr,
7020
0
        /*.grads        =*/ grads_ptr,
7021
0
        /*.grad_accs    =*/ grad_accs_ptr,
7022
0
        /*.leafs        =*/ leafs_ptr,
7023
0
        /*.use_counts   =*/ use_counts_ptr,
7024
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
7025
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
7026
0
    };
7027
7028
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7029
0
    if (grads) {
7030
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
7031
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
7032
0
    }
7033
7034
0
    return cgraph;
7035
0
}
7036
7037
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
7038
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
7039
0
}
7040
7041
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
7042
0
    struct ggml_cgraph cgraph = {
7043
0
        /*.size             =*/ 0,
7044
0
        /*.n_nodes          =*/ i1 - i0,
7045
0
        /*.n_leafs          =*/ 0,
7046
0
        /*.nodes            =*/ cgraph0->nodes + i0,
7047
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
7048
0
        /*.grad_accs        =*/ NULL,
7049
0
        /*.leafs            =*/ NULL,
7050
0
        /*.use_counts       =*/ cgraph0->use_counts,
7051
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
7052
0
        /*.order            =*/ cgraph0->order,
7053
0
    };
7054
7055
0
    return cgraph;
7056
0
}
7057
7058
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
7059
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7060
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7061
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7062
7063
0
    dst->n_leafs = src->n_leafs;
7064
0
    dst->n_nodes = src->n_nodes;
7065
0
    dst->order   = src->order;
7066
7067
0
    for (int i = 0; i < src->n_leafs; ++i) {
7068
0
        dst->leafs[i] = src->leafs[i];
7069
0
    }
7070
7071
0
    for (int i = 0; i < src->n_nodes; ++i) {
7072
0
        dst->nodes[i] = src->nodes[i];
7073
0
    }
7074
7075
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7076
        // copy all hashset keys (tensors) that are in use
7077
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7078
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7079
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7080
0
        }
7081
0
    }
7082
7083
0
    if (dst->grads) {
7084
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7085
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7086
0
    }
7087
0
    if (src->grads) {
7088
0
        GGML_ASSERT(dst->grads     != NULL);
7089
0
        GGML_ASSERT(dst->grad_accs != NULL);
7090
0
        for (int i = 0; i < src->n_nodes; ++i) {
7091
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7092
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7093
7094
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7095
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7096
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7097
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7098
7099
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7100
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7101
0
        }
7102
0
    }
7103
0
}
7104
7105
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7106
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7107
0
    ggml_graph_cpy(cgraph, result);
7108
0
    return result;
7109
0
}
7110
7111
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7112
0
    if (ggml_is_empty(tensor)) {
7113
0
        return tensor;
7114
0
    }
7115
0
    if (tensor->buffer) {
7116
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7117
0
    } else {
7118
0
        GGML_ASSERT(tensor->data);
7119
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7120
0
    }
7121
0
    return tensor;
7122
0
}
7123
7124
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7125
0
    if (!cgraph) {
7126
0
        return;
7127
0
    }
7128
0
    GGML_ASSERT(cgraph->grads != NULL);
7129
7130
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7131
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7132
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7133
7134
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7135
            // clear momenta
7136
0
            ggml_set_zero(node->src[2]);
7137
0
            ggml_set_zero(node->src[3]);
7138
0
        }
7139
7140
        // initial gradients of loss should be 1, 0 otherwise
7141
0
        if (grad_acc) {
7142
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7143
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7144
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7145
7146
0
                const float onef = 1.0f;
7147
0
                if (grad_acc->buffer) {
7148
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7149
0
                } else {
7150
0
                    GGML_ASSERT(grad_acc->data);
7151
0
                    *((float *) grad_acc->data) = onef;
7152
0
                }
7153
0
            } else {
7154
0
                ggml_set_zero(grad_acc);
7155
0
            }
7156
0
        }
7157
0
    }
7158
0
}
7159
7160
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7161
0
    cgraph->n_leafs = 0;
7162
0
    cgraph->n_nodes = 0;
7163
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7164
0
}
7165
7166
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7167
0
    return cgraph->size;
7168
0
}
7169
7170
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7171
0
    if (i < 0) {
7172
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7173
0
        return cgraph->nodes[cgraph->n_nodes + i];
7174
0
    }
7175
7176
0
    GGML_ASSERT(i < cgraph->n_nodes);
7177
0
    return cgraph->nodes[i];
7178
0
}
7179
7180
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7181
0
    return cgraph->nodes;
7182
0
}
7183
7184
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7185
0
    return cgraph->n_nodes;
7186
0
}
7187
7188
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7189
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7190
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7191
0
    cgraph->n_nodes++;
7192
0
}
7193
7194
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7195
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7196
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7197
7198
0
        if (strcmp(leaf->name, name) == 0) {
7199
0
            return leaf;
7200
0
        }
7201
0
    }
7202
7203
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7204
0
        struct ggml_tensor * node = cgraph->nodes[i];
7205
7206
0
        if (strcmp(node->name, name) == 0) {
7207
0
            return node;
7208
0
        }
7209
0
    }
7210
7211
0
    return NULL;
7212
0
}
7213
7214
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7215
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7216
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7217
0
}
7218
7219
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7220
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7221
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7222
0
}
7223
7224
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7225
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7226
7227
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7228
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7229
0
        struct ggml_tensor * node = cgraph->nodes[i];
7230
7231
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7232
0
                i,
7233
0
                node->ne[0], node->ne[1], node->ne[2],
7234
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7235
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7236
0
    }
7237
7238
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7239
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7240
0
        struct ggml_tensor * node = cgraph->leafs[i];
7241
7242
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7243
0
                i,
7244
0
                node->ne[0], node->ne[1],
7245
0
                ggml_op_name(node->op),
7246
0
                ggml_get_name(node));
7247
0
    }
7248
7249
0
    GGML_LOG_INFO("========================================\n");
7250
0
}
7251
7252
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7253
                                      const int *                idxs,
7254
                                      int                        count,
7255
0
                                      const struct ggml_tensor * tensor) {
7256
0
    GGML_ASSERT(cgraph && idxs);
7257
0
    for (int i = 0; i < count; ++i) {
7258
0
        const int node_idx = idxs[i];
7259
7260
0
        if (node_idx >= cgraph->n_nodes) {
7261
0
            return -1;
7262
0
        }
7263
0
        if (cgraph->nodes[node_idx] == tensor) {
7264
0
            return i;
7265
0
        }
7266
0
    }
7267
0
    return -1;
7268
0
}
7269
7270
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7271
                                const int *                node_idxs,
7272
                                int                        count,
7273
                                const enum ggml_op *       ops,
7274
                                const int *                outputs,
7275
0
                                int                        num_outputs) {
7276
0
    GGML_ASSERT(outputs && num_outputs > 0);
7277
7278
0
    for (int i = 0; i < count; ++i) {
7279
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7280
0
            return false;
7281
0
        }
7282
7283
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7284
7285
0
        if (node->op != ops[i]) {
7286
0
            return false;
7287
0
        }
7288
7289
0
        if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
7290
0
            return false;
7291
0
        }
7292
7293
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7294
0
            continue;
7295
0
        }
7296
7297
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7298
0
            return false;
7299
0
        }
7300
7301
0
        int subgraph_uses = 0;
7302
0
        for (int j = i + 1; j < count; ++j) {
7303
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7304
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7305
0
                if (other_node->src[src_idx] == node) {
7306
0
                    subgraph_uses++;
7307
0
                }
7308
0
            }
7309
0
        }
7310
7311
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7312
0
            return false;
7313
0
        }
7314
7315
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7316
0
        struct ggml_tensor * view_src = node->view_src;
7317
0
        while (view_src) {
7318
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7319
0
                return false;
7320
0
            }
7321
0
            view_src = view_src->view_src;
7322
0
        }
7323
0
    }
7324
7325
0
    return true;
7326
0
}
7327
7328
// check if node is part of the graph
7329
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7330
0
    if (cgraph == NULL) {
7331
0
        return true;
7332
0
    }
7333
7334
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7335
0
        if (cgraph->nodes[i] == node) {
7336
0
            return true;
7337
0
        }
7338
0
    }
7339
7340
0
    return false;
7341
0
}
7342
7343
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7344
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7345
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7346
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7347
7348
0
        if (grad == node) {
7349
0
            return parent;
7350
0
        }
7351
0
    }
7352
7353
0
    return NULL;
7354
0
}
7355
7356
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7357
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7358
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7359
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7360
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7361
0
            gparent ? (void *) gparent : (void *) node,
7362
0
            gparent ? "empty" : "vee",
7363
0
            gparent ? "dashed" : "solid",
7364
0
            label);
7365
0
}
7366
7367
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7368
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7369
0
            (void *) parent,
7370
0
            (void *) node,
7371
0
            label);
7372
0
}
7373
7374
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
7375
0
    char color[16];
7376
7377
0
    FILE * fp = ggml_fopen(filename, "w");
7378
0
    GGML_ASSERT(fp);
7379
7380
0
    fprintf(fp, "digraph G {\n");
7381
0
    fprintf(fp, "  newrank = true;\n");
7382
0
    fprintf(fp, "  rankdir = TB;\n");
7383
7384
0
    for (int i = 0; i < gb->n_nodes; i++) {
7385
0
        struct ggml_tensor * node = gb->nodes[i];
7386
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7387
7388
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7389
0
            continue;
7390
0
        }
7391
7392
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7393
0
            snprintf(color, sizeof(color), "yellow");
7394
0
        } else if (grad) {
7395
0
            if (ggml_graph_find(cgraph, node)) {
7396
0
                snprintf(color, sizeof(color), "green");
7397
0
            } else {
7398
0
                snprintf(color, sizeof(color), "lightblue");
7399
0
            }
7400
0
        } else {
7401
0
            snprintf(color, sizeof(color), "white");
7402
0
        }
7403
7404
0
        fprintf(fp, "  \"%p\" [ "
7405
0
                    "style = filled; fillcolor = %s; shape = record; "
7406
0
                    "label=\"",
7407
0
                (void *) node, color);
7408
7409
0
        if (strlen(node->name) > 0) {
7410
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7411
0
        } else {
7412
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7413
0
        }
7414
7415
0
        if (ggml_is_matrix(node)) {
7416
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7417
0
        } else {
7418
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7419
0
        }
7420
7421
0
        if (grad) {
7422
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7423
0
        } else {
7424
0
            fprintf(fp, "\"; ]\n");
7425
0
        }
7426
0
    }
7427
7428
0
    for (int i = 0; i < gb->n_leafs; i++) {
7429
0
        struct ggml_tensor * node = gb->leafs[i];
7430
7431
0
        snprintf(color, sizeof(color), "pink");
7432
7433
0
        fprintf(fp, "  \"%p\" [ "
7434
0
                    "style = filled; fillcolor = %s; shape = record; "
7435
0
                    "label=\"<x>",
7436
0
                (void *) node, color);
7437
7438
0
        if (strlen(node->name) > 0) {
7439
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7440
0
        } else {
7441
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7442
0
        }
7443
7444
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7445
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7446
0
            fprintf(fp, " | (");
7447
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7448
                // FIXME: use ggml-backend to obtain the tensor data
7449
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7450
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7451
                //}
7452
                //else if (node->type == GGML_TYPE_F32 ||
7453
                //         node->type == GGML_TYPE_F16 ||
7454
                //         node->type == GGML_TYPE_BF16) {
7455
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7456
                //}
7457
                //else
7458
0
                {
7459
0
                    fprintf(fp, "#");
7460
0
                }
7461
0
                if (j < ggml_nelements(node) - 1) {
7462
0
                    fprintf(fp, ", ");
7463
0
                }
7464
0
            }
7465
0
            fprintf(fp, ")");
7466
0
        }
7467
0
        fprintf(fp, "\"; ]\n");
7468
0
    }
7469
7470
0
    for (int i = 0; i < gb->n_nodes; i++) {
7471
0
        struct ggml_tensor * node = gb->nodes[i];
7472
7473
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7474
0
            if (node->src[j]) {
7475
0
                char label[16];
7476
0
                snprintf(label, sizeof(label), "src %d", j);
7477
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7478
0
            }
7479
0
        }
7480
0
    }
7481
7482
0
    for (int i = 0; i < gb->n_leafs; i++) {
7483
0
        struct ggml_tensor * node = gb->leafs[i];
7484
7485
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7486
0
            if (node->src[j]) {
7487
0
                char label[16];
7488
0
                snprintf(label, sizeof(label), "src %d", j);
7489
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7490
0
            }
7491
0
        }
7492
0
    }
7493
7494
0
    fprintf(fp, "}\n");
7495
7496
0
    fclose(fp);
7497
7498
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7499
0
}
7500
7501
////////////////////////////////////////////////////////////////////////////////
7502
7503
0
void ggml_set_input(struct ggml_tensor * tensor) {
7504
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7505
0
}
7506
7507
0
void ggml_set_output(struct ggml_tensor * tensor) {
7508
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7509
0
}
7510
7511
0
void ggml_set_param(struct ggml_tensor * tensor) {
7512
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7513
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7514
0
}
7515
7516
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7517
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7518
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7519
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7520
0
}
7521
7522
////////////////////////////////////////////////////////////////////////////////
7523
7524
0
void ggml_quantize_init(enum ggml_type type) {
7525
0
    ggml_critical_section_start();
7526
7527
0
    switch (type) {
7528
0
        case GGML_TYPE_IQ2_XXS:
7529
0
        case GGML_TYPE_IQ2_XS:
7530
0
        case GGML_TYPE_IQ2_S:
7531
0
        case GGML_TYPE_IQ1_S:
7532
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7533
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7534
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7535
0
        default: // nothing
7536
0
            break;
7537
0
    }
7538
7539
0
    ggml_critical_section_end();
7540
0
}
7541
7542
4.18k
void ggml_quantize_free(void) {
7543
4.18k
    ggml_critical_section_start();
7544
7545
4.18k
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7546
4.18k
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7547
4.18k
    iq2xs_free_impl(GGML_TYPE_IQ2_S);
7548
4.18k
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7549
4.18k
    iq2xs_free_impl(GGML_TYPE_IQ1_M);
7550
4.18k
    iq3xs_free_impl(256);
7551
4.18k
    iq3xs_free_impl(512);
7552
7553
4.18k
    ggml_critical_section_end();
7554
4.18k
}
7555
7556
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7557
0
    return
7558
0
        type == GGML_TYPE_IQ2_XXS ||
7559
0
        type == GGML_TYPE_IQ2_XS  ||
7560
0
        type == GGML_TYPE_IQ1_S;//   ||
7561
        //type == GGML_TYPE_IQ1_M;
7562
0
}
7563
7564
size_t ggml_quantize_chunk(
7565
        enum ggml_type   type,
7566
           const float * src,
7567
                  void * dst,
7568
               int64_t   start,
7569
               int64_t   nrows,
7570
               int64_t   n_per_row,
7571
0
           const float * imatrix) {
7572
0
    const int64_t n = (int64_t) nrows * n_per_row;
7573
7574
0
    if (ggml_quantize_requires_imatrix(type)) {
7575
0
        GGML_ASSERT(imatrix != NULL);
7576
0
    }
7577
7578
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7579
0
    GGML_ASSERT(start % n_per_row == 0);
7580
7581
0
    ggml_quantize_init(type); // this is noop if already initialized
7582
7583
0
    const size_t start_row = start / n_per_row;
7584
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7585
7586
0
    size_t result = 0;
7587
7588
0
    switch (type) {
7589
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7590
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7591
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7592
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7593
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7594
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7595
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7596
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7597
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7598
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7599
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7600
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7601
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7602
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7603
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7604
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7605
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7606
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7607
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7608
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7609
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7610
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7611
0
        case GGML_TYPE_F16:
7612
0
            {
7613
0
                size_t elemsize = sizeof(ggml_fp16_t);
7614
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7615
0
                result = n * elemsize;
7616
0
            } break;
7617
0
        case GGML_TYPE_BF16:
7618
0
            {
7619
0
                size_t elemsize = sizeof(ggml_bf16_t);
7620
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7621
0
                result = n * elemsize;
7622
0
            } break;
7623
0
        case GGML_TYPE_F32:
7624
0
            {
7625
0
                size_t elemsize = sizeof(float);
7626
0
                result = n * elemsize;
7627
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7628
0
            } break;
7629
0
        default:
7630
0
            assert(false);
7631
0
    }
7632
7633
0
    GGML_ASSERT(result == nrows * row_size);
7634
7635
0
    return result;
7636
0
}
7637
7638
////////////////////////////////////////////////////////////////////////////////
7639
7640
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7641
0
    *log_callback = g_logger_state.log_callback;
7642
0
    *user_data    = g_logger_state.log_callback_user_data;
7643
0
}
7644
7645
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7646
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7647
0
    g_logger_state.log_callback_user_data = user_data;
7648
0
}
7649
7650
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7651
0
    p->n_threads  = n_threads;
7652
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7653
0
    p->poll       = 50;    // hybrid-polling enabled
7654
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7655
0
    p->paused     = false; // threads are ready to go
7656
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7657
0
}
7658
7659
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7660
0
    struct ggml_threadpool_params p;
7661
0
    ggml_threadpool_params_init(&p, n_threads);
7662
0
    return p;
7663
0
}
7664
7665
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7666
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7667
0
    if (p0->prio           != p1->prio       )    return false;
7668
0
    if (p0->poll           != p1->poll       )    return false;
7669
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7670
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7671
0
}