Coverage Report

Created: 2026-01-11 07:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
10
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
10
    fflush(stdout);
239
240
10
    char message[2048];
241
10
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
10
    va_list args;
244
10
    va_start(args, fmt);
245
10
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
10
    va_end(args);
247
248
10
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
10
    } else {
251
        // default: print error and backtrace to stderr
252
10
        fprintf(stderr, "%s\n", message);
253
        
254
10
    }
255
256
10
    abort();
257
10
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
262
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
262
    if (format == NULL) {
273
0
        return;
274
0
    }
275
262
    va_list args_copy;
276
262
    va_copy(args_copy, args);
277
262
    char buffer[128];
278
262
    int len = vsnprintf(buffer, 128, format, args);
279
262
    if (len < 128) {
280
252
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
252
    } else {
282
10
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
10
        vsnprintf(buffer2, len + 1, format, args_copy);
284
10
        buffer2[len] = 0;
285
10
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
10
        free(buffer2);
287
10
    }
288
262
    va_end(args_copy);
289
262
}
290
291
262
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
262
    va_list args;
293
262
    va_start(args, format);
294
262
    ggml_log_internal_v(level, format, args);
295
262
    va_end(args);
296
262
}
297
298
262
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
262
    (void) level;
300
262
    (void) user_data;
301
262
    fputs(text, stderr);
302
262
    fflush(stderr);
303
262
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
547
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
547
    const int alignment = 64;
321
547
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
547
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
547
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
547
    int result = posix_memalign(&aligned_memory, alignment, size);
353
547
  #endif
354
547
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
547
    return aligned_memory;
369
547
#endif
370
547
}
371
372
547
void ggml_aligned_free(void * ptr, size_t size) {
373
547
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
547
    free(ptr);
386
547
#endif
387
547
}
388
389
390
547
inline static void * ggml_malloc(size_t size) {
391
547
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
547
    void * result = malloc(size);
396
547
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
547
    return result;
401
547
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
547
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
547
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
1.25k
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
674
int64_t ggml_time_us(void) {
545
674
    struct timespec ts;
546
674
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
674
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
674
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
339
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
339
    return fopen(fname, mode);
607
339
#endif
608
609
339
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q4_0] = {
657
        .type_name                = "q4_0",
658
        .blck_size                = QK4_0,
659
        .type_size                = sizeof(block_q4_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
663
    },
664
    [GGML_TYPE_Q4_1] = {
665
        .type_name                = "q4_1",
666
        .blck_size                = QK4_1,
667
        .type_size                = sizeof(block_q4_1),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
671
    },
672
    [4] = { // GGML_TYPE_Q4_2
673
        .type_name                = "DEPRECATED",
674
        .blck_size                = 0,
675
        .type_size                = 0,
676
        .is_quantized             = false,
677
    },
678
    [5] = { // GGML_TYPE_Q4_3
679
        .type_name                = "DEPRECATED",
680
        .blck_size                = 0,
681
        .type_size                = 0,
682
        .is_quantized             = false,
683
    },
684
    [GGML_TYPE_Q5_0] = {
685
        .type_name                = "q5_0",
686
        .blck_size                = QK5_0,
687
        .type_size                = sizeof(block_q5_0),
688
        .is_quantized             = true,
689
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
691
    },
692
    [GGML_TYPE_Q5_1] = {
693
        .type_name                = "q5_1",
694
        .blck_size                = QK5_1,
695
        .type_size                = sizeof(block_q5_1),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
699
    },
700
    [GGML_TYPE_Q8_0] = {
701
        .type_name                = "q8_0",
702
        .blck_size                = QK8_0,
703
        .type_size                = sizeof(block_q8_0),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
707
    },
708
    [GGML_TYPE_Q8_1] = {
709
        .type_name                = "q8_1",
710
        .blck_size                = QK8_1,
711
        .type_size                = sizeof(block_q8_1),
712
        .is_quantized             = true,
713
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
714
    },
715
    [GGML_TYPE_MXFP4] = {
716
        .type_name                = "mxfp4",
717
        .blck_size                = QK_MXFP4,
718
        .type_size                = sizeof(block_mxfp4),
719
        .is_quantized             = true,
720
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
721
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
722
    },
723
    [GGML_TYPE_Q2_K] = {
724
        .type_name                = "q2_K",
725
        .blck_size                = QK_K,
726
        .type_size                = sizeof(block_q2_K),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
729
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
730
    },
731
    [GGML_TYPE_Q3_K] = {
732
        .type_name                = "q3_K",
733
        .blck_size                = QK_K,
734
        .type_size                = sizeof(block_q3_K),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
737
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
738
    },
739
    [GGML_TYPE_Q4_K] = {
740
        .type_name                = "q4_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q4_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
746
    },
747
    [GGML_TYPE_Q5_K] = {
748
        .type_name                = "q5_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q5_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
754
    },
755
    [GGML_TYPE_Q6_K] = {
756
        .type_name                = "q6_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q6_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
762
    },
763
    [GGML_TYPE_IQ2_XXS] = {
764
        .type_name                = "iq2_xxs",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_iq2_xxs),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
769
        .from_float_ref           = NULL,
770
    },
771
    [GGML_TYPE_IQ2_XS] = {
772
        .type_name                = "iq2_xs",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_iq2_xs),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
777
        .from_float_ref           = NULL,
778
    },
779
    [GGML_TYPE_IQ3_XXS] = {
780
        .type_name                = "iq3_xxs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq3_xxs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
785
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
786
    },
787
    [GGML_TYPE_IQ3_S] = {
788
        .type_name                = "iq3_s",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq3_s),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
793
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
794
    },
795
    [GGML_TYPE_IQ2_S] = {
796
        .type_name                = "iq2_s",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq2_s),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
802
    },
803
    [GGML_TYPE_IQ1_S] = {
804
        .type_name                = "iq1_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq1_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
809
        .from_float_ref           = NULL,
810
    },
811
    [GGML_TYPE_IQ1_M] = {
812
        .type_name                = "iq1_m",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq1_m),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
817
        .from_float_ref           = NULL,
818
    },
819
    [GGML_TYPE_IQ4_NL] = {
820
        .type_name                = "iq4_nl",
821
        .blck_size                = QK4_NL,
822
        .type_size                = sizeof(block_iq4_nl),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
825
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
826
    },
827
    [GGML_TYPE_IQ4_XS] = {
828
        .type_name                = "iq4_xs",
829
        .blck_size                = QK_K,
830
        .type_size                = sizeof(block_iq4_xs),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
833
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
834
    },
835
    [GGML_TYPE_Q8_K] = {
836
        .type_name                = "q8_K",
837
        .blck_size                = QK_K,
838
        .type_size                = sizeof(block_q8_K),
839
        .is_quantized             = true,
840
    },
841
    [GGML_TYPE_BF16] = {
842
        .type_name                = "bf16",
843
        .blck_size                = 1,
844
        .type_size                = sizeof(ggml_bf16_t),
845
        .is_quantized             = false,
846
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
847
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
848
    },
849
    [31] = { // GGML_TYPE_Q4_0_4_4
850
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
851
        .blck_size                = 0,
852
        .type_size                = 0,
853
        .is_quantized             = false,
854
    },
855
    [32] = { // GGML_TYPE_Q4_0_4_8
856
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
857
        .blck_size                = 0,
858
        .type_size                = 0,
859
        .is_quantized             = false,
860
    },
861
    [33] = { // GGML_TYPE_Q4_0_8_8
862
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
863
        .blck_size                = 0,
864
        .type_size                = 0,
865
        .is_quantized             = false,
866
    },
867
    [GGML_TYPE_TQ1_0] = {
868
        .type_name                = "tq1_0",
869
        .blck_size                = QK_K,
870
        .type_size                = sizeof(block_tq1_0),
871
        .is_quantized             = true,
872
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
873
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
874
    },
875
    [GGML_TYPE_TQ2_0] = {
876
        .type_name                = "tq2_0",
877
        .blck_size                = QK_K,
878
        .type_size                = sizeof(block_tq2_0),
879
        .is_quantized             = true,
880
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
881
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
882
    },
883
    [36] = { // GGML_TYPE_IQ4_NL_4_4
884
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
885
        .blck_size                = 0,
886
        .type_size                = 0,
887
        .is_quantized             = false,
888
    },
889
    [37] = { // GGML_TYPE_IQ4_NL_4_8
890
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
891
        .blck_size                = 0,
892
        .type_size                = 0,
893
        .is_quantized             = false,
894
    },
895
    [38] = { // GGML_TYPE_IQ4_NL_8_8
896
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
897
        .blck_size                = 0,
898
        .type_size                = 0,
899
        .is_quantized             = false,
900
    },
901
};
902
903
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
904
0
    GGML_ASSERT(type < GGML_TYPE_COUNT);
905
0
    return &type_traits[type];
906
0
}
907
908
//
909
// ggml object
910
//
911
912
struct ggml_object {
913
    size_t offs;
914
    size_t size;
915
916
    struct ggml_object * next;
917
918
    enum ggml_object_type type;
919
920
    char padding[4];
921
};
922
923
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
924
925
//
926
// ggml context
927
//
928
929
struct ggml_context {
930
    size_t mem_size;
931
    void * mem_buffer;
932
    bool   mem_buffer_owned;
933
    bool   no_alloc;
934
935
    int    n_objects;
936
937
    struct ggml_object * objects_begin;
938
    struct ggml_object * objects_end;
939
};
940
941
//
942
// data types
943
//
944
945
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
946
    "NONE",
947
948
    "DUP",
949
    "ADD",
950
    "ADD_ID",
951
    "ADD1",
952
    "ACC",
953
    "SUB",
954
    "MUL",
955
    "DIV",
956
    "SQR",
957
    "SQRT",
958
    "LOG",
959
    "SIN",
960
    "COS",
961
    "SUM",
962
    "SUM_ROWS",
963
    "CUMSUM",
964
    "MEAN",
965
    "ARGMAX",
966
    "COUNT_EQUAL",
967
    "REPEAT",
968
    "REPEAT_BACK",
969
    "CONCAT",
970
    "SILU_BACK",
971
    "NORM",
972
    "RMS_NORM",
973
    "RMS_NORM_BACK",
974
    "GROUP_NORM",
975
    "L2_NORM",
976
977
    "MUL_MAT",
978
    "MUL_MAT_ID",
979
    "OUT_PROD",
980
981
    "SCALE",
982
    "SET",
983
    "CPY",
984
    "CONT",
985
    "RESHAPE",
986
    "VIEW",
987
    "PERMUTE",
988
    "TRANSPOSE",
989
    "GET_ROWS",
990
    "GET_ROWS_BACK",
991
    "SET_ROWS",
992
    "DIAG",
993
    "DIAG_MASK_INF",
994
    "DIAG_MASK_ZERO",
995
    "SOFT_MAX",
996
    "SOFT_MAX_BACK",
997
    "ROPE",
998
    "ROPE_BACK",
999
    "CLAMP",
1000
    "CONV_TRANSPOSE_1D",
1001
    "IM2COL",
1002
    "IM2COL_BACK",
1003
    "IM2COL_3D",
1004
    "CONV_2D",
1005
    "CONV_3D",
1006
    "CONV_2D_DW",
1007
    "CONV_TRANSPOSE_2D",
1008
    "POOL_1D",
1009
    "POOL_2D",
1010
    "POOL_2D_BACK",
1011
    "UPSCALE",
1012
    "PAD",
1013
    "PAD_REFLECT_1D",
1014
    "ROLL",
1015
    "ARANGE",
1016
    "TIMESTEP_EMBEDDING",
1017
    "ARGSORT",
1018
    "TOP_K",
1019
    "LEAKY_RELU",
1020
    "TRI",
1021
    "FILL",
1022
1023
    "FLASH_ATTN_EXT",
1024
    "FLASH_ATTN_BACK",
1025
    "SSM_CONV",
1026
    "SSM_SCAN",
1027
    "WIN_PART",
1028
    "WIN_UNPART",
1029
    "GET_REL_POS",
1030
    "ADD_REL_POS",
1031
    "RWKV_WKV6",
1032
    "GATED_LINEAR_ATTN",
1033
    "RWKV_WKV7",
1034
    "SOLVE_TRI",
1035
1036
    "UNARY",
1037
1038
    "MAP_CUSTOM1",
1039
    "MAP_CUSTOM2",
1040
    "MAP_CUSTOM3",
1041
1042
    "CUSTOM",
1043
1044
    "CROSS_ENTROPY_LOSS",
1045
    "CROSS_ENTROPY_LOSS_BACK",
1046
    "OPT_STEP_ADAMW",
1047
    "OPT_STEP_SGD",
1048
1049
    "GLU",
1050
};
1051
1052
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1053
1054
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1055
    "none",
1056
1057
    "x",
1058
    "x+y",
1059
    "x[i]+y",
1060
    "x+y",
1061
    "view(x,nb,offset)+=y->x",
1062
    "x-y",
1063
    "x*y",
1064
    "x/y",
1065
    "x^2",
1066
    "√x",
1067
    "log(x)",
1068
    "sin(x)",
1069
    "cos(x)",
1070
    "Σx",
1071
    "Σx_k",
1072
    "cumsum(x)",
1073
    "Σx/n",
1074
    "argmax(x)",
1075
    "count_equal(x)",
1076
    "repeat(x)",
1077
    "repeat_back(x)",
1078
    "concat(x, y)",
1079
    "silu_back(x)",
1080
    "norm(x)",
1081
    "rms_norm(x)",
1082
    "rms_norm_back(x)",
1083
    "group_norm(x)",
1084
    "l2_norm(x)",
1085
1086
    "X*Y",
1087
    "X[i]*Y",
1088
    "X*Y",
1089
1090
    "x*v",
1091
    "y-\\>view(x)",
1092
    "x-\\>y",
1093
    "cont(x)",
1094
    "reshape(x)",
1095
    "view(x)",
1096
    "permute(x)",
1097
    "transpose(x)",
1098
    "get_rows(x)",
1099
    "get_rows_back(x)",
1100
    "set_rows(x)",
1101
    "diag(x)",
1102
    "diag_mask_inf(x)",
1103
    "diag_mask_zero(x)",
1104
    "soft_max(x)",
1105
    "soft_max_back(x)",
1106
    "rope(x)",
1107
    "rope_back(x)",
1108
    "clamp(x)",
1109
    "conv_transpose_1d(x)",
1110
    "im2col(x)",
1111
    "im2col_back(x)",
1112
    "im2col_3d(x)",
1113
    "conv_2d(x)",
1114
    "conv_3d(x)",
1115
    "conv_2d_dw(x)",
1116
    "conv_transpose_2d(x)",
1117
    "pool_1d(x)",
1118
    "pool_2d(x)",
1119
    "pool_2d_back(x)",
1120
    "upscale(x)",
1121
    "pad(x)",
1122
    "pad_reflect_1d(x)",
1123
    "roll(x)",
1124
    "arange(start, stop, step)",
1125
    "timestep_embedding(timesteps, dim, max_period)",
1126
    "argsort(x)",
1127
    "top_k(x)",
1128
    "leaky_relu(x)",
1129
    "tri(x)",
1130
    "fill(x, c)",
1131
1132
    "flash_attn_ext(x)",
1133
    "flash_attn_back(x)",
1134
    "ssm_conv(x)",
1135
    "ssm_scan(x)",
1136
    "win_part(x)",
1137
    "win_unpart(x)",
1138
    "get_rel_pos(x)",
1139
    "add_rel_pos(x)",
1140
    "rwkv_wkv6(k, v, r, tf, td, s)",
1141
    "gated_linear_attn(k, v, q, gate, s)",
1142
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1143
    "A X = B, A triangular, solve X",
1144
1145
    "unary(x)",
1146
1147
    "map_custom(x)",
1148
    "map_custom(x,y)",
1149
    "map_custom(x,y,z)",
1150
1151
    "custom(x)",
1152
1153
    "cross_entropy_loss(x,y)",
1154
    "cross_entropy_loss_back(x,y)",
1155
    "adamw(x)",
1156
    "sgd(x)",
1157
1158
    "glu(x)",
1159
};
1160
1161
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1162
1163
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1164
1165
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1166
    "ABS",
1167
    "SGN",
1168
    "NEG",
1169
    "STEP",
1170
    "TANH",
1171
    "ELU",
1172
    "RELU",
1173
    "SIGMOID",
1174
    "GELU",
1175
    "GELU_QUICK",
1176
    "SILU",
1177
    "HARDSWISH",
1178
    "HARDSIGMOID",
1179
    "EXP",
1180
    "EXPM1",
1181
    "SOFTPLUS",
1182
    "GELU_ERF",
1183
    "XIELU",
1184
    "FLOOR",
1185
    "CEIL",
1186
    "ROUND",
1187
    "TRUNC",
1188
};
1189
1190
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1191
1192
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1193
    "REGLU",
1194
    "GEGLU",
1195
    "SWIGLU",
1196
    "SWIGLU_OAI",
1197
    "GEGLU_ERF",
1198
    "GEGLU_QUICK",
1199
};
1200
1201
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1202
1203
1204
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1205
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1206
1207
1208
////////////////////////////////////////////////////////////////////////////////
1209
1210
0
void ggml_print_object(const struct ggml_object * obj) {
1211
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1212
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1213
0
}
1214
1215
0
void ggml_print_objects(const struct ggml_context * ctx) {
1216
0
    struct ggml_object * obj = ctx->objects_begin;
1217
1218
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1219
1220
0
    while (obj != NULL) {
1221
0
        ggml_print_object(obj);
1222
0
        obj = obj->next;
1223
0
    }
1224
1225
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1226
0
}
1227
1228
926
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1229
926
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1230
1231
926
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1232
926
}
1233
1234
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1235
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1236
1237
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1238
0
}
1239
1240
4.06k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1241
20.3k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1242
16.2k
        if (tensor->ne[i] <= 0) {
1243
0
            return 0;
1244
0
        }
1245
16.2k
    }
1246
1247
4.06k
    size_t nbytes;
1248
4.06k
    const size_t blck_size = ggml_blck_size(tensor->type);
1249
4.06k
    if (blck_size == 1) {
1250
4.06k
        nbytes = ggml_type_size(tensor->type);
1251
20.3k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1252
16.2k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1253
16.2k
        }
1254
4.06k
    }
1255
1
    else {
1256
1
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1257
4
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1258
3
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1259
3
        }
1260
1
    }
1261
1262
4.06k
    return nbytes;
1263
4.06k
}
1264
1265
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1266
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1267
0
}
1268
1269
7.93k
int64_t ggml_blck_size(enum ggml_type type) {
1270
7.93k
    return type_traits[type].blck_size;
1271
7.93k
}
1272
1273
7.93k
size_t ggml_type_size(enum ggml_type type) {
1274
7.93k
    return type_traits[type].type_size;
1275
7.93k
}
1276
1277
1.23k
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1278
1.23k
    assert(ne % ggml_blck_size(type) == 0);
1279
1.23k
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1280
1.23k
}
1281
1282
0
double ggml_type_sizef(enum ggml_type type) {
1283
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1284
0
}
1285
1286
127
const char * ggml_type_name(enum ggml_type type) {
1287
127
    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
1288
127
}
1289
1290
0
bool ggml_is_quantized(enum ggml_type type) {
1291
0
    return type_traits[type].is_quantized;
1292
0
}
1293
1294
0
const char * ggml_op_name(enum ggml_op op) {
1295
0
    return GGML_OP_NAME[op];
1296
0
}
1297
1298
0
const char * ggml_op_symbol(enum ggml_op op) {
1299
0
    return GGML_OP_SYMBOL[op];
1300
0
}
1301
1302
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1303
0
    return GGML_UNARY_OP_NAME[op];
1304
0
}
1305
1306
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1307
0
    return GGML_GLU_OP_NAME[op];
1308
0
}
1309
1310
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1311
0
    if (t->op == GGML_OP_UNARY) {
1312
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1313
0
        return ggml_unary_op_name(uop);
1314
0
    }
1315
0
    if (t->op == GGML_OP_GLU) {
1316
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1317
0
        return ggml_glu_op_name(gop);
1318
0
    }
1319
0
    return ggml_op_name(t->op);
1320
0
}
1321
1322
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1323
0
    return ggml_type_size(tensor->type);
1324
0
}
1325
1326
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1327
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1328
1329
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1330
0
}
1331
1332
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1333
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1334
1335
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1336
0
}
1337
1338
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1339
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1340
1341
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1342
0
}
1343
1344
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1345
0
    return tensor->ne[3] == 1;
1346
0
}
1347
1348
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1349
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1350
0
        if (tensor->ne[i] > 1) {
1351
0
            return i + 1;
1352
0
        }
1353
0
    }
1354
0
    return 1;
1355
0
}
1356
1357
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1358
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1359
1360
0
    switch (ftype) {
1361
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1362
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1363
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1364
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1365
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1366
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1367
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1368
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1369
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1370
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1371
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1372
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1373
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1374
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1375
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1376
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1377
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1378
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1379
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1380
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1381
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1382
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1383
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1384
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1385
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1386
0
    }
1387
1388
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1389
1390
0
    return wtype;
1391
0
}
1392
1393
187
size_t ggml_tensor_overhead(void) {
1394
187
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1395
187
}
1396
1397
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1398
0
    return tensor->nb[0] > tensor->nb[1];
1399
0
}
1400
1401
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1402
0
    size_t next_nb = ggml_type_size(tensor->type);
1403
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1404
0
        return false;
1405
0
    }
1406
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1407
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1408
0
        if (tensor->ne[i] != 1) {
1409
0
            if (i > n) {
1410
0
                if (tensor->nb[i] != next_nb) {
1411
0
                    return false;
1412
0
                }
1413
0
                next_nb *= tensor->ne[i];
1414
0
            } else {
1415
                // this dimension does not need to be contiguous
1416
0
                next_nb = tensor->ne[i]*tensor->nb[i];
1417
0
            }
1418
0
        }
1419
0
    }
1420
0
    return true;
1421
0
}
1422
1423
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1424
0
    return ggml_is_contiguous_0(tensor);
1425
0
}
1426
1427
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1428
0
    return ggml_is_contiguous_n(tensor, 0);
1429
0
}
1430
1431
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1432
0
    return ggml_is_contiguous_n(tensor, 1);
1433
0
}
1434
1435
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1436
0
    return ggml_is_contiguous_n(tensor, 2);
1437
0
}
1438
1439
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1440
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1441
0
}
1442
1443
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1444
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1445
1446
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1447
0
}
1448
1449
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1450
0
    return
1451
0
        tensor->nb[0] > tensor->nb[2] &&
1452
0
        tensor->nb[1] > tensor->nb[0] &&
1453
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1454
0
}
1455
1456
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1457
0
    return
1458
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1459
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1460
0
}
1461
1462
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1463
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1464
1465
0
    return
1466
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1467
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1468
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1469
0
}
1470
1471
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1472
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1473
0
        if (tensor->ne[i] == 0) {
1474
            // empty if any dimension has no elements
1475
0
            return true;
1476
0
        }
1477
0
    }
1478
0
    return false;
1479
0
}
1480
1481
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1482
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1483
1484
0
    return
1485
0
        (t0->ne[0] == t1->ne[0]) &&
1486
0
        (t0->ne[1] == t1->ne[1]) &&
1487
0
        (t0->ne[2] == t1->ne[2]) &&
1488
0
        (t0->ne[3] == t1->ne[3]);
1489
0
}
1490
1491
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1492
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1493
1494
0
    return
1495
0
        (t0->nb[0] == t1->nb[0]) &&
1496
0
        (t0->nb[1] == t1->nb[1]) &&
1497
0
        (t0->nb[2] == t1->nb[2]) &&
1498
0
        (t0->nb[3] == t1->nb[3]);
1499
0
}
1500
1501
// check if t1 can be represented as a repetition of t0
1502
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1503
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1504
1505
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1506
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1507
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1508
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1509
0
        (t1->ne[3]%t0->ne[3] == 0);
1510
0
}
1511
1512
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1513
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1514
1515
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1516
0
}
1517
1518
// assert that pointer is aligned to GGML_MEM_ALIGN
1519
#define GGML_ASSERT_ALIGNED(ptr) \
1520
1.77k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1521
1522
////////////////////////////////////////////////////////////////////////////////
1523
1524
547
struct ggml_context * ggml_init(struct ggml_init_params params) {
1525
547
    bool is_first_call = true;
1526
1527
547
    ggml_critical_section_start();
1528
1529
547
    if (is_first_call) {
1530
        // initialize time system (required on Windows)
1531
547
        ggml_time_init();
1532
1533
547
        is_first_call = false;
1534
547
    }
1535
1536
547
    ggml_critical_section_end();
1537
1538
547
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1539
1540
    // allow to call ggml_init with 0 size
1541
547
    if (params.mem_size == 0) {
1542
372
        params.mem_size = GGML_MEM_ALIGN;
1543
372
    }
1544
1545
547
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1546
1547
547
    *ctx = (struct ggml_context) {
1548
547
        /*.mem_size           =*/ mem_size,
1549
547
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1550
547
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1551
547
        /*.no_alloc           =*/ params.no_alloc,
1552
547
        /*.n_objects          =*/ 0,
1553
547
        /*.objects_begin      =*/ NULL,
1554
547
        /*.objects_end        =*/ NULL,
1555
547
    };
1556
1557
547
    GGML_ASSERT(ctx->mem_buffer != NULL);
1558
1559
547
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1560
1561
547
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1562
1563
547
    return ctx;
1564
547
}
1565
1566
0
void ggml_reset(struct ggml_context * ctx) {
1567
0
    if (ctx == NULL) {
1568
0
        return;
1569
0
    }
1570
1571
0
    ctx->n_objects     = 0;
1572
0
    ctx->objects_begin = NULL;
1573
0
    ctx->objects_end   = NULL;
1574
0
}
1575
1576
547
void ggml_free(struct ggml_context * ctx) {
1577
547
    if (ctx == NULL) {
1578
0
        return;
1579
0
    }
1580
1581
547
    if (ctx->mem_buffer_owned) {
1582
547
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1583
547
    }
1584
1585
547
    GGML_FREE(ctx);
1586
547
}
1587
1588
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1589
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1590
0
}
1591
1592
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1593
0
    return ctx->no_alloc;
1594
0
}
1595
1596
374
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1597
374
    ctx->no_alloc = no_alloc;
1598
374
}
1599
1600
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1601
0
    return ctx->mem_buffer;
1602
0
}
1603
1604
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1605
0
    return ctx->mem_size;
1606
0
}
1607
1608
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1609
0
    size_t max_size = 0;
1610
1611
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1612
0
        size_t bytes = ggml_nbytes(tensor);
1613
0
        max_size = MAX(max_size, bytes);
1614
0
    }
1615
1616
0
    return max_size;
1617
0
}
1618
1619
////////////////////////////////////////////////////////////////////////////////
1620
1621
1.23k
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1622
    // always insert objects at the end of the context's memory pool
1623
1.23k
    struct ggml_object * obj_cur = ctx->objects_end;
1624
1625
1.23k
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1626
1.23k
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1627
1.23k
    const size_t cur_end  = cur_offs + cur_size;
1628
1629
    // align to GGML_MEM_ALIGN
1630
1.23k
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1631
1632
1.23k
    char * const mem_buffer = ctx->mem_buffer;
1633
1.23k
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1634
1635
1.23k
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1636
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1637
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1638
#ifndef NDEBUG
1639
        GGML_ABORT("not enough space in the context's memory pool");
1640
#endif
1641
0
        return NULL;
1642
0
    }
1643
1644
1.23k
    *obj_new = (struct ggml_object) {
1645
1.23k
        .offs = cur_end + GGML_OBJECT_SIZE,
1646
1.23k
        .size = size_needed,
1647
1.23k
        .next = NULL,
1648
1.23k
        .type = type,
1649
1.23k
    };
1650
1651
1.23k
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1652
1653
1.23k
    if (obj_cur != NULL) {
1654
1.05k
        obj_cur->next = obj_new;
1655
1.05k
    } else {
1656
        // this is the first object in this context
1657
175
        ctx->objects_begin = obj_new;
1658
175
    }
1659
1660
1.23k
    ctx->objects_end = obj_new;
1661
1662
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1663
1664
1.23k
    return obj_new;
1665
1.23k
}
1666
1667
static struct ggml_tensor * ggml_new_tensor_impl(
1668
        struct ggml_context * ctx,
1669
        enum   ggml_type      type,
1670
        int                   n_dims,
1671
        const int64_t       * ne,
1672
        struct ggml_tensor  * view_src,
1673
1.23k
        size_t                view_offs) {
1674
1675
1.23k
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1676
1.23k
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1677
1678
    // find the base tensor and absolute offset
1679
1.23k
    if (view_src != NULL && view_src->view_src != NULL) {
1680
0
        view_offs += view_src->view_offs;
1681
0
        view_src   = view_src->view_src;
1682
0
    }
1683
1684
1.23k
    size_t data_size = ggml_row_size(type, ne[0]);
1685
4.92k
    for (int i = 1; i < n_dims; i++) {
1686
3.69k
        data_size *= ne[i];
1687
3.69k
    }
1688
1689
1.23k
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1690
1691
1.23k
    void * data = view_src != NULL ? view_src->data : NULL;
1692
1.23k
    if (data != NULL) {
1693
0
        data = (char *) data + view_offs;
1694
0
    }
1695
1696
1.23k
    size_t obj_alloc_size = 0;
1697
1698
1.23k
    if (view_src == NULL && !ctx->no_alloc) {
1699
        // allocate tensor data in the context's memory pool
1700
0
        obj_alloc_size = data_size;
1701
0
    }
1702
1703
1.23k
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1704
1.23k
    GGML_ASSERT(obj_new);
1705
1706
1.23k
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1707
1708
1.23k
    *result = (struct ggml_tensor) {
1709
1.23k
        /*.type         =*/ type,
1710
1.23k
        /*.buffer       =*/ NULL,
1711
1.23k
        /*.ne           =*/ { 1, 1, 1, 1 },
1712
1.23k
        /*.nb           =*/ { 0, 0, 0, 0 },
1713
1.23k
        /*.op           =*/ GGML_OP_NONE,
1714
1.23k
        /*.op_params    =*/ { 0 },
1715
1.23k
        /*.flags        =*/ 0,
1716
1.23k
        /*.src          =*/ { NULL },
1717
1.23k
        /*.view_src     =*/ view_src,
1718
1.23k
        /*.view_offs    =*/ view_offs,
1719
1.23k
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1720
1.23k
        /*.name         =*/ { 0 },
1721
1.23k
        /*.extra        =*/ NULL,
1722
1.23k
        /*.padding      =*/ { 0 },
1723
1.23k
    };
1724
1725
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1726
    //GGML_ASSERT_ALIGNED(result->data);
1727
1728
6.16k
    for (int i = 0; i < n_dims; i++) {
1729
4.92k
        result->ne[i] = ne[i];
1730
4.92k
    }
1731
1732
1.23k
    result->nb[0] = ggml_type_size(type);
1733
1.23k
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1734
3.69k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1735
2.46k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1736
2.46k
    }
1737
1738
1.23k
    ctx->n_objects++;
1739
1740
1.23k
    return result;
1741
1.23k
}
1742
1743
struct ggml_tensor * ggml_new_tensor(
1744
        struct ggml_context * ctx,
1745
        enum   ggml_type      type,
1746
        int                   n_dims,
1747
1.23k
        const int64_t       * ne) {
1748
1.23k
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1749
1.23k
}
1750
1751
struct ggml_tensor * ggml_new_tensor_1d(
1752
        struct ggml_context * ctx,
1753
        enum   ggml_type      type,
1754
0
        int64_t ne0) {
1755
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1756
0
}
1757
1758
struct ggml_tensor * ggml_new_tensor_2d(
1759
        struct ggml_context * ctx,
1760
        enum   ggml_type      type,
1761
        int64_t ne0,
1762
0
        int64_t ne1) {
1763
0
    const int64_t ne[2] = { ne0, ne1 };
1764
0
    return ggml_new_tensor(ctx, type, 2, ne);
1765
0
}
1766
1767
struct ggml_tensor * ggml_new_tensor_3d(
1768
        struct ggml_context * ctx,
1769
        enum   ggml_type      type,
1770
        int64_t ne0,
1771
        int64_t ne1,
1772
0
        int64_t ne2) {
1773
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1774
0
    return ggml_new_tensor(ctx, type, 3, ne);
1775
0
}
1776
1777
struct ggml_tensor * ggml_new_tensor_4d(
1778
        struct ggml_context * ctx,
1779
        enum   ggml_type type,
1780
        int64_t ne0,
1781
        int64_t ne1,
1782
        int64_t ne2,
1783
0
        int64_t ne3) {
1784
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1785
0
    return ggml_new_tensor(ctx, type, 4, ne);
1786
0
}
1787
1788
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1789
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1790
1791
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1792
0
}
1793
1794
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1795
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1796
0
}
1797
1798
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1799
0
    const int64_t ne2 = tensor->ne[2];
1800
0
    const int64_t ne1 = tensor->ne[1];
1801
0
    const int64_t ne0 = tensor->ne[0];
1802
1803
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1804
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1805
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1806
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1807
1808
0
    if (i0) {
1809
0
        * i0 = i0_;
1810
0
    }
1811
0
    if (i1) {
1812
0
        * i1 = i1_;
1813
0
    }
1814
0
    if (i2) {
1815
0
        * i2 = i2_;
1816
0
    }
1817
0
    if (i3) {
1818
0
        * i3 = i3_;
1819
0
    }
1820
0
}
1821
1822
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1823
0
    return tensor->data;
1824
0
}
1825
1826
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1827
0
    assert(tensor->type == GGML_TYPE_F32);
1828
0
    return (float *)(tensor->data);
1829
0
}
1830
1831
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1832
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1833
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1834
0
}
1835
1836
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1837
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1838
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1839
0
}
1840
1841
1.04k
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1842
1.04k
    return tensor->name;
1843
1.04k
}
1844
1845
2.65k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1846
2.65k
    size_t i;
1847
21.7k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1848
19.0k
        tensor->name[i] = name[i];
1849
19.0k
    }
1850
2.65k
    tensor->name[i] = '\0';
1851
2.65k
    return tensor;
1852
2.65k
}
1853
1854
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1855
0
    va_list args;
1856
0
    va_start(args, fmt);
1857
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1858
0
    va_end(args);
1859
0
    return tensor;
1860
0
}
1861
1862
struct ggml_tensor * ggml_view_tensor(
1863
        struct ggml_context * ctx,
1864
0
        struct ggml_tensor  * src) {
1865
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1866
0
    ggml_format_name(result, "%s (view)", src->name);
1867
1868
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1869
0
        result->nb[i] = src->nb[i];
1870
0
    }
1871
1872
0
    return result;
1873
0
}
1874
1875
187
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1876
187
    struct ggml_object * obj = ctx->objects_begin;
1877
1878
187
    char * const mem_buffer = ctx->mem_buffer;
1879
1880
187
    while (obj != NULL) {
1881
175
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1882
175
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1883
175
        }
1884
1885
0
        obj = obj->next;
1886
0
    }
1887
1888
12
    return NULL;
1889
187
}
1890
1891
806
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1892
806
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1893
806
    obj = obj->next;
1894
1895
806
    char * const mem_buffer = ctx->mem_buffer;
1896
1897
806
    while (obj != NULL) {
1898
751
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1899
751
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1900
751
        }
1901
1902
0
        obj = obj->next;
1903
0
    }
1904
1905
55
    return NULL;
1906
806
}
1907
1908
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1909
0
    struct ggml_object * obj = ctx->objects_begin;
1910
1911
0
    char * const mem_buffer = ctx->mem_buffer;
1912
1913
0
    while (obj != NULL) {
1914
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1915
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1916
0
            if (strcmp(cur->name, name) == 0) {
1917
0
                return cur;
1918
0
            }
1919
0
        }
1920
1921
0
        obj = obj->next;
1922
0
    }
1923
1924
0
    return NULL;
1925
0
}
1926
1927
////////////////////////////////////////////////////////////////////////////////
1928
1929
// ggml_dup
1930
1931
static struct ggml_tensor * ggml_dup_impl(
1932
        struct ggml_context * ctx,
1933
        struct ggml_tensor  * a,
1934
0
        bool                  inplace) {
1935
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1936
1937
0
    result->op     = GGML_OP_DUP;
1938
0
    result->src[0] = a;
1939
1940
0
    return result;
1941
0
}
1942
1943
struct ggml_tensor * ggml_dup(
1944
        struct ggml_context * ctx,
1945
0
        struct ggml_tensor  * a) {
1946
0
    return ggml_dup_impl(ctx, a, false);
1947
0
}
1948
1949
struct ggml_tensor * ggml_dup_inplace(
1950
        struct ggml_context * ctx,
1951
0
        struct ggml_tensor  * a) {
1952
0
    return ggml_dup_impl(ctx, a, true);
1953
0
}
1954
1955
// ggml_add
1956
1957
static struct ggml_tensor * ggml_add_impl(
1958
        struct ggml_context * ctx,
1959
        struct ggml_tensor  * a,
1960
        struct ggml_tensor  * b,
1961
0
        bool                  inplace) {
1962
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1963
1964
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1965
1966
0
    result->op     = GGML_OP_ADD;
1967
0
    result->src[0] = a;
1968
0
    result->src[1] = b;
1969
1970
0
    return result;
1971
0
}
1972
1973
struct ggml_tensor * ggml_add(
1974
        struct ggml_context * ctx,
1975
        struct ggml_tensor  * a,
1976
0
        struct ggml_tensor  * b) {
1977
0
    return ggml_add_impl(ctx, a, b, false);
1978
0
}
1979
1980
struct ggml_tensor * ggml_add_inplace(
1981
        struct ggml_context * ctx,
1982
        struct ggml_tensor  * a,
1983
0
        struct ggml_tensor  * b) {
1984
0
    return ggml_add_impl(ctx, a, b, true);
1985
0
}
1986
1987
// ggml_add_cast
1988
1989
static struct ggml_tensor * ggml_add_cast_impl(
1990
        struct ggml_context * ctx,
1991
        struct ggml_tensor  * a,
1992
        struct ggml_tensor  * b,
1993
0
        enum   ggml_type      type) {
1994
    // TODO: support less-strict constraint
1995
    //       GGML_ASSERT(ggml_can_repeat(b, a));
1996
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
1997
1998
    // currently only supported for quantized input and f16
1999
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2000
0
                a->type == GGML_TYPE_F16 ||
2001
0
                a->type == GGML_TYPE_BF16);
2002
2003
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2004
2005
0
    result->op     = GGML_OP_ADD;
2006
0
    result->src[0] = a;
2007
0
    result->src[1] = b;
2008
2009
0
    return result;
2010
0
}
2011
2012
struct ggml_tensor * ggml_add_cast(
2013
        struct ggml_context * ctx,
2014
        struct ggml_tensor  * a,
2015
        struct ggml_tensor  * b,
2016
0
        enum   ggml_type      type) {
2017
0
    return ggml_add_cast_impl(ctx, a, b, type);
2018
0
}
2019
2020
struct ggml_tensor * ggml_add_id(
2021
            struct ggml_context * ctx,
2022
            struct ggml_tensor  * a,
2023
            struct ggml_tensor  * b,
2024
0
            struct ggml_tensor  * ids) {
2025
2026
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2027
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2028
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2029
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2030
2031
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2032
2033
0
    result->op     = GGML_OP_ADD_ID;
2034
0
    result->src[0] = a;
2035
0
    result->src[1] = b;
2036
0
    result->src[2] = ids;
2037
2038
0
    return result;
2039
0
}
2040
2041
// ggml_add1
2042
2043
static struct ggml_tensor * ggml_add1_impl(
2044
        struct ggml_context * ctx,
2045
        struct ggml_tensor  * a,
2046
        struct ggml_tensor  * b,
2047
0
        bool                  inplace) {
2048
0
    GGML_ASSERT(ggml_is_scalar(b));
2049
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2050
2051
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2052
2053
0
    result->op     = GGML_OP_ADD1;
2054
0
    result->src[0] = a;
2055
0
    result->src[1] = b;
2056
2057
0
    return result;
2058
0
}
2059
2060
struct ggml_tensor * ggml_add1(
2061
        struct ggml_context * ctx,
2062
        struct ggml_tensor  * a,
2063
0
        struct ggml_tensor  * b) {
2064
0
    return ggml_add1_impl(ctx, a, b, false);
2065
0
}
2066
2067
struct ggml_tensor * ggml_add1_inplace(
2068
        struct ggml_context * ctx,
2069
        struct ggml_tensor  * a,
2070
0
        struct ggml_tensor  * b) {
2071
0
    return ggml_add1_impl(ctx, a, b, true);
2072
0
}
2073
2074
// ggml_acc
2075
2076
static struct ggml_tensor * ggml_acc_impl(
2077
        struct ggml_context * ctx,
2078
        struct ggml_tensor  * a,
2079
        struct ggml_tensor  * b,
2080
        size_t                nb1,
2081
        size_t                nb2,
2082
        size_t                nb3,
2083
        size_t                offset,
2084
0
        bool                  inplace) {
2085
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2086
0
    GGML_ASSERT(ggml_is_contiguous(a));
2087
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2088
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2089
2090
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2091
2092
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2093
0
    ggml_set_op_params(result, params, sizeof(params));
2094
2095
0
    result->op     = GGML_OP_ACC;
2096
0
    result->src[0] = a;
2097
0
    result->src[1] = b;
2098
2099
0
    return result;
2100
0
}
2101
2102
struct ggml_tensor * ggml_acc(
2103
        struct ggml_context * ctx,
2104
        struct ggml_tensor  * a,
2105
        struct ggml_tensor  * b,
2106
        size_t                nb1,
2107
        size_t                nb2,
2108
        size_t                nb3,
2109
0
        size_t                offset) {
2110
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2111
0
}
2112
2113
struct ggml_tensor * ggml_acc_inplace(
2114
        struct ggml_context * ctx,
2115
        struct ggml_tensor  * a,
2116
        struct ggml_tensor  * b,
2117
        size_t                nb1,
2118
        size_t                nb2,
2119
        size_t                nb3,
2120
0
        size_t                offset) {
2121
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2122
0
}
2123
2124
// ggml_sub
2125
2126
static struct ggml_tensor * ggml_sub_impl(
2127
        struct ggml_context * ctx,
2128
        struct ggml_tensor  * a,
2129
        struct ggml_tensor  * b,
2130
0
        bool                  inplace) {
2131
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2132
2133
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2134
2135
0
    result->op     = GGML_OP_SUB;
2136
0
    result->src[0] = a;
2137
0
    result->src[1] = b;
2138
2139
0
    return result;
2140
0
}
2141
2142
struct ggml_tensor * ggml_sub(
2143
        struct ggml_context * ctx,
2144
        struct ggml_tensor  * a,
2145
0
        struct ggml_tensor  * b) {
2146
0
    return ggml_sub_impl(ctx, a, b, false);
2147
0
}
2148
2149
struct ggml_tensor * ggml_sub_inplace(
2150
        struct ggml_context * ctx,
2151
        struct ggml_tensor  * a,
2152
0
        struct ggml_tensor  * b) {
2153
0
    return ggml_sub_impl(ctx, a, b, true);
2154
0
}
2155
2156
// ggml_mul
2157
2158
static struct ggml_tensor * ggml_mul_impl(
2159
        struct ggml_context * ctx,
2160
        struct ggml_tensor  * a,
2161
        struct ggml_tensor  * b,
2162
0
        bool                  inplace) {
2163
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2164
2165
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2166
2167
0
    result->op     = GGML_OP_MUL;
2168
0
    result->src[0] = a;
2169
0
    result->src[1] = b;
2170
2171
0
    return result;
2172
0
}
2173
2174
struct ggml_tensor * ggml_mul(
2175
        struct ggml_context * ctx,
2176
        struct ggml_tensor  * a,
2177
0
        struct ggml_tensor  * b) {
2178
0
    return ggml_mul_impl(ctx, a, b, false);
2179
0
}
2180
2181
struct ggml_tensor * ggml_mul_inplace(
2182
        struct ggml_context * ctx,
2183
        struct ggml_tensor  * a,
2184
0
        struct ggml_tensor  * b) {
2185
0
    return ggml_mul_impl(ctx, a, b, true);
2186
0
}
2187
2188
// ggml_div
2189
2190
static struct ggml_tensor * ggml_div_impl(
2191
        struct ggml_context * ctx,
2192
        struct ggml_tensor  * a,
2193
        struct ggml_tensor  * b,
2194
0
        bool                  inplace) {
2195
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2196
2197
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2198
2199
0
    result->op     = GGML_OP_DIV;
2200
0
    result->src[0] = a;
2201
0
    result->src[1] = b;
2202
2203
0
    return result;
2204
0
}
2205
2206
struct ggml_tensor * ggml_div(
2207
        struct ggml_context * ctx,
2208
        struct ggml_tensor  * a,
2209
0
        struct ggml_tensor  * b) {
2210
0
    return ggml_div_impl(ctx, a, b, false);
2211
0
}
2212
2213
struct ggml_tensor * ggml_div_inplace(
2214
        struct ggml_context * ctx,
2215
        struct ggml_tensor  * a,
2216
0
        struct ggml_tensor  * b) {
2217
0
    return ggml_div_impl(ctx, a, b, true);
2218
0
}
2219
2220
// ggml_sqr
2221
2222
static struct ggml_tensor * ggml_sqr_impl(
2223
        struct ggml_context * ctx,
2224
        struct ggml_tensor  * a,
2225
0
        bool                  inplace) {
2226
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2227
2228
0
    result->op     = GGML_OP_SQR;
2229
0
    result->src[0] = a;
2230
2231
0
    return result;
2232
0
}
2233
2234
struct ggml_tensor * ggml_sqr(
2235
        struct ggml_context * ctx,
2236
0
        struct ggml_tensor  * a) {
2237
0
    return ggml_sqr_impl(ctx, a, false);
2238
0
}
2239
2240
struct ggml_tensor * ggml_sqr_inplace(
2241
        struct ggml_context * ctx,
2242
0
        struct ggml_tensor  * a) {
2243
0
    return ggml_sqr_impl(ctx, a, true);
2244
0
}
2245
2246
// ggml_sqrt
2247
2248
static struct ggml_tensor * ggml_sqrt_impl(
2249
        struct ggml_context * ctx,
2250
        struct ggml_tensor  * a,
2251
0
        bool                  inplace) {
2252
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2253
2254
0
    result->op     = GGML_OP_SQRT;
2255
0
    result->src[0] = a;
2256
2257
0
    return result;
2258
0
}
2259
2260
struct ggml_tensor * ggml_sqrt(
2261
        struct ggml_context * ctx,
2262
0
        struct ggml_tensor  * a) {
2263
0
    return ggml_sqrt_impl(ctx, a, false);
2264
0
}
2265
2266
struct ggml_tensor * ggml_sqrt_inplace(
2267
        struct ggml_context * ctx,
2268
0
        struct ggml_tensor  * a) {
2269
0
    return ggml_sqrt_impl(ctx, a, true);
2270
0
}
2271
2272
// ggml_log
2273
2274
static struct ggml_tensor * ggml_log_impl(
2275
        struct ggml_context * ctx,
2276
        struct ggml_tensor  * a,
2277
0
        bool                  inplace) {
2278
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2279
2280
0
    result->op     = GGML_OP_LOG;
2281
0
    result->src[0] = a;
2282
2283
0
    return result;
2284
0
}
2285
2286
struct ggml_tensor * ggml_log(
2287
        struct ggml_context * ctx,
2288
0
        struct ggml_tensor  * a) {
2289
0
    return ggml_log_impl(ctx, a, false);
2290
0
}
2291
2292
struct ggml_tensor * ggml_log_inplace(
2293
        struct ggml_context * ctx,
2294
0
        struct ggml_tensor  * a) {
2295
0
    return ggml_log_impl(ctx, a, true);
2296
0
}
2297
2298
struct ggml_tensor * ggml_expm1(
2299
        struct ggml_context * ctx,
2300
0
        struct ggml_tensor  * a) {
2301
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2302
0
}
2303
2304
struct ggml_tensor * ggml_expm1_inplace(
2305
        struct ggml_context * ctx,
2306
0
        struct ggml_tensor  * a) {
2307
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2308
0
}
2309
2310
struct ggml_tensor * ggml_softplus(
2311
        struct ggml_context * ctx,
2312
0
        struct ggml_tensor  * a) {
2313
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2314
0
}
2315
2316
struct ggml_tensor * ggml_softplus_inplace(
2317
        struct ggml_context * ctx,
2318
0
        struct ggml_tensor  * a) {
2319
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2320
0
}
2321
2322
// ggml_sin
2323
2324
static struct ggml_tensor * ggml_sin_impl(
2325
        struct ggml_context * ctx,
2326
        struct ggml_tensor  * a,
2327
0
        bool                  inplace) {
2328
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2329
2330
0
    result->op     = GGML_OP_SIN;
2331
0
    result->src[0] = a;
2332
2333
0
    return result;
2334
0
}
2335
2336
struct ggml_tensor * ggml_sin(
2337
        struct ggml_context * ctx,
2338
0
        struct ggml_tensor  * a) {
2339
0
    return ggml_sin_impl(ctx, a, false);
2340
0
}
2341
2342
struct ggml_tensor * ggml_sin_inplace(
2343
        struct ggml_context * ctx,
2344
0
        struct ggml_tensor  * a) {
2345
0
    return ggml_sin_impl(ctx, a, true);
2346
0
}
2347
2348
// ggml_cos
2349
2350
static struct ggml_tensor * ggml_cos_impl(
2351
        struct ggml_context * ctx,
2352
        struct ggml_tensor  * a,
2353
0
        bool                  inplace) {
2354
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2355
2356
0
    result->op     = GGML_OP_COS;
2357
0
    result->src[0] = a;
2358
2359
0
    return result;
2360
0
}
2361
2362
struct ggml_tensor * ggml_cos(
2363
        struct ggml_context * ctx,
2364
0
        struct ggml_tensor  * a) {
2365
0
    return ggml_cos_impl(ctx, a, false);
2366
0
}
2367
2368
struct ggml_tensor * ggml_cos_inplace(
2369
        struct ggml_context * ctx,
2370
0
        struct ggml_tensor  * a) {
2371
0
    return ggml_cos_impl(ctx, a, true);
2372
0
}
2373
2374
// ggml_sum
2375
2376
struct ggml_tensor * ggml_sum(
2377
        struct ggml_context * ctx,
2378
0
        struct ggml_tensor  * a) {
2379
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2380
2381
0
    result->op     = GGML_OP_SUM;
2382
0
    result->src[0] = a;
2383
2384
0
    return result;
2385
0
}
2386
2387
// ggml_sum_rows
2388
2389
struct ggml_tensor * ggml_sum_rows(
2390
        struct ggml_context * ctx,
2391
0
        struct ggml_tensor  * a) {
2392
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2393
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2394
0
        ne[i] = a->ne[i];
2395
0
    }
2396
2397
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2398
2399
0
    result->op     = GGML_OP_SUM_ROWS;
2400
0
    result->src[0] = a;
2401
2402
0
    return result;
2403
0
}
2404
2405
// ggml_cumsum
2406
2407
struct ggml_tensor * ggml_cumsum(
2408
        struct ggml_context * ctx,
2409
0
        struct ggml_tensor  * a) {
2410
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2411
2412
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2413
2414
0
    result->op     = GGML_OP_CUMSUM;
2415
0
    result->src[0] = a;
2416
2417
0
    return result;
2418
0
}
2419
2420
// ggml_mean
2421
2422
struct ggml_tensor * ggml_mean(
2423
        struct ggml_context * ctx,
2424
0
        struct ggml_tensor  * a) {
2425
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2426
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2427
2428
0
    result->op     = GGML_OP_MEAN;
2429
0
    result->src[0] = a;
2430
2431
0
    return result;
2432
0
}
2433
2434
// ggml_argmax
2435
2436
struct ggml_tensor * ggml_argmax(
2437
        struct ggml_context * ctx,
2438
0
        struct ggml_tensor  * a) {
2439
0
    GGML_ASSERT(ggml_is_matrix(a));
2440
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2441
2442
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2443
2444
0
    result->op     = GGML_OP_ARGMAX;
2445
0
    result->src[0] = a;
2446
2447
0
    return result;
2448
0
}
2449
2450
// ggml_count_equal
2451
2452
struct ggml_tensor * ggml_count_equal(
2453
        struct ggml_context * ctx,
2454
        struct ggml_tensor  * a,
2455
0
        struct ggml_tensor  * b) {
2456
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2457
2458
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2459
2460
0
    result->op     = GGML_OP_COUNT_EQUAL;
2461
0
    result->src[0] = a;
2462
0
    result->src[1] = b;
2463
2464
0
    return result;
2465
0
}
2466
2467
// ggml_repeat
2468
2469
struct ggml_tensor * ggml_repeat(
2470
        struct ggml_context * ctx,
2471
        struct ggml_tensor  * a,
2472
0
        struct ggml_tensor  * b) {
2473
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2474
2475
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2476
2477
0
    result->op     = GGML_OP_REPEAT;
2478
0
    result->src[0] = a;
2479
2480
0
    return result;
2481
0
}
2482
2483
struct ggml_tensor * ggml_repeat_4d(
2484
        struct ggml_context * ctx,
2485
        struct ggml_tensor * a,
2486
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2487
0
    const bool can_repeat = ggml_is_empty(a) || (
2488
0
        (ne0 % a->ne[0] == 0) &&
2489
0
        (ne1 % a->ne[1] == 0) &&
2490
0
        (ne2 % a->ne[2] == 0) &&
2491
0
        (ne3 % a->ne[3] == 0)
2492
0
    );
2493
0
    GGML_ASSERT(can_repeat);
2494
2495
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2496
2497
0
    result->op     = GGML_OP_REPEAT;
2498
0
    result->src[0] = a;
2499
2500
0
    return result;
2501
0
}
2502
2503
// ggml_repeat_back
2504
2505
struct ggml_tensor * ggml_repeat_back(
2506
        struct ggml_context * ctx,
2507
        struct ggml_tensor  * a,
2508
0
        struct ggml_tensor  * b) {
2509
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2510
2511
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2512
2513
0
    result->op     = GGML_OP_REPEAT_BACK;
2514
0
    result->src[0] = a;
2515
2516
0
    return result;
2517
0
}
2518
2519
// ggml_concat
2520
2521
struct ggml_tensor * ggml_concat(
2522
    struct ggml_context * ctx,
2523
    struct ggml_tensor  * a,
2524
    struct ggml_tensor  * b,
2525
0
    int                   dim) {
2526
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2527
0
    GGML_ASSERT(a->type == b->type);
2528
2529
0
    int64_t ne[GGML_MAX_DIMS];
2530
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2531
0
        if (d == dim) {
2532
0
            ne[d] = a->ne[d] + b->ne[d];
2533
0
            continue;
2534
0
        }
2535
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2536
0
        ne[d] = a->ne[d];
2537
0
    }
2538
2539
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2540
2541
0
    ggml_set_op_params_i32(result, 0, dim);
2542
2543
0
    result->op     = GGML_OP_CONCAT;
2544
0
    result->src[0] = a;
2545
0
    result->src[1] = b;
2546
2547
0
    return result;
2548
0
}
2549
2550
// ggml_abs
2551
2552
struct ggml_tensor * ggml_abs(
2553
        struct ggml_context * ctx,
2554
0
        struct ggml_tensor  * a) {
2555
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2556
0
}
2557
2558
struct ggml_tensor * ggml_abs_inplace(
2559
        struct ggml_context * ctx,
2560
0
        struct ggml_tensor  * a) {
2561
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2562
0
}
2563
2564
// ggml_sgn
2565
2566
struct ggml_tensor * ggml_sgn(
2567
        struct ggml_context * ctx,
2568
0
        struct ggml_tensor  * a) {
2569
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2570
0
}
2571
2572
struct ggml_tensor * ggml_sgn_inplace(
2573
        struct ggml_context * ctx,
2574
0
        struct ggml_tensor  * a) {
2575
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2576
0
}
2577
2578
// ggml_neg
2579
2580
struct ggml_tensor * ggml_neg(
2581
        struct ggml_context * ctx,
2582
0
        struct ggml_tensor  * a) {
2583
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2584
0
}
2585
2586
struct ggml_tensor * ggml_neg_inplace(
2587
        struct ggml_context * ctx,
2588
0
        struct ggml_tensor  * a) {
2589
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2590
0
}
2591
2592
// ggml_step
2593
2594
struct ggml_tensor * ggml_step(
2595
        struct ggml_context * ctx,
2596
0
        struct ggml_tensor  * a) {
2597
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2598
0
}
2599
2600
struct ggml_tensor * ggml_step_inplace(
2601
        struct ggml_context * ctx,
2602
0
        struct ggml_tensor  * a) {
2603
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2604
0
}
2605
2606
// ggml_tanh
2607
2608
struct ggml_tensor * ggml_tanh(
2609
        struct ggml_context * ctx,
2610
0
        struct ggml_tensor  * a) {
2611
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2612
0
}
2613
2614
struct ggml_tensor * ggml_tanh_inplace(
2615
        struct ggml_context * ctx,
2616
0
        struct ggml_tensor  * a) {
2617
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2618
0
}
2619
2620
// ggml_elu
2621
2622
struct ggml_tensor * ggml_elu(
2623
    struct ggml_context * ctx,
2624
0
    struct ggml_tensor  * a) {
2625
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2626
0
}
2627
2628
struct ggml_tensor * ggml_elu_inplace(
2629
    struct ggml_context * ctx,
2630
0
    struct ggml_tensor  * a) {
2631
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2632
0
}
2633
2634
// ggml_relu
2635
2636
struct ggml_tensor * ggml_relu(
2637
        struct ggml_context * ctx,
2638
0
        struct ggml_tensor  * a) {
2639
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2640
0
}
2641
2642
struct ggml_tensor * ggml_relu_inplace(
2643
        struct ggml_context * ctx,
2644
0
        struct ggml_tensor  * a) {
2645
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2646
0
}
2647
2648
// ggml_leaky_relu
2649
2650
struct ggml_tensor * ggml_leaky_relu(
2651
        struct ggml_context * ctx,
2652
        struct ggml_tensor  * a,
2653
        float                 negative_slope,
2654
0
        bool                  inplace) {
2655
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2656
2657
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2658
2659
0
    result->op     = GGML_OP_LEAKY_RELU;
2660
0
    result->src[0] = a;
2661
2662
0
    return result;
2663
0
}
2664
2665
// ggml_sigmoid
2666
2667
struct ggml_tensor * ggml_sigmoid(
2668
        struct ggml_context * ctx,
2669
0
        struct ggml_tensor  * a) {
2670
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2671
0
}
2672
2673
struct ggml_tensor * ggml_sigmoid_inplace(
2674
        struct ggml_context * ctx,
2675
0
        struct ggml_tensor  * a) {
2676
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2677
0
}
2678
2679
// ggml_gelu
2680
2681
struct ggml_tensor * ggml_gelu(
2682
        struct ggml_context * ctx,
2683
0
        struct ggml_tensor  * a) {
2684
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2685
0
}
2686
2687
struct ggml_tensor * ggml_gelu_inplace(
2688
        struct ggml_context * ctx,
2689
0
        struct ggml_tensor  * a) {
2690
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2691
0
}
2692
2693
// ggml_gelu_erf
2694
2695
struct ggml_tensor * ggml_gelu_erf(
2696
        struct ggml_context * ctx,
2697
0
        struct ggml_tensor  * a) {
2698
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2699
0
}
2700
2701
struct ggml_tensor * ggml_gelu_erf_inplace(
2702
        struct ggml_context * ctx,
2703
0
        struct ggml_tensor  * a) {
2704
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2705
0
}
2706
2707
// ggml_gelu_quick
2708
2709
struct ggml_tensor * ggml_gelu_quick(
2710
        struct ggml_context * ctx,
2711
0
        struct ggml_tensor  * a) {
2712
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2713
0
}
2714
2715
struct ggml_tensor * ggml_gelu_quick_inplace(
2716
        struct ggml_context * ctx,
2717
0
        struct ggml_tensor  * a) {
2718
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2719
0
}
2720
2721
// ggml_silu
2722
2723
struct ggml_tensor * ggml_silu(
2724
        struct ggml_context * ctx,
2725
0
        struct ggml_tensor  * a) {
2726
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2727
0
}
2728
2729
struct ggml_tensor * ggml_silu_inplace(
2730
        struct ggml_context * ctx,
2731
0
        struct ggml_tensor  * a) {
2732
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2733
0
}
2734
2735
// ggml_xielu
2736
2737
struct ggml_tensor * ggml_xielu(
2738
        struct ggml_context * ctx,
2739
        struct ggml_tensor  * a,
2740
        float alpha_n,
2741
        float alpha_p,
2742
        float beta,
2743
0
        float eps) {
2744
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2745
2746
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2747
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2748
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2749
0
    ggml_set_op_params_f32(result, 3, beta);
2750
0
    ggml_set_op_params_f32(result, 4, eps);
2751
2752
0
    result->op     = GGML_OP_UNARY;
2753
0
    result->src[0] = a;
2754
2755
0
    return result;
2756
0
}
2757
2758
// ggml_silu_back
2759
2760
struct ggml_tensor * ggml_silu_back(
2761
        struct ggml_context * ctx,
2762
        struct ggml_tensor  * a,
2763
0
        struct ggml_tensor  * b) {
2764
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2765
2766
0
    result->op     = GGML_OP_SILU_BACK;
2767
0
    result->src[0] = a;
2768
0
    result->src[1] = b;
2769
2770
0
    return result;
2771
0
}
2772
2773
// ggml hardswish
2774
2775
struct ggml_tensor * ggml_hardswish(
2776
        struct ggml_context * ctx,
2777
0
        struct ggml_tensor  * a) {
2778
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2779
0
}
2780
2781
// ggml hardsigmoid
2782
2783
struct ggml_tensor * ggml_hardsigmoid(
2784
        struct ggml_context * ctx,
2785
0
        struct ggml_tensor  * a) {
2786
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2787
0
}
2788
2789
// ggml exp
2790
2791
struct ggml_tensor * ggml_exp(
2792
        struct ggml_context * ctx,
2793
0
        struct ggml_tensor  * a) {
2794
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2795
0
}
2796
2797
struct ggml_tensor * ggml_exp_inplace(
2798
        struct ggml_context * ctx,
2799
0
        struct ggml_tensor  * a) {
2800
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2801
0
}
2802
2803
// ggml_glu
2804
2805
static struct ggml_tensor * ggml_glu_impl(
2806
        struct ggml_context * ctx,
2807
        struct ggml_tensor  * a,
2808
        struct ggml_tensor  * b,
2809
        enum ggml_glu_op      op,
2810
0
        bool                  swapped) {
2811
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2812
2813
0
    if (b) {
2814
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2815
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2816
0
        GGML_ASSERT(a->type == b->type);
2817
0
    }
2818
2819
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2820
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2821
2822
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2823
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2824
2825
0
    result->op     = GGML_OP_GLU;
2826
0
    result->src[0] = a;
2827
0
    result->src[1] = b;
2828
2829
0
    return result;
2830
0
}
2831
2832
// ggml_floor
2833
2834
struct ggml_tensor * ggml_floor(
2835
        struct ggml_context * ctx,
2836
0
        struct ggml_tensor  * a) {
2837
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2838
0
}
2839
2840
struct ggml_tensor * ggml_floor_inplace(
2841
        struct ggml_context * ctx,
2842
0
        struct ggml_tensor  * a) {
2843
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2844
0
}
2845
2846
// ggml_ceil
2847
2848
struct ggml_tensor * ggml_ceil(
2849
        struct ggml_context * ctx,
2850
0
        struct ggml_tensor  * a) {
2851
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2852
0
}
2853
2854
struct ggml_tensor * ggml_ceil_inplace(
2855
        struct ggml_context * ctx,
2856
0
        struct ggml_tensor  * a) {
2857
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2858
0
}
2859
2860
//ggml_round
2861
2862
struct ggml_tensor * ggml_round(
2863
        struct ggml_context * ctx,
2864
0
        struct ggml_tensor  * a) {
2865
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2866
0
}
2867
2868
struct ggml_tensor * ggml_round_inplace(
2869
        struct ggml_context * ctx,
2870
0
        struct ggml_tensor  * a) {
2871
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2872
0
}
2873
2874
//ggml_trunc
2875
2876
struct ggml_tensor * ggml_trunc(
2877
        struct ggml_context * ctx,
2878
0
        struct ggml_tensor  * a) {
2879
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2880
0
}
2881
2882
struct ggml_tensor * ggml_trunc_inplace(
2883
        struct ggml_context * ctx,
2884
0
        struct ggml_tensor  * a) {
2885
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2886
0
}
2887
2888
struct ggml_tensor * ggml_glu(
2889
        struct ggml_context * ctx,
2890
        struct ggml_tensor  * a,
2891
        enum ggml_glu_op      op,
2892
0
        bool                  swapped) {
2893
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2894
0
}
2895
2896
struct ggml_tensor * ggml_glu_split(
2897
        struct ggml_context * ctx,
2898
        struct ggml_tensor  * a,
2899
        struct ggml_tensor  * b,
2900
0
        enum ggml_glu_op      op) {
2901
0
    return ggml_glu_impl(ctx, a, b, op, false);
2902
0
}
2903
2904
// ggml_reglu
2905
2906
struct ggml_tensor * ggml_reglu(
2907
        struct ggml_context * ctx,
2908
0
        struct ggml_tensor  * a) {
2909
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2910
0
}
2911
2912
struct ggml_tensor * ggml_reglu_swapped(
2913
        struct ggml_context * ctx,
2914
0
        struct ggml_tensor  * a) {
2915
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2916
0
}
2917
2918
struct ggml_tensor * ggml_reglu_split(
2919
        struct ggml_context * ctx,
2920
        struct ggml_tensor  * a,
2921
0
        struct ggml_tensor  * b) {
2922
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2923
0
}
2924
2925
// ggml_geglu
2926
2927
struct ggml_tensor * ggml_geglu(
2928
        struct ggml_context * ctx,
2929
0
        struct ggml_tensor  * a) {
2930
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2931
0
}
2932
2933
struct ggml_tensor * ggml_geglu_swapped(
2934
        struct ggml_context * ctx,
2935
0
        struct ggml_tensor  * a) {
2936
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2937
0
}
2938
2939
struct ggml_tensor * ggml_geglu_split(
2940
        struct ggml_context * ctx,
2941
        struct ggml_tensor  * a,
2942
0
        struct ggml_tensor  * b) {
2943
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2944
0
}
2945
2946
// ggml_swiglu
2947
2948
struct ggml_tensor * ggml_swiglu(
2949
        struct ggml_context * ctx,
2950
0
        struct ggml_tensor  * a) {
2951
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2952
0
}
2953
2954
struct ggml_tensor * ggml_swiglu_swapped(
2955
        struct ggml_context * ctx,
2956
0
        struct ggml_tensor  * a) {
2957
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2958
0
}
2959
2960
struct ggml_tensor * ggml_swiglu_split(
2961
        struct ggml_context * ctx,
2962
        struct ggml_tensor  * a,
2963
0
        struct ggml_tensor  * b) {
2964
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2965
0
}
2966
2967
// ggml_geglu_erf
2968
2969
struct ggml_tensor * ggml_geglu_erf(
2970
        struct ggml_context * ctx,
2971
0
        struct ggml_tensor  * a) {
2972
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2973
0
}
2974
2975
struct ggml_tensor * ggml_geglu_erf_swapped(
2976
        struct ggml_context * ctx,
2977
0
        struct ggml_tensor  * a) {
2978
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
2979
0
}
2980
2981
struct ggml_tensor * ggml_geglu_erf_split(
2982
        struct ggml_context * ctx,
2983
        struct ggml_tensor  * a,
2984
0
        struct ggml_tensor  * b) {
2985
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
2986
0
}
2987
2988
// ggml_geglu_quick
2989
2990
struct ggml_tensor * ggml_geglu_quick(
2991
        struct ggml_context * ctx,
2992
0
        struct ggml_tensor  * a) {
2993
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
2994
0
}
2995
2996
struct ggml_tensor * ggml_geglu_quick_swapped(
2997
        struct ggml_context * ctx,
2998
0
        struct ggml_tensor  * a) {
2999
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3000
0
}
3001
3002
struct ggml_tensor * ggml_geglu_quick_split(
3003
        struct ggml_context * ctx,
3004
        struct ggml_tensor  * a,
3005
0
        struct ggml_tensor  * b) {
3006
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3007
0
}
3008
3009
struct ggml_tensor * ggml_swiglu_oai(
3010
        struct ggml_context * ctx,
3011
        struct ggml_tensor  * a,
3012
        struct ggml_tensor  * b,
3013
        float                 alpha,
3014
0
        float                 limit) {
3015
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3016
0
    ggml_set_op_params_f32(result, 2, alpha);
3017
0
    ggml_set_op_params_f32(result, 3, limit);
3018
3019
0
    return result;
3020
0
}
3021
3022
// ggml_norm
3023
3024
static struct ggml_tensor * ggml_norm_impl(
3025
        struct ggml_context * ctx,
3026
        struct ggml_tensor  * a,
3027
        float                 eps,
3028
0
        bool                  inplace) {
3029
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3030
3031
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3032
3033
0
    result->op     = GGML_OP_NORM;
3034
0
    result->src[0] = a;
3035
3036
0
    return result;
3037
0
}
3038
3039
struct ggml_tensor * ggml_norm(
3040
        struct ggml_context * ctx,
3041
        struct ggml_tensor  * a,
3042
0
        float                 eps) {
3043
0
    return ggml_norm_impl(ctx, a, eps, false);
3044
0
}
3045
3046
struct ggml_tensor * ggml_norm_inplace(
3047
        struct ggml_context * ctx,
3048
        struct ggml_tensor  * a,
3049
0
        float                 eps) {
3050
0
    return ggml_norm_impl(ctx, a, eps, true);
3051
0
}
3052
3053
// ggml_rms_norm
3054
3055
static struct ggml_tensor * ggml_rms_norm_impl(
3056
        struct ggml_context * ctx,
3057
        struct ggml_tensor  * a,
3058
        float                 eps,
3059
0
        bool                  inplace) {
3060
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3061
3062
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3063
3064
0
    result->op     = GGML_OP_RMS_NORM;
3065
0
    result->src[0] = a;
3066
3067
0
    return result;
3068
0
}
3069
3070
struct ggml_tensor * ggml_rms_norm(
3071
        struct ggml_context * ctx,
3072
        struct ggml_tensor  * a,
3073
0
        float                 eps) {
3074
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3075
0
}
3076
3077
struct ggml_tensor * ggml_rms_norm_inplace(
3078
        struct ggml_context * ctx,
3079
        struct ggml_tensor  * a,
3080
0
        float                 eps) {
3081
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3082
0
}
3083
3084
// ggml_rms_norm_back
3085
3086
struct ggml_tensor * ggml_rms_norm_back(
3087
        struct ggml_context * ctx,
3088
        struct ggml_tensor  * a,
3089
        struct ggml_tensor  * b,
3090
0
        float                 eps) {
3091
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3092
3093
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3094
3095
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3096
0
    result->src[0] = a;
3097
0
    result->src[1] = b;
3098
3099
0
    return result;
3100
0
}
3101
3102
// ggml_group_norm
3103
3104
static struct ggml_tensor * ggml_group_norm_impl(
3105
        struct ggml_context * ctx,
3106
        struct ggml_tensor  * a,
3107
        int                   n_groups,
3108
        float                 eps,
3109
0
        bool                  inplace) {
3110
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3111
3112
0
    ggml_set_op_params_i32(result, 0, n_groups);
3113
0
    ggml_set_op_params_f32(result, 1, eps);
3114
3115
0
    result->op     = GGML_OP_GROUP_NORM;
3116
0
    result->src[0] = a;
3117
3118
0
    return result;
3119
0
}
3120
3121
struct ggml_tensor * ggml_group_norm(
3122
        struct ggml_context * ctx,
3123
        struct ggml_tensor  * a,
3124
        int                   n_groups,
3125
0
        float                 eps) {
3126
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3127
0
}
3128
3129
struct ggml_tensor * ggml_group_norm_inplace(
3130
        struct ggml_context * ctx,
3131
        struct ggml_tensor  * a,
3132
        int                   n_groups,
3133
0
        float                 eps) {
3134
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3135
0
}
3136
3137
// ggml_l2_norm
3138
3139
static struct ggml_tensor * ggml_l2_norm_impl(
3140
        struct ggml_context * ctx,
3141
        struct ggml_tensor  * a,
3142
        float                 eps,
3143
0
        bool                  inplace) {
3144
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3145
3146
0
    ggml_set_op_params_f32(result, 0, eps);
3147
3148
0
    result->op     = GGML_OP_L2_NORM;
3149
0
    result->src[0] = a;
3150
3151
0
    return result;
3152
0
}
3153
3154
struct ggml_tensor * ggml_l2_norm(
3155
        struct ggml_context * ctx,
3156
        struct ggml_tensor  * a,
3157
0
        float                 eps) {
3158
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3159
0
}
3160
3161
struct ggml_tensor * ggml_l2_norm_inplace(
3162
        struct ggml_context * ctx,
3163
        struct ggml_tensor  * a,
3164
0
        float                 eps) {
3165
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3166
0
}
3167
3168
// ggml_mul_mat
3169
3170
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3171
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3172
3173
0
    return (t0->ne[0]           == t1->ne[0])  &&
3174
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3175
0
           (t1->ne[3]%t0->ne[3] == 0);
3176
0
}
3177
3178
struct ggml_tensor * ggml_mul_mat(
3179
        struct ggml_context * ctx,
3180
        struct ggml_tensor  * a,
3181
0
        struct ggml_tensor  * b) {
3182
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3183
0
    GGML_ASSERT(!ggml_is_transposed(a));
3184
3185
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3186
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3187
3188
0
    result->op     = GGML_OP_MUL_MAT;
3189
0
    result->src[0] = a;
3190
0
    result->src[1] = b;
3191
3192
0
    return result;
3193
0
}
3194
3195
void ggml_mul_mat_set_prec(
3196
        struct ggml_tensor * a,
3197
0
        enum ggml_prec       prec) {
3198
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3199
3200
0
    const int32_t prec_i32 = (int32_t) prec;
3201
3202
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3203
0
}
3204
3205
// ggml_mul_mat_id
3206
3207
/*
3208
    c = ggml_mul_mat_id(ctx, as, b, ids);
3209
3210
    as  -> [cols, rows, n_expert]
3211
    b   -> [cols, n_expert_used, n_tokens]
3212
    ids -> [n_expert_used, n_tokens] (i32)
3213
    c   -> [rows, n_expert_used, n_tokens]
3214
3215
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3216
3217
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3218
*/
3219
struct ggml_tensor * ggml_mul_mat_id(
3220
        struct ggml_context * ctx,
3221
        struct ggml_tensor  * as,
3222
        struct ggml_tensor  * b,
3223
0
        struct ggml_tensor  * ids) {
3224
0
    GGML_ASSERT(!ggml_is_transposed(as));
3225
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3226
3227
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3228
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3229
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3230
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3231
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3232
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3233
3234
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3235
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3236
3237
0
    result->op     = GGML_OP_MUL_MAT_ID;
3238
0
    result->src[0] = as;
3239
0
    result->src[1] = b;
3240
0
    result->src[2] = ids;
3241
3242
0
    return result;
3243
0
}
3244
3245
// ggml_out_prod
3246
3247
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3248
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3249
3250
0
    return (t0->ne[1] == t1->ne[1])   &&
3251
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3252
0
           (t1->ne[3]%t0->ne[3] == 0);
3253
0
}
3254
3255
struct ggml_tensor * ggml_out_prod(
3256
        struct ggml_context * ctx,
3257
        struct ggml_tensor  * a,
3258
0
        struct ggml_tensor  * b) {
3259
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3260
0
    GGML_ASSERT(!ggml_is_transposed(a));
3261
3262
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3263
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3264
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3265
3266
0
    result->op     = GGML_OP_OUT_PROD;
3267
0
    result->src[0] = a;
3268
0
    result->src[1] = b;
3269
3270
0
    return result;
3271
0
}
3272
3273
// ggml_scale
3274
3275
static struct ggml_tensor * ggml_scale_impl(
3276
        struct ggml_context * ctx,
3277
        struct ggml_tensor  * a,
3278
        float                 s,
3279
        float                 b,
3280
0
        bool                  inplace) {
3281
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3282
3283
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3284
3285
0
    float params[2] = { s, b };
3286
0
    ggml_set_op_params(result, &params, sizeof(params));
3287
3288
0
    result->op     = GGML_OP_SCALE;
3289
0
    result->src[0] = a;
3290
3291
0
    return result;
3292
0
}
3293
3294
struct ggml_tensor * ggml_scale(
3295
        struct ggml_context * ctx,
3296
        struct ggml_tensor  * a,
3297
0
        float                 s) {
3298
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3299
0
}
3300
3301
struct ggml_tensor * ggml_scale_inplace(
3302
        struct ggml_context * ctx,
3303
        struct ggml_tensor  * a,
3304
0
        float                 s) {
3305
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3306
0
}
3307
3308
struct ggml_tensor * ggml_scale_bias(
3309
        struct ggml_context * ctx,
3310
        struct ggml_tensor  * a,
3311
        float                 s,
3312
0
        float                 b) {
3313
0
    return ggml_scale_impl(ctx, a, s, b, false);
3314
0
}
3315
3316
struct ggml_tensor * ggml_scale_bias_inplace(
3317
        struct ggml_context * ctx,
3318
        struct ggml_tensor  * a,
3319
        float                 s,
3320
0
        float                 b) {
3321
0
    return ggml_scale_impl(ctx, a, s, b, true);
3322
0
}
3323
3324
// ggml_set
3325
3326
static struct ggml_tensor * ggml_set_impl(
3327
        struct ggml_context * ctx,
3328
        struct ggml_tensor  * a,
3329
        struct ggml_tensor  * b,
3330
        size_t                nb1,
3331
        size_t                nb2,
3332
        size_t                nb3,
3333
        size_t                offset,
3334
0
        bool                  inplace) {
3335
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3336
3337
    // make a view of the destination
3338
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3339
3340
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3341
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3342
0
    ggml_set_op_params(result, params, sizeof(params));
3343
3344
0
    result->op     = GGML_OP_SET;
3345
0
    result->src[0] = a;
3346
0
    result->src[1] = b;
3347
3348
0
    return result;
3349
0
}
3350
3351
struct ggml_tensor * ggml_set(
3352
        struct ggml_context * ctx,
3353
        struct ggml_tensor  * a,
3354
        struct ggml_tensor  * b,
3355
        size_t                nb1,
3356
        size_t                nb2,
3357
        size_t                nb3,
3358
0
        size_t                offset) {
3359
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3360
0
}
3361
3362
struct ggml_tensor * ggml_set_inplace(
3363
        struct ggml_context * ctx,
3364
        struct ggml_tensor  * a,
3365
        struct ggml_tensor  * b,
3366
        size_t                nb1,
3367
        size_t                nb2,
3368
        size_t                nb3,
3369
0
        size_t                offset) {
3370
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3371
0
}
3372
3373
struct ggml_tensor * ggml_set_1d(
3374
        struct ggml_context * ctx,
3375
        struct ggml_tensor  * a,
3376
        struct ggml_tensor  * b,
3377
0
        size_t                offset) {
3378
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3379
0
}
3380
3381
struct ggml_tensor * ggml_set_1d_inplace(
3382
        struct ggml_context * ctx,
3383
        struct ggml_tensor  * a,
3384
        struct ggml_tensor  * b,
3385
0
        size_t                offset) {
3386
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3387
0
}
3388
3389
struct ggml_tensor * ggml_set_2d(
3390
        struct ggml_context * ctx,
3391
        struct ggml_tensor  * a,
3392
        struct ggml_tensor  * b,
3393
        size_t                nb1,
3394
0
        size_t                offset) {
3395
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3396
0
}
3397
3398
struct ggml_tensor * ggml_set_2d_inplace(
3399
        struct ggml_context * ctx,
3400
        struct ggml_tensor  * a,
3401
        struct ggml_tensor  * b,
3402
        size_t                nb1,
3403
0
        size_t                offset) {
3404
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3405
0
}
3406
3407
// ggml_cpy
3408
3409
static struct ggml_tensor * ggml_cpy_impl(
3410
        struct ggml_context * ctx,
3411
        struct ggml_tensor  * a,
3412
0
        struct ggml_tensor  * b) {
3413
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3414
3415
    // make a view of the destination
3416
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3417
0
    if (strlen(b->name) > 0) {
3418
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3419
0
    } else {
3420
0
        ggml_format_name(result, "%s (copy)", a->name);
3421
0
    }
3422
3423
0
    result->op     = GGML_OP_CPY;
3424
0
    result->src[0] = a;
3425
0
    result->src[1] = b;
3426
3427
0
    return result;
3428
0
}
3429
3430
struct ggml_tensor * ggml_cpy(
3431
        struct ggml_context * ctx,
3432
        struct ggml_tensor * a,
3433
0
        struct ggml_tensor * b) {
3434
0
    return ggml_cpy_impl(ctx, a, b);
3435
0
}
3436
3437
struct ggml_tensor * ggml_cast(
3438
        struct ggml_context * ctx,
3439
        struct ggml_tensor  * a,
3440
0
        enum   ggml_type      type) {
3441
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3442
0
    ggml_format_name(result, "%s (copy)", a->name);
3443
3444
0
    result->op     = GGML_OP_CPY;
3445
0
    result->src[0] = a;
3446
0
    result->src[1] = result;
3447
3448
0
    return result;
3449
0
}
3450
3451
// ggml_cont
3452
3453
static struct ggml_tensor * ggml_cont_impl(
3454
        struct ggml_context * ctx,
3455
0
        struct ggml_tensor  * a) {
3456
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3457
0
    ggml_format_name(result, "%s (cont)", a->name);
3458
3459
0
    result->op     = GGML_OP_CONT;
3460
0
    result->src[0] = a;
3461
3462
0
    return result;
3463
0
}
3464
3465
struct ggml_tensor * ggml_cont(
3466
        struct ggml_context * ctx,
3467
0
        struct ggml_tensor * a) {
3468
0
    return ggml_cont_impl(ctx, a);
3469
0
}
3470
3471
// make contiguous, with new shape
3472
GGML_API struct ggml_tensor * ggml_cont_1d(
3473
        struct ggml_context * ctx,
3474
        struct ggml_tensor  * a,
3475
0
        int64_t               ne0) {
3476
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3477
0
}
3478
3479
GGML_API struct ggml_tensor * ggml_cont_2d(
3480
        struct ggml_context * ctx,
3481
        struct ggml_tensor  * a,
3482
        int64_t               ne0,
3483
0
        int64_t               ne1) {
3484
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3485
0
}
3486
3487
GGML_API struct ggml_tensor * ggml_cont_3d(
3488
        struct ggml_context * ctx,
3489
        struct ggml_tensor  * a,
3490
        int64_t               ne0,
3491
        int64_t               ne1,
3492
0
        int64_t               ne2) {
3493
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3494
0
}
3495
3496
struct ggml_tensor * ggml_cont_4d(
3497
        struct ggml_context * ctx,
3498
        struct ggml_tensor  * a,
3499
        int64_t               ne0,
3500
        int64_t               ne1,
3501
        int64_t               ne2,
3502
0
        int64_t               ne3) {
3503
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3504
3505
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3506
0
    ggml_format_name(result, "%s (cont)", a->name);
3507
3508
0
    result->op     = GGML_OP_CONT;
3509
0
    result->src[0] = a;
3510
3511
0
    return result;
3512
0
}
3513
3514
// ggml_reshape
3515
3516
struct ggml_tensor * ggml_reshape(
3517
        struct ggml_context * ctx,
3518
        struct ggml_tensor * a,
3519
0
        struct ggml_tensor * b) {
3520
0
    GGML_ASSERT(ggml_is_contiguous(a));
3521
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3522
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3523
3524
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3525
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3526
3527
0
    result->op     = GGML_OP_RESHAPE;
3528
0
    result->src[0] = a;
3529
3530
0
    return result;
3531
0
}
3532
3533
struct ggml_tensor * ggml_reshape_1d(
3534
        struct ggml_context * ctx,
3535
        struct ggml_tensor  * a,
3536
0
        int64_t               ne0) {
3537
0
    GGML_ASSERT(ggml_is_contiguous(a));
3538
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3539
3540
0
    const int64_t ne[1] = { ne0 };
3541
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3542
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3543
3544
0
    result->op     = GGML_OP_RESHAPE;
3545
0
    result->src[0] = a;
3546
3547
0
    return result;
3548
0
}
3549
3550
struct ggml_tensor * ggml_reshape_2d(
3551
        struct ggml_context * ctx,
3552
        struct ggml_tensor  * a,
3553
        int64_t               ne0,
3554
0
        int64_t               ne1) {
3555
0
    GGML_ASSERT(ggml_is_contiguous(a));
3556
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3557
3558
0
    const int64_t ne[2] = { ne0, ne1 };
3559
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3560
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3561
3562
0
    result->op     = GGML_OP_RESHAPE;
3563
0
    result->src[0] = a;
3564
3565
0
    return result;
3566
0
}
3567
3568
struct ggml_tensor * ggml_reshape_3d(
3569
        struct ggml_context * ctx,
3570
        struct ggml_tensor  * a,
3571
        int64_t               ne0,
3572
        int64_t               ne1,
3573
0
        int64_t               ne2) {
3574
0
    GGML_ASSERT(ggml_is_contiguous(a));
3575
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3576
3577
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3578
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3579
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3580
3581
0
    result->op     = GGML_OP_RESHAPE;
3582
0
    result->src[0] = a;
3583
3584
0
    return result;
3585
0
}
3586
3587
struct ggml_tensor * ggml_reshape_4d(
3588
        struct ggml_context * ctx,
3589
        struct ggml_tensor  * a,
3590
        int64_t               ne0,
3591
        int64_t               ne1,
3592
        int64_t               ne2,
3593
0
        int64_t               ne3) {
3594
0
    GGML_ASSERT(ggml_is_contiguous(a));
3595
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3596
3597
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3598
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3599
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3600
3601
0
    result->op     = GGML_OP_RESHAPE;
3602
0
    result->src[0] = a;
3603
3604
0
    return result;
3605
0
}
3606
3607
static struct ggml_tensor * ggml_view_impl(
3608
        struct ggml_context * ctx,
3609
        struct ggml_tensor  * a,
3610
        int                   n_dims,
3611
        const int64_t       * ne,
3612
0
        size_t                offset) {
3613
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3614
0
    ggml_format_name(result, "%s (view)", a->name);
3615
3616
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3617
3618
0
    result->op     = GGML_OP_VIEW;
3619
0
    result->src[0] = a;
3620
3621
0
    return result;
3622
0
}
3623
3624
// ggml_view_1d
3625
3626
struct ggml_tensor * ggml_view_1d(
3627
        struct ggml_context * ctx,
3628
        struct ggml_tensor  * a,
3629
        int64_t               ne0,
3630
0
        size_t                offset) {
3631
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3632
3633
0
    return result;
3634
0
}
3635
3636
// ggml_view_2d
3637
3638
struct ggml_tensor * ggml_view_2d(
3639
        struct ggml_context * ctx,
3640
        struct ggml_tensor  * a,
3641
        int64_t               ne0,
3642
        int64_t               ne1,
3643
        size_t                nb1,
3644
0
        size_t                offset) {
3645
0
    const int64_t ne[2] = { ne0, ne1 };
3646
3647
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3648
3649
0
    result->nb[1] = nb1;
3650
0
    result->nb[2] = result->nb[1]*ne1;
3651
0
    result->nb[3] = result->nb[2];
3652
3653
0
    return result;
3654
0
}
3655
3656
// ggml_view_3d
3657
3658
struct ggml_tensor * ggml_view_3d(
3659
        struct ggml_context * ctx,
3660
        struct ggml_tensor  * a,
3661
        int64_t               ne0,
3662
        int64_t               ne1,
3663
        int64_t               ne2,
3664
        size_t                nb1,
3665
        size_t                nb2,
3666
0
        size_t                offset) {
3667
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3668
3669
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3670
3671
0
    result->nb[1] = nb1;
3672
0
    result->nb[2] = nb2;
3673
0
    result->nb[3] = result->nb[2]*ne2;
3674
3675
0
    return result;
3676
0
}
3677
3678
// ggml_view_4d
3679
3680
struct ggml_tensor * ggml_view_4d(
3681
        struct ggml_context * ctx,
3682
        struct ggml_tensor  * a,
3683
        int64_t               ne0,
3684
        int64_t               ne1,
3685
        int64_t               ne2,
3686
        int64_t               ne3,
3687
        size_t                nb1,
3688
        size_t                nb2,
3689
        size_t                nb3,
3690
0
        size_t                offset) {
3691
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3692
3693
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3694
3695
0
    result->nb[1] = nb1;
3696
0
    result->nb[2] = nb2;
3697
0
    result->nb[3] = nb3;
3698
3699
0
    return result;
3700
0
}
3701
3702
// ggml_permute
3703
3704
struct ggml_tensor * ggml_permute(
3705
        struct ggml_context * ctx,
3706
        struct ggml_tensor  * a,
3707
        int                   axis0,
3708
        int                   axis1,
3709
        int                   axis2,
3710
0
        int                   axis3) {
3711
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3712
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3713
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3714
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3715
3716
0
    GGML_ASSERT(axis0 != axis1);
3717
0
    GGML_ASSERT(axis0 != axis2);
3718
0
    GGML_ASSERT(axis0 != axis3);
3719
0
    GGML_ASSERT(axis1 != axis2);
3720
0
    GGML_ASSERT(axis1 != axis3);
3721
0
    GGML_ASSERT(axis2 != axis3);
3722
3723
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3724
0
    ggml_format_name(result, "%s (permuted)", a->name);
3725
3726
0
    int ne[GGML_MAX_DIMS];
3727
0
    int nb[GGML_MAX_DIMS];
3728
3729
0
    ne[axis0] = a->ne[0];
3730
0
    ne[axis1] = a->ne[1];
3731
0
    ne[axis2] = a->ne[2];
3732
0
    ne[axis3] = a->ne[3];
3733
3734
0
    nb[axis0] = a->nb[0];
3735
0
    nb[axis1] = a->nb[1];
3736
0
    nb[axis2] = a->nb[2];
3737
0
    nb[axis3] = a->nb[3];
3738
3739
0
    result->ne[0] = ne[0];
3740
0
    result->ne[1] = ne[1];
3741
0
    result->ne[2] = ne[2];
3742
0
    result->ne[3] = ne[3];
3743
3744
0
    result->nb[0] = nb[0];
3745
0
    result->nb[1] = nb[1];
3746
0
    result->nb[2] = nb[2];
3747
0
    result->nb[3] = nb[3];
3748
3749
0
    result->op     = GGML_OP_PERMUTE;
3750
0
    result->src[0] = a;
3751
3752
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3753
0
    ggml_set_op_params(result, params, sizeof(params));
3754
3755
0
    return result;
3756
0
}
3757
3758
// ggml_transpose
3759
3760
struct ggml_tensor * ggml_transpose(
3761
        struct ggml_context * ctx,
3762
0
        struct ggml_tensor  * a) {
3763
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3764
0
    ggml_format_name(result, "%s (transposed)", a->name);
3765
3766
0
    result->ne[0] = a->ne[1];
3767
0
    result->ne[1] = a->ne[0];
3768
3769
0
    result->nb[0] = a->nb[1];
3770
0
    result->nb[1] = a->nb[0];
3771
3772
0
    result->op     = GGML_OP_TRANSPOSE;
3773
0
    result->src[0] = a;
3774
3775
0
    return result;
3776
0
}
3777
3778
// ggml_get_rows
3779
3780
struct ggml_tensor * ggml_get_rows(
3781
        struct ggml_context * ctx,
3782
        struct ggml_tensor  * a,
3783
0
        struct ggml_tensor  * b) {
3784
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3785
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3786
0
    GGML_ASSERT(b->ne[3] == 1);
3787
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3788
3789
    // TODO: implement non F32 return
3790
0
    enum ggml_type type = GGML_TYPE_F32;
3791
0
    if (a->type == GGML_TYPE_I32) {
3792
0
        type = a->type;
3793
0
    }
3794
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3795
3796
0
    result->op     = GGML_OP_GET_ROWS;
3797
0
    result->src[0] = a;
3798
0
    result->src[1] = b;
3799
3800
0
    return result;
3801
0
}
3802
3803
// ggml_get_rows_back
3804
3805
struct ggml_tensor * ggml_get_rows_back(
3806
        struct ggml_context * ctx,
3807
        struct ggml_tensor  * a,
3808
        struct ggml_tensor  * b,
3809
0
        struct ggml_tensor  * c) {
3810
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3811
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3812
3813
    // TODO: implement non F32 return
3814
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3815
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3816
3817
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3818
0
    result->src[0] = a;
3819
0
    result->src[1] = b;
3820
3821
0
    return result;
3822
0
}
3823
3824
// ggml_set_rows
3825
3826
struct ggml_tensor * ggml_set_rows(
3827
        struct ggml_context * ctx,
3828
        struct ggml_tensor  * a,
3829
        struct ggml_tensor  * b,
3830
0
        struct ggml_tensor  * c) {
3831
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3832
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3833
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3834
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3835
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3836
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3837
0
    GGML_ASSERT(c->ne[3] == 1);
3838
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3839
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3840
3841
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3842
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3843
3844
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3845
3846
0
    result->op     = GGML_OP_SET_ROWS;
3847
0
    result->src[0] = b;
3848
0
    result->src[1] = c;
3849
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3850
3851
0
    return result;
3852
0
}
3853
3854
// ggml_diag
3855
3856
struct ggml_tensor * ggml_diag(
3857
        struct ggml_context * ctx,
3858
0
        struct ggml_tensor  * a) {
3859
0
    GGML_ASSERT(a->ne[1] == 1);
3860
3861
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3862
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3863
3864
0
    result->op     = GGML_OP_DIAG;
3865
0
    result->src[0] = a;
3866
3867
0
    return result;
3868
0
}
3869
3870
// ggml_diag_mask_inf
3871
3872
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3873
        struct ggml_context * ctx,
3874
        struct ggml_tensor  * a,
3875
        int                   n_past,
3876
0
        bool                  inplace) {
3877
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3878
3879
0
    int32_t params[] = { n_past };
3880
0
    ggml_set_op_params(result, params, sizeof(params));
3881
3882
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3883
0
    result->src[0] = a;
3884
3885
0
    return result;
3886
0
}
3887
3888
struct ggml_tensor * ggml_diag_mask_inf(
3889
        struct ggml_context * ctx,
3890
        struct ggml_tensor  * a,
3891
0
        int                   n_past) {
3892
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3893
0
}
3894
3895
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3896
        struct ggml_context * ctx,
3897
        struct ggml_tensor  * a,
3898
0
        int                   n_past) {
3899
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3900
0
}
3901
3902
// ggml_diag_mask_zero
3903
3904
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3905
        struct ggml_context * ctx,
3906
        struct ggml_tensor  * a,
3907
        int                   n_past,
3908
0
        bool                  inplace) {
3909
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3910
3911
0
    int32_t params[] = { n_past };
3912
0
    ggml_set_op_params(result, params, sizeof(params));
3913
3914
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3915
0
    result->src[0] = a;
3916
3917
0
    return result;
3918
0
}
3919
3920
struct ggml_tensor * ggml_diag_mask_zero(
3921
        struct ggml_context * ctx,
3922
        struct ggml_tensor  * a,
3923
0
        int                   n_past) {
3924
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3925
0
}
3926
3927
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3928
        struct ggml_context * ctx,
3929
        struct ggml_tensor  * a,
3930
0
        int                   n_past) {
3931
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3932
0
}
3933
3934
// ggml_soft_max
3935
3936
static struct ggml_tensor * ggml_soft_max_impl(
3937
        struct ggml_context * ctx,
3938
        struct ggml_tensor  * a,
3939
        struct ggml_tensor  * mask,
3940
        float                 scale,
3941
        float                 max_bias,
3942
0
        bool                  inplace) {
3943
0
    GGML_ASSERT(ggml_is_contiguous(a));
3944
3945
0
    if (mask) {
3946
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3947
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3948
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3949
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3950
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3951
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3952
0
    }
3953
3954
0
    if (max_bias > 0.0f) {
3955
0
        GGML_ASSERT(mask);
3956
0
    }
3957
3958
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3959
3960
0
    float params[] = { scale, max_bias };
3961
0
    ggml_set_op_params(result, params, sizeof(params));
3962
3963
0
    result->op     = GGML_OP_SOFT_MAX;
3964
0
    result->src[0] = a;
3965
0
    result->src[1] = mask;
3966
3967
0
    return result;
3968
0
}
3969
3970
struct ggml_tensor * ggml_soft_max(
3971
        struct ggml_context * ctx,
3972
0
        struct ggml_tensor  * a) {
3973
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
3974
0
}
3975
3976
struct ggml_tensor * ggml_soft_max_inplace(
3977
        struct ggml_context * ctx,
3978
0
        struct ggml_tensor  * a) {
3979
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
3980
0
}
3981
3982
struct ggml_tensor * ggml_soft_max_ext(
3983
        struct ggml_context * ctx,
3984
        struct ggml_tensor  * a,
3985
        struct ggml_tensor  * mask,
3986
        float                 scale,
3987
0
        float                 max_bias) {
3988
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3989
0
}
3990
3991
struct ggml_tensor * ggml_soft_max_ext_inplace(
3992
        struct ggml_context * ctx,
3993
        struct ggml_tensor  * a,
3994
        struct ggml_tensor  * mask,
3995
        float                 scale,
3996
0
        float                 max_bias) {
3997
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3998
0
}
3999
4000
void ggml_soft_max_add_sinks(
4001
        struct ggml_tensor * a,
4002
0
        struct ggml_tensor * sinks) {
4003
0
    if (!sinks) {
4004
0
        a->src[2] = NULL;
4005
0
        return;
4006
0
    }
4007
4008
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4009
0
    GGML_ASSERT(a->src[2] == NULL);
4010
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4011
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4012
4013
0
    a->src[2] = sinks;
4014
0
}
4015
4016
// ggml_soft_max_ext_back
4017
4018
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4019
        struct ggml_context * ctx,
4020
        struct ggml_tensor  * a,
4021
        struct ggml_tensor  * b,
4022
        float                 scale,
4023
        float                 max_bias,
4024
0
        bool                  inplace) {
4025
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4026
4027
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4028
0
    result->src[0] = a;
4029
0
    result->src[1] = b;
4030
4031
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4032
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4033
4034
0
    return result;
4035
0
}
4036
4037
struct ggml_tensor * ggml_soft_max_ext_back(
4038
        struct ggml_context * ctx,
4039
        struct ggml_tensor  * a,
4040
        struct ggml_tensor  * b,
4041
        float                 scale,
4042
0
        float                 max_bias) {
4043
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4044
0
}
4045
4046
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4047
        struct ggml_context * ctx,
4048
        struct ggml_tensor  * a,
4049
        struct ggml_tensor  * b,
4050
        float                 scale,
4051
0
        float                 max_bias) {
4052
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4053
0
}
4054
4055
// ggml_rope
4056
4057
static struct ggml_tensor * ggml_rope_impl(
4058
        struct ggml_context * ctx,
4059
        struct ggml_tensor  * a,
4060
        struct ggml_tensor  * b,
4061
        struct ggml_tensor  * c,
4062
        int                   n_dims,
4063
        int                   sections[GGML_MROPE_SECTIONS],
4064
        int                   mode,
4065
        int                   n_ctx_orig,
4066
        float                 freq_base,
4067
        float                 freq_scale,
4068
        float                 ext_factor,
4069
        float                 attn_factor,
4070
        float                 beta_fast,
4071
        float                 beta_slow,
4072
0
        bool                  inplace) {
4073
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4074
4075
0
    GGML_ASSERT(ggml_is_vector(b));
4076
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4077
4078
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4079
0
    if (mrope_used) {
4080
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4081
0
    } else {
4082
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4083
0
    }
4084
4085
0
    if (c) {
4086
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4087
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4088
0
    }
4089
4090
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4091
4092
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4093
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4094
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4095
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4096
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4097
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4098
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4099
0
    if (mrope_used && sections) {
4100
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4101
0
    } else {
4102
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4103
0
    }
4104
0
    ggml_set_op_params(result, params, sizeof(params));
4105
4106
0
    result->op     = GGML_OP_ROPE;
4107
0
    result->src[0] = a;
4108
0
    result->src[1] = b;
4109
0
    result->src[2] = c;
4110
4111
0
    return result;
4112
0
}
4113
4114
struct ggml_tensor * ggml_rope(
4115
        struct ggml_context * ctx,
4116
        struct ggml_tensor  * a,
4117
        struct ggml_tensor  * b,
4118
        int                   n_dims,
4119
0
        int                   mode) {
4120
0
    return ggml_rope_impl(
4121
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4122
0
    );
4123
0
}
4124
4125
struct ggml_tensor * ggml_rope_multi(
4126
        struct ggml_context * ctx,
4127
        struct ggml_tensor  * a,
4128
        struct ggml_tensor  * b,
4129
        struct ggml_tensor  * c,
4130
        int                   n_dims,
4131
        int                   sections[GGML_MROPE_SECTIONS],
4132
        int                   mode,
4133
        int                   n_ctx_orig,
4134
        float                 freq_base,
4135
        float                 freq_scale,
4136
        float                 ext_factor,
4137
        float                 attn_factor,
4138
        float                 beta_fast,
4139
0
        float                 beta_slow) {
4140
0
    return ggml_rope_impl(
4141
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4142
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4143
0
    );
4144
0
}
4145
4146
struct ggml_tensor * ggml_rope_multi_inplace(
4147
        struct ggml_context * ctx,
4148
        struct ggml_tensor  * a,
4149
        struct ggml_tensor  * b,
4150
        struct ggml_tensor  * c,
4151
        int                   n_dims,
4152
        int                   sections[GGML_MROPE_SECTIONS],
4153
        int                   mode,
4154
        int                   n_ctx_orig,
4155
        float                 freq_base,
4156
        float                 freq_scale,
4157
        float                 ext_factor,
4158
        float                 attn_factor,
4159
        float                 beta_fast,
4160
0
        float                 beta_slow) {
4161
0
    return ggml_rope_impl(
4162
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4163
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4164
0
    );
4165
0
}
4166
4167
struct ggml_tensor * ggml_rope_inplace(
4168
        struct ggml_context * ctx,
4169
        struct ggml_tensor  * a,
4170
        struct ggml_tensor  * b,
4171
        int                   n_dims,
4172
0
        int                   mode) {
4173
0
    return ggml_rope_impl(
4174
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4175
0
    );
4176
0
}
4177
4178
struct ggml_tensor * ggml_rope_ext(
4179
        struct ggml_context * ctx,
4180
        struct ggml_tensor  * a,
4181
        struct ggml_tensor  * b,
4182
        struct ggml_tensor  * c,
4183
        int                   n_dims,
4184
        int                   mode,
4185
        int                   n_ctx_orig,
4186
        float                 freq_base,
4187
        float                 freq_scale,
4188
        float                 ext_factor,
4189
        float                 attn_factor,
4190
        float                 beta_fast,
4191
0
        float                 beta_slow) {
4192
0
    return ggml_rope_impl(
4193
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4194
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4195
0
    );
4196
0
}
4197
4198
struct ggml_tensor * ggml_rope_ext_inplace(
4199
        struct ggml_context * ctx,
4200
        struct ggml_tensor  * a,
4201
        struct ggml_tensor  * b,
4202
        struct ggml_tensor  * c,
4203
        int                   n_dims,
4204
        int                   mode,
4205
        int                   n_ctx_orig,
4206
        float                 freq_base,
4207
        float                 freq_scale,
4208
        float                 ext_factor,
4209
        float                 attn_factor,
4210
        float                 beta_fast,
4211
0
        float                 beta_slow) {
4212
0
    return ggml_rope_impl(
4213
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4214
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4215
0
    );
4216
0
}
4217
4218
struct ggml_tensor * ggml_rope_custom(
4219
        struct ggml_context * ctx,
4220
        struct ggml_tensor  * a,
4221
        struct ggml_tensor  * b,
4222
        int                   n_dims,
4223
        int                   mode,
4224
        int                   n_ctx_orig,
4225
        float                 freq_base,
4226
        float                 freq_scale,
4227
        float                 ext_factor,
4228
        float                 attn_factor,
4229
        float                 beta_fast,
4230
0
        float                 beta_slow) {
4231
0
    return ggml_rope_impl(
4232
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4233
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4234
0
    );
4235
0
}
4236
4237
struct ggml_tensor * ggml_rope_custom_inplace(
4238
        struct ggml_context * ctx,
4239
        struct ggml_tensor  * a,
4240
        struct ggml_tensor  * b,
4241
        int                   n_dims,
4242
        int                   mode,
4243
        int                   n_ctx_orig,
4244
        float                 freq_base,
4245
        float                 freq_scale,
4246
        float                 ext_factor,
4247
        float                 attn_factor,
4248
        float                 beta_fast,
4249
0
        float                 beta_slow) {
4250
0
    return ggml_rope_impl(
4251
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4252
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4253
0
    );
4254
0
}
4255
4256
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4257
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4258
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4259
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4260
0
}
4261
4262
void ggml_rope_yarn_corr_dims(
4263
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4264
0
) {
4265
    // start and end correction dims
4266
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4267
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4268
0
    dims[0] = MAX(0, start);
4269
0
    dims[1] = MIN(n_dims - 1, end);
4270
0
}
4271
4272
// ggml_rope_back
4273
4274
struct ggml_tensor * ggml_rope_ext_back(
4275
        struct ggml_context * ctx,
4276
        struct ggml_tensor  * a,
4277
        struct ggml_tensor  * b,
4278
        struct ggml_tensor  * c,
4279
        int                   n_dims,
4280
        int                   mode,
4281
        int                   n_ctx_orig,
4282
        float                 freq_base,
4283
        float                 freq_scale,
4284
        float                 ext_factor,
4285
        float                 attn_factor,
4286
        float                 beta_fast,
4287
0
        float                 beta_slow) {
4288
0
    struct ggml_tensor * result = ggml_rope_ext(
4289
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4290
0
    result->op = GGML_OP_ROPE_BACK;
4291
0
    return result;
4292
0
}
4293
4294
struct ggml_tensor * ggml_rope_multi_back(
4295
        struct ggml_context * ctx,
4296
        struct ggml_tensor  * a,
4297
        struct ggml_tensor  * b,
4298
        struct ggml_tensor  * c,
4299
        int                   n_dims,
4300
        int                   sections[4],
4301
        int                   mode,
4302
        int                   n_ctx_orig,
4303
        float                 freq_base,
4304
        float                 freq_scale,
4305
        float                 ext_factor,
4306
        float                 attn_factor,
4307
        float                 beta_fast,
4308
0
        float                 beta_slow) {
4309
0
    struct ggml_tensor * result = ggml_rope_multi(
4310
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4311
0
    result->op = GGML_OP_ROPE_BACK;
4312
0
    return result;
4313
0
}
4314
// ggml_clamp
4315
4316
struct ggml_tensor * ggml_clamp(
4317
        struct ggml_context * ctx,
4318
        struct ggml_tensor  * a,
4319
        float                 min,
4320
0
        float                 max) {
4321
    // TODO: when implement backward, fix this:
4322
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4323
4324
0
    float params[] = { min, max };
4325
0
    ggml_set_op_params(result, params, sizeof(params));
4326
4327
0
    result->op     = GGML_OP_CLAMP;
4328
0
    result->src[0] = a;
4329
4330
0
    return result;
4331
0
}
4332
4333
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4334
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4335
0
}
4336
4337
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4338
// a: [OC,IC, KH, KW]
4339
// b: [N, IC, IH, IW]
4340
// result: [N, OH, OW, IC*KH*KW]
4341
struct ggml_tensor * ggml_im2col(
4342
        struct ggml_context * ctx,
4343
        struct ggml_tensor  * a,
4344
        struct ggml_tensor  * b,
4345
        int                   s0,
4346
        int                   s1,
4347
        int                   p0,
4348
        int                   p1,
4349
        int                   d0,
4350
        int                   d1,
4351
        bool                  is_2D,
4352
0
        enum ggml_type        dst_type) {
4353
0
    if (is_2D) {
4354
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4355
0
    } else {
4356
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4357
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4358
0
        GGML_ASSERT(b->ne[3] == 1);
4359
0
    }
4360
4361
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4362
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4363
4364
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4365
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4366
4367
0
    const int64_t ne[4] = {
4368
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4369
0
        OW,
4370
0
        is_2D ? OH : b->ne[2],
4371
0
        is_2D ?      b->ne[3] : 1,
4372
0
    };
4373
4374
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4375
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4376
0
    ggml_set_op_params(result, params, sizeof(params));
4377
4378
0
    result->op     = GGML_OP_IM2COL;
4379
0
    result->src[0] = a;
4380
0
    result->src[1] = b;
4381
4382
0
    return result;
4383
0
}
4384
4385
struct ggml_tensor * ggml_im2col_back(
4386
        struct ggml_context * ctx,
4387
        struct ggml_tensor  * a,
4388
        struct ggml_tensor  * b,
4389
        int64_t             * ne,
4390
        int                   s0,
4391
        int                   s1,
4392
        int                   p0,
4393
        int                   p1,
4394
        int                   d0,
4395
        int                   d1,
4396
0
        bool                  is_2D) {
4397
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4398
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4399
0
    ggml_set_op_params(result, params, sizeof(params));
4400
4401
0
    result->op     = GGML_OP_IM2COL_BACK;
4402
0
    result->src[0] = a;
4403
0
    result->src[1] = b;
4404
4405
0
    return result;
4406
0
}
4407
4408
// ggml_conv_1d
4409
4410
struct ggml_tensor * ggml_conv_1d(
4411
        struct ggml_context * ctx,
4412
        struct ggml_tensor  * a,
4413
        struct ggml_tensor  * b,
4414
        int                   s0,
4415
        int                   p0,
4416
0
        int                   d0) {
4417
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4418
4419
0
    struct ggml_tensor * result =
4420
0
        ggml_mul_mat(ctx,
4421
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4422
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4423
4424
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4425
4426
0
    return result;
4427
0
}
4428
4429
// ggml_conv_1d_ph
4430
4431
struct ggml_tensor* ggml_conv_1d_ph(
4432
        struct ggml_context * ctx,
4433
        struct ggml_tensor  * a,
4434
        struct ggml_tensor  * b,
4435
        int                   s,
4436
0
        int                   d) {
4437
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4438
0
}
4439
4440
// ggml_conv_1d_dw
4441
4442
struct ggml_tensor * ggml_conv_1d_dw(
4443
        struct ggml_context * ctx,
4444
        struct ggml_tensor  * a,
4445
        struct ggml_tensor  * b,
4446
        int                   s0,
4447
        int                   p0,
4448
0
        int                   d0) {
4449
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4450
4451
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4452
4453
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4454
4455
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4456
4457
0
    return result;
4458
0
}
4459
4460
// ggml_conv_1d_dw_ph
4461
4462
struct ggml_tensor * ggml_conv_1d_dw_ph(
4463
        struct ggml_context * ctx,
4464
        struct ggml_tensor  * a,
4465
        struct ggml_tensor  * b,
4466
        int                   s0,
4467
0
        int                   d0) {
4468
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4469
0
}
4470
4471
// ggml_conv_transpose_1d
4472
4473
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4474
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4475
0
}
4476
4477
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4478
        struct ggml_context * ctx,
4479
        struct ggml_tensor  * a,
4480
        struct ggml_tensor  * b,
4481
        int                   s0,
4482
        int                   p0,
4483
0
        int                   d0) {
4484
0
    GGML_ASSERT(ggml_is_matrix(b));
4485
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4486
0
    GGML_ASSERT(a->ne[3] == 1);
4487
4488
0
    GGML_ASSERT(p0 == 0);
4489
0
    GGML_ASSERT(d0 == 1);
4490
4491
0
    const int64_t ne[4] = {
4492
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4493
0
        a->ne[1], b->ne[2], 1,
4494
0
    };
4495
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4496
4497
0
    int32_t params[] = { s0, p0, d0 };
4498
0
    ggml_set_op_params(result, params, sizeof(params));
4499
4500
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4501
0
    result->src[0] = a;
4502
0
    result->src[1] = b;
4503
4504
0
    return result;
4505
0
}
4506
4507
// ggml_conv_2d
4508
4509
// a: [OC,IC, KH, KW]
4510
// b: [N, IC, IH, IW]
4511
// result: [N, OC, OH, OW]
4512
struct ggml_tensor * ggml_conv_2d(
4513
        struct ggml_context * ctx,
4514
        struct ggml_tensor  * a,
4515
        struct ggml_tensor  * b,
4516
        int                   s0,
4517
        int                   s1,
4518
        int                   p0,
4519
        int                   p1,
4520
        int                   d0,
4521
0
        int                   d1) {
4522
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4523
4524
0
    struct ggml_tensor * result =
4525
0
        ggml_mul_mat(ctx,
4526
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4527
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4528
4529
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4530
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4531
4532
4533
0
    return result;
4534
0
}
4535
4536
// a: [OC*IC, KD, KH, KW]
4537
// b: [N*IC, ID, IH, IW]
4538
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4539
struct ggml_tensor * ggml_im2col_3d(
4540
        struct ggml_context * ctx,
4541
        struct ggml_tensor  * a,
4542
        struct ggml_tensor  * b,
4543
        int64_t               IC,
4544
        int                   s0, // stride width
4545
        int                   s1, // stride height
4546
        int                   s2, // stride depth
4547
        int                   p0, // padding width
4548
        int                   p1, // padding height
4549
        int                   p2, // padding depth
4550
        int                   d0, // dilation width
4551
        int                   d1, // dilation height
4552
        int                   d2, // dilation depth
4553
0
        enum ggml_type        dst_type) {
4554
0
    const int64_t N = b->ne[3] / IC;
4555
0
    const int64_t ID = b->ne[2];
4556
0
    const int64_t IH = b->ne[1];
4557
0
    const int64_t IW = b->ne[0];
4558
4559
0
    const int64_t OC = a->ne[3] / IC;
4560
0
    UNUSED(OC);
4561
0
    const int64_t KD = a->ne[2];
4562
0
    const int64_t KH = a->ne[1];
4563
0
    const int64_t KW = a->ne[0];
4564
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4565
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4566
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4567
4568
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4569
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4570
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4571
4572
4573
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4574
4575
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4576
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4577
0
    ggml_set_op_params(result, params, sizeof(params));
4578
4579
0
    result->op     = GGML_OP_IM2COL_3D;
4580
0
    result->src[0] = a;
4581
0
    result->src[1] = b;
4582
4583
0
    return result;
4584
0
}
4585
4586
// a: [OC*IC, KD, KH, KW]
4587
// b: [N*IC, ID, IH, IW]
4588
// result: [N*OC, OD, OH, OW]
4589
struct ggml_tensor * ggml_conv_3d(
4590
        struct ggml_context * ctx,
4591
        struct ggml_tensor  * a,
4592
        struct ggml_tensor  * b,
4593
        int64_t               IC,
4594
        int                   s0, // stride width
4595
        int                   s1, // stride height
4596
        int                   s2, // stride depth
4597
        int                   p0, // padding width
4598
        int                   p1, // padding height
4599
        int                   p2, // padding depth
4600
        int                   d0, // dilation width
4601
        int                   d1, // dilation height
4602
        int                   d2  // dilation depth
4603
0
        ) {
4604
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4605
4606
0
    int64_t OC = a->ne[3] / IC;
4607
0
    int64_t N = b->ne[3] / IC;
4608
0
    struct ggml_tensor * result =
4609
0
        ggml_mul_mat(ctx,
4610
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4611
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4612
4613
0
    int64_t OD = im2col->ne[3] / N;
4614
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4615
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4616
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4617
4618
0
    return result;
4619
0
}
4620
4621
// ggml_conv_2d_sk_p0
4622
4623
struct ggml_tensor * ggml_conv_2d_sk_p0(
4624
        struct ggml_context * ctx,
4625
        struct ggml_tensor  * a,
4626
0
        struct ggml_tensor  * b) {
4627
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4628
0
}
4629
4630
// ggml_conv_2d_s1_ph
4631
4632
struct ggml_tensor * ggml_conv_2d_s1_ph(
4633
        struct ggml_context * ctx,
4634
        struct ggml_tensor  * a,
4635
0
        struct ggml_tensor  * b) {
4636
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4637
0
}
4638
4639
// ggml_conv_2d_dw
4640
4641
struct ggml_tensor * ggml_conv_2d_dw(
4642
        struct ggml_context * ctx,
4643
        struct ggml_tensor  * a,
4644
        struct ggml_tensor  * b,
4645
        int                   s0,
4646
        int                   s1,
4647
        int                   p0,
4648
        int                   p1,
4649
        int                   d0,
4650
0
        int                   d1) {
4651
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4652
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4653
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4654
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4655
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4656
4657
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4658
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4659
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4660
4661
0
    return result;
4662
0
}
4663
4664
// ggml_conv_2d_dw_direct
4665
4666
struct ggml_tensor * ggml_conv_2d_dw_direct(
4667
        struct ggml_context * ctx,
4668
        struct ggml_tensor  * a,
4669
        struct ggml_tensor  * b,
4670
        int                   stride0,
4671
        int                   stride1,
4672
        int                   pad0,
4673
        int                   pad1,
4674
        int                   dilation0,
4675
0
        int                   dilation1) {
4676
0
    GGML_ASSERT(a->ne[2] == 1);
4677
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4678
0
    int64_t ne[4];
4679
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4680
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4681
0
    ne[2] = b->ne[2];
4682
0
    ne[3] = b->ne[3];
4683
4684
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4685
4686
0
    if (ggml_is_contiguous_channels(b)) {
4687
        // Result will be permuted the same way as input (CWHN order)
4688
0
        const int64_t type_size = ggml_type_size(result->type);
4689
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4690
0
        result->nb[0] = result->ne[2] * type_size;
4691
0
        result->nb[1] = result->ne[0] * result->nb[0];
4692
0
        result->nb[2] = type_size;
4693
0
    }
4694
4695
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4696
0
    ggml_set_op_params(result, params, sizeof(params));
4697
4698
0
    result->op     = GGML_OP_CONV_2D_DW;
4699
0
    result->src[0] = a;
4700
0
    result->src[1] = b;
4701
0
    return result;
4702
0
}
4703
4704
// ggml_conv_2d_direct
4705
4706
struct ggml_tensor * ggml_conv_2d_direct(
4707
        struct ggml_context * ctx,
4708
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4709
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4710
        int                   s0,  // stride dimension 0
4711
        int                   s1,  // stride dimension 1
4712
        int                   p0,  // padding dimension 0
4713
        int                   p1,  // padding dimension 1
4714
        int                   d0,  // dilation dimension 0
4715
0
        int                   d1) {// dilation dimension 1
4716
4717
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4718
    //GGML_ASSERT(a->type == b->type);
4719
4720
0
    int64_t ne[4];
4721
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4722
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4723
0
    ne[2] = a->ne[3];
4724
0
    ne[3] = b->ne[3];
4725
4726
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4727
4728
0
    ggml_set_op_params_i32(result, 0, s0);
4729
0
    ggml_set_op_params_i32(result, 1, s1);
4730
0
    ggml_set_op_params_i32(result, 2, p0);
4731
0
    ggml_set_op_params_i32(result, 3, p1);
4732
0
    ggml_set_op_params_i32(result, 4, d0);
4733
0
    ggml_set_op_params_i32(result, 5, d1);
4734
4735
0
    result->op = GGML_OP_CONV_2D;
4736
0
    result->src[0] = a;
4737
0
    result->src[1] = b;
4738
4739
0
    return result;
4740
0
}
4741
4742
// ggml_conv_3d_direct
4743
4744
struct ggml_tensor * ggml_conv_3d_direct(
4745
        struct ggml_context * ctx,
4746
        struct ggml_tensor  * a,
4747
        struct ggml_tensor  * b,
4748
        int                   s0,
4749
        int                   s1,
4750
        int                   s2,
4751
        int                   p0,
4752
        int                   p1,
4753
        int                   p2,
4754
        int                   d0,
4755
        int                   d1,
4756
        int                   d2,
4757
        int                   c,
4758
        int                   n,
4759
0
        int                   oc) {
4760
4761
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4762
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4763
4764
0
    int64_t ne[4];
4765
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4766
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4767
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4768
0
    ne[3] = (int64_t) oc * n;
4769
4770
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4771
4772
0
    ggml_set_op_params_i32(result, 0,  s0);
4773
0
    ggml_set_op_params_i32(result, 1,  s1);
4774
0
    ggml_set_op_params_i32(result, 2,  s2);
4775
0
    ggml_set_op_params_i32(result, 3,  p0);
4776
0
    ggml_set_op_params_i32(result, 4,  p1);
4777
0
    ggml_set_op_params_i32(result, 5,  p2);
4778
0
    ggml_set_op_params_i32(result, 6,  d0);
4779
0
    ggml_set_op_params_i32(result, 7,  d1);
4780
0
    ggml_set_op_params_i32(result, 8,  d2);
4781
0
    ggml_set_op_params_i32(result, 9,  c);
4782
0
    ggml_set_op_params_i32(result, 10, n);
4783
0
    ggml_set_op_params_i32(result, 11, oc);
4784
4785
0
    result->op = GGML_OP_CONV_3D;
4786
0
    result->src[0] = a;
4787
0
    result->src[1] = b;
4788
4789
0
    return result;
4790
0
}
4791
4792
// ggml_conv_transpose_2d_p0
4793
4794
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4795
0
    return (ins - 1) * s - 2 * p + ks;
4796
0
}
4797
4798
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4799
        struct ggml_context * ctx,
4800
        struct ggml_tensor  * a,
4801
        struct ggml_tensor  * b,
4802
0
        int                   stride) {
4803
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4804
4805
0
    const int64_t ne[4] = {
4806
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4807
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4808
0
        a->ne[2], b->ne[3],
4809
0
    };
4810
4811
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4812
4813
0
    ggml_set_op_params_i32(result, 0, stride);
4814
4815
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4816
0
    result->src[0] = a;
4817
0
    result->src[1] = b;
4818
4819
0
    return result;
4820
0
}
4821
4822
// ggml_pool_*
4823
4824
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4825
0
    return (ins + 2 * p - ks) / s + 1;
4826
0
}
4827
4828
// ggml_pool_1d
4829
4830
struct ggml_tensor * ggml_pool_1d(
4831
        struct ggml_context * ctx,
4832
        struct ggml_tensor  * a,
4833
        enum ggml_op_pool     op,
4834
        int                   k0,
4835
        int                   s0,
4836
0
        int                   p0) {
4837
0
    const int64_t ne[4] = {
4838
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4839
0
        a->ne[1],
4840
0
        a->ne[2],
4841
0
        a->ne[3],
4842
0
    };
4843
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4844
4845
0
    int32_t params[] = { op, k0, s0, p0 };
4846
0
    ggml_set_op_params(result, params, sizeof(params));
4847
4848
0
    result->op     = GGML_OP_POOL_1D;
4849
0
    result->src[0] = a;
4850
4851
0
    return result;
4852
0
}
4853
4854
// ggml_pool_2d
4855
4856
struct ggml_tensor * ggml_pool_2d(
4857
        struct ggml_context * ctx,
4858
        struct ggml_tensor  * a,
4859
        enum ggml_op_pool     op,
4860
        int                   k0,
4861
        int                   k1,
4862
        int                   s0,
4863
        int                   s1,
4864
        float                 p0,
4865
0
        float                 p1) {
4866
0
    struct ggml_tensor * result;
4867
0
    const int64_t ne[4] = {
4868
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4869
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4870
0
        a->ne[2],
4871
0
        a->ne[3],
4872
0
    };
4873
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4874
4875
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4876
0
    ggml_set_op_params(result, params, sizeof(params));
4877
4878
0
    result->op     = GGML_OP_POOL_2D;
4879
0
    result->src[0] = a;
4880
4881
0
    return result;
4882
0
}
4883
4884
struct ggml_tensor * ggml_pool_2d_back(
4885
        struct ggml_context * ctx,
4886
        struct ggml_tensor  * a,
4887
        struct ggml_tensor  * af,
4888
        enum ggml_op_pool     op,
4889
        int                   k0,
4890
        int                   k1,
4891
        int                   s0,
4892
        int                   s1,
4893
        float                 p0,
4894
0
        float                 p1) {
4895
0
    struct ggml_tensor * result;
4896
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4897
4898
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4899
0
    ggml_set_op_params(result, params, sizeof(params));
4900
4901
0
    result->op     = GGML_OP_POOL_2D_BACK;
4902
0
    result->src[0] = a;
4903
0
    result->src[1] = af;
4904
4905
0
    return result;
4906
0
}
4907
4908
// ggml_upscale / ggml_interpolate
4909
4910
static struct ggml_tensor * ggml_interpolate_impl(
4911
        struct ggml_context * ctx,
4912
        struct ggml_tensor  * a,
4913
        int64_t               ne0,
4914
        int64_t               ne1,
4915
        int64_t               ne2,
4916
        int64_t               ne3,
4917
0
        uint32_t              mode) {
4918
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4919
    // TODO: implement antialias for modes other than bilinear
4920
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4921
4922
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4923
4924
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4925
4926
0
    result->op     = GGML_OP_UPSCALE;
4927
0
    result->src[0] = a;
4928
4929
0
    return result;
4930
0
}
4931
4932
struct ggml_tensor * ggml_upscale(
4933
        struct ggml_context * ctx,
4934
        struct ggml_tensor  * a,
4935
        int                   scale_factor,
4936
0
        enum ggml_scale_mode  mode) {
4937
0
    GGML_ASSERT(scale_factor > 1);
4938
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4939
0
}
4940
4941
struct ggml_tensor * ggml_upscale_ext(
4942
        struct ggml_context * ctx,
4943
        struct ggml_tensor  * a,
4944
        int                   ne0,
4945
        int                   ne1,
4946
        int                   ne2,
4947
        int                   ne3,
4948
0
        enum ggml_scale_mode  mode) {
4949
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4950
0
}
4951
4952
struct ggml_tensor * ggml_interpolate(
4953
        struct ggml_context * ctx,
4954
        struct ggml_tensor  * a,
4955
        int64_t               ne0,
4956
        int64_t               ne1,
4957
        int64_t               ne2,
4958
        int64_t               ne3,
4959
0
        uint32_t              mode) {
4960
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4961
0
}
4962
4963
// ggml_pad
4964
4965
struct ggml_tensor * ggml_pad(
4966
        struct ggml_context * ctx,
4967
        struct ggml_tensor  * a,
4968
        int                   p0,
4969
        int                   p1,
4970
        int                   p2,
4971
0
        int                   p3) {
4972
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4973
0
}
4974
4975
// ggml_pad_circular
4976
4977
struct ggml_tensor * ggml_pad_circular(
4978
        struct ggml_context * ctx,
4979
        struct ggml_tensor  * a,
4980
        int                   p0,
4981
        int                   p1,
4982
        int                   p2,
4983
0
        int                   p3) {
4984
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4985
0
}
4986
4987
struct ggml_tensor * ggml_pad_ext(
4988
            struct ggml_context * ctx,
4989
            struct ggml_tensor  * a,
4990
            int                  lp0,
4991
            int                  rp0,
4992
            int                  lp1,
4993
            int                  rp1,
4994
            int                  lp2,
4995
            int                  rp2,
4996
            int                  lp3,
4997
            int                  rp3
4998
0
            ) {
4999
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5000
0
            a->ne[0] + lp0 + rp0,
5001
0
            a->ne[1] + lp1 + rp1,
5002
0
            a->ne[2] + lp2 + rp2,
5003
0
            a->ne[3] + lp3 + rp3);
5004
5005
0
    ggml_set_op_params_i32(result, 0, lp0);
5006
0
    ggml_set_op_params_i32(result, 1, rp0);
5007
0
    ggml_set_op_params_i32(result, 2, lp1);
5008
0
    ggml_set_op_params_i32(result, 3, rp1);
5009
0
    ggml_set_op_params_i32(result, 4, lp2);
5010
0
    ggml_set_op_params_i32(result, 5, rp2);
5011
0
    ggml_set_op_params_i32(result, 6, lp3);
5012
0
    ggml_set_op_params_i32(result, 7, rp3);
5013
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5014
5015
5016
0
    result->op     = GGML_OP_PAD;
5017
0
    result->src[0] = a;
5018
5019
0
    return result;
5020
0
}
5021
5022
// ggml_pad_ext_circular
5023
5024
struct ggml_tensor * ggml_pad_ext_circular(
5025
        struct ggml_context * ctx,
5026
        struct ggml_tensor  * a,
5027
        int                  lp0,
5028
        int                  rp0,
5029
        int                  lp1,
5030
        int                  rp1,
5031
        int                  lp2,
5032
        int                  rp2,
5033
        int                  lp3,
5034
        int                  rp3
5035
0
        ) {
5036
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5037
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5038
0
    return result;
5039
0
}
5040
5041
// ggml_pad_reflect_1d
5042
5043
struct ggml_tensor * ggml_pad_reflect_1d(
5044
        struct ggml_context * ctx,
5045
        struct ggml_tensor  * a,
5046
        int                   p0,
5047
0
        int                   p1) {
5048
0
    GGML_ASSERT(p0 >= 0);
5049
0
    GGML_ASSERT(p1 >= 0);
5050
5051
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5052
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5053
5054
0
    GGML_ASSERT(ggml_is_contiguous(a));
5055
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5056
5057
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5058
0
            a->ne[0] + p0 + p1,
5059
0
            a->ne[1],
5060
0
            a->ne[2],
5061
0
            a->ne[3]);
5062
5063
0
    int32_t params[] = { p0, p1 };
5064
0
    ggml_set_op_params(result, params, sizeof(params));
5065
5066
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5067
0
    result->src[0] = a;
5068
5069
0
    return result;
5070
0
}
5071
5072
// ggml_roll
5073
5074
struct ggml_tensor * ggml_roll(
5075
        struct ggml_context * ctx,
5076
        struct ggml_tensor  * a,
5077
        int                   shift0,
5078
        int                   shift1,
5079
        int                   shift2,
5080
0
        int                   shift3) {
5081
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5082
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5083
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5084
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5085
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5086
5087
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5088
5089
0
    ggml_set_op_params_i32(result, 0, shift0);
5090
0
    ggml_set_op_params_i32(result, 1, shift1);
5091
0
    ggml_set_op_params_i32(result, 2, shift2);
5092
0
    ggml_set_op_params_i32(result, 3, shift3);
5093
5094
0
    result->op     = GGML_OP_ROLL;
5095
0
    result->src[0] = a;
5096
5097
0
    return result;
5098
0
}
5099
5100
// ggml_timestep_embedding
5101
5102
struct ggml_tensor * ggml_timestep_embedding(
5103
        struct ggml_context * ctx,
5104
        struct ggml_tensor  * timesteps,
5105
        int                   dim,
5106
0
        int                   max_period) {
5107
5108
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5109
5110
0
    ggml_set_op_params_i32(result, 0, dim);
5111
0
    ggml_set_op_params_i32(result, 1, max_period);
5112
5113
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5114
0
    result->src[0] = timesteps;
5115
5116
0
    return result;
5117
0
}
5118
5119
// ggml_tri
5120
5121
struct ggml_tensor * ggml_tri(
5122
    struct ggml_context * ctx,
5123
    struct ggml_tensor  * a,
5124
0
    enum ggml_tri_type    type) {
5125
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5126
5127
0
    GGML_ASSERT(ggml_is_contiguous(a));
5128
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5129
5130
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5131
5132
0
    ggml_set_op_params_i32(result, 0, type);
5133
5134
0
    result->op = GGML_OP_TRI;
5135
0
    result->src[0] = a;
5136
5137
0
    return result;
5138
0
}
5139
5140
// ggml_fill
5141
5142
static struct ggml_tensor * ggml_fill_impl(
5143
    struct ggml_context * ctx,
5144
    struct ggml_tensor  * a,
5145
    float                 c,
5146
0
    bool                  inplace) {
5147
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5148
0
    GGML_ASSERT(ggml_is_contiguous(a));
5149
5150
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5151
5152
0
    ggml_set_op_params_f32(result, 0, c);
5153
5154
0
    result->op = GGML_OP_FILL;
5155
0
    result->src[0] = a;
5156
5157
0
    return result;
5158
0
}
5159
5160
struct ggml_tensor * ggml_fill(
5161
    struct ggml_context * ctx,
5162
    struct ggml_tensor  * a,
5163
0
    float                 c) {
5164
0
    return ggml_fill_impl(ctx, a, c, false);
5165
0
}
5166
5167
struct ggml_tensor * ggml_fill_inplace(
5168
    struct ggml_context * ctx,
5169
    struct ggml_tensor  * a,
5170
0
    float                 c) {
5171
0
    return ggml_fill_impl(ctx, a, c, true);
5172
0
}
5173
5174
// ggml_argsort
5175
5176
struct ggml_tensor * ggml_argsort(
5177
        struct ggml_context  * ctx,
5178
        struct ggml_tensor   * a,
5179
0
        enum ggml_sort_order   order) {
5180
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5181
5182
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5183
5184
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5185
5186
0
    result->op     = GGML_OP_ARGSORT;
5187
0
    result->src[0] = a;
5188
5189
0
    return result;
5190
0
}
5191
5192
// ggml_argsort_top_k
5193
5194
struct ggml_tensor * ggml_argsort_top_k(
5195
        struct ggml_context * ctx,
5196
        struct ggml_tensor  * a,
5197
0
        int                   k) {
5198
0
    GGML_ASSERT(a->ne[0] >= k);
5199
5200
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5201
5202
0
    result = ggml_view_4d(ctx, result,
5203
0
                k, result->ne[1], result->ne[2], result->ne[3],
5204
0
                   result->nb[1], result->nb[2], result->nb[3],
5205
0
                0);
5206
5207
0
    return result;
5208
0
}
5209
5210
// ggml_top_k
5211
5212
struct ggml_tensor * ggml_top_k(
5213
        struct ggml_context * ctx,
5214
        struct ggml_tensor  * a,
5215
0
        int                   k) {
5216
0
    GGML_ASSERT(a->ne[0] >= k);
5217
5218
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5219
5220
0
    result->op     = GGML_OP_TOP_K;
5221
0
    result->src[0] = a;
5222
5223
0
    return result;
5224
0
}
5225
5226
// ggml_arange
5227
5228
struct ggml_tensor * ggml_arange(
5229
        struct ggml_context * ctx,
5230
        float                 start,
5231
        float                 stop,
5232
0
        float                 step) {
5233
0
    GGML_ASSERT(stop > start);
5234
5235
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5236
5237
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5238
5239
0
    ggml_set_op_params_f32(result, 0, start);
5240
0
    ggml_set_op_params_f32(result, 1, stop);
5241
0
    ggml_set_op_params_f32(result, 2, step);
5242
5243
0
    result->op = GGML_OP_ARANGE;
5244
5245
0
    return result;
5246
0
}
5247
5248
// ggml_flash_attn_ext
5249
5250
struct ggml_tensor * ggml_flash_attn_ext(
5251
        struct ggml_context * ctx,
5252
        struct ggml_tensor  * q,
5253
        struct ggml_tensor  * k,
5254
        struct ggml_tensor  * v,
5255
        struct ggml_tensor  * mask,
5256
        float                 scale,
5257
        float                 max_bias,
5258
0
        float                 logit_softcap) {
5259
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5260
    // TODO: check if vT can be multiplied by (k*qT)
5261
5262
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5263
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5264
5265
0
    if (mask) {
5266
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5267
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5268
5269
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5270
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5271
0
    }
5272
5273
0
    if (max_bias > 0.0f) {
5274
0
        GGML_ASSERT(mask);
5275
0
    }
5276
5277
    // permute(0, 2, 1, 3)
5278
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5279
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5280
5281
0
    float params[] = { scale, max_bias, logit_softcap };
5282
0
    ggml_set_op_params(result, params, sizeof(params));
5283
5284
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5285
0
    result->src[0] = q;
5286
0
    result->src[1] = k;
5287
0
    result->src[2] = v;
5288
0
    result->src[3] = mask;
5289
5290
0
    return result;
5291
0
}
5292
5293
void ggml_flash_attn_ext_set_prec(
5294
        struct ggml_tensor * a,
5295
0
        enum ggml_prec       prec) {
5296
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5297
5298
0
    const int32_t prec_i32 = (int32_t) prec;
5299
5300
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5301
0
}
5302
5303
enum ggml_prec ggml_flash_attn_ext_get_prec(
5304
0
        const struct ggml_tensor * a) {
5305
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5306
5307
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5308
5309
0
    return (enum ggml_prec) prec_i32;
5310
0
}
5311
5312
void ggml_flash_attn_ext_add_sinks(
5313
        struct ggml_tensor * a,
5314
0
        struct ggml_tensor * sinks) {
5315
0
    if (!sinks) {
5316
0
        a->src[4] = NULL;
5317
0
        return;
5318
0
    }
5319
5320
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5321
0
    GGML_ASSERT(a->src[4] == NULL);
5322
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5323
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5324
5325
0
    a->src[4] = sinks;
5326
0
}
5327
5328
// ggml_flash_attn_back
5329
5330
struct ggml_tensor * ggml_flash_attn_back(
5331
        struct ggml_context * ctx,
5332
        struct ggml_tensor  * q,
5333
        struct ggml_tensor  * k,
5334
        struct ggml_tensor  * v,
5335
        struct ggml_tensor  * d,
5336
0
        bool                  masked) {
5337
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5338
5339
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5340
    // TODO: check if vT can be multiplied by (k*qT)
5341
5342
    // d shape [D,N,ne2,ne3]
5343
    // q shape [D,N,ne2,ne3]
5344
    // k shape [D,M,kvne2,ne3]
5345
    // v shape [M,D,kvne2,ne3]
5346
5347
0
    const int64_t     D = q->ne[0];
5348
0
    const int64_t     N = q->ne[1];
5349
0
    const int64_t     M = k->ne[1];
5350
0
    const int64_t   ne2 = q->ne[2];
5351
0
    const int64_t   ne3 = q->ne[3];
5352
0
    const int64_t kvne2 = k->ne[2];
5353
5354
0
    GGML_ASSERT(k->ne[0] == D);
5355
0
    GGML_ASSERT(v->ne[0] == M);
5356
0
    GGML_ASSERT(v->ne[1] == D);
5357
0
    GGML_ASSERT(d->ne[0] == D);
5358
0
    GGML_ASSERT(d->ne[1] == N);
5359
0
    GGML_ASSERT(k->ne[2] == kvne2);
5360
0
    GGML_ASSERT(k->ne[3] == ne3);
5361
0
    GGML_ASSERT(v->ne[2] == kvne2);
5362
0
    GGML_ASSERT(v->ne[3] == ne3);
5363
0
    GGML_ASSERT(d->ne[2] == ne2);
5364
0
    GGML_ASSERT(d->ne[3] == ne3);
5365
5366
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5367
5368
    // store gradients of q, k and v as continuous tensors concatenated in result.
5369
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5370
0
    const int64_t elem_q = ggml_nelements(q);
5371
0
    const int64_t elem_k = ggml_nelements(k);
5372
0
    const int64_t elem_v = ggml_nelements(v);
5373
5374
0
    enum ggml_type result_type = GGML_TYPE_F32;
5375
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5376
0
    const size_t tsize = ggml_type_size(result_type);
5377
5378
0
    const size_t offs_q = 0;
5379
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5380
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5381
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5382
5383
0
    const size_t nelements = (end + tsize - 1)/tsize;
5384
5385
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5386
5387
0
    int32_t masked_i = masked ? 1 : 0;
5388
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5389
5390
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5391
0
    result->src[0] = q;
5392
0
    result->src[1] = k;
5393
0
    result->src[2] = v;
5394
0
    result->src[3] = d;
5395
5396
0
    return result;
5397
0
}
5398
5399
// ggml_ssm_conv
5400
5401
struct ggml_tensor * ggml_ssm_conv(
5402
        struct ggml_context * ctx,
5403
        struct ggml_tensor  * sx,
5404
0
        struct ggml_tensor  * c) {
5405
0
    GGML_ASSERT(ggml_is_3d(sx));
5406
0
    GGML_ASSERT(ggml_is_matrix(c));
5407
5408
0
    const int64_t d_conv  = c->ne[0];
5409
0
    const int64_t d_inner = c->ne[1];
5410
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5411
0
    const int64_t n_s     = sx->ne[2];
5412
5413
    // TODO: maybe support other strides than 1?
5414
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5415
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5416
0
    GGML_ASSERT(n_t >= 0);
5417
5418
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5419
5420
0
    result->op     = GGML_OP_SSM_CONV;
5421
0
    result->src[0] = sx;
5422
0
    result->src[1] = c;
5423
5424
0
    return result;
5425
0
}
5426
5427
// ggml_ssm_scan
5428
5429
struct ggml_tensor * ggml_ssm_scan(
5430
        struct ggml_context * ctx,
5431
        struct ggml_tensor  * s,
5432
        struct ggml_tensor  * x,
5433
        struct ggml_tensor  * dt,
5434
        struct ggml_tensor  * A,
5435
        struct ggml_tensor  * B,
5436
        struct ggml_tensor  * C,
5437
0
        struct ggml_tensor  * ids) {
5438
0
    GGML_ASSERT(ggml_is_contiguous(s));
5439
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5440
0
    GGML_ASSERT(ggml_is_contiguous(A));
5441
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5442
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5443
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5444
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5445
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5446
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5447
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5448
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5449
5450
0
    {
5451
0
        const int64_t d_state      = s->ne[0];
5452
0
        const int64_t head_dim     = x->ne[0];
5453
0
        const int64_t n_head       = x->ne[1];
5454
0
        const int64_t n_seq_tokens = x->ne[2];
5455
0
        const int64_t n_seqs       = x->ne[3];
5456
5457
0
        GGML_ASSERT(dt->ne[0] == n_head);
5458
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5459
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5460
0
        GGML_ASSERT(ggml_is_3d(dt));
5461
0
        GGML_ASSERT(s->ne[1] == head_dim);
5462
0
        GGML_ASSERT(s->ne[2] == n_head);
5463
0
        GGML_ASSERT(B->ne[0] == d_state);
5464
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5465
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5466
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5467
0
        GGML_ASSERT(ggml_is_vector(ids));
5468
0
        GGML_ASSERT(A->ne[1] == n_head);
5469
0
        GGML_ASSERT(ggml_is_matrix(A));
5470
5471
0
        if (A->ne[0] != 1) {
5472
            // Mamba-1 has more granular decay factors
5473
0
            GGML_ASSERT(A->ne[0] == d_state);
5474
0
        }
5475
0
    }
5476
5477
    // concatenated y + ssm_states
5478
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5479
5480
0
    result->op   = GGML_OP_SSM_SCAN;
5481
0
    result->src[0] = s;
5482
0
    result->src[1] = x;
5483
0
    result->src[2] = dt;
5484
0
    result->src[3] = A;
5485
0
    result->src[4] = B;
5486
0
    result->src[5] = C;
5487
0
    result->src[6] = ids;
5488
5489
0
    return result;
5490
0
}
5491
5492
// ggml_win_part
5493
5494
struct ggml_tensor * ggml_win_part(
5495
        struct ggml_context * ctx,
5496
        struct ggml_tensor  * a,
5497
0
        int                   w) {
5498
0
    GGML_ASSERT(a->ne[3] == 1);
5499
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5500
5501
    // padding
5502
0
    const int px = (w - a->ne[1]%w)%w;
5503
0
    const int py = (w - a->ne[2]%w)%w;
5504
5505
0
    const int npx = (px + a->ne[1])/w;
5506
0
    const int npy = (py + a->ne[2])/w;
5507
0
    const int np  = npx*npy;
5508
5509
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5510
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5511
5512
0
    int32_t params[] = { npx, npy, w };
5513
0
    ggml_set_op_params(result, params, sizeof(params));
5514
5515
0
    result->op     = GGML_OP_WIN_PART;
5516
0
    result->src[0] = a;
5517
5518
0
    return result;
5519
0
}
5520
5521
// ggml_win_unpart
5522
5523
struct ggml_tensor * ggml_win_unpart(
5524
        struct ggml_context * ctx,
5525
        struct ggml_tensor  * a,
5526
        int                   w0,
5527
        int                   h0,
5528
0
        int                   w) {
5529
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5530
5531
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5532
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5533
5534
0
    int32_t params[] = { w };
5535
0
    ggml_set_op_params(result, params, sizeof(params));
5536
5537
0
    result->op     = GGML_OP_WIN_UNPART;
5538
0
    result->src[0] = a;
5539
5540
0
    return result;
5541
0
}
5542
5543
// ggml_get_rel_pos
5544
5545
struct ggml_tensor * ggml_get_rel_pos(
5546
        struct ggml_context * ctx,
5547
        struct ggml_tensor  * a,
5548
        int                   qh,
5549
0
        int                   kh) {
5550
0
    GGML_ASSERT(qh == kh);
5551
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5552
5553
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5554
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5555
5556
0
    result->op     = GGML_OP_GET_REL_POS;
5557
0
    result->src[0] = a;
5558
5559
0
    return result;
5560
0
}
5561
5562
// ggml_add_rel_pos
5563
5564
static struct ggml_tensor * ggml_add_rel_pos_impl(
5565
        struct ggml_context * ctx,
5566
        struct ggml_tensor  * a,
5567
        struct ggml_tensor  * pw,
5568
        struct ggml_tensor  * ph,
5569
0
        bool                  inplace) {
5570
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5571
0
    GGML_ASSERT(ggml_is_contiguous(a));
5572
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5573
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5574
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5575
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5576
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5577
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5578
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5579
5580
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5581
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5582
5583
0
    result->op     = GGML_OP_ADD_REL_POS;
5584
0
    result->src[0] = a;
5585
0
    result->src[1] = pw;
5586
0
    result->src[2] = ph;
5587
5588
0
    return result;
5589
0
}
5590
5591
struct ggml_tensor * ggml_add_rel_pos(
5592
        struct ggml_context * ctx,
5593
        struct ggml_tensor  * a,
5594
        struct ggml_tensor  * pw,
5595
0
        struct ggml_tensor  * ph) {
5596
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5597
0
}
5598
5599
struct ggml_tensor * ggml_add_rel_pos_inplace(
5600
        struct ggml_context * ctx,
5601
        struct ggml_tensor  * a,
5602
        struct ggml_tensor  * pw,
5603
0
        struct ggml_tensor  * ph) {
5604
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5605
0
}
5606
5607
// ggml_rwkv_wkv6
5608
5609
struct ggml_tensor * ggml_rwkv_wkv6(
5610
        struct ggml_context * ctx,
5611
        struct ggml_tensor  * k,
5612
        struct ggml_tensor  * v,
5613
        struct ggml_tensor  * r,
5614
        struct ggml_tensor  * tf,
5615
        struct ggml_tensor  * td,
5616
0
        struct ggml_tensor  * state) {
5617
0
    GGML_ASSERT(ggml_is_contiguous(k));
5618
0
    GGML_ASSERT(ggml_is_contiguous(v));
5619
0
    GGML_ASSERT(ggml_is_contiguous(r));
5620
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5621
0
    GGML_ASSERT(ggml_is_contiguous(td));
5622
0
    GGML_ASSERT(ggml_is_contiguous(state));
5623
5624
0
    const int64_t S = k->ne[0];
5625
0
    const int64_t H = k->ne[1];
5626
0
    const int64_t n_tokens = k->ne[2];
5627
0
    const int64_t n_seqs = state->ne[1];
5628
0
    {
5629
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5630
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5631
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5632
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5633
0
    }
5634
5635
    // concat output and new_state
5636
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5637
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5638
5639
0
    result->op     = GGML_OP_RWKV_WKV6;
5640
0
    result->src[0] = k;
5641
0
    result->src[1] = v;
5642
0
    result->src[2] = r;
5643
0
    result->src[3] = tf;
5644
0
    result->src[4] = td;
5645
0
    result->src[5] = state;
5646
5647
0
    return result;
5648
0
}
5649
5650
// ggml_gated_linear_attn
5651
5652
struct ggml_tensor * ggml_gated_linear_attn(
5653
        struct ggml_context * ctx,
5654
        struct ggml_tensor  * k,
5655
        struct ggml_tensor  * v,
5656
        struct ggml_tensor  * q,
5657
        struct ggml_tensor  * g,
5658
        struct ggml_tensor  * state,
5659
0
        float scale) {
5660
0
    GGML_ASSERT(ggml_is_contiguous(k));
5661
0
    GGML_ASSERT(ggml_is_contiguous(v));
5662
0
    GGML_ASSERT(ggml_is_contiguous(q));
5663
0
    GGML_ASSERT(ggml_is_contiguous(g));
5664
0
    GGML_ASSERT(ggml_is_contiguous(state));
5665
5666
0
    const int64_t S = k->ne[0];
5667
0
    const int64_t H = k->ne[1];
5668
0
    const int64_t n_tokens = k->ne[2];
5669
0
    const int64_t n_seqs = state->ne[1];
5670
0
    {
5671
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5672
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5673
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5674
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5675
0
    }
5676
5677
    // concat output and new_state
5678
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5679
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5680
5681
0
    ggml_set_op_params_f32(result, 0, scale);
5682
5683
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5684
0
    result->src[0] = k;
5685
0
    result->src[1] = v;
5686
0
    result->src[2] = q;
5687
0
    result->src[3] = g;
5688
0
    result->src[4] = state;
5689
5690
0
    return result;
5691
0
}
5692
5693
// ggml_rwkv_wkv7
5694
5695
struct ggml_tensor * ggml_rwkv_wkv7(
5696
        struct ggml_context * ctx,
5697
        struct ggml_tensor  * r,
5698
        struct ggml_tensor  * w,
5699
        struct ggml_tensor  * k,
5700
        struct ggml_tensor  * v,
5701
        struct ggml_tensor  * a,
5702
        struct ggml_tensor  * b,
5703
0
        struct ggml_tensor  * state) {
5704
0
    GGML_ASSERT(ggml_is_contiguous(r));
5705
0
    GGML_ASSERT(ggml_is_contiguous(w));
5706
0
    GGML_ASSERT(ggml_is_contiguous(k));
5707
0
    GGML_ASSERT(ggml_is_contiguous(v));
5708
0
    GGML_ASSERT(ggml_is_contiguous(a));
5709
0
    GGML_ASSERT(ggml_is_contiguous(b));
5710
0
    GGML_ASSERT(ggml_is_contiguous(state));
5711
5712
0
    const int64_t S = k->ne[0];
5713
0
    const int64_t H = k->ne[1];
5714
0
    const int64_t n_tokens = k->ne[2];
5715
0
    const int64_t n_seqs = state->ne[1];
5716
0
    {
5717
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5718
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5719
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5720
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5721
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5722
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5723
0
    }
5724
5725
    // concat output and new_state
5726
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5727
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5728
5729
0
    result->op     = GGML_OP_RWKV_WKV7;
5730
0
    result->src[0] = r;
5731
0
    result->src[1] = w;
5732
0
    result->src[2] = k;
5733
0
    result->src[3] = v;
5734
0
    result->src[4] = a;
5735
0
    result->src[5] = b;
5736
0
    result->src[6] = state;
5737
5738
0
    return result;
5739
0
}
5740
5741
// ggml_unary
5742
5743
static struct ggml_tensor * ggml_unary_impl(
5744
        struct ggml_context * ctx,
5745
        struct ggml_tensor  * a,
5746
        enum ggml_unary_op    op,
5747
0
        bool                  inplace) {
5748
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
5749
5750
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5751
5752
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5753
5754
0
    result->op     = GGML_OP_UNARY;
5755
0
    result->src[0] = a;
5756
5757
0
    return result;
5758
0
}
5759
5760
struct ggml_tensor * ggml_unary(
5761
        struct ggml_context * ctx,
5762
        struct ggml_tensor  * a,
5763
0
        enum ggml_unary_op    op) {
5764
0
    return ggml_unary_impl(ctx, a, op, false);
5765
0
}
5766
5767
struct ggml_tensor * ggml_unary_inplace(
5768
        struct ggml_context * ctx,
5769
        struct ggml_tensor  * a,
5770
0
        enum ggml_unary_op    op) {
5771
0
    return ggml_unary_impl(ctx, a, op, true);
5772
0
}
5773
5774
// ggml_map_custom1
5775
5776
static struct ggml_tensor * ggml_map_custom1_impl(
5777
        struct ggml_context      * ctx,
5778
        struct ggml_tensor       * a,
5779
        const  ggml_custom1_op_t   fun,
5780
        int                        n_tasks,
5781
        void                     * userdata,
5782
0
        bool                       inplace) {
5783
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5784
5785
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5786
5787
0
    struct ggml_map_custom1_op_params params = {
5788
0
        /*.fun      =*/ fun,
5789
0
        /*.n_tasks  =*/ n_tasks,
5790
0
        /*.userdata =*/ userdata
5791
0
    };
5792
0
    ggml_set_op_params(result, &params, sizeof(params));
5793
5794
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5795
0
    result->src[0] = a;
5796
5797
0
    return result;
5798
0
}
5799
5800
struct ggml_tensor * ggml_map_custom1(
5801
        struct ggml_context      * ctx,
5802
        struct ggml_tensor       * a,
5803
        const  ggml_custom1_op_t   fun,
5804
        int                        n_tasks,
5805
0
        void                     * userdata) {
5806
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5807
0
}
5808
5809
struct ggml_tensor * ggml_map_custom1_inplace(
5810
        struct ggml_context      * ctx,
5811
        struct ggml_tensor       * a,
5812
        const  ggml_custom1_op_t   fun,
5813
        int                        n_tasks,
5814
0
        void                     * userdata) {
5815
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5816
0
}
5817
5818
// ggml_map_custom2
5819
5820
static struct ggml_tensor * ggml_map_custom2_impl(
5821
        struct ggml_context      * ctx,
5822
        struct ggml_tensor       * a,
5823
        struct ggml_tensor       * b,
5824
        const  ggml_custom2_op_t   fun,
5825
        int                        n_tasks,
5826
        void                     * userdata,
5827
0
        bool                       inplace) {
5828
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5829
5830
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5831
5832
0
    struct ggml_map_custom2_op_params params = {
5833
0
        /*.fun      =*/ fun,
5834
0
        /*.n_tasks  =*/ n_tasks,
5835
0
        /*.userdata =*/ userdata
5836
0
    };
5837
0
    ggml_set_op_params(result, &params, sizeof(params));
5838
5839
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5840
0
    result->src[0] = a;
5841
0
    result->src[1] = b;
5842
5843
0
    return result;
5844
0
}
5845
5846
struct ggml_tensor * ggml_map_custom2(
5847
        struct ggml_context      * ctx,
5848
        struct ggml_tensor       * a,
5849
        struct ggml_tensor       * b,
5850
        const  ggml_custom2_op_t   fun,
5851
        int                        n_tasks,
5852
0
        void                     * userdata) {
5853
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5854
0
}
5855
5856
struct ggml_tensor * ggml_map_custom2_inplace(
5857
        struct ggml_context      * ctx,
5858
        struct ggml_tensor       * a,
5859
        struct ggml_tensor       * b,
5860
        const  ggml_custom2_op_t   fun,
5861
        int                        n_tasks,
5862
0
        void                     * userdata) {
5863
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5864
0
}
5865
5866
// ggml_map_custom3
5867
5868
static struct ggml_tensor * ggml_map_custom3_impl(
5869
        struct ggml_context      * ctx,
5870
        struct ggml_tensor       * a,
5871
        struct ggml_tensor       * b,
5872
        struct ggml_tensor       * c,
5873
        const  ggml_custom3_op_t   fun,
5874
        int                        n_tasks,
5875
        void                     * userdata,
5876
0
        bool                       inplace) {
5877
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5878
5879
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5880
5881
0
    struct ggml_map_custom3_op_params params = {
5882
0
        /*.fun      =*/ fun,
5883
0
        /*.n_tasks  =*/ n_tasks,
5884
0
        /*.userdata =*/ userdata
5885
0
    };
5886
0
    ggml_set_op_params(result, &params, sizeof(params));
5887
5888
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5889
0
    result->src[0] = a;
5890
0
    result->src[1] = b;
5891
0
    result->src[2] = c;
5892
5893
0
    return result;
5894
0
}
5895
5896
struct ggml_tensor * ggml_map_custom3(
5897
        struct ggml_context      * ctx,
5898
        struct ggml_tensor       * a,
5899
        struct ggml_tensor       * b,
5900
        struct ggml_tensor       * c,
5901
        const  ggml_custom3_op_t   fun,
5902
        int                        n_tasks,
5903
0
        void                     * userdata) {
5904
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5905
0
}
5906
5907
struct ggml_tensor * ggml_map_custom3_inplace(
5908
        struct ggml_context      * ctx,
5909
        struct ggml_tensor       * a,
5910
        struct ggml_tensor       * b,
5911
        struct ggml_tensor       * c,
5912
        const  ggml_custom3_op_t   fun,
5913
        int                        n_tasks,
5914
0
        void                     * userdata) {
5915
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5916
0
}
5917
5918
struct ggml_tensor * ggml_custom_4d(
5919
        struct ggml_context * ctx,
5920
        enum ggml_type        type,
5921
        int64_t               ne0,
5922
        int64_t               ne1,
5923
        int64_t               ne2,
5924
        int64_t               ne3,
5925
        struct ggml_tensor ** args,
5926
        int                   n_args,
5927
        ggml_custom_op_t      fun,
5928
        int                   n_tasks,
5929
0
        void                * userdata) {
5930
5931
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5932
5933
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5934
5935
0
    struct ggml_custom_op_params params = {
5936
0
        /*.fun      =*/ fun,
5937
0
        /*.n_tasks  =*/ n_tasks,
5938
0
        /*.userdata =*/ userdata
5939
0
    };
5940
0
    ggml_set_op_params(result, &params, sizeof(params));
5941
5942
0
    result->op = GGML_OP_CUSTOM;
5943
0
    for (int i = 0; i < n_args; i++) {
5944
0
        result->src[i] = args[i];
5945
0
    }
5946
5947
0
    return result;
5948
0
}
5949
5950
struct ggml_tensor * ggml_custom_inplace(
5951
        struct ggml_context * ctx,
5952
        struct ggml_tensor  * a,
5953
        struct ggml_tensor ** args,
5954
        int                   n_args,
5955
        ggml_custom_op_t      fun,
5956
        int                   n_tasks,
5957
0
        void                * userdata) {
5958
5959
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5960
5961
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5962
5963
0
    struct ggml_custom_op_params params = {
5964
0
        /*.fun      =*/ fun,
5965
0
        /*.n_tasks  =*/ n_tasks,
5966
0
        /*.userdata =*/ userdata
5967
0
    };
5968
0
    ggml_set_op_params(result, &params, sizeof(params));
5969
5970
0
    result->op = GGML_OP_CUSTOM;
5971
0
    result->src[0] = a;
5972
0
    for (int i = 0; i < n_args; i++) {
5973
0
        result->src[i + 1] = args[i];
5974
0
    }
5975
5976
0
    return result;
5977
0
}
5978
// ggml_cross_entropy_loss
5979
5980
struct ggml_tensor * ggml_cross_entropy_loss(
5981
        struct ggml_context * ctx,
5982
        struct ggml_tensor  * a,
5983
0
        struct ggml_tensor  * b) {
5984
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
5985
5986
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5987
5988
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
5989
0
    result->src[0] = a;
5990
0
    result->src[1] = b;
5991
5992
0
    return result;
5993
0
}
5994
5995
// ggml_cross_entropy_loss_back
5996
5997
struct ggml_tensor * ggml_cross_entropy_loss_back(
5998
        struct ggml_context * ctx,
5999
        struct ggml_tensor  * a,
6000
        struct ggml_tensor  * b,
6001
0
        struct ggml_tensor  * c) {
6002
0
    GGML_ASSERT(ggml_is_scalar(a));
6003
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6004
6005
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6006
6007
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6008
0
    result->src[0] = a;
6009
0
    result->src[1] = b;
6010
0
    result->src[2] = c;
6011
6012
0
    return result;
6013
0
}
6014
6015
// opt_step_adamw
6016
6017
struct ggml_tensor * ggml_opt_step_adamw(
6018
        struct ggml_context * ctx,
6019
        struct ggml_tensor  * a,
6020
        struct ggml_tensor  * grad,
6021
        struct ggml_tensor  * m,
6022
        struct ggml_tensor  * v,
6023
0
        struct ggml_tensor  * adamw_params) {
6024
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6025
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6026
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6027
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6028
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6029
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6030
6031
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6032
6033
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6034
0
    result->src[0] = a;
6035
0
    result->src[1] = grad;
6036
0
    result->src[2] = m;
6037
0
    result->src[3] = v;
6038
0
    result->src[4] = adamw_params;
6039
6040
0
    return result;
6041
0
}
6042
6043
// opt_step_sgd
6044
6045
struct ggml_tensor * ggml_opt_step_sgd(
6046
        struct ggml_context * ctx,
6047
        struct ggml_tensor  * a,
6048
        struct ggml_tensor  * grad,
6049
0
        struct ggml_tensor  * params) {
6050
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6051
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6052
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6053
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6054
6055
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6056
6057
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6058
0
    result->src[0] = a;
6059
0
    result->src[1] = grad;
6060
0
    result->src[2] = params;
6061
6062
0
    return result;
6063
0
}
6064
6065
// solve_tri
6066
6067
struct ggml_tensor * ggml_solve_tri(
6068
        struct ggml_context * ctx,
6069
        struct ggml_tensor  * a,
6070
        struct ggml_tensor  * b,
6071
        bool                  left,
6072
        bool                  lower,
6073
0
        bool                  uni) {
6074
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6075
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6076
6077
    // A must be square and lower diagonal
6078
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6079
    // B must have same outer dimension as A
6080
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6081
6082
    // batch dimensions must be equal
6083
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6084
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6085
6086
0
    GGML_ASSERT(ggml_is_contiguous(a));
6087
0
    GGML_ASSERT(ggml_is_contiguous(b));
6088
6089
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6090
6091
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6092
6093
0
    result->op     = GGML_OP_SOLVE_TRI;
6094
0
    result->src[0] = a;
6095
0
    result->src[1] = b;
6096
6097
0
    return result;
6098
0
}
6099
6100
////////////////////////////////////////////////////////////////////////////////
6101
6102
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6103
0
    size = ggml_hash_size(size);
6104
0
    struct ggml_hash_set result;
6105
0
    result.size = size;
6106
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6107
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6108
0
    return result;
6109
0
}
6110
6111
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6112
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6113
0
}
6114
6115
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6116
0
    GGML_FREE(hash_set->used);
6117
0
    GGML_FREE(hash_set->keys);
6118
0
}
6119
6120
0
size_t ggml_hash_size(size_t min_sz) {
6121
    // next primes after powers of two
6122
0
    static const size_t primes[] = {
6123
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6124
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6125
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6126
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6127
0
        536870923, 1073741827, 2147483659
6128
0
    };
6129
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6130
6131
    // find the smallest prime that is larger or equal than min_sz
6132
0
    size_t l = 0;
6133
0
    size_t r = n_primes;
6134
0
    while (l < r) {
6135
0
        size_t m = (l + r)/2;
6136
0
        if (primes[m] < min_sz) {
6137
0
            l = m + 1;
6138
0
        } else {
6139
0
            r = m;
6140
0
        }
6141
0
    }
6142
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6143
0
    return sz;
6144
0
}
6145
6146
struct hash_map {
6147
    struct ggml_hash_set set;
6148
    struct ggml_tensor ** vals;
6149
};
6150
6151
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6152
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6153
0
    result->set = ggml_hash_set_new(size);
6154
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6155
0
    return result;
6156
0
}
6157
6158
0
static void ggml_hash_map_free(struct hash_map * map) {
6159
0
    ggml_hash_set_free(&map->set);
6160
0
    GGML_FREE(map->vals);
6161
0
    GGML_FREE(map);
6162
0
}
6163
6164
// utility functions to change gradients
6165
// isrc is the index of tensor in cgraph->visited_has_set.keys
6166
// the corresponding gradient (accumulators) are also at position isrc
6167
// if tensor has a gradient accumulator, modify that accumulator in-place
6168
// else if there is no gradient for tensor, set the corresponding value
6169
// else, just add/subtract/etc. the gradients
6170
6171
static void ggml_add_or_set(
6172
        struct ggml_context * ctx,
6173
        struct ggml_cgraph  * cgraph,
6174
        size_t                isrc,
6175
0
        struct ggml_tensor  * tensor) {
6176
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6177
0
    GGML_ASSERT(src);
6178
0
    if (cgraph->grads[isrc]) {
6179
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6180
0
    } else {
6181
0
        cgraph->grads[isrc] = tensor;
6182
0
    }
6183
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6184
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6185
0
}
6186
6187
static void ggml_acc_or_set(
6188
        struct ggml_context * ctx,
6189
        struct ggml_cgraph  * cgraph,
6190
        size_t                isrc,
6191
        struct ggml_tensor  * tensor,
6192
        const  size_t         nb1,
6193
        const  size_t         nb2,
6194
        const  size_t         nb3,
6195
0
        const  size_t         offset) {
6196
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6197
0
    GGML_ASSERT(src);
6198
0
    if (cgraph->grads[isrc]) {
6199
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6200
0
    } else {
6201
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6202
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6203
0
    }
6204
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6205
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6206
0
}
6207
6208
static void ggml_add1_or_set(
6209
        struct ggml_context * ctx,
6210
        struct ggml_cgraph  * cgraph,
6211
        size_t                isrc,
6212
0
        struct ggml_tensor  * tensor) {
6213
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6214
0
    GGML_ASSERT(src);
6215
0
    if (cgraph->grads[isrc]) {
6216
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6217
0
    } else {
6218
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6219
0
    }
6220
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6221
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6222
0
}
6223
6224
static void ggml_sub_or_set(
6225
        struct ggml_context * ctx,
6226
        struct ggml_cgraph  * cgraph,
6227
        size_t                isrc,
6228
0
        struct ggml_tensor  * tensor) {
6229
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6230
0
    GGML_ASSERT(src);
6231
0
    if (cgraph->grads[isrc]) {
6232
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6233
0
    } else {
6234
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6235
0
    }
6236
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6237
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6238
0
}
6239
6240
static void ggml_compute_backward(
6241
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6242
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6243
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6244
6245
0
    if (!grad) {
6246
0
        return;
6247
0
    }
6248
6249
0
    struct ggml_tensor * src0 = tensor->src[0];
6250
0
    struct ggml_tensor * src1 = tensor->src[1];
6251
0
    struct ggml_tensor * src2 = tensor->src[2];
6252
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6253
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6254
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6255
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6256
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6257
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6258
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6259
6260
0
    switch (tensor->op) {
6261
0
        case GGML_OP_DUP: {
6262
0
            if (src0_needs_grads) {
6263
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6264
0
            }
6265
0
        } break;
6266
0
        case GGML_OP_ADD: {
6267
0
            if (src0_needs_grads) {
6268
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6269
0
            }
6270
0
            if (src1_needs_grads) {
6271
0
                struct ggml_tensor * tmp = grad;
6272
0
                if (!ggml_are_same_shape(src0, src1)) {
6273
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6274
0
                }
6275
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6276
0
            }
6277
0
        } break;
6278
0
        case GGML_OP_ADD1: {
6279
0
            if (src0_needs_grads) {
6280
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6281
0
            }
6282
0
            if (src1_needs_grads) {
6283
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6284
0
            }
6285
0
        } break;
6286
0
        case GGML_OP_ACC: {
6287
0
            if (src0_needs_grads) {
6288
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6289
0
            }
6290
0
            if (src1_needs_grads) {
6291
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6292
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6293
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6294
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6295
6296
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6297
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6298
0
                    nb1, nb2, nb3, offset);
6299
6300
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6301
0
            }
6302
0
        } break;
6303
0
        case GGML_OP_SUB: {
6304
0
            if (src0_needs_grads) {
6305
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6306
0
            }
6307
0
            if (src1_needs_grads) {
6308
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6309
0
            }
6310
0
        } break;
6311
0
        case GGML_OP_MUL: {
6312
0
            if (src0_needs_grads) {
6313
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6314
0
            }
6315
0
            if (src1_needs_grads) {
6316
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6317
0
                if (!ggml_are_same_shape(src0, src1)) {
6318
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6319
0
                }
6320
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6321
0
            }
6322
0
        } break;
6323
0
        case GGML_OP_DIV: {
6324
0
            if (src0_needs_grads) {
6325
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6326
0
            }
6327
0
            if (src1_needs_grads) {
6328
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6329
0
            }
6330
0
        } break;
6331
0
        case GGML_OP_SQR: {
6332
0
            if (src0_needs_grads) {
6333
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6334
0
            }
6335
0
        } break;
6336
0
        case GGML_OP_SQRT: {
6337
0
            if (src0_needs_grads) {
6338
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6339
0
            }
6340
0
        } break;
6341
0
        case GGML_OP_LOG: {
6342
0
            if (src0_needs_grads) {
6343
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6344
0
            }
6345
0
        } break;
6346
0
        case GGML_OP_SIN: {
6347
0
            if (src0_needs_grads) {
6348
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6349
0
            }
6350
0
        } break;
6351
0
        case GGML_OP_COS: {
6352
0
            if (src0_needs_grads) {
6353
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6354
0
            }
6355
0
        } break;
6356
0
        case GGML_OP_SUM: {
6357
0
            if (src0_needs_grads) {
6358
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6359
0
            }
6360
0
        } break;
6361
0
        case GGML_OP_SUM_ROWS: {
6362
0
            if (src0_needs_grads) {
6363
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6364
0
            }
6365
0
        } break;
6366
0
        case GGML_OP_MEAN: {
6367
0
            if (src0_needs_grads) {
6368
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6369
0
            }
6370
0
        } break;
6371
0
        case GGML_OP_REPEAT: {
6372
0
            if (src0_needs_grads) {
6373
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6374
0
            }
6375
0
        } break;
6376
0
        case GGML_OP_REPEAT_BACK: {
6377
0
            if (src0_needs_grads) {
6378
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6379
0
            }
6380
0
        } break;
6381
0
        case GGML_OP_RMS_NORM: {
6382
0
            if (src0_needs_grads) {
6383
0
                float eps;
6384
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6385
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6386
0
            }
6387
0
        } break;
6388
0
        case GGML_OP_MUL_MAT: {
6389
            // https://cs231n.github.io/optimization-2/#staged
6390
            // # forward pass
6391
            // s0 = np.random.randn(5, 10)
6392
            // s1 = np.random.randn(10, 3)
6393
            // t = s0.dot(s1)
6394
6395
            // # now suppose we had the gradient on t from above in the circuit
6396
            // dt = np.random.randn(*t.shape) # same shape as t
6397
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6398
            // ds1 = t.T.dot(dt)
6399
6400
            // tensor.shape [m,p,qq,rr]
6401
            // src0.shape   [n,m,q1,r1]
6402
            // src1.shape   [n,p,qq,rr]
6403
6404
0
            if (src0_needs_grads) {
6405
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6406
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6407
0
                struct ggml_tensor * tmp =
6408
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6409
0
                        src1,          // [n,p,qq,rr]
6410
0
                        grad);         // [m,p,qq,rr]
6411
0
                if (!ggml_are_same_shape(tmp, src0)) {
6412
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6413
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6414
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6415
6416
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6417
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6418
0
                    const size_t nb3 = tmp->nb[2];
6419
6420
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6421
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6422
0
                }
6423
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6424
0
            }
6425
0
            if (src1_needs_grads) {
6426
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6427
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6428
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6429
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6430
                        //     grad),                          // [m,p,qq,rr]
6431
6432
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6433
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6434
                        // and then use ggml_out_prod
6435
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6436
0
                            src0,               // [n,m,q1,r1]
6437
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6438
0
                                grad)));        // [m,p,qq,rr]
6439
0
            }
6440
0
        } break;
6441
0
        case GGML_OP_SCALE: {
6442
0
            if (src0_needs_grads) {
6443
0
                float s;
6444
0
                memcpy(&s, tensor->op_params, sizeof(float));
6445
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6446
0
            }
6447
0
        } break;
6448
0
        case GGML_OP_SET: {
6449
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6450
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6451
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6452
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6453
6454
0
            struct ggml_tensor * tensor_grad_view = NULL;
6455
6456
0
            if (src0_needs_grads || src1_needs_grads) {
6457
0
                GGML_ASSERT(src0->type == tensor->type);
6458
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6459
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6460
6461
0
                tensor_grad_view = ggml_view_4d(ctx,
6462
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6463
0
                    nb1, nb2, nb3, offset);
6464
0
            }
6465
6466
0
            if (src0_needs_grads) {
6467
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6468
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6469
0
            }
6470
6471
0
            if (src1_needs_grads) {
6472
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6473
0
            }
6474
0
        } break;
6475
0
        case GGML_OP_CPY: {
6476
            // cpy overwrites value of src1 by src0 and returns view(src1)
6477
            // the overwriting is mathematically equivalent to:
6478
            // tensor = src0 * 1 + src1 * 0
6479
0
            if (src0_needs_grads) {
6480
                // dsrc0 = dtensor * 1
6481
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6482
0
            }
6483
0
            if (src1_needs_grads) {
6484
                // dsrc1 = dtensor * 0 -> noop
6485
0
            }
6486
0
        } break;
6487
0
        case GGML_OP_CONT: {
6488
            // same as cpy
6489
0
            if (src0_needs_grads) {
6490
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6491
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6492
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6493
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6494
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6495
0
            }
6496
0
        } break;
6497
0
        case GGML_OP_RESHAPE: {
6498
0
            if (src0_needs_grads) {
6499
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6500
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6501
0
            }
6502
0
        } break;
6503
0
        case GGML_OP_VIEW: {
6504
0
            if (src0_needs_grads) {
6505
0
                size_t offset;
6506
6507
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6508
6509
0
                size_t nb1 = tensor->nb[1];
6510
0
                size_t nb2 = tensor->nb[2];
6511
0
                size_t nb3 = tensor->nb[3];
6512
6513
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6514
                    // gradient is typically F32, but src0 could be other type
6515
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6516
0
                    size_t n0 = ggml_element_size(src0);
6517
0
                    GGML_ASSERT(offset % n0 == 0);
6518
0
                    GGML_ASSERT(nb1 % n0 == 0);
6519
0
                    GGML_ASSERT(nb2 % n0 == 0);
6520
0
                    GGML_ASSERT(nb3 % n0 == 0);
6521
0
                    offset = (offset / n0) * ng;
6522
0
                    nb1 = (nb1 / n0) * ng;
6523
0
                    nb2 = (nb2 / n0) * ng;
6524
0
                    nb3 = (nb3 / n0) * ng;
6525
0
                }
6526
6527
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6528
0
            }
6529
0
        } break;
6530
0
        case GGML_OP_PERMUTE: {
6531
0
            if (src0_needs_grads) {
6532
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6533
0
                const int axis0 = axes[0] & 0x3;
6534
0
                const int axis1 = axes[1] & 0x3;
6535
0
                const int axis2 = axes[2] & 0x3;
6536
0
                const int axis3 = axes[3] & 0x3;
6537
0
                int axb[4] = {0,0,0,0}; // axes backward
6538
0
                axb[axis0] = 0;
6539
0
                axb[axis1] = 1;
6540
0
                axb[axis2] = 2;
6541
0
                axb[axis3] = 3;
6542
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6543
0
            }
6544
0
        } break;
6545
0
        case GGML_OP_TRANSPOSE: {
6546
0
            if (src0_needs_grads) {
6547
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6548
0
            }
6549
0
        } break;
6550
0
        case GGML_OP_GET_ROWS: {
6551
0
            if (src0_needs_grads) {
6552
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6553
0
            }
6554
0
            if (src1_needs_grads) {
6555
                // noop
6556
0
            }
6557
0
        } break;
6558
0
        case GGML_OP_DIAG_MASK_INF: {
6559
0
            if (src0_needs_grads) {
6560
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6561
                /* ref:  https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
6562
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6563
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6564
0
            }
6565
0
        } break;
6566
0
        case GGML_OP_DIAG_MASK_ZERO: {
6567
0
            if (src0_needs_grads) {
6568
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6569
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6570
0
            }
6571
0
        } break;
6572
0
        case GGML_OP_SOFT_MAX: {
6573
0
            if (src0_needs_grads) {
6574
0
                float scale    = 1.0f;
6575
0
                float max_bias = 0.0f;
6576
6577
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6578
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6579
6580
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6581
0
            }
6582
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6583
0
        } break;
6584
0
        case GGML_OP_ROPE: {
6585
0
            if (src0_needs_grads) {
6586
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6587
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6588
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6589
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6590
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6591
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6592
0
                int sections[4] = {0, 0, 0, 0};
6593
6594
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6595
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6596
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6597
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6598
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6599
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6600
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6601
6602
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6603
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6604
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6605
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6606
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6607
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6608
0
            }
6609
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6610
0
        } break;
6611
0
        case GGML_OP_IM2COL: {
6612
0
            if (src1_needs_grads) {
6613
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6614
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6615
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6616
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6617
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6618
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6619
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6620
6621
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6622
0
            }
6623
0
        } break;
6624
0
        case GGML_OP_POOL_2D: {
6625
0
            if (src0_needs_grads) {
6626
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6627
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6628
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6629
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6630
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6631
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6632
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6633
6634
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6635
0
            }
6636
0
        } break;
6637
0
        case GGML_OP_WIN_PART:
6638
0
        case GGML_OP_WIN_UNPART:
6639
0
        case GGML_OP_UNARY: {
6640
0
            switch (ggml_get_unary_op(tensor)) {
6641
0
                case GGML_UNARY_OP_ABS: {
6642
0
                    if (src0_needs_grads) {
6643
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6644
0
                    }
6645
0
                } break;
6646
0
                case GGML_UNARY_OP_SGN: {
6647
                    // noop
6648
0
                } break;
6649
0
                case GGML_UNARY_OP_NEG: {
6650
0
                    if (src0_needs_grads) {
6651
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6652
0
                    }
6653
0
                } break;
6654
0
                case GGML_UNARY_OP_STEP: {
6655
                    // noop
6656
0
                } break;
6657
0
                case GGML_UNARY_OP_RELU: {
6658
0
                    if (src0_needs_grads) {
6659
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6660
0
                    }
6661
0
                } break;
6662
0
                case GGML_UNARY_OP_SILU: {
6663
0
                    if (src0_needs_grads) {
6664
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6665
0
                    }
6666
0
                } break;
6667
0
                case GGML_UNARY_OP_EXP: {
6668
0
                    if (src0_needs_grads) {
6669
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6670
0
                    }
6671
0
                } break;
6672
0
                case GGML_UNARY_OP_EXPM1: {
6673
0
                    if (src0_needs_grads) {
6674
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6675
0
                    }
6676
0
                } break;
6677
0
                case GGML_UNARY_OP_SOFTPLUS: {
6678
0
                    if (src0_needs_grads) {
6679
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6680
0
                    }
6681
0
                } break;
6682
0
                default: {
6683
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6684
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6685
0
                    GGML_ABORT("fatal error");
6686
0
                } //break;
6687
0
            }
6688
0
        } break;
6689
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6690
0
            if (src0_needs_grads) {
6691
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6692
0
            }
6693
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6694
0
        } break;
6695
0
        case GGML_OP_GLU: {
6696
0
            switch (ggml_get_glu_op(tensor)) {
6697
0
                case GGML_GLU_OP_SWIGLU: {
6698
0
                    if (src0_needs_grads) {
6699
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6700
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6701
0
                    }
6702
0
                    if (src1_needs_grads) {
6703
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6704
0
                    }
6705
0
                } break;
6706
0
                default: {
6707
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6708
0
                } //break;
6709
0
            }
6710
0
        } break;
6711
0
        case GGML_OP_NONE: {
6712
            // noop
6713
0
        } break;
6714
0
        case GGML_OP_COUNT:
6715
0
        default: {
6716
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6717
0
        } //break;
6718
0
    }
6719
6720
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6721
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6722
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6723
0
}
6724
6725
0
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
6726
    // check if already visited
6727
0
    size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6728
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6729
0
    if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6730
        // This is the first time we see this node in the current graph.
6731
0
        cgraph->visited_hash_set.keys[node_hash_pos] = node;
6732
0
        ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6733
0
        cgraph->use_counts[node_hash_pos] = 0;
6734
0
    } else {
6735
        // already visited
6736
0
        return node_hash_pos;
6737
0
    }
6738
6739
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6740
0
        const int k =
6741
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6742
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6743
0
            /* unknown order, just fall back to using i */ i;
6744
6745
0
        struct ggml_tensor * src = node->src[k];
6746
0
        if (src) {
6747
0
            size_t src_hash_pos = ggml_visit_parents(cgraph, src);
6748
6749
            // Update the use count for this operand.
6750
0
            cgraph->use_counts[src_hash_pos]++;
6751
0
        }
6752
0
    }
6753
6754
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6755
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6756
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6757
6758
0
        if (strlen(node->name) == 0) {
6759
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6760
0
        }
6761
6762
0
        cgraph->leafs[cgraph->n_leafs] = node;
6763
0
        cgraph->n_leafs++;
6764
0
    } else {
6765
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6766
6767
0
        if (strlen(node->name) == 0) {
6768
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6769
0
        }
6770
6771
0
        cgraph->nodes[cgraph->n_nodes] = node;
6772
0
        cgraph->n_nodes++;
6773
0
    }
6774
6775
0
    return node_hash_pos;
6776
0
}
6777
6778
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
6779
0
    if (!expand) {
6780
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6781
0
        ggml_graph_clear(cgraph);
6782
0
    }
6783
6784
0
    const int n0 = cgraph->n_nodes;
6785
6786
0
    ggml_visit_parents(cgraph, tensor);
6787
6788
0
    const int n_new = cgraph->n_nodes - n0;
6789
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6790
6791
0
    if (n_new > 0) {
6792
        // the last added node should always be starting point
6793
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6794
0
    }
6795
0
}
6796
6797
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6798
0
    ggml_build_forward_impl(cgraph, tensor, true);
6799
0
}
6800
6801
void ggml_build_backward_expand(
6802
        struct ggml_context *  ctx,
6803
        struct ggml_cgraph  *  cgraph,
6804
0
        struct ggml_tensor  ** grad_accs) {
6805
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6806
0
    GGML_ASSERT(cgraph->grads);
6807
0
    GGML_ASSERT(cgraph->grad_accs);
6808
6809
0
    const int n_nodes_f = cgraph->n_nodes;
6810
6811
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6812
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6813
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6814
6815
0
    {
6816
0
        bool any_params = false;
6817
0
        bool any_loss   = false;
6818
0
        for (int i = 0; i < n_nodes_f; ++i) {
6819
0
            struct ggml_tensor * node = cgraph->nodes[i];
6820
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6821
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6822
0
        }
6823
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6824
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6825
0
    }
6826
6827
0
    for (int i = 0; i < n_nodes_f; ++i) {
6828
0
        struct ggml_tensor * node = cgraph->nodes[i];
6829
6830
0
        if (node->type == GGML_TYPE_I32) {
6831
0
            continue;
6832
0
        }
6833
6834
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6835
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6836
0
        switch (node->op) {
6837
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6838
0
            case GGML_OP_IM2COL:      // only used for its shape
6839
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6840
0
                ignore_src[0] = true;
6841
0
                break;
6842
0
            case GGML_OP_UNARY: {
6843
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6844
                // SGN and STEP unary ops are piecewise constant
6845
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6846
0
                    ignore_src[0] = true;
6847
0
                }
6848
0
            } break;
6849
6850
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6851
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6852
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6853
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6854
0
            case GGML_OP_ROPE:          // positions not differentiable
6855
0
                ignore_src[1] = true;
6856
0
                break;
6857
6858
0
            default:
6859
0
                break;
6860
0
        }
6861
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6862
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6863
0
                continue;
6864
0
            }
6865
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6866
0
            node_needs_grad = true;
6867
0
            break;
6868
0
        }
6869
0
        if (!node_needs_grad) {
6870
0
            continue;
6871
0
        }
6872
6873
        // inplace operations are currently not supported
6874
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6875
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6876
6877
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6878
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6879
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6880
0
        if (grad_accs && grad_accs[i]) {
6881
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6882
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6883
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6884
            // loss tensors always need a gradient accumulator
6885
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6886
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6887
0
        }
6888
0
        grads_needed[ihash] = true;
6889
0
    }
6890
6891
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6892
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6893
        // use allocator to automatically make inplace operations
6894
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6895
0
    }
6896
6897
0
    free(grads_needed);
6898
0
}
6899
6900
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6901
0
    void * ptr = *p;
6902
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6903
0
    *p = (void *) ((char *) ptr + size);
6904
0
    return ptr;
6905
0
}
6906
6907
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6908
0
    size_t hash_size = ggml_hash_size(size * 2);
6909
0
    void * p = 0;
6910
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6911
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6912
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6913
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6914
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6915
0
    if (grads) {
6916
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6917
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6918
0
    }
6919
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6920
6921
0
    size_t nbytes = (size_t) p;
6922
0
    return nbytes;
6923
0
}
6924
6925
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6926
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6927
0
}
6928
6929
0
size_t ggml_graph_overhead(void) {
6930
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6931
0
}
6932
6933
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6934
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6935
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6936
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6937
6938
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6939
0
    size_t hash_size = ggml_hash_size(size * 2);
6940
6941
0
    void * p = cgraph + 1;
6942
6943
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6944
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6945
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
6946
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6947
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6948
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6949
6950
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6951
6952
    // check that we allocated the correct amount of memory
6953
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
6954
6955
0
    *cgraph = (struct ggml_cgraph) {
6956
0
        /*.size         =*/ size,
6957
0
        /*.n_nodes      =*/ 0,
6958
0
        /*.n_leafs      =*/ 0,
6959
0
        /*.nodes        =*/ nodes_ptr,
6960
0
        /*.grads        =*/ grads_ptr,
6961
0
        /*.grad_accs    =*/ grad_accs_ptr,
6962
0
        /*.leafs        =*/ leafs_ptr,
6963
0
        /*.use_counts   =*/ use_counts_ptr,
6964
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
6965
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
6966
0
    };
6967
6968
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
6969
0
    if (grads) {
6970
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
6971
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
6972
0
    }
6973
6974
0
    return cgraph;
6975
0
}
6976
6977
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
6978
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
6979
0
}
6980
6981
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
6982
0
    struct ggml_cgraph cgraph = {
6983
0
        /*.size             =*/ 0,
6984
0
        /*.n_nodes          =*/ i1 - i0,
6985
0
        /*.n_leafs          =*/ 0,
6986
0
        /*.nodes            =*/ cgraph0->nodes + i0,
6987
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
6988
0
        /*.grad_accs        =*/ NULL,
6989
0
        /*.leafs            =*/ NULL,
6990
0
        /*.use_counts       =*/ cgraph0->use_counts,
6991
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
6992
0
        /*.order            =*/ cgraph0->order,
6993
0
    };
6994
6995
0
    return cgraph;
6996
0
}
6997
6998
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
6999
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7000
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7001
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7002
7003
0
    dst->n_leafs = src->n_leafs;
7004
0
    dst->n_nodes = src->n_nodes;
7005
0
    dst->order   = src->order;
7006
7007
0
    for (int i = 0; i < src->n_leafs; ++i) {
7008
0
        dst->leafs[i] = src->leafs[i];
7009
0
    }
7010
7011
0
    for (int i = 0; i < src->n_nodes; ++i) {
7012
0
        dst->nodes[i] = src->nodes[i];
7013
0
    }
7014
7015
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7016
        // copy all hashset keys (tensors) that are in use
7017
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7018
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7019
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7020
0
        }
7021
0
    }
7022
7023
0
    if (dst->grads) {
7024
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7025
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7026
0
    }
7027
0
    if (src->grads) {
7028
0
        GGML_ASSERT(dst->grads     != NULL);
7029
0
        GGML_ASSERT(dst->grad_accs != NULL);
7030
0
        for (int i = 0; i < src->n_nodes; ++i) {
7031
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7032
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7033
7034
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7035
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7036
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7037
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7038
7039
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7040
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7041
0
        }
7042
0
    }
7043
0
}
7044
7045
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7046
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7047
0
    ggml_graph_cpy(cgraph, result);
7048
0
    return result;
7049
0
}
7050
7051
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7052
0
    if (ggml_is_empty(tensor)) {
7053
0
        return tensor;
7054
0
    }
7055
0
    if (tensor->buffer) {
7056
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7057
0
    } else {
7058
0
        GGML_ASSERT(tensor->data);
7059
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7060
0
    }
7061
0
    return tensor;
7062
0
}
7063
7064
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7065
0
    if (!cgraph) {
7066
0
        return;
7067
0
    }
7068
0
    GGML_ASSERT(cgraph->grads != NULL);
7069
7070
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7071
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7072
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7073
7074
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7075
            // clear momenta
7076
0
            ggml_set_zero(node->src[2]);
7077
0
            ggml_set_zero(node->src[3]);
7078
0
        }
7079
7080
        // initial gradients of loss should be 1, 0 otherwise
7081
0
        if (grad_acc) {
7082
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7083
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7084
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7085
7086
0
                const float onef = 1.0f;
7087
0
                if (grad_acc->buffer) {
7088
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7089
0
                } else {
7090
0
                    GGML_ASSERT(grad_acc->data);
7091
0
                    *((float *) grad_acc->data) = onef;
7092
0
                }
7093
0
            } else {
7094
0
                ggml_set_zero(grad_acc);
7095
0
            }
7096
0
        }
7097
0
    }
7098
0
}
7099
7100
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7101
0
    cgraph->n_leafs = 0;
7102
0
    cgraph->n_nodes = 0;
7103
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7104
0
}
7105
7106
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7107
0
    return cgraph->size;
7108
0
}
7109
7110
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7111
0
    if (i < 0) {
7112
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7113
0
        return cgraph->nodes[cgraph->n_nodes + i];
7114
0
    }
7115
7116
0
    GGML_ASSERT(i < cgraph->n_nodes);
7117
0
    return cgraph->nodes[i];
7118
0
}
7119
7120
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7121
0
    return cgraph->nodes;
7122
0
}
7123
7124
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7125
0
    return cgraph->n_nodes;
7126
0
}
7127
7128
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7129
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7130
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7131
0
    cgraph->n_nodes++;
7132
0
}
7133
7134
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7135
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7136
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7137
7138
0
        if (strcmp(leaf->name, name) == 0) {
7139
0
            return leaf;
7140
0
        }
7141
0
    }
7142
7143
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7144
0
        struct ggml_tensor * node = cgraph->nodes[i];
7145
7146
0
        if (strcmp(node->name, name) == 0) {
7147
0
            return node;
7148
0
        }
7149
0
    }
7150
7151
0
    return NULL;
7152
0
}
7153
7154
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7155
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7156
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7157
0
}
7158
7159
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7160
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7161
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7162
0
}
7163
7164
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7165
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7166
7167
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7168
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7169
0
        struct ggml_tensor * node = cgraph->nodes[i];
7170
7171
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7172
0
                i,
7173
0
                node->ne[0], node->ne[1], node->ne[2],
7174
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7175
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7176
0
    }
7177
7178
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7179
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7180
0
        struct ggml_tensor * node = cgraph->leafs[i];
7181
7182
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7183
0
                i,
7184
0
                node->ne[0], node->ne[1],
7185
0
                ggml_op_name(node->op),
7186
0
                ggml_get_name(node));
7187
0
    }
7188
7189
0
    GGML_LOG_INFO("========================================\n");
7190
0
}
7191
7192
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7193
                                      const int *                idxs,
7194
                                      int                        count,
7195
0
                                      const struct ggml_tensor * tensor) {
7196
0
    GGML_ASSERT(cgraph && idxs);
7197
0
    for (int i = 0; i < count; ++i) {
7198
0
        const int node_idx = idxs[i];
7199
7200
0
        if (node_idx >= cgraph->n_nodes) {
7201
0
            return -1;
7202
0
        }
7203
0
        if (cgraph->nodes[node_idx] == tensor) {
7204
0
            return i;
7205
0
        }
7206
0
    }
7207
0
    return -1;
7208
0
}
7209
7210
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7211
                                const int *                node_idxs,
7212
                                int                        count,
7213
                                const enum ggml_op *       ops,
7214
                                const int *                outputs,
7215
0
                                int                        num_outputs) {
7216
0
    GGML_ASSERT(outputs && num_outputs > 0);
7217
7218
0
    for (int i = 0; i < count; ++i) {
7219
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7220
0
            return false;
7221
0
        }
7222
7223
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7224
7225
0
        if (node->op != ops[i]) {
7226
0
            return false;
7227
0
        }
7228
7229
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7230
0
            continue;
7231
0
        }
7232
7233
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7234
0
            return false;
7235
0
        }
7236
7237
0
        int subgraph_uses = 0;
7238
0
        for (int j = i + 1; j < count; ++j) {
7239
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7240
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7241
0
                if (other_node->src[src_idx] == node) {
7242
0
                    subgraph_uses++;
7243
0
                }
7244
0
            }
7245
0
        }
7246
7247
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7248
0
            return false;
7249
0
        }
7250
7251
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7252
0
        struct ggml_tensor * view_src = node->view_src;
7253
0
        while (view_src) {
7254
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7255
0
                return false;
7256
0
            }
7257
0
            view_src = view_src->view_src;
7258
0
        }
7259
0
    }
7260
7261
0
    return true;
7262
0
}
7263
7264
// check if node is part of the graph
7265
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7266
0
    if (cgraph == NULL) {
7267
0
        return true;
7268
0
    }
7269
7270
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7271
0
        if (cgraph->nodes[i] == node) {
7272
0
            return true;
7273
0
        }
7274
0
    }
7275
7276
0
    return false;
7277
0
}
7278
7279
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7280
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7281
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7282
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7283
7284
0
        if (grad == node) {
7285
0
            return parent;
7286
0
        }
7287
0
    }
7288
7289
0
    return NULL;
7290
0
}
7291
7292
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7293
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7294
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7295
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7296
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7297
0
            gparent ? (void *) gparent : (void *) node,
7298
0
            gparent ? "empty" : "vee",
7299
0
            gparent ? "dashed" : "solid",
7300
0
            label);
7301
0
}
7302
7303
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7304
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7305
0
            (void *) parent,
7306
0
            (void *) node,
7307
0
            label);
7308
0
}
7309
7310
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
7311
0
    char color[16];
7312
7313
0
    FILE * fp = ggml_fopen(filename, "w");
7314
0
    GGML_ASSERT(fp);
7315
7316
0
    fprintf(fp, "digraph G {\n");
7317
0
    fprintf(fp, "  newrank = true;\n");
7318
0
    fprintf(fp, "  rankdir = TB;\n");
7319
7320
0
    for (int i = 0; i < gb->n_nodes; i++) {
7321
0
        struct ggml_tensor * node = gb->nodes[i];
7322
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7323
7324
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7325
0
            continue;
7326
0
        }
7327
7328
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7329
0
            snprintf(color, sizeof(color), "yellow");
7330
0
        } else if (grad) {
7331
0
            if (ggml_graph_find(gf, node)) {
7332
0
                snprintf(color, sizeof(color), "green");
7333
0
            } else {
7334
0
                snprintf(color, sizeof(color), "lightblue");
7335
0
            }
7336
0
        } else {
7337
0
            snprintf(color, sizeof(color), "white");
7338
0
        }
7339
7340
0
        fprintf(fp, "  \"%p\" [ "
7341
0
                    "style = filled; fillcolor = %s; shape = record; "
7342
0
                    "label=\"",
7343
0
                (void *) node, color);
7344
7345
0
        if (strlen(node->name) > 0) {
7346
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7347
0
        } else {
7348
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7349
0
        }
7350
7351
0
        if (ggml_is_matrix(node)) {
7352
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7353
0
        } else {
7354
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7355
0
        }
7356
7357
0
        if (grad) {
7358
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7359
0
        } else {
7360
0
            fprintf(fp, "\"; ]\n");
7361
0
        }
7362
0
    }
7363
7364
0
    for (int i = 0; i < gb->n_leafs; i++) {
7365
0
        struct ggml_tensor * node = gb->leafs[i];
7366
7367
0
        snprintf(color, sizeof(color), "pink");
7368
7369
0
        fprintf(fp, "  \"%p\" [ "
7370
0
                    "style = filled; fillcolor = %s; shape = record; "
7371
0
                    "label=\"<x>",
7372
0
                (void *) node, color);
7373
7374
0
        if (strlen(node->name) > 0) {
7375
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7376
0
        } else {
7377
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7378
0
        }
7379
7380
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7381
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7382
0
            fprintf(fp, " | (");
7383
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7384
                // FIXME: use ggml-backend to obtain the tensor data
7385
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7386
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7387
                //}
7388
                //else if (node->type == GGML_TYPE_F32 ||
7389
                //         node->type == GGML_TYPE_F16 ||
7390
                //         node->type == GGML_TYPE_BF16) {
7391
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7392
                //}
7393
                //else
7394
0
                {
7395
0
                    fprintf(fp, "#");
7396
0
                }
7397
0
                if (j < ggml_nelements(node) - 1) {
7398
0
                    fprintf(fp, ", ");
7399
0
                }
7400
0
            }
7401
0
            fprintf(fp, ")");
7402
0
        }
7403
0
        fprintf(fp, "\"; ]\n");
7404
0
    }
7405
7406
0
    for (int i = 0; i < gb->n_nodes; i++) {
7407
0
        struct ggml_tensor * node = gb->nodes[i];
7408
7409
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7410
0
            if (node->src[j]) {
7411
0
                char label[16];
7412
0
                snprintf(label, sizeof(label), "src %d", j);
7413
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7414
0
            }
7415
0
        }
7416
0
    }
7417
7418
0
    for (int i = 0; i < gb->n_leafs; i++) {
7419
0
        struct ggml_tensor * node = gb->leafs[i];
7420
7421
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7422
0
            if (node->src[j]) {
7423
0
                char label[16];
7424
0
                snprintf(label, sizeof(label), "src %d", j);
7425
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7426
0
            }
7427
0
        }
7428
0
    }
7429
7430
0
    fprintf(fp, "}\n");
7431
7432
0
    fclose(fp);
7433
7434
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7435
0
}
7436
7437
////////////////////////////////////////////////////////////////////////////////
7438
7439
0
void ggml_set_input(struct ggml_tensor * tensor) {
7440
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7441
0
}
7442
7443
0
void ggml_set_output(struct ggml_tensor * tensor) {
7444
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7445
0
}
7446
7447
0
void ggml_set_param(struct ggml_tensor * tensor) {
7448
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7449
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7450
0
}
7451
7452
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7453
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7454
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7455
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7456
0
}
7457
7458
////////////////////////////////////////////////////////////////////////////////
7459
7460
0
void ggml_quantize_init(enum ggml_type type) {
7461
0
    ggml_critical_section_start();
7462
7463
0
    switch (type) {
7464
0
        case GGML_TYPE_IQ2_XXS:
7465
0
        case GGML_TYPE_IQ2_XS:
7466
0
        case GGML_TYPE_IQ2_S:
7467
0
        case GGML_TYPE_IQ1_S:
7468
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7469
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7470
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7471
0
        default: // nothing
7472
0
            break;
7473
0
    }
7474
7475
0
    ggml_critical_section_end();
7476
0
}
7477
7478
349
void ggml_quantize_free(void) {
7479
349
    ggml_critical_section_start();
7480
7481
349
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7482
349
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7483
349
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7484
349
    iq3xs_free_impl(256);
7485
7486
349
    ggml_critical_section_end();
7487
349
}
7488
7489
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7490
0
    return
7491
0
        type == GGML_TYPE_IQ2_XXS ||
7492
0
        type == GGML_TYPE_IQ2_XS  ||
7493
0
        type == GGML_TYPE_IQ1_S;//   ||
7494
        //type == GGML_TYPE_IQ1_M;
7495
0
}
7496
7497
size_t ggml_quantize_chunk(
7498
        enum ggml_type   type,
7499
           const float * src,
7500
                  void * dst,
7501
               int64_t   start,
7502
               int64_t   nrows,
7503
               int64_t   n_per_row,
7504
0
           const float * imatrix) {
7505
0
    const int64_t n = (int64_t) nrows * n_per_row;
7506
7507
0
    if (ggml_quantize_requires_imatrix(type)) {
7508
0
        GGML_ASSERT(imatrix != NULL);
7509
0
    }
7510
7511
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7512
0
    GGML_ASSERT(start % n_per_row == 0);
7513
7514
0
    ggml_quantize_init(type); // this is noop if already initialized
7515
7516
0
    const size_t start_row = start / n_per_row;
7517
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7518
7519
0
    size_t result = 0;
7520
7521
0
    switch (type) {
7522
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7523
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7524
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7525
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7526
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7527
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7528
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7529
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7530
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7531
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7532
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7533
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7534
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7535
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7536
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7537
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7538
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7539
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7540
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7541
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7542
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7543
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7544
0
        case GGML_TYPE_F16:
7545
0
            {
7546
0
                size_t elemsize = sizeof(ggml_fp16_t);
7547
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7548
0
                result = n * elemsize;
7549
0
            } break;
7550
0
        case GGML_TYPE_BF16:
7551
0
            {
7552
0
                size_t elemsize = sizeof(ggml_bf16_t);
7553
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7554
0
                result = n * elemsize;
7555
0
            } break;
7556
0
        case GGML_TYPE_F32:
7557
0
            {
7558
0
                size_t elemsize = sizeof(float);
7559
0
                result = n * elemsize;
7560
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7561
0
            } break;
7562
0
        default:
7563
0
            assert(false);
7564
0
    }
7565
7566
0
    GGML_ASSERT(result == nrows * row_size);
7567
7568
0
    return result;
7569
0
}
7570
7571
////////////////////////////////////////////////////////////////////////////////
7572
7573
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7574
0
    *log_callback = g_logger_state.log_callback;
7575
0
    *user_data    = g_logger_state.log_callback_user_data;
7576
0
}
7577
7578
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7579
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7580
0
    g_logger_state.log_callback_user_data = user_data;
7581
0
}
7582
7583
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7584
0
    p->n_threads  = n_threads;
7585
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7586
0
    p->poll       = 50;    // hybrid-polling enabled
7587
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7588
0
    p->paused     = false; // threads are ready to go
7589
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7590
0
}
7591
7592
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7593
0
    struct ggml_threadpool_params p;
7594
0
    ggml_threadpool_params_init(&p, n_threads);
7595
0
    return p;
7596
0
}
7597
7598
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7599
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7600
0
    if (p0->prio           != p1->prio       )    return false;
7601
0
    if (p0->poll           != p1->poll       )    return false;
7602
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7603
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7604
0
}