Coverage Report

Created: 2026-04-12 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
250
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
250
    fflush(stdout);
239
240
250
    char message[2048];
241
250
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
250
    va_list args;
244
250
    va_start(args, fmt);
245
250
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
250
    va_end(args);
247
248
250
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
250
    } else {
251
        // default: print error and backtrace to stderr
252
250
        fprintf(stderr, "%s\n", message);
253
        
254
250
    }
255
256
250
    abort();
257
250
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
4.71k
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
4.71k
    if (format == NULL) {
273
0
        return;
274
0
    }
275
4.71k
    va_list args_copy;
276
4.71k
    va_copy(args_copy, args);
277
4.71k
    char buffer[128];
278
4.71k
    int len = vsnprintf(buffer, 128, format, args);
279
4.71k
    if (len < 128) {
280
4.62k
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
4.62k
    } else {
282
92
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
92
        vsnprintf(buffer2, len + 1, format, args_copy);
284
92
        buffer2[len] = 0;
285
92
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
92
        free(buffer2);
287
92
    }
288
4.71k
    va_end(args_copy);
289
4.71k
}
290
291
4.71k
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
4.71k
    va_list args;
293
4.71k
    va_start(args, format);
294
4.71k
    ggml_log_internal_v(level, format, args);
295
4.71k
    va_end(args);
296
4.71k
}
297
298
4.71k
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
4.71k
    (void) level;
300
4.71k
    (void) user_data;
301
4.71k
    fputs(text, stderr);
302
4.71k
    fflush(stderr);
303
4.71k
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
5.05k
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
5.05k
    const int alignment = 64;
321
5.05k
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
5.05k
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
5.05k
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
5.05k
    int result = posix_memalign(&aligned_memory, alignment, size);
353
5.05k
  #endif
354
5.05k
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
5.05k
    return aligned_memory;
369
5.05k
#endif
370
5.05k
}
371
372
5.04k
void ggml_aligned_free(void * ptr, size_t size) {
373
5.04k
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
5.04k
    free(ptr);
386
5.04k
#endif
387
5.04k
}
388
389
390
5.05k
inline static void * ggml_malloc(size_t size) {
391
5.05k
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
5.05k
    void * result = malloc(size);
396
5.05k
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
5.05k
    return result;
401
5.05k
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
5.05k
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
5.04k
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
13.3k
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
8.04k
int64_t ggml_time_us(void) {
545
8.04k
    struct timespec ts;
546
8.04k
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
8.04k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
8.04k
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
5.04k
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
5.04k
    return fopen(fname, mode);
607
5.04k
#endif
608
609
5.04k
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q1_0] = {
657
        .type_name                = "q1_0",
658
        .blck_size                = QK1_0,
659
        .type_size                = sizeof(block_q1_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q1_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q1_0_ref,
663
    },
664
    [GGML_TYPE_Q4_0] = {
665
        .type_name                = "q4_0",
666
        .blck_size                = QK4_0,
667
        .type_size                = sizeof(block_q4_0),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
671
    },
672
    [GGML_TYPE_Q4_1] = {
673
        .type_name                = "q4_1",
674
        .blck_size                = QK4_1,
675
        .type_size                = sizeof(block_q4_1),
676
        .is_quantized             = true,
677
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
678
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
679
    },
680
    [4] = { // GGML_TYPE_Q4_2
681
        .type_name                = "DEPRECATED",
682
        .blck_size                = 0,
683
        .type_size                = 0,
684
        .is_quantized             = false,
685
    },
686
    [5] = { // GGML_TYPE_Q4_3
687
        .type_name                = "DEPRECATED",
688
        .blck_size                = 0,
689
        .type_size                = 0,
690
        .is_quantized             = false,
691
    },
692
    [GGML_TYPE_Q5_0] = {
693
        .type_name                = "q5_0",
694
        .blck_size                = QK5_0,
695
        .type_size                = sizeof(block_q5_0),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
699
    },
700
    [GGML_TYPE_Q5_1] = {
701
        .type_name                = "q5_1",
702
        .blck_size                = QK5_1,
703
        .type_size                = sizeof(block_q5_1),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
707
    },
708
    [GGML_TYPE_Q8_0] = {
709
        .type_name                = "q8_0",
710
        .blck_size                = QK8_0,
711
        .type_size                = sizeof(block_q8_0),
712
        .is_quantized             = true,
713
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
714
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
715
    },
716
    [GGML_TYPE_Q8_1] = {
717
        .type_name                = "q8_1",
718
        .blck_size                = QK8_1,
719
        .type_size                = sizeof(block_q8_1),
720
        .is_quantized             = true,
721
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
722
    },
723
    [GGML_TYPE_MXFP4] = {
724
        .type_name                = "mxfp4",
725
        .blck_size                = QK_MXFP4,
726
        .type_size                = sizeof(block_mxfp4),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
729
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
730
    },
731
    [GGML_TYPE_NVFP4] = {
732
        .type_name                = "nvfp4",
733
        .blck_size                = QK_NVFP4,
734
        .type_size                = sizeof(block_nvfp4),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_nvfp4,
737
        .from_float_ref           = (ggml_from_float_t)quantize_row_nvfp4_ref,
738
    },
739
    [GGML_TYPE_Q2_K] = {
740
        .type_name                = "q2_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q2_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
746
    },
747
    [GGML_TYPE_Q3_K] = {
748
        .type_name                = "q3_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q3_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
754
    },
755
    [GGML_TYPE_Q4_K] = {
756
        .type_name                = "q4_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q4_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
762
    },
763
    [GGML_TYPE_Q5_K] = {
764
        .type_name                = "q5_K",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_q5_K),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
769
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
770
    },
771
    [GGML_TYPE_Q6_K] = {
772
        .type_name                = "q6_K",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_q6_K),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
777
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
778
    },
779
    [GGML_TYPE_IQ2_XXS] = {
780
        .type_name                = "iq2_xxs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq2_xxs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
785
        .from_float_ref           = NULL,
786
    },
787
    [GGML_TYPE_IQ2_XS] = {
788
        .type_name                = "iq2_xs",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq2_xs),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
793
        .from_float_ref           = NULL,
794
    },
795
    [GGML_TYPE_IQ3_XXS] = {
796
        .type_name                = "iq3_xxs",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq3_xxs),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
802
    },
803
    [GGML_TYPE_IQ3_S] = {
804
        .type_name                = "iq3_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq3_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
809
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
810
    },
811
    [GGML_TYPE_IQ2_S] = {
812
        .type_name                = "iq2_s",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq2_s),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
817
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
818
    },
819
    [GGML_TYPE_IQ1_S] = {
820
        .type_name                = "iq1_s",
821
        .blck_size                = QK_K,
822
        .type_size                = sizeof(block_iq1_s),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
825
        .from_float_ref           = NULL,
826
    },
827
    [GGML_TYPE_IQ1_M] = {
828
        .type_name                = "iq1_m",
829
        .blck_size                = QK_K,
830
        .type_size                = sizeof(block_iq1_m),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
833
        .from_float_ref           = NULL,
834
    },
835
    [GGML_TYPE_IQ4_NL] = {
836
        .type_name                = "iq4_nl",
837
        .blck_size                = QK4_NL,
838
        .type_size                = sizeof(block_iq4_nl),
839
        .is_quantized             = true,
840
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
841
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
842
    },
843
    [GGML_TYPE_IQ4_XS] = {
844
        .type_name                = "iq4_xs",
845
        .blck_size                = QK_K,
846
        .type_size                = sizeof(block_iq4_xs),
847
        .is_quantized             = true,
848
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
849
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
850
    },
851
    [GGML_TYPE_Q8_K] = {
852
        .type_name                = "q8_K",
853
        .blck_size                = QK_K,
854
        .type_size                = sizeof(block_q8_K),
855
        .is_quantized             = true,
856
    },
857
    [GGML_TYPE_BF16] = {
858
        .type_name                = "bf16",
859
        .blck_size                = 1,
860
        .type_size                = sizeof(ggml_bf16_t),
861
        .is_quantized             = false,
862
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
863
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
864
    },
865
    [31] = { // GGML_TYPE_Q4_0_4_4
866
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
867
        .blck_size                = 0,
868
        .type_size                = 0,
869
        .is_quantized             = false,
870
    },
871
    [32] = { // GGML_TYPE_Q4_0_4_8
872
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
873
        .blck_size                = 0,
874
        .type_size                = 0,
875
        .is_quantized             = false,
876
    },
877
    [33] = { // GGML_TYPE_Q4_0_8_8
878
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
879
        .blck_size                = 0,
880
        .type_size                = 0,
881
        .is_quantized             = false,
882
    },
883
    [GGML_TYPE_TQ1_0] = {
884
        .type_name                = "tq1_0",
885
        .blck_size                = QK_K,
886
        .type_size                = sizeof(block_tq1_0),
887
        .is_quantized             = true,
888
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
889
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
890
    },
891
    [GGML_TYPE_TQ2_0] = {
892
        .type_name                = "tq2_0",
893
        .blck_size                = QK_K,
894
        .type_size                = sizeof(block_tq2_0),
895
        .is_quantized             = true,
896
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
897
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
898
    },
899
    [36] = { // GGML_TYPE_IQ4_NL_4_4
900
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
901
        .blck_size                = 0,
902
        .type_size                = 0,
903
        .is_quantized             = false,
904
    },
905
    [37] = { // GGML_TYPE_IQ4_NL_4_8
906
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
907
        .blck_size                = 0,
908
        .type_size                = 0,
909
        .is_quantized             = false,
910
    },
911
    [38] = { // GGML_TYPE_IQ4_NL_8_8
912
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
913
        .blck_size                = 0,
914
        .type_size                = 0,
915
        .is_quantized             = false,
916
    },
917
};
918
919
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
920
0
    assert(type >= 0);
921
0
    assert(type < GGML_TYPE_COUNT);
922
0
    return &type_traits[type];
923
0
}
924
925
//
926
// ggml object
927
//
928
929
struct ggml_object {
930
    size_t offs;
931
    size_t size;
932
933
    struct ggml_object * next;
934
935
    enum ggml_object_type type;
936
937
    char padding[4];
938
};
939
940
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
941
942
//
943
// ggml context
944
//
945
946
struct ggml_context {
947
    size_t mem_size;
948
    void * mem_buffer;
949
    bool   mem_buffer_owned;
950
    bool   no_alloc;
951
952
    int    n_objects;
953
954
    struct ggml_object * objects_begin;
955
    struct ggml_object * objects_end;
956
};
957
958
//
959
// data types
960
//
961
962
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
963
    "NONE",
964
965
    "DUP",
966
    "ADD",
967
    "ADD_ID",
968
    "ADD1",
969
    "ACC",
970
    "SUB",
971
    "MUL",
972
    "DIV",
973
    "SQR",
974
    "SQRT",
975
    "LOG",
976
    "SIN",
977
    "COS",
978
    "SUM",
979
    "SUM_ROWS",
980
    "CUMSUM",
981
    "MEAN",
982
    "ARGMAX",
983
    "COUNT_EQUAL",
984
    "REPEAT",
985
    "REPEAT_BACK",
986
    "CONCAT",
987
    "SILU_BACK",
988
    "NORM",
989
    "RMS_NORM",
990
    "RMS_NORM_BACK",
991
    "GROUP_NORM",
992
    "L2_NORM",
993
994
    "MUL_MAT",
995
    "MUL_MAT_ID",
996
    "OUT_PROD",
997
998
    "SCALE",
999
    "SET",
1000
    "CPY",
1001
    "CONT",
1002
    "RESHAPE",
1003
    "VIEW",
1004
    "PERMUTE",
1005
    "TRANSPOSE",
1006
    "GET_ROWS",
1007
    "GET_ROWS_BACK",
1008
    "SET_ROWS",
1009
    "DIAG",
1010
    "DIAG_MASK_INF",
1011
    "DIAG_MASK_ZERO",
1012
    "SOFT_MAX",
1013
    "SOFT_MAX_BACK",
1014
    "ROPE",
1015
    "ROPE_BACK",
1016
    "CLAMP",
1017
    "CONV_TRANSPOSE_1D",
1018
    "IM2COL",
1019
    "IM2COL_BACK",
1020
    "IM2COL_3D",
1021
    "CONV_2D",
1022
    "CONV_3D",
1023
    "CONV_2D_DW",
1024
    "CONV_TRANSPOSE_2D",
1025
    "POOL_1D",
1026
    "POOL_2D",
1027
    "POOL_2D_BACK",
1028
    "UPSCALE",
1029
    "PAD",
1030
    "PAD_REFLECT_1D",
1031
    "ROLL",
1032
    "ARANGE",
1033
    "TIMESTEP_EMBEDDING",
1034
    "ARGSORT",
1035
    "TOP_K",
1036
    "LEAKY_RELU",
1037
    "TRI",
1038
    "FILL",
1039
1040
    "FLASH_ATTN_EXT",
1041
    "FLASH_ATTN_BACK",
1042
    "SSM_CONV",
1043
    "SSM_SCAN",
1044
    "WIN_PART",
1045
    "WIN_UNPART",
1046
    "GET_REL_POS",
1047
    "ADD_REL_POS",
1048
    "RWKV_WKV6",
1049
    "GATED_LINEAR_ATTN",
1050
    "RWKV_WKV7",
1051
    "SOLVE_TRI",
1052
    "GATED_DELTA_NET",
1053
1054
    "UNARY",
1055
1056
    "MAP_CUSTOM1",
1057
    "MAP_CUSTOM2",
1058
    "MAP_CUSTOM3",
1059
1060
    "CUSTOM",
1061
1062
    "CROSS_ENTROPY_LOSS",
1063
    "CROSS_ENTROPY_LOSS_BACK",
1064
    "OPT_STEP_ADAMW",
1065
    "OPT_STEP_SGD",
1066
1067
    "GLU",
1068
};
1069
1070
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1071
1072
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1073
    "none",
1074
1075
    "x",
1076
    "x+y",
1077
    "x[i]+y",
1078
    "x+y",
1079
    "view(x,nb,offset)+=y->x",
1080
    "x-y",
1081
    "x*y",
1082
    "x/y",
1083
    "x^2",
1084
    "√x",
1085
    "log(x)",
1086
    "sin(x)",
1087
    "cos(x)",
1088
    "Σx",
1089
    "Σx_k",
1090
    "cumsum(x)",
1091
    "Σx/n",
1092
    "argmax(x)",
1093
    "count_equal(x)",
1094
    "repeat(x)",
1095
    "repeat_back(x)",
1096
    "concat(x, y)",
1097
    "silu_back(x)",
1098
    "norm(x)",
1099
    "rms_norm(x)",
1100
    "rms_norm_back(x)",
1101
    "group_norm(x)",
1102
    "l2_norm(x)",
1103
1104
    "X*Y",
1105
    "X[i]*Y",
1106
    "X*Y",
1107
1108
    "x*v",
1109
    "y-\\>view(x)",
1110
    "x-\\>y",
1111
    "cont(x)",
1112
    "reshape(x)",
1113
    "view(x)",
1114
    "permute(x)",
1115
    "transpose(x)",
1116
    "get_rows(x)",
1117
    "get_rows_back(x)",
1118
    "set_rows(x)",
1119
    "diag(x)",
1120
    "diag_mask_inf(x)",
1121
    "diag_mask_zero(x)",
1122
    "soft_max(x)",
1123
    "soft_max_back(x)",
1124
    "rope(x)",
1125
    "rope_back(x)",
1126
    "clamp(x)",
1127
    "conv_transpose_1d(x)",
1128
    "im2col(x)",
1129
    "im2col_back(x)",
1130
    "im2col_3d(x)",
1131
    "conv_2d(x)",
1132
    "conv_3d(x)",
1133
    "conv_2d_dw(x)",
1134
    "conv_transpose_2d(x)",
1135
    "pool_1d(x)",
1136
    "pool_2d(x)",
1137
    "pool_2d_back(x)",
1138
    "upscale(x)",
1139
    "pad(x)",
1140
    "pad_reflect_1d(x)",
1141
    "roll(x)",
1142
    "arange(start, stop, step)",
1143
    "timestep_embedding(timesteps, dim, max_period)",
1144
    "argsort(x)",
1145
    "top_k(x)",
1146
    "leaky_relu(x)",
1147
    "tri(x)",
1148
    "fill(x, c)",
1149
1150
    "flash_attn_ext(x)",
1151
    "flash_attn_back(x)",
1152
    "ssm_conv(x)",
1153
    "ssm_scan(x)",
1154
    "win_part(x)",
1155
    "win_unpart(x)",
1156
    "get_rel_pos(x)",
1157
    "add_rel_pos(x)",
1158
    "rwkv_wkv6(k, v, r, tf, td, s)",
1159
    "gated_linear_attn(k, v, q, gate, s)",
1160
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1161
    "A X = B, A triangular, solve X",
1162
    "gated_delta_net(q, k, v, g, beta, s)",
1163
1164
    "unary(x)",
1165
1166
    "map_custom(x)",
1167
    "map_custom(x,y)",
1168
    "map_custom(x,y,z)",
1169
1170
    "custom(x)",
1171
1172
    "cross_entropy_loss(x,y)",
1173
    "cross_entropy_loss_back(x,y)",
1174
    "adamw(x)",
1175
    "sgd(x)",
1176
1177
    "glu(x)",
1178
};
1179
1180
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1181
1182
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1183
1184
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1185
    "ABS",
1186
    "SGN",
1187
    "NEG",
1188
    "STEP",
1189
    "TANH",
1190
    "ELU",
1191
    "RELU",
1192
    "SIGMOID",
1193
    "GELU",
1194
    "GELU_QUICK",
1195
    "SILU",
1196
    "HARDSWISH",
1197
    "HARDSIGMOID",
1198
    "EXP",
1199
    "EXPM1",
1200
    "SOFTPLUS",
1201
    "GELU_ERF",
1202
    "XIELU",
1203
    "FLOOR",
1204
    "CEIL",
1205
    "ROUND",
1206
    "TRUNC",
1207
};
1208
1209
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1210
1211
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1212
    "REGLU",
1213
    "GEGLU",
1214
    "SWIGLU",
1215
    "SWIGLU_OAI",
1216
    "GEGLU_ERF",
1217
    "GEGLU_QUICK",
1218
};
1219
1220
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1221
1222
1223
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1224
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1225
1226
1227
////////////////////////////////////////////////////////////////////////////////
1228
1229
0
void ggml_print_object(const struct ggml_object * obj) {
1230
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1231
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1232
0
}
1233
1234
0
void ggml_print_objects(const struct ggml_context * ctx) {
1235
0
    struct ggml_object * obj = ctx->objects_begin;
1236
1237
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1238
1239
0
    while (obj != NULL) {
1240
0
        ggml_print_object(obj);
1241
0
        obj = obj->next;
1242
0
    }
1243
1244
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1245
0
}
1246
1247
3.75k
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1248
3.75k
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1249
1250
3.75k
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1251
3.75k
}
1252
1253
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1254
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1255
1256
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1257
0
}
1258
1259
5.17k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1260
25.4k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1261
20.3k
        if (tensor->ne[i] <= 0) {
1262
123
            return 0;
1263
123
        }
1264
20.3k
    }
1265
1266
5.05k
    size_t nbytes;
1267
5.05k
    const size_t blck_size = ggml_blck_size(tensor->type);
1268
5.05k
    if (blck_size == 1) {
1269
4.96k
        nbytes = ggml_type_size(tensor->type);
1270
24.8k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1271
19.8k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1272
19.8k
        }
1273
4.96k
    }
1274
89
    else {
1275
89
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1276
356
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1277
267
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1278
267
        }
1279
89
    }
1280
1281
5.05k
    return nbytes;
1282
5.17k
}
1283
1284
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1285
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1286
0
}
1287
1288
13.3k
int64_t ggml_blck_size(enum ggml_type type) {
1289
13.3k
    assert(type >= 0);
1290
13.3k
    assert(type < GGML_TYPE_COUNT);
1291
13.3k
    return type_traits[type].blck_size;
1292
13.3k
}
1293
1294
13.2k
size_t ggml_type_size(enum ggml_type type) {
1295
13.2k
    assert(type >= 0);
1296
13.2k
    assert(type < GGML_TYPE_COUNT);
1297
13.2k
    return type_traits[type].type_size;
1298
13.2k
}
1299
1300
1.53k
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1301
1.53k
    assert(type >= 0);
1302
1.53k
    assert(type < GGML_TYPE_COUNT);
1303
1.53k
    assert(ne % ggml_blck_size(type) == 0);
1304
1.53k
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1305
1.53k
}
1306
1307
0
double ggml_type_sizef(enum ggml_type type) {
1308
0
    assert(type >= 0);
1309
0
    assert(type < GGML_TYPE_COUNT);
1310
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1311
0
}
1312
1313
288
const char * ggml_type_name(enum ggml_type type) {
1314
288
    assert(type >= 0);
1315
288
    assert(type < GGML_TYPE_COUNT);
1316
288
    return type_traits[type].type_name;
1317
288
}
1318
1319
0
bool ggml_is_quantized(enum ggml_type type) {
1320
0
    assert(type >= 0);
1321
0
    assert(type < GGML_TYPE_COUNT);
1322
0
    return type_traits[type].is_quantized;
1323
0
}
1324
1325
0
const char * ggml_op_name(enum ggml_op op) {
1326
0
    return GGML_OP_NAME[op];
1327
0
}
1328
1329
0
const char * ggml_op_symbol(enum ggml_op op) {
1330
0
    return GGML_OP_SYMBOL[op];
1331
0
}
1332
1333
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1334
0
    return GGML_UNARY_OP_NAME[op];
1335
0
}
1336
1337
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1338
0
    return GGML_GLU_OP_NAME[op];
1339
0
}
1340
1341
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1342
0
    if (t->op == GGML_OP_UNARY) {
1343
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1344
0
        return ggml_unary_op_name(uop);
1345
0
    }
1346
0
    if (t->op == GGML_OP_GLU) {
1347
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1348
0
        return ggml_glu_op_name(gop);
1349
0
    }
1350
0
    return ggml_op_name(t->op);
1351
0
}
1352
1353
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1354
0
    return ggml_type_size(tensor->type);
1355
0
}
1356
1357
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1358
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1359
1360
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1361
0
}
1362
1363
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1364
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1365
1366
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1367
0
}
1368
1369
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1370
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1371
1372
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1373
0
}
1374
1375
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1376
0
    return tensor->ne[3] == 1;
1377
0
}
1378
1379
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1380
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1381
0
        if (tensor->ne[i] > 1) {
1382
0
            return i + 1;
1383
0
        }
1384
0
    }
1385
0
    return 1;
1386
0
}
1387
1388
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1389
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1390
1391
0
    switch (ftype) {
1392
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1393
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1394
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1395
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1396
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1397
0
        case GGML_FTYPE_MOSTLY_Q1_0:          wtype = GGML_TYPE_Q1_0;  break;
1398
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1399
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1400
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1401
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1402
0
        case GGML_FTYPE_MOSTLY_NVFP4:         wtype = GGML_TYPE_NVFP4; break;
1403
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1404
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1405
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1406
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1407
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1408
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1409
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1410
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1411
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1412
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1413
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1414
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1415
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1416
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1417
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1418
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1419
0
    }
1420
1421
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1422
1423
0
    return wtype;
1424
0
}
1425
1426
1.27k
size_t ggml_tensor_overhead(void) {
1427
1.27k
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1428
1.27k
}
1429
1430
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1431
0
    return tensor->nb[0] > tensor->nb[1];
1432
0
}
1433
1434
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1435
0
    size_t next_nb = ggml_type_size(tensor->type);
1436
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1437
0
        return false;
1438
0
    }
1439
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1440
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1441
0
        if (i > n) {
1442
0
            if (tensor->ne[i] != 1 && tensor->nb[i] != next_nb) {
1443
0
                return false;
1444
0
            }
1445
0
            next_nb *= tensor->ne[i];
1446
0
        } else {
1447
            // this dimension does not need to be contiguous
1448
0
            next_nb = tensor->ne[i]*tensor->nb[i];
1449
0
        }
1450
0
    }
1451
0
    return true;
1452
0
}
1453
1454
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1455
0
    return ggml_is_contiguous_0(tensor);
1456
0
}
1457
1458
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1459
0
    return ggml_is_contiguous_n(tensor, 0);
1460
0
}
1461
1462
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1463
0
    return ggml_is_contiguous_n(tensor, 1);
1464
0
}
1465
1466
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1467
0
    return ggml_is_contiguous_n(tensor, 2);
1468
0
}
1469
1470
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1471
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1472
0
}
1473
1474
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1475
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1476
1477
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1478
0
}
1479
1480
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1481
0
    return
1482
0
        tensor->nb[0] > tensor->nb[2] &&
1483
0
        tensor->nb[1] > tensor->nb[0] &&
1484
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1485
0
}
1486
1487
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1488
0
    return
1489
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1490
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1491
0
}
1492
1493
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1494
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1495
1496
0
    return
1497
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1498
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1499
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1500
0
}
1501
1502
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1503
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1504
0
        if (tensor->ne[i] == 0) {
1505
            // empty if any dimension has no elements
1506
0
            return true;
1507
0
        }
1508
0
    }
1509
0
    return false;
1510
0
}
1511
1512
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1513
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1514
1515
0
    return
1516
0
        (t0->ne[0] == t1->ne[0]) &&
1517
0
        (t0->ne[1] == t1->ne[1]) &&
1518
0
        (t0->ne[2] == t1->ne[2]) &&
1519
0
        (t0->ne[3] == t1->ne[3]);
1520
0
}
1521
1522
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1523
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1524
1525
0
    return
1526
0
        (t0->nb[0] == t1->nb[0]) &&
1527
0
        (t0->nb[1] == t1->nb[1]) &&
1528
0
        (t0->nb[2] == t1->nb[2]) &&
1529
0
        (t0->nb[3] == t1->nb[3]);
1530
0
}
1531
1532
0
bool ggml_is_view(const struct ggml_tensor * t) {
1533
0
    return ggml_impl_is_view(t);
1534
0
}
1535
1536
// check if t1 can be represented as a repetition of t0
1537
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1538
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1539
1540
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1541
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1542
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1543
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1544
0
        (t1->ne[3]%t0->ne[3] == 0);
1545
0
}
1546
1547
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1548
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1549
1550
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1551
0
}
1552
1553
// assert that pointer is aligned to GGML_MEM_ALIGN
1554
#define GGML_ASSERT_ALIGNED(ptr) \
1555
6.58k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1556
1557
////////////////////////////////////////////////////////////////////////////////
1558
1559
5.05k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1560
5.05k
    bool is_first_call = true;
1561
1562
5.05k
    ggml_critical_section_start();
1563
1564
5.05k
    if (is_first_call) {
1565
        // initialize time system (required on Windows)
1566
5.05k
        ggml_time_init();
1567
1568
5.05k
        is_first_call = false;
1569
5.05k
    }
1570
1571
5.05k
    ggml_critical_section_end();
1572
1573
5.05k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1574
1575
    // allow to call ggml_init with 0 size
1576
5.05k
    if (params.mem_size == 0) {
1577
4.68k
        params.mem_size = GGML_MEM_ALIGN;
1578
4.68k
    }
1579
1580
5.05k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1581
1582
5.05k
    *ctx = (struct ggml_context) {
1583
5.05k
        /*.mem_size           =*/ mem_size,
1584
5.05k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1585
5.05k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1586
5.05k
        /*.no_alloc           =*/ params.no_alloc,
1587
5.05k
        /*.n_objects          =*/ 0,
1588
5.05k
        /*.objects_begin      =*/ NULL,
1589
5.05k
        /*.objects_end        =*/ NULL,
1590
5.05k
    };
1591
1592
5.05k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1593
1594
5.05k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1595
1596
5.05k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1597
1598
5.05k
    return ctx;
1599
5.05k
}
1600
1601
0
void ggml_reset(struct ggml_context * ctx) {
1602
0
    if (ctx == NULL) {
1603
0
        return;
1604
0
    }
1605
1606
0
    ctx->n_objects     = 0;
1607
0
    ctx->objects_begin = NULL;
1608
0
    ctx->objects_end   = NULL;
1609
0
}
1610
1611
5.04k
void ggml_free(struct ggml_context * ctx) {
1612
5.04k
    if (ctx == NULL) {
1613
0
        return;
1614
0
    }
1615
1616
5.04k
    if (ctx->mem_buffer_owned) {
1617
5.04k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1618
5.04k
    }
1619
1620
5.04k
    GGML_FREE(ctx);
1621
5.04k
}
1622
1623
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1624
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1625
0
}
1626
1627
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1628
0
    return ctx->no_alloc;
1629
0
}
1630
1631
1.80k
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1632
1.80k
    ctx->no_alloc = no_alloc;
1633
1.80k
}
1634
1635
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1636
0
    return ctx->mem_buffer;
1637
0
}
1638
1639
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1640
0
    return ctx->mem_size;
1641
0
}
1642
1643
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1644
0
    size_t max_size = 0;
1645
1646
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1647
0
        size_t bytes = ggml_nbytes(tensor);
1648
0
        max_size = MAX(max_size, bytes);
1649
0
    }
1650
1651
0
    return max_size;
1652
0
}
1653
1654
////////////////////////////////////////////////////////////////////////////////
1655
1656
1.53k
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1657
    // always insert objects at the end of the context's memory pool
1658
1.53k
    struct ggml_object * obj_cur = ctx->objects_end;
1659
1660
1.53k
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1661
1.53k
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1662
1.53k
    const size_t cur_end  = cur_offs + cur_size;
1663
1664
    // align to GGML_MEM_ALIGN
1665
1.53k
    GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
1666
1.53k
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1667
1668
1.53k
    char * const mem_buffer = ctx->mem_buffer;
1669
1.53k
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1670
1671
    // integer overflow checks
1672
1.53k
    if (cur_end > SIZE_MAX - size_needed) {
1673
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
1674
0
        return NULL;
1675
0
    }
1676
1.53k
    if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
1677
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
1678
0
                cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
1679
0
        return NULL;
1680
0
    }
1681
1682
1.53k
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1683
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1684
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1685
#ifndef NDEBUG
1686
        GGML_ABORT("not enough space in the context's memory pool");
1687
#endif
1688
0
        return NULL;
1689
0
    }
1690
1691
1.53k
    *obj_new = (struct ggml_object) {
1692
1.53k
        .offs = cur_end + GGML_OBJECT_SIZE,
1693
1.53k
        .size = size_needed,
1694
1.53k
        .next = NULL,
1695
1.53k
        .type = type,
1696
1.53k
    };
1697
1698
1.53k
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1699
1700
1.53k
    if (obj_cur != NULL) {
1701
1.16k
        obj_cur->next = obj_new;
1702
1.16k
    } else {
1703
        // this is the first object in this context
1704
370
        ctx->objects_begin = obj_new;
1705
370
    }
1706
1707
1.53k
    ctx->objects_end = obj_new;
1708
1709
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1710
1711
1.53k
    return obj_new;
1712
1.53k
}
1713
1714
static struct ggml_tensor * ggml_new_tensor_impl(
1715
        struct ggml_context * ctx,
1716
        enum   ggml_type      type,
1717
        int                   n_dims,
1718
        const int64_t       * ne,
1719
        struct ggml_tensor  * view_src,
1720
1.53k
        size_t                view_offs) {
1721
1722
1.53k
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1723
1.53k
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1724
1725
    // find the base tensor and absolute offset
1726
1.53k
    if (view_src != NULL && view_src->view_src != NULL) {
1727
0
        view_offs += view_src->view_offs;
1728
0
        view_src   = view_src->view_src;
1729
0
    }
1730
1731
1.53k
    size_t data_size = ggml_row_size(type, ne[0]);
1732
6.12k
    for (int i = 1; i < n_dims; i++) {
1733
4.59k
        data_size *= ne[i];
1734
4.59k
    }
1735
1736
1.53k
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1737
1738
1.53k
    void * data = view_src != NULL ? view_src->data : NULL;
1739
1.53k
    if (data != NULL) {
1740
0
        data = (char *) data + view_offs;
1741
0
    }
1742
1743
1.53k
    size_t obj_alloc_size = 0;
1744
1745
1.53k
    if (view_src == NULL && !ctx->no_alloc) {
1746
        // allocate tensor data in the context's memory pool
1747
0
        obj_alloc_size = data_size;
1748
0
    }
1749
1750
1.53k
    GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
1751
1752
1.53k
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1753
1.53k
    GGML_ASSERT(obj_new);
1754
1755
1.53k
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1756
1757
1.53k
    *result = (struct ggml_tensor) {
1758
1.53k
        /*.type         =*/ type,
1759
1.53k
        /*.buffer       =*/ NULL,
1760
1.53k
        /*.ne           =*/ { 1, 1, 1, 1 },
1761
1.53k
        /*.nb           =*/ { 0, 0, 0, 0 },
1762
1.53k
        /*.op           =*/ GGML_OP_NONE,
1763
1.53k
        /*.op_params    =*/ { 0 },
1764
1.53k
        /*.flags        =*/ 0,
1765
1.53k
        /*.src          =*/ { NULL },
1766
1.53k
        /*.view_src     =*/ view_src,
1767
1.53k
        /*.view_offs    =*/ view_offs,
1768
1.53k
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1769
1.53k
        /*.name         =*/ { 0 },
1770
1.53k
        /*.extra        =*/ NULL,
1771
1.53k
        /*.padding      =*/ { 0 },
1772
1.53k
    };
1773
1774
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1775
    //GGML_ASSERT_ALIGNED(result->data);
1776
1777
7.65k
    for (int i = 0; i < n_dims; i++) {
1778
6.12k
        result->ne[i] = ne[i];
1779
6.12k
    }
1780
1781
1.53k
    result->nb[0] = ggml_type_size(type);
1782
1.53k
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1783
4.59k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1784
3.06k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1785
3.06k
    }
1786
1787
1.53k
    ctx->n_objects++;
1788
1789
1.53k
    return result;
1790
1.53k
}
1791
1792
struct ggml_tensor * ggml_new_tensor(
1793
        struct ggml_context * ctx,
1794
        enum   ggml_type      type,
1795
        int                   n_dims,
1796
1.53k
        const int64_t       * ne) {
1797
1.53k
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1798
1.53k
}
1799
1800
struct ggml_tensor * ggml_new_tensor_1d(
1801
        struct ggml_context * ctx,
1802
        enum   ggml_type      type,
1803
0
        int64_t ne0) {
1804
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1805
0
}
1806
1807
struct ggml_tensor * ggml_new_tensor_2d(
1808
        struct ggml_context * ctx,
1809
        enum   ggml_type      type,
1810
        int64_t ne0,
1811
0
        int64_t ne1) {
1812
0
    const int64_t ne[2] = { ne0, ne1 };
1813
0
    return ggml_new_tensor(ctx, type, 2, ne);
1814
0
}
1815
1816
struct ggml_tensor * ggml_new_tensor_3d(
1817
        struct ggml_context * ctx,
1818
        enum   ggml_type      type,
1819
        int64_t ne0,
1820
        int64_t ne1,
1821
0
        int64_t ne2) {
1822
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1823
0
    return ggml_new_tensor(ctx, type, 3, ne);
1824
0
}
1825
1826
struct ggml_tensor * ggml_new_tensor_4d(
1827
        struct ggml_context * ctx,
1828
        enum   ggml_type type,
1829
        int64_t ne0,
1830
        int64_t ne1,
1831
        int64_t ne2,
1832
0
        int64_t ne3) {
1833
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1834
0
    return ggml_new_tensor(ctx, type, 4, ne);
1835
0
}
1836
1837
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1838
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1839
1840
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1841
0
}
1842
1843
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1844
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1845
0
}
1846
1847
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1848
0
    const int64_t ne2 = tensor->ne[2];
1849
0
    const int64_t ne1 = tensor->ne[1];
1850
0
    const int64_t ne0 = tensor->ne[0];
1851
1852
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1853
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1854
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1855
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1856
1857
0
    if (i0) {
1858
0
        * i0 = i0_;
1859
0
    }
1860
0
    if (i1) {
1861
0
        * i1 = i1_;
1862
0
    }
1863
0
    if (i2) {
1864
0
        * i2 = i2_;
1865
0
    }
1866
0
    if (i3) {
1867
0
        * i3 = i3_;
1868
0
    }
1869
0
}
1870
1871
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1872
0
    return tensor->data;
1873
0
}
1874
1875
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1876
0
    assert(tensor->type == GGML_TYPE_F32);
1877
0
    return (float *)(tensor->data);
1878
0
}
1879
1880
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1881
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1882
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1883
0
}
1884
1885
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1886
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1887
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1888
0
}
1889
1890
1.38k
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1891
1.38k
    return tensor->name;
1892
1.38k
}
1893
1894
4.85k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1895
4.85k
    size_t i;
1896
49.6k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1897
44.8k
        tensor->name[i] = name[i];
1898
44.8k
    }
1899
4.85k
    tensor->name[i] = '\0';
1900
4.85k
    return tensor;
1901
4.85k
}
1902
1903
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1904
0
    va_list args;
1905
0
    va_start(args, fmt);
1906
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1907
0
    va_end(args);
1908
0
    return tensor;
1909
0
}
1910
1911
struct ggml_tensor * ggml_view_tensor(
1912
        struct ggml_context * ctx,
1913
0
        struct ggml_tensor  * src) {
1914
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1915
0
    ggml_format_name(result, "%s (view)", src->name);
1916
1917
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1918
0
        result->nb[i] = src->nb[i];
1919
0
    }
1920
1921
0
    return result;
1922
0
}
1923
1924
899
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1925
899
    struct ggml_object * obj = ctx->objects_begin;
1926
1927
899
    char * const mem_buffer = ctx->mem_buffer;
1928
1929
899
    while (obj != NULL) {
1930
370
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1931
370
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1932
370
        }
1933
1934
0
        obj = obj->next;
1935
0
    }
1936
1937
529
    return NULL;
1938
899
}
1939
1940
904
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1941
904
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1942
904
    obj = obj->next;
1943
1944
904
    char * const mem_buffer = ctx->mem_buffer;
1945
1946
904
    while (obj != NULL) {
1947
772
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1948
772
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1949
772
        }
1950
1951
0
        obj = obj->next;
1952
0
    }
1953
1954
132
    return NULL;
1955
904
}
1956
1957
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1958
0
    struct ggml_object * obj = ctx->objects_begin;
1959
1960
0
    char * const mem_buffer = ctx->mem_buffer;
1961
1962
0
    while (obj != NULL) {
1963
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1964
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1965
0
            if (strcmp(cur->name, name) == 0) {
1966
0
                return cur;
1967
0
            }
1968
0
        }
1969
1970
0
        obj = obj->next;
1971
0
    }
1972
1973
0
    return NULL;
1974
0
}
1975
1976
////////////////////////////////////////////////////////////////////////////////
1977
1978
// ggml_dup
1979
1980
static struct ggml_tensor * ggml_dup_impl(
1981
        struct ggml_context * ctx,
1982
        struct ggml_tensor  * a,
1983
0
        bool                  inplace) {
1984
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1985
1986
0
    result->op     = GGML_OP_DUP;
1987
0
    result->src[0] = a;
1988
1989
0
    return result;
1990
0
}
1991
1992
struct ggml_tensor * ggml_dup(
1993
        struct ggml_context * ctx,
1994
0
        struct ggml_tensor  * a) {
1995
0
    return ggml_dup_impl(ctx, a, false);
1996
0
}
1997
1998
struct ggml_tensor * ggml_dup_inplace(
1999
        struct ggml_context * ctx,
2000
0
        struct ggml_tensor  * a) {
2001
0
    return ggml_dup_impl(ctx, a, true);
2002
0
}
2003
2004
// ggml_add
2005
2006
static struct ggml_tensor * ggml_add_impl(
2007
        struct ggml_context * ctx,
2008
        struct ggml_tensor  * a,
2009
        struct ggml_tensor  * b,
2010
0
        bool                  inplace) {
2011
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2012
2013
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2014
2015
0
    result->op     = GGML_OP_ADD;
2016
0
    result->src[0] = a;
2017
0
    result->src[1] = b;
2018
2019
0
    return result;
2020
0
}
2021
2022
struct ggml_tensor * ggml_add(
2023
        struct ggml_context * ctx,
2024
        struct ggml_tensor  * a,
2025
0
        struct ggml_tensor  * b) {
2026
0
    return ggml_add_impl(ctx, a, b, false);
2027
0
}
2028
2029
struct ggml_tensor * ggml_add_inplace(
2030
        struct ggml_context * ctx,
2031
        struct ggml_tensor  * a,
2032
0
        struct ggml_tensor  * b) {
2033
0
    return ggml_add_impl(ctx, a, b, true);
2034
0
}
2035
2036
// ggml_add_cast
2037
2038
static struct ggml_tensor * ggml_add_cast_impl(
2039
        struct ggml_context * ctx,
2040
        struct ggml_tensor  * a,
2041
        struct ggml_tensor  * b,
2042
0
        enum   ggml_type      type) {
2043
    // TODO: support less-strict constraint
2044
    //       GGML_ASSERT(ggml_can_repeat(b, a));
2045
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
2046
2047
    // currently only supported for quantized input and f16
2048
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2049
0
                a->type == GGML_TYPE_F16 ||
2050
0
                a->type == GGML_TYPE_BF16);
2051
2052
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2053
2054
0
    result->op     = GGML_OP_ADD;
2055
0
    result->src[0] = a;
2056
0
    result->src[1] = b;
2057
2058
0
    return result;
2059
0
}
2060
2061
struct ggml_tensor * ggml_add_cast(
2062
        struct ggml_context * ctx,
2063
        struct ggml_tensor  * a,
2064
        struct ggml_tensor  * b,
2065
0
        enum   ggml_type      type) {
2066
0
    return ggml_add_cast_impl(ctx, a, b, type);
2067
0
}
2068
2069
struct ggml_tensor * ggml_add_id(
2070
            struct ggml_context * ctx,
2071
            struct ggml_tensor  * a,
2072
            struct ggml_tensor  * b,
2073
0
            struct ggml_tensor  * ids) {
2074
2075
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2076
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2077
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2078
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2079
2080
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2081
2082
0
    result->op     = GGML_OP_ADD_ID;
2083
0
    result->src[0] = a;
2084
0
    result->src[1] = b;
2085
0
    result->src[2] = ids;
2086
2087
0
    return result;
2088
0
}
2089
2090
// ggml_add1
2091
2092
static struct ggml_tensor * ggml_add1_impl(
2093
        struct ggml_context * ctx,
2094
        struct ggml_tensor  * a,
2095
        struct ggml_tensor  * b,
2096
0
        bool                  inplace) {
2097
0
    GGML_ASSERT(ggml_is_scalar(b));
2098
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2099
2100
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2101
2102
0
    result->op     = GGML_OP_ADD1;
2103
0
    result->src[0] = a;
2104
0
    result->src[1] = b;
2105
2106
0
    return result;
2107
0
}
2108
2109
struct ggml_tensor * ggml_add1(
2110
        struct ggml_context * ctx,
2111
        struct ggml_tensor  * a,
2112
0
        struct ggml_tensor  * b) {
2113
0
    return ggml_add1_impl(ctx, a, b, false);
2114
0
}
2115
2116
struct ggml_tensor * ggml_add1_inplace(
2117
        struct ggml_context * ctx,
2118
        struct ggml_tensor  * a,
2119
0
        struct ggml_tensor  * b) {
2120
0
    return ggml_add1_impl(ctx, a, b, true);
2121
0
}
2122
2123
// ggml_acc
2124
2125
static struct ggml_tensor * ggml_acc_impl(
2126
        struct ggml_context * ctx,
2127
        struct ggml_tensor  * a,
2128
        struct ggml_tensor  * b,
2129
        size_t                nb1,
2130
        size_t                nb2,
2131
        size_t                nb3,
2132
        size_t                offset,
2133
0
        bool                  inplace) {
2134
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2135
0
    GGML_ASSERT(ggml_is_contiguous(a));
2136
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2137
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2138
2139
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2140
2141
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2142
0
    ggml_set_op_params(result, params, sizeof(params));
2143
2144
0
    result->op     = GGML_OP_ACC;
2145
0
    result->src[0] = a;
2146
0
    result->src[1] = b;
2147
2148
0
    return result;
2149
0
}
2150
2151
struct ggml_tensor * ggml_acc(
2152
        struct ggml_context * ctx,
2153
        struct ggml_tensor  * a,
2154
        struct ggml_tensor  * b,
2155
        size_t                nb1,
2156
        size_t                nb2,
2157
        size_t                nb3,
2158
0
        size_t                offset) {
2159
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2160
0
}
2161
2162
struct ggml_tensor * ggml_acc_inplace(
2163
        struct ggml_context * ctx,
2164
        struct ggml_tensor  * a,
2165
        struct ggml_tensor  * b,
2166
        size_t                nb1,
2167
        size_t                nb2,
2168
        size_t                nb3,
2169
0
        size_t                offset) {
2170
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2171
0
}
2172
2173
// ggml_sub
2174
2175
static struct ggml_tensor * ggml_sub_impl(
2176
        struct ggml_context * ctx,
2177
        struct ggml_tensor  * a,
2178
        struct ggml_tensor  * b,
2179
0
        bool                  inplace) {
2180
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2181
2182
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2183
2184
0
    result->op     = GGML_OP_SUB;
2185
0
    result->src[0] = a;
2186
0
    result->src[1] = b;
2187
2188
0
    return result;
2189
0
}
2190
2191
struct ggml_tensor * ggml_sub(
2192
        struct ggml_context * ctx,
2193
        struct ggml_tensor  * a,
2194
0
        struct ggml_tensor  * b) {
2195
0
    return ggml_sub_impl(ctx, a, b, false);
2196
0
}
2197
2198
struct ggml_tensor * ggml_sub_inplace(
2199
        struct ggml_context * ctx,
2200
        struct ggml_tensor  * a,
2201
0
        struct ggml_tensor  * b) {
2202
0
    return ggml_sub_impl(ctx, a, b, true);
2203
0
}
2204
2205
// ggml_mul
2206
2207
static struct ggml_tensor * ggml_mul_impl(
2208
        struct ggml_context * ctx,
2209
        struct ggml_tensor  * a,
2210
        struct ggml_tensor  * b,
2211
0
        bool                  inplace) {
2212
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2213
2214
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2215
2216
0
    result->op     = GGML_OP_MUL;
2217
0
    result->src[0] = a;
2218
0
    result->src[1] = b;
2219
2220
0
    return result;
2221
0
}
2222
2223
struct ggml_tensor * ggml_mul(
2224
        struct ggml_context * ctx,
2225
        struct ggml_tensor  * a,
2226
0
        struct ggml_tensor  * b) {
2227
0
    return ggml_mul_impl(ctx, a, b, false);
2228
0
}
2229
2230
struct ggml_tensor * ggml_mul_inplace(
2231
        struct ggml_context * ctx,
2232
        struct ggml_tensor  * a,
2233
0
        struct ggml_tensor  * b) {
2234
0
    return ggml_mul_impl(ctx, a, b, true);
2235
0
}
2236
2237
// ggml_div
2238
2239
static struct ggml_tensor * ggml_div_impl(
2240
        struct ggml_context * ctx,
2241
        struct ggml_tensor  * a,
2242
        struct ggml_tensor  * b,
2243
0
        bool                  inplace) {
2244
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2245
2246
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2247
2248
0
    result->op     = GGML_OP_DIV;
2249
0
    result->src[0] = a;
2250
0
    result->src[1] = b;
2251
2252
0
    return result;
2253
0
}
2254
2255
struct ggml_tensor * ggml_div(
2256
        struct ggml_context * ctx,
2257
        struct ggml_tensor  * a,
2258
0
        struct ggml_tensor  * b) {
2259
0
    return ggml_div_impl(ctx, a, b, false);
2260
0
}
2261
2262
struct ggml_tensor * ggml_div_inplace(
2263
        struct ggml_context * ctx,
2264
        struct ggml_tensor  * a,
2265
0
        struct ggml_tensor  * b) {
2266
0
    return ggml_div_impl(ctx, a, b, true);
2267
0
}
2268
2269
// ggml_sqr
2270
2271
static struct ggml_tensor * ggml_sqr_impl(
2272
        struct ggml_context * ctx,
2273
        struct ggml_tensor  * a,
2274
0
        bool                  inplace) {
2275
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2276
2277
0
    result->op     = GGML_OP_SQR;
2278
0
    result->src[0] = a;
2279
2280
0
    return result;
2281
0
}
2282
2283
struct ggml_tensor * ggml_sqr(
2284
        struct ggml_context * ctx,
2285
0
        struct ggml_tensor  * a) {
2286
0
    return ggml_sqr_impl(ctx, a, false);
2287
0
}
2288
2289
struct ggml_tensor * ggml_sqr_inplace(
2290
        struct ggml_context * ctx,
2291
0
        struct ggml_tensor  * a) {
2292
0
    return ggml_sqr_impl(ctx, a, true);
2293
0
}
2294
2295
// ggml_sqrt
2296
2297
static struct ggml_tensor * ggml_sqrt_impl(
2298
        struct ggml_context * ctx,
2299
        struct ggml_tensor  * a,
2300
0
        bool                  inplace) {
2301
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2302
2303
0
    result->op     = GGML_OP_SQRT;
2304
0
    result->src[0] = a;
2305
2306
0
    return result;
2307
0
}
2308
2309
struct ggml_tensor * ggml_sqrt(
2310
        struct ggml_context * ctx,
2311
0
        struct ggml_tensor  * a) {
2312
0
    return ggml_sqrt_impl(ctx, a, false);
2313
0
}
2314
2315
struct ggml_tensor * ggml_sqrt_inplace(
2316
        struct ggml_context * ctx,
2317
0
        struct ggml_tensor  * a) {
2318
0
    return ggml_sqrt_impl(ctx, a, true);
2319
0
}
2320
2321
// ggml_log
2322
2323
static struct ggml_tensor * ggml_log_impl(
2324
        struct ggml_context * ctx,
2325
        struct ggml_tensor  * a,
2326
0
        bool                  inplace) {
2327
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2328
2329
0
    result->op     = GGML_OP_LOG;
2330
0
    result->src[0] = a;
2331
2332
0
    return result;
2333
0
}
2334
2335
struct ggml_tensor * ggml_log(
2336
        struct ggml_context * ctx,
2337
0
        struct ggml_tensor  * a) {
2338
0
    return ggml_log_impl(ctx, a, false);
2339
0
}
2340
2341
struct ggml_tensor * ggml_log_inplace(
2342
        struct ggml_context * ctx,
2343
0
        struct ggml_tensor  * a) {
2344
0
    return ggml_log_impl(ctx, a, true);
2345
0
}
2346
2347
struct ggml_tensor * ggml_expm1(
2348
        struct ggml_context * ctx,
2349
0
        struct ggml_tensor  * a) {
2350
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2351
0
}
2352
2353
struct ggml_tensor * ggml_expm1_inplace(
2354
        struct ggml_context * ctx,
2355
0
        struct ggml_tensor  * a) {
2356
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2357
0
}
2358
2359
struct ggml_tensor * ggml_softplus(
2360
        struct ggml_context * ctx,
2361
0
        struct ggml_tensor  * a) {
2362
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2363
0
}
2364
2365
struct ggml_tensor * ggml_softplus_inplace(
2366
        struct ggml_context * ctx,
2367
0
        struct ggml_tensor  * a) {
2368
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2369
0
}
2370
2371
// ggml_sin
2372
2373
static struct ggml_tensor * ggml_sin_impl(
2374
        struct ggml_context * ctx,
2375
        struct ggml_tensor  * a,
2376
0
        bool                  inplace) {
2377
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2378
2379
0
    result->op     = GGML_OP_SIN;
2380
0
    result->src[0] = a;
2381
2382
0
    return result;
2383
0
}
2384
2385
struct ggml_tensor * ggml_sin(
2386
        struct ggml_context * ctx,
2387
0
        struct ggml_tensor  * a) {
2388
0
    return ggml_sin_impl(ctx, a, false);
2389
0
}
2390
2391
struct ggml_tensor * ggml_sin_inplace(
2392
        struct ggml_context * ctx,
2393
0
        struct ggml_tensor  * a) {
2394
0
    return ggml_sin_impl(ctx, a, true);
2395
0
}
2396
2397
// ggml_cos
2398
2399
static struct ggml_tensor * ggml_cos_impl(
2400
        struct ggml_context * ctx,
2401
        struct ggml_tensor  * a,
2402
0
        bool                  inplace) {
2403
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2404
2405
0
    result->op     = GGML_OP_COS;
2406
0
    result->src[0] = a;
2407
2408
0
    return result;
2409
0
}
2410
2411
struct ggml_tensor * ggml_cos(
2412
        struct ggml_context * ctx,
2413
0
        struct ggml_tensor  * a) {
2414
0
    return ggml_cos_impl(ctx, a, false);
2415
0
}
2416
2417
struct ggml_tensor * ggml_cos_inplace(
2418
        struct ggml_context * ctx,
2419
0
        struct ggml_tensor  * a) {
2420
0
    return ggml_cos_impl(ctx, a, true);
2421
0
}
2422
2423
// ggml_sum
2424
2425
struct ggml_tensor * ggml_sum(
2426
        struct ggml_context * ctx,
2427
0
        struct ggml_tensor  * a) {
2428
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2429
2430
0
    result->op     = GGML_OP_SUM;
2431
0
    result->src[0] = a;
2432
2433
0
    return result;
2434
0
}
2435
2436
// ggml_sum_rows
2437
2438
struct ggml_tensor * ggml_sum_rows(
2439
        struct ggml_context * ctx,
2440
0
        struct ggml_tensor  * a) {
2441
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2442
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2443
0
        ne[i] = a->ne[i];
2444
0
    }
2445
2446
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2447
2448
0
    result->op     = GGML_OP_SUM_ROWS;
2449
0
    result->src[0] = a;
2450
2451
0
    return result;
2452
0
}
2453
2454
// ggml_cumsum
2455
2456
struct ggml_tensor * ggml_cumsum(
2457
        struct ggml_context * ctx,
2458
0
        struct ggml_tensor  * a) {
2459
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2460
2461
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2462
2463
0
    result->op     = GGML_OP_CUMSUM;
2464
0
    result->src[0] = a;
2465
2466
0
    return result;
2467
0
}
2468
2469
// ggml_mean
2470
2471
struct ggml_tensor * ggml_mean(
2472
        struct ggml_context * ctx,
2473
0
        struct ggml_tensor  * a) {
2474
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2475
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2476
2477
0
    result->op     = GGML_OP_MEAN;
2478
0
    result->src[0] = a;
2479
2480
0
    return result;
2481
0
}
2482
2483
// ggml_argmax
2484
2485
struct ggml_tensor * ggml_argmax(
2486
        struct ggml_context * ctx,
2487
0
        struct ggml_tensor  * a) {
2488
0
    GGML_ASSERT(ggml_is_matrix(a));
2489
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2490
2491
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2492
2493
0
    result->op     = GGML_OP_ARGMAX;
2494
0
    result->src[0] = a;
2495
2496
0
    return result;
2497
0
}
2498
2499
// ggml_count_equal
2500
2501
struct ggml_tensor * ggml_count_equal(
2502
        struct ggml_context * ctx,
2503
        struct ggml_tensor  * a,
2504
0
        struct ggml_tensor  * b) {
2505
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2506
2507
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2508
2509
0
    result->op     = GGML_OP_COUNT_EQUAL;
2510
0
    result->src[0] = a;
2511
0
    result->src[1] = b;
2512
2513
0
    return result;
2514
0
}
2515
2516
// ggml_repeat
2517
2518
struct ggml_tensor * ggml_repeat(
2519
        struct ggml_context * ctx,
2520
        struct ggml_tensor  * a,
2521
0
        struct ggml_tensor  * b) {
2522
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2523
2524
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2525
2526
0
    result->op     = GGML_OP_REPEAT;
2527
0
    result->src[0] = a;
2528
2529
0
    return result;
2530
0
}
2531
2532
struct ggml_tensor * ggml_repeat_4d(
2533
        struct ggml_context * ctx,
2534
        struct ggml_tensor * a,
2535
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2536
0
    const bool can_repeat = ggml_is_empty(a) || (
2537
0
        (ne0 % a->ne[0] == 0) &&
2538
0
        (ne1 % a->ne[1] == 0) &&
2539
0
        (ne2 % a->ne[2] == 0) &&
2540
0
        (ne3 % a->ne[3] == 0)
2541
0
    );
2542
0
    GGML_ASSERT(can_repeat);
2543
2544
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2545
2546
0
    result->op     = GGML_OP_REPEAT;
2547
0
    result->src[0] = a;
2548
2549
0
    return result;
2550
0
}
2551
2552
// ggml_repeat_back
2553
2554
struct ggml_tensor * ggml_repeat_back(
2555
        struct ggml_context * ctx,
2556
        struct ggml_tensor  * a,
2557
0
        struct ggml_tensor  * b) {
2558
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2559
2560
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2561
2562
0
    result->op     = GGML_OP_REPEAT_BACK;
2563
0
    result->src[0] = a;
2564
2565
0
    return result;
2566
0
}
2567
2568
// ggml_concat
2569
2570
struct ggml_tensor * ggml_concat(
2571
    struct ggml_context * ctx,
2572
    struct ggml_tensor  * a,
2573
    struct ggml_tensor  * b,
2574
0
    int                   dim) {
2575
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2576
0
    GGML_ASSERT(a->type == b->type);
2577
2578
0
    int64_t ne[GGML_MAX_DIMS];
2579
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2580
0
        if (d == dim) {
2581
0
            ne[d] = a->ne[d] + b->ne[d];
2582
0
            continue;
2583
0
        }
2584
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2585
0
        ne[d] = a->ne[d];
2586
0
    }
2587
2588
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2589
2590
0
    ggml_set_op_params_i32(result, 0, dim);
2591
2592
0
    result->op     = GGML_OP_CONCAT;
2593
0
    result->src[0] = a;
2594
0
    result->src[1] = b;
2595
2596
0
    return result;
2597
0
}
2598
2599
// ggml_abs
2600
2601
struct ggml_tensor * ggml_abs(
2602
        struct ggml_context * ctx,
2603
0
        struct ggml_tensor  * a) {
2604
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2605
0
}
2606
2607
struct ggml_tensor * ggml_abs_inplace(
2608
        struct ggml_context * ctx,
2609
0
        struct ggml_tensor  * a) {
2610
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2611
0
}
2612
2613
// ggml_sgn
2614
2615
struct ggml_tensor * ggml_sgn(
2616
        struct ggml_context * ctx,
2617
0
        struct ggml_tensor  * a) {
2618
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2619
0
}
2620
2621
struct ggml_tensor * ggml_sgn_inplace(
2622
        struct ggml_context * ctx,
2623
0
        struct ggml_tensor  * a) {
2624
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2625
0
}
2626
2627
// ggml_neg
2628
2629
struct ggml_tensor * ggml_neg(
2630
        struct ggml_context * ctx,
2631
0
        struct ggml_tensor  * a) {
2632
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2633
0
}
2634
2635
struct ggml_tensor * ggml_neg_inplace(
2636
        struct ggml_context * ctx,
2637
0
        struct ggml_tensor  * a) {
2638
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2639
0
}
2640
2641
// ggml_step
2642
2643
struct ggml_tensor * ggml_step(
2644
        struct ggml_context * ctx,
2645
0
        struct ggml_tensor  * a) {
2646
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2647
0
}
2648
2649
struct ggml_tensor * ggml_step_inplace(
2650
        struct ggml_context * ctx,
2651
0
        struct ggml_tensor  * a) {
2652
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2653
0
}
2654
2655
// ggml_tanh
2656
2657
struct ggml_tensor * ggml_tanh(
2658
        struct ggml_context * ctx,
2659
0
        struct ggml_tensor  * a) {
2660
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2661
0
}
2662
2663
struct ggml_tensor * ggml_tanh_inplace(
2664
        struct ggml_context * ctx,
2665
0
        struct ggml_tensor  * a) {
2666
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2667
0
}
2668
2669
// ggml_elu
2670
2671
struct ggml_tensor * ggml_elu(
2672
    struct ggml_context * ctx,
2673
0
    struct ggml_tensor  * a) {
2674
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2675
0
}
2676
2677
struct ggml_tensor * ggml_elu_inplace(
2678
    struct ggml_context * ctx,
2679
0
    struct ggml_tensor  * a) {
2680
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2681
0
}
2682
2683
// ggml_relu
2684
2685
struct ggml_tensor * ggml_relu(
2686
        struct ggml_context * ctx,
2687
0
        struct ggml_tensor  * a) {
2688
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2689
0
}
2690
2691
struct ggml_tensor * ggml_relu_inplace(
2692
        struct ggml_context * ctx,
2693
0
        struct ggml_tensor  * a) {
2694
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2695
0
}
2696
2697
// ggml_leaky_relu
2698
2699
struct ggml_tensor * ggml_leaky_relu(
2700
        struct ggml_context * ctx,
2701
        struct ggml_tensor  * a,
2702
        float                 negative_slope,
2703
0
        bool                  inplace) {
2704
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2705
2706
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2707
2708
0
    result->op     = GGML_OP_LEAKY_RELU;
2709
0
    result->src[0] = a;
2710
2711
0
    return result;
2712
0
}
2713
2714
// ggml_sigmoid
2715
2716
struct ggml_tensor * ggml_sigmoid(
2717
        struct ggml_context * ctx,
2718
0
        struct ggml_tensor  * a) {
2719
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2720
0
}
2721
2722
struct ggml_tensor * ggml_sigmoid_inplace(
2723
        struct ggml_context * ctx,
2724
0
        struct ggml_tensor  * a) {
2725
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2726
0
}
2727
2728
// ggml_gelu
2729
2730
struct ggml_tensor * ggml_gelu(
2731
        struct ggml_context * ctx,
2732
0
        struct ggml_tensor  * a) {
2733
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2734
0
}
2735
2736
struct ggml_tensor * ggml_gelu_inplace(
2737
        struct ggml_context * ctx,
2738
0
        struct ggml_tensor  * a) {
2739
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2740
0
}
2741
2742
// ggml_gelu_erf
2743
2744
struct ggml_tensor * ggml_gelu_erf(
2745
        struct ggml_context * ctx,
2746
0
        struct ggml_tensor  * a) {
2747
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2748
0
}
2749
2750
struct ggml_tensor * ggml_gelu_erf_inplace(
2751
        struct ggml_context * ctx,
2752
0
        struct ggml_tensor  * a) {
2753
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2754
0
}
2755
2756
// ggml_gelu_quick
2757
2758
struct ggml_tensor * ggml_gelu_quick(
2759
        struct ggml_context * ctx,
2760
0
        struct ggml_tensor  * a) {
2761
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2762
0
}
2763
2764
struct ggml_tensor * ggml_gelu_quick_inplace(
2765
        struct ggml_context * ctx,
2766
0
        struct ggml_tensor  * a) {
2767
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2768
0
}
2769
2770
// ggml_silu
2771
2772
struct ggml_tensor * ggml_silu(
2773
        struct ggml_context * ctx,
2774
0
        struct ggml_tensor  * a) {
2775
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2776
0
}
2777
2778
struct ggml_tensor * ggml_silu_inplace(
2779
        struct ggml_context * ctx,
2780
0
        struct ggml_tensor  * a) {
2781
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2782
0
}
2783
2784
// ggml_xielu
2785
2786
struct ggml_tensor * ggml_xielu(
2787
        struct ggml_context * ctx,
2788
        struct ggml_tensor  * a,
2789
        float alpha_n,
2790
        float alpha_p,
2791
        float beta,
2792
0
        float eps) {
2793
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2794
2795
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2796
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2797
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2798
0
    ggml_set_op_params_f32(result, 3, beta);
2799
0
    ggml_set_op_params_f32(result, 4, eps);
2800
2801
0
    result->op     = GGML_OP_UNARY;
2802
0
    result->src[0] = a;
2803
2804
0
    return result;
2805
0
}
2806
2807
// ggml_silu_back
2808
2809
struct ggml_tensor * ggml_silu_back(
2810
        struct ggml_context * ctx,
2811
        struct ggml_tensor  * a,
2812
0
        struct ggml_tensor  * b) {
2813
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2814
2815
0
    result->op     = GGML_OP_SILU_BACK;
2816
0
    result->src[0] = a;
2817
0
    result->src[1] = b;
2818
2819
0
    return result;
2820
0
}
2821
2822
// ggml hardswish
2823
2824
struct ggml_tensor * ggml_hardswish(
2825
        struct ggml_context * ctx,
2826
0
        struct ggml_tensor  * a) {
2827
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2828
0
}
2829
2830
// ggml hardsigmoid
2831
2832
struct ggml_tensor * ggml_hardsigmoid(
2833
        struct ggml_context * ctx,
2834
0
        struct ggml_tensor  * a) {
2835
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2836
0
}
2837
2838
// ggml exp
2839
2840
struct ggml_tensor * ggml_exp(
2841
        struct ggml_context * ctx,
2842
0
        struct ggml_tensor  * a) {
2843
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2844
0
}
2845
2846
struct ggml_tensor * ggml_exp_inplace(
2847
        struct ggml_context * ctx,
2848
0
        struct ggml_tensor  * a) {
2849
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2850
0
}
2851
2852
// ggml_glu
2853
2854
static struct ggml_tensor * ggml_glu_impl(
2855
        struct ggml_context * ctx,
2856
        struct ggml_tensor  * a,
2857
        struct ggml_tensor  * b,
2858
        enum ggml_glu_op      op,
2859
0
        bool                  swapped) {
2860
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2861
2862
0
    if (b) {
2863
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2864
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2865
0
        GGML_ASSERT(a->type == b->type);
2866
0
    }
2867
2868
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2869
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2870
2871
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2872
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2873
2874
0
    result->op     = GGML_OP_GLU;
2875
0
    result->src[0] = a;
2876
0
    result->src[1] = b;
2877
2878
0
    return result;
2879
0
}
2880
2881
// ggml_floor
2882
2883
struct ggml_tensor * ggml_floor(
2884
        struct ggml_context * ctx,
2885
0
        struct ggml_tensor  * a) {
2886
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2887
0
}
2888
2889
struct ggml_tensor * ggml_floor_inplace(
2890
        struct ggml_context * ctx,
2891
0
        struct ggml_tensor  * a) {
2892
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2893
0
}
2894
2895
// ggml_ceil
2896
2897
struct ggml_tensor * ggml_ceil(
2898
        struct ggml_context * ctx,
2899
0
        struct ggml_tensor  * a) {
2900
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2901
0
}
2902
2903
struct ggml_tensor * ggml_ceil_inplace(
2904
        struct ggml_context * ctx,
2905
0
        struct ggml_tensor  * a) {
2906
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2907
0
}
2908
2909
//ggml_round
2910
2911
struct ggml_tensor * ggml_round(
2912
        struct ggml_context * ctx,
2913
0
        struct ggml_tensor  * a) {
2914
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2915
0
}
2916
2917
struct ggml_tensor * ggml_round_inplace(
2918
        struct ggml_context * ctx,
2919
0
        struct ggml_tensor  * a) {
2920
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2921
0
}
2922
2923
//ggml_trunc
2924
2925
struct ggml_tensor * ggml_trunc(
2926
        struct ggml_context * ctx,
2927
0
        struct ggml_tensor  * a) {
2928
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2929
0
}
2930
2931
struct ggml_tensor * ggml_trunc_inplace(
2932
        struct ggml_context * ctx,
2933
0
        struct ggml_tensor  * a) {
2934
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2935
0
}
2936
2937
struct ggml_tensor * ggml_glu(
2938
        struct ggml_context * ctx,
2939
        struct ggml_tensor  * a,
2940
        enum ggml_glu_op      op,
2941
0
        bool                  swapped) {
2942
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2943
0
}
2944
2945
struct ggml_tensor * ggml_glu_split(
2946
        struct ggml_context * ctx,
2947
        struct ggml_tensor  * a,
2948
        struct ggml_tensor  * b,
2949
0
        enum ggml_glu_op      op) {
2950
0
    return ggml_glu_impl(ctx, a, b, op, false);
2951
0
}
2952
2953
// ggml_reglu
2954
2955
struct ggml_tensor * ggml_reglu(
2956
        struct ggml_context * ctx,
2957
0
        struct ggml_tensor  * a) {
2958
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2959
0
}
2960
2961
struct ggml_tensor * ggml_reglu_swapped(
2962
        struct ggml_context * ctx,
2963
0
        struct ggml_tensor  * a) {
2964
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2965
0
}
2966
2967
struct ggml_tensor * ggml_reglu_split(
2968
        struct ggml_context * ctx,
2969
        struct ggml_tensor  * a,
2970
0
        struct ggml_tensor  * b) {
2971
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2972
0
}
2973
2974
// ggml_geglu
2975
2976
struct ggml_tensor * ggml_geglu(
2977
        struct ggml_context * ctx,
2978
0
        struct ggml_tensor  * a) {
2979
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2980
0
}
2981
2982
struct ggml_tensor * ggml_geglu_swapped(
2983
        struct ggml_context * ctx,
2984
0
        struct ggml_tensor  * a) {
2985
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2986
0
}
2987
2988
struct ggml_tensor * ggml_geglu_split(
2989
        struct ggml_context * ctx,
2990
        struct ggml_tensor  * a,
2991
0
        struct ggml_tensor  * b) {
2992
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2993
0
}
2994
2995
// ggml_swiglu
2996
2997
struct ggml_tensor * ggml_swiglu(
2998
        struct ggml_context * ctx,
2999
0
        struct ggml_tensor  * a) {
3000
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
3001
0
}
3002
3003
struct ggml_tensor * ggml_swiglu_swapped(
3004
        struct ggml_context * ctx,
3005
0
        struct ggml_tensor  * a) {
3006
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
3007
0
}
3008
3009
struct ggml_tensor * ggml_swiglu_split(
3010
        struct ggml_context * ctx,
3011
        struct ggml_tensor  * a,
3012
0
        struct ggml_tensor  * b) {
3013
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
3014
0
}
3015
3016
// ggml_geglu_erf
3017
3018
struct ggml_tensor * ggml_geglu_erf(
3019
        struct ggml_context * ctx,
3020
0
        struct ggml_tensor  * a) {
3021
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
3022
0
}
3023
3024
struct ggml_tensor * ggml_geglu_erf_swapped(
3025
        struct ggml_context * ctx,
3026
0
        struct ggml_tensor  * a) {
3027
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
3028
0
}
3029
3030
struct ggml_tensor * ggml_geglu_erf_split(
3031
        struct ggml_context * ctx,
3032
        struct ggml_tensor  * a,
3033
0
        struct ggml_tensor  * b) {
3034
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
3035
0
}
3036
3037
// ggml_geglu_quick
3038
3039
struct ggml_tensor * ggml_geglu_quick(
3040
        struct ggml_context * ctx,
3041
0
        struct ggml_tensor  * a) {
3042
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
3043
0
}
3044
3045
struct ggml_tensor * ggml_geglu_quick_swapped(
3046
        struct ggml_context * ctx,
3047
0
        struct ggml_tensor  * a) {
3048
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3049
0
}
3050
3051
struct ggml_tensor * ggml_geglu_quick_split(
3052
        struct ggml_context * ctx,
3053
        struct ggml_tensor  * a,
3054
0
        struct ggml_tensor  * b) {
3055
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3056
0
}
3057
3058
struct ggml_tensor * ggml_swiglu_oai(
3059
        struct ggml_context * ctx,
3060
        struct ggml_tensor  * a,
3061
        struct ggml_tensor  * b,
3062
        float                 alpha,
3063
0
        float                 limit) {
3064
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3065
0
    ggml_set_op_params_f32(result, 2, alpha);
3066
0
    ggml_set_op_params_f32(result, 3, limit);
3067
3068
0
    return result;
3069
0
}
3070
3071
// ggml_norm
3072
3073
static struct ggml_tensor * ggml_norm_impl(
3074
        struct ggml_context * ctx,
3075
        struct ggml_tensor  * a,
3076
        float                 eps,
3077
0
        bool                  inplace) {
3078
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3079
3080
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3081
3082
0
    result->op     = GGML_OP_NORM;
3083
0
    result->src[0] = a;
3084
3085
0
    return result;
3086
0
}
3087
3088
struct ggml_tensor * ggml_norm(
3089
        struct ggml_context * ctx,
3090
        struct ggml_tensor  * a,
3091
0
        float                 eps) {
3092
0
    return ggml_norm_impl(ctx, a, eps, false);
3093
0
}
3094
3095
struct ggml_tensor * ggml_norm_inplace(
3096
        struct ggml_context * ctx,
3097
        struct ggml_tensor  * a,
3098
0
        float                 eps) {
3099
0
    return ggml_norm_impl(ctx, a, eps, true);
3100
0
}
3101
3102
// ggml_rms_norm
3103
3104
static struct ggml_tensor * ggml_rms_norm_impl(
3105
        struct ggml_context * ctx,
3106
        struct ggml_tensor  * a,
3107
        float                 eps,
3108
0
        bool                  inplace) {
3109
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3110
3111
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3112
3113
0
    result->op     = GGML_OP_RMS_NORM;
3114
0
    result->src[0] = a;
3115
3116
0
    return result;
3117
0
}
3118
3119
struct ggml_tensor * ggml_rms_norm(
3120
        struct ggml_context * ctx,
3121
        struct ggml_tensor  * a,
3122
0
        float                 eps) {
3123
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3124
0
}
3125
3126
struct ggml_tensor * ggml_rms_norm_inplace(
3127
        struct ggml_context * ctx,
3128
        struct ggml_tensor  * a,
3129
0
        float                 eps) {
3130
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3131
0
}
3132
3133
// ggml_rms_norm_back
3134
3135
struct ggml_tensor * ggml_rms_norm_back(
3136
        struct ggml_context * ctx,
3137
        struct ggml_tensor  * a,
3138
        struct ggml_tensor  * b,
3139
0
        float                 eps) {
3140
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3141
3142
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3143
3144
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3145
0
    result->src[0] = a;
3146
0
    result->src[1] = b;
3147
3148
0
    return result;
3149
0
}
3150
3151
// ggml_group_norm
3152
3153
static struct ggml_tensor * ggml_group_norm_impl(
3154
        struct ggml_context * ctx,
3155
        struct ggml_tensor  * a,
3156
        int                   n_groups,
3157
        float                 eps,
3158
0
        bool                  inplace) {
3159
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3160
3161
0
    ggml_set_op_params_i32(result, 0, n_groups);
3162
0
    ggml_set_op_params_f32(result, 1, eps);
3163
3164
0
    result->op     = GGML_OP_GROUP_NORM;
3165
0
    result->src[0] = a;
3166
3167
0
    return result;
3168
0
}
3169
3170
struct ggml_tensor * ggml_group_norm(
3171
        struct ggml_context * ctx,
3172
        struct ggml_tensor  * a,
3173
        int                   n_groups,
3174
0
        float                 eps) {
3175
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3176
0
}
3177
3178
struct ggml_tensor * ggml_group_norm_inplace(
3179
        struct ggml_context * ctx,
3180
        struct ggml_tensor  * a,
3181
        int                   n_groups,
3182
0
        float                 eps) {
3183
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3184
0
}
3185
3186
// ggml_l2_norm
3187
3188
static struct ggml_tensor * ggml_l2_norm_impl(
3189
        struct ggml_context * ctx,
3190
        struct ggml_tensor  * a,
3191
        float                 eps,
3192
0
        bool                  inplace) {
3193
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3194
3195
0
    ggml_set_op_params_f32(result, 0, eps);
3196
3197
0
    result->op     = GGML_OP_L2_NORM;
3198
0
    result->src[0] = a;
3199
3200
0
    return result;
3201
0
}
3202
3203
struct ggml_tensor * ggml_l2_norm(
3204
        struct ggml_context * ctx,
3205
        struct ggml_tensor  * a,
3206
0
        float                 eps) {
3207
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3208
0
}
3209
3210
struct ggml_tensor * ggml_l2_norm_inplace(
3211
        struct ggml_context * ctx,
3212
        struct ggml_tensor  * a,
3213
0
        float                 eps) {
3214
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3215
0
}
3216
3217
// ggml_mul_mat
3218
3219
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3220
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3221
3222
0
    return (t0->ne[0]           == t1->ne[0])  &&
3223
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3224
0
           (t1->ne[3]%t0->ne[3] == 0);
3225
0
}
3226
3227
struct ggml_tensor * ggml_mul_mat(
3228
        struct ggml_context * ctx,
3229
        struct ggml_tensor  * a,
3230
0
        struct ggml_tensor  * b) {
3231
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3232
0
    GGML_ASSERT(!ggml_is_transposed(a));
3233
3234
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3235
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3236
3237
0
    result->op     = GGML_OP_MUL_MAT;
3238
0
    result->src[0] = a;
3239
0
    result->src[1] = b;
3240
3241
0
    return result;
3242
0
}
3243
3244
void ggml_mul_mat_set_prec(
3245
        struct ggml_tensor * a,
3246
0
        enum ggml_prec       prec) {
3247
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3248
3249
0
    const int32_t prec_i32 = (int32_t) prec;
3250
3251
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3252
0
}
3253
3254
// ggml_mul_mat_id
3255
3256
/*
3257
    c = ggml_mul_mat_id(ctx, as, b, ids);
3258
3259
    as  -> [cols, rows, n_expert]
3260
    b   -> [cols, n_expert_used, n_tokens]
3261
    ids -> [n_expert_used, n_tokens] (i32)
3262
    c   -> [rows, n_expert_used, n_tokens]
3263
3264
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3265
3266
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3267
*/
3268
struct ggml_tensor * ggml_mul_mat_id(
3269
        struct ggml_context * ctx,
3270
        struct ggml_tensor  * as,
3271
        struct ggml_tensor  * b,
3272
0
        struct ggml_tensor  * ids) {
3273
0
    GGML_ASSERT(!ggml_is_transposed(as));
3274
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3275
3276
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3277
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3278
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3279
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3280
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3281
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3282
3283
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3284
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3285
3286
0
    result->op     = GGML_OP_MUL_MAT_ID;
3287
0
    result->src[0] = as;
3288
0
    result->src[1] = b;
3289
0
    result->src[2] = ids;
3290
3291
0
    return result;
3292
0
}
3293
3294
// ggml_out_prod
3295
3296
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3297
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3298
3299
0
    return (t0->ne[1] == t1->ne[1])   &&
3300
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3301
0
           (t1->ne[3]%t0->ne[3] == 0);
3302
0
}
3303
3304
struct ggml_tensor * ggml_out_prod(
3305
        struct ggml_context * ctx,
3306
        struct ggml_tensor  * a,
3307
0
        struct ggml_tensor  * b) {
3308
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3309
0
    GGML_ASSERT(!ggml_is_transposed(a));
3310
3311
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3312
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3313
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3314
3315
0
    result->op     = GGML_OP_OUT_PROD;
3316
0
    result->src[0] = a;
3317
0
    result->src[1] = b;
3318
3319
0
    return result;
3320
0
}
3321
3322
// ggml_scale
3323
3324
static struct ggml_tensor * ggml_scale_impl(
3325
        struct ggml_context * ctx,
3326
        struct ggml_tensor  * a,
3327
        float                 s,
3328
        float                 b,
3329
0
        bool                  inplace) {
3330
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3331
3332
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3333
3334
0
    float params[2] = { s, b };
3335
0
    ggml_set_op_params(result, &params, sizeof(params));
3336
3337
0
    result->op     = GGML_OP_SCALE;
3338
0
    result->src[0] = a;
3339
3340
0
    return result;
3341
0
}
3342
3343
struct ggml_tensor * ggml_scale(
3344
        struct ggml_context * ctx,
3345
        struct ggml_tensor  * a,
3346
0
        float                 s) {
3347
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3348
0
}
3349
3350
struct ggml_tensor * ggml_scale_inplace(
3351
        struct ggml_context * ctx,
3352
        struct ggml_tensor  * a,
3353
0
        float                 s) {
3354
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3355
0
}
3356
3357
struct ggml_tensor * ggml_scale_bias(
3358
        struct ggml_context * ctx,
3359
        struct ggml_tensor  * a,
3360
        float                 s,
3361
0
        float                 b) {
3362
0
    return ggml_scale_impl(ctx, a, s, b, false);
3363
0
}
3364
3365
struct ggml_tensor * ggml_scale_bias_inplace(
3366
        struct ggml_context * ctx,
3367
        struct ggml_tensor  * a,
3368
        float                 s,
3369
0
        float                 b) {
3370
0
    return ggml_scale_impl(ctx, a, s, b, true);
3371
0
}
3372
3373
// ggml_set
3374
3375
static struct ggml_tensor * ggml_set_impl(
3376
        struct ggml_context * ctx,
3377
        struct ggml_tensor  * a,
3378
        struct ggml_tensor  * b,
3379
        size_t                nb1,
3380
        size_t                nb2,
3381
        size_t                nb3,
3382
        size_t                offset,
3383
0
        bool                  inplace) {
3384
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3385
3386
    // make a view of the destination
3387
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3388
3389
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3390
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3391
0
    ggml_set_op_params(result, params, sizeof(params));
3392
3393
0
    result->op     = GGML_OP_SET;
3394
0
    result->src[0] = a;
3395
0
    result->src[1] = b;
3396
3397
0
    return result;
3398
0
}
3399
3400
struct ggml_tensor * ggml_set(
3401
        struct ggml_context * ctx,
3402
        struct ggml_tensor  * a,
3403
        struct ggml_tensor  * b,
3404
        size_t                nb1,
3405
        size_t                nb2,
3406
        size_t                nb3,
3407
0
        size_t                offset) {
3408
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3409
0
}
3410
3411
struct ggml_tensor * ggml_set_inplace(
3412
        struct ggml_context * ctx,
3413
        struct ggml_tensor  * a,
3414
        struct ggml_tensor  * b,
3415
        size_t                nb1,
3416
        size_t                nb2,
3417
        size_t                nb3,
3418
0
        size_t                offset) {
3419
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3420
0
}
3421
3422
struct ggml_tensor * ggml_set_1d(
3423
        struct ggml_context * ctx,
3424
        struct ggml_tensor  * a,
3425
        struct ggml_tensor  * b,
3426
0
        size_t                offset) {
3427
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3428
0
}
3429
3430
struct ggml_tensor * ggml_set_1d_inplace(
3431
        struct ggml_context * ctx,
3432
        struct ggml_tensor  * a,
3433
        struct ggml_tensor  * b,
3434
0
        size_t                offset) {
3435
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3436
0
}
3437
3438
struct ggml_tensor * ggml_set_2d(
3439
        struct ggml_context * ctx,
3440
        struct ggml_tensor  * a,
3441
        struct ggml_tensor  * b,
3442
        size_t                nb1,
3443
0
        size_t                offset) {
3444
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3445
0
}
3446
3447
struct ggml_tensor * ggml_set_2d_inplace(
3448
        struct ggml_context * ctx,
3449
        struct ggml_tensor  * a,
3450
        struct ggml_tensor  * b,
3451
        size_t                nb1,
3452
0
        size_t                offset) {
3453
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3454
0
}
3455
3456
// ggml_cpy
3457
3458
static struct ggml_tensor * ggml_cpy_impl(
3459
        struct ggml_context * ctx,
3460
        struct ggml_tensor  * a,
3461
0
        struct ggml_tensor  * b) {
3462
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3463
3464
    // make a view of the destination
3465
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3466
0
    if (strlen(b->name) > 0) {
3467
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3468
0
    } else {
3469
0
        ggml_format_name(result, "%s (copy)", a->name);
3470
0
    }
3471
3472
0
    result->op     = GGML_OP_CPY;
3473
0
    result->src[0] = a;
3474
0
    result->src[1] = b;
3475
3476
0
    return result;
3477
0
}
3478
3479
struct ggml_tensor * ggml_cpy(
3480
        struct ggml_context * ctx,
3481
        struct ggml_tensor * a,
3482
0
        struct ggml_tensor * b) {
3483
0
    return ggml_cpy_impl(ctx, a, b);
3484
0
}
3485
3486
struct ggml_tensor * ggml_cast(
3487
        struct ggml_context * ctx,
3488
        struct ggml_tensor  * a,
3489
0
        enum   ggml_type      type) {
3490
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3491
0
    ggml_format_name(result, "%s (copy)", a->name);
3492
3493
0
    result->op     = GGML_OP_CPY;
3494
0
    result->src[0] = a;
3495
0
    result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
3496
                             //       backends for consistency with ggml_cpy_impl() above
3497
3498
0
    return result;
3499
0
}
3500
3501
// ggml_cont
3502
3503
static struct ggml_tensor * ggml_cont_impl(
3504
        struct ggml_context * ctx,
3505
0
        struct ggml_tensor  * a) {
3506
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3507
0
    ggml_format_name(result, "%s (cont)", a->name);
3508
3509
0
    result->op     = GGML_OP_CONT;
3510
0
    result->src[0] = a;
3511
3512
0
    return result;
3513
0
}
3514
3515
struct ggml_tensor * ggml_cont(
3516
        struct ggml_context * ctx,
3517
0
        struct ggml_tensor * a) {
3518
0
    return ggml_cont_impl(ctx, a);
3519
0
}
3520
3521
// make contiguous, with new shape
3522
GGML_API struct ggml_tensor * ggml_cont_1d(
3523
        struct ggml_context * ctx,
3524
        struct ggml_tensor  * a,
3525
0
        int64_t               ne0) {
3526
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3527
0
}
3528
3529
GGML_API struct ggml_tensor * ggml_cont_2d(
3530
        struct ggml_context * ctx,
3531
        struct ggml_tensor  * a,
3532
        int64_t               ne0,
3533
0
        int64_t               ne1) {
3534
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3535
0
}
3536
3537
GGML_API struct ggml_tensor * ggml_cont_3d(
3538
        struct ggml_context * ctx,
3539
        struct ggml_tensor  * a,
3540
        int64_t               ne0,
3541
        int64_t               ne1,
3542
0
        int64_t               ne2) {
3543
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3544
0
}
3545
3546
struct ggml_tensor * ggml_cont_4d(
3547
        struct ggml_context * ctx,
3548
        struct ggml_tensor  * a,
3549
        int64_t               ne0,
3550
        int64_t               ne1,
3551
        int64_t               ne2,
3552
0
        int64_t               ne3) {
3553
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3554
3555
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3556
0
    ggml_format_name(result, "%s (cont)", a->name);
3557
3558
0
    result->op     = GGML_OP_CONT;
3559
0
    result->src[0] = a;
3560
3561
0
    return result;
3562
0
}
3563
3564
// ggml_reshape
3565
3566
struct ggml_tensor * ggml_reshape(
3567
        struct ggml_context * ctx,
3568
        struct ggml_tensor * a,
3569
0
        struct ggml_tensor * b) {
3570
0
    GGML_ASSERT(ggml_is_contiguous(a));
3571
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3572
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3573
3574
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3575
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3576
3577
0
    result->op     = GGML_OP_RESHAPE;
3578
0
    result->src[0] = a;
3579
3580
0
    return result;
3581
0
}
3582
3583
struct ggml_tensor * ggml_reshape_1d(
3584
        struct ggml_context * ctx,
3585
        struct ggml_tensor  * a,
3586
0
        int64_t               ne0) {
3587
0
    GGML_ASSERT(ggml_is_contiguous(a));
3588
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3589
3590
0
    const int64_t ne[1] = { ne0 };
3591
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3592
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3593
3594
0
    result->op     = GGML_OP_RESHAPE;
3595
0
    result->src[0] = a;
3596
3597
0
    return result;
3598
0
}
3599
3600
struct ggml_tensor * ggml_reshape_2d(
3601
        struct ggml_context * ctx,
3602
        struct ggml_tensor  * a,
3603
        int64_t               ne0,
3604
0
        int64_t               ne1) {
3605
0
    GGML_ASSERT(ggml_is_contiguous(a));
3606
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3607
3608
0
    const int64_t ne[2] = { ne0, ne1 };
3609
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3610
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3611
3612
0
    result->op     = GGML_OP_RESHAPE;
3613
0
    result->src[0] = a;
3614
3615
0
    return result;
3616
0
}
3617
3618
struct ggml_tensor * ggml_reshape_3d(
3619
        struct ggml_context * ctx,
3620
        struct ggml_tensor  * a,
3621
        int64_t               ne0,
3622
        int64_t               ne1,
3623
0
        int64_t               ne2) {
3624
0
    GGML_ASSERT(ggml_is_contiguous(a));
3625
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3626
3627
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3628
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3629
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3630
3631
0
    result->op     = GGML_OP_RESHAPE;
3632
0
    result->src[0] = a;
3633
3634
0
    return result;
3635
0
}
3636
3637
struct ggml_tensor * ggml_reshape_4d(
3638
        struct ggml_context * ctx,
3639
        struct ggml_tensor  * a,
3640
        int64_t               ne0,
3641
        int64_t               ne1,
3642
        int64_t               ne2,
3643
0
        int64_t               ne3) {
3644
0
    GGML_ASSERT(ggml_is_contiguous(a));
3645
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3646
3647
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3648
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3649
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3650
3651
0
    result->op     = GGML_OP_RESHAPE;
3652
0
    result->src[0] = a;
3653
3654
0
    return result;
3655
0
}
3656
3657
static struct ggml_tensor * ggml_view_impl(
3658
        struct ggml_context * ctx,
3659
        struct ggml_tensor  * a,
3660
        int                   n_dims,
3661
        const int64_t       * ne,
3662
0
        size_t                offset) {
3663
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3664
0
    ggml_format_name(result, "%s (view)", a->name);
3665
3666
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3667
3668
0
    result->op     = GGML_OP_VIEW;
3669
0
    result->src[0] = a;
3670
3671
0
    return result;
3672
0
}
3673
3674
// ggml_view_1d
3675
3676
struct ggml_tensor * ggml_view_1d(
3677
        struct ggml_context * ctx,
3678
        struct ggml_tensor  * a,
3679
        int64_t               ne0,
3680
0
        size_t                offset) {
3681
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3682
3683
0
    return result;
3684
0
}
3685
3686
// ggml_view_2d
3687
3688
struct ggml_tensor * ggml_view_2d(
3689
        struct ggml_context * ctx,
3690
        struct ggml_tensor  * a,
3691
        int64_t               ne0,
3692
        int64_t               ne1,
3693
        size_t                nb1,
3694
0
        size_t                offset) {
3695
0
    const int64_t ne[2] = { ne0, ne1 };
3696
3697
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3698
3699
0
    result->nb[1] = nb1;
3700
0
    result->nb[2] = result->nb[1]*ne1;
3701
0
    result->nb[3] = result->nb[2];
3702
3703
0
    return result;
3704
0
}
3705
3706
// ggml_view_3d
3707
3708
struct ggml_tensor * ggml_view_3d(
3709
        struct ggml_context * ctx,
3710
        struct ggml_tensor  * a,
3711
        int64_t               ne0,
3712
        int64_t               ne1,
3713
        int64_t               ne2,
3714
        size_t                nb1,
3715
        size_t                nb2,
3716
0
        size_t                offset) {
3717
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3718
3719
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3720
3721
0
    result->nb[1] = nb1;
3722
0
    result->nb[2] = nb2;
3723
0
    result->nb[3] = result->nb[2]*ne2;
3724
3725
0
    return result;
3726
0
}
3727
3728
// ggml_view_4d
3729
3730
struct ggml_tensor * ggml_view_4d(
3731
        struct ggml_context * ctx,
3732
        struct ggml_tensor  * a,
3733
        int64_t               ne0,
3734
        int64_t               ne1,
3735
        int64_t               ne2,
3736
        int64_t               ne3,
3737
        size_t                nb1,
3738
        size_t                nb2,
3739
        size_t                nb3,
3740
0
        size_t                offset) {
3741
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3742
3743
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3744
3745
0
    result->nb[1] = nb1;
3746
0
    result->nb[2] = nb2;
3747
0
    result->nb[3] = nb3;
3748
3749
0
    return result;
3750
0
}
3751
3752
// ggml_permute
3753
3754
struct ggml_tensor * ggml_permute(
3755
        struct ggml_context * ctx,
3756
        struct ggml_tensor  * a,
3757
        int                   axis0,
3758
        int                   axis1,
3759
        int                   axis2,
3760
0
        int                   axis3) {
3761
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3762
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3763
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3764
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3765
3766
0
    GGML_ASSERT(axis0 != axis1);
3767
0
    GGML_ASSERT(axis0 != axis2);
3768
0
    GGML_ASSERT(axis0 != axis3);
3769
0
    GGML_ASSERT(axis1 != axis2);
3770
0
    GGML_ASSERT(axis1 != axis3);
3771
0
    GGML_ASSERT(axis2 != axis3);
3772
3773
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3774
0
    ggml_format_name(result, "%s (permuted)", a->name);
3775
3776
0
    int ne[GGML_MAX_DIMS];
3777
0
    int nb[GGML_MAX_DIMS];
3778
3779
0
    ne[axis0] = a->ne[0];
3780
0
    ne[axis1] = a->ne[1];
3781
0
    ne[axis2] = a->ne[2];
3782
0
    ne[axis3] = a->ne[3];
3783
3784
0
    nb[axis0] = a->nb[0];
3785
0
    nb[axis1] = a->nb[1];
3786
0
    nb[axis2] = a->nb[2];
3787
0
    nb[axis3] = a->nb[3];
3788
3789
0
    result->ne[0] = ne[0];
3790
0
    result->ne[1] = ne[1];
3791
0
    result->ne[2] = ne[2];
3792
0
    result->ne[3] = ne[3];
3793
3794
0
    result->nb[0] = nb[0];
3795
0
    result->nb[1] = nb[1];
3796
0
    result->nb[2] = nb[2];
3797
0
    result->nb[3] = nb[3];
3798
3799
0
    result->op     = GGML_OP_PERMUTE;
3800
0
    result->src[0] = a;
3801
3802
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3803
0
    ggml_set_op_params(result, params, sizeof(params));
3804
3805
0
    return result;
3806
0
}
3807
3808
// ggml_transpose
3809
3810
struct ggml_tensor * ggml_transpose(
3811
        struct ggml_context * ctx,
3812
0
        struct ggml_tensor  * a) {
3813
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3814
0
    ggml_format_name(result, "%s (transposed)", a->name);
3815
3816
0
    result->ne[0] = a->ne[1];
3817
0
    result->ne[1] = a->ne[0];
3818
3819
0
    result->nb[0] = a->nb[1];
3820
0
    result->nb[1] = a->nb[0];
3821
3822
0
    result->op     = GGML_OP_TRANSPOSE;
3823
0
    result->src[0] = a;
3824
3825
0
    return result;
3826
0
}
3827
3828
// ggml_get_rows
3829
3830
struct ggml_tensor * ggml_get_rows(
3831
        struct ggml_context * ctx,
3832
        struct ggml_tensor  * a,
3833
0
        struct ggml_tensor  * b) {
3834
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3835
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3836
0
    GGML_ASSERT(b->ne[3] == 1);
3837
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3838
3839
    // TODO: implement non F32 return
3840
0
    enum ggml_type type = GGML_TYPE_F32;
3841
0
    if (a->type == GGML_TYPE_I32) {
3842
0
        type = a->type;
3843
0
    }
3844
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3845
3846
0
    result->op     = GGML_OP_GET_ROWS;
3847
0
    result->src[0] = a;
3848
0
    result->src[1] = b;
3849
3850
0
    return result;
3851
0
}
3852
3853
// ggml_get_rows_back
3854
3855
struct ggml_tensor * ggml_get_rows_back(
3856
        struct ggml_context * ctx,
3857
        struct ggml_tensor  * a,
3858
        struct ggml_tensor  * b,
3859
0
        struct ggml_tensor  * c) {
3860
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3861
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3862
3863
    // TODO: implement non F32 return
3864
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3865
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3866
3867
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3868
0
    result->src[0] = a;
3869
0
    result->src[1] = b;
3870
3871
0
    return result;
3872
0
}
3873
3874
// ggml_set_rows
3875
3876
struct ggml_tensor * ggml_set_rows(
3877
        struct ggml_context * ctx,
3878
        struct ggml_tensor  * a,
3879
        struct ggml_tensor  * b,
3880
0
        struct ggml_tensor  * c) {
3881
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3882
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3883
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3884
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3885
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3886
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3887
0
    GGML_ASSERT(c->ne[3] == 1);
3888
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3889
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3890
3891
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3892
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3893
3894
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3895
3896
0
    result->op     = GGML_OP_SET_ROWS;
3897
0
    result->src[0] = b;
3898
0
    result->src[1] = c;
3899
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3900
3901
0
    return result;
3902
0
}
3903
3904
// ggml_diag
3905
3906
struct ggml_tensor * ggml_diag(
3907
        struct ggml_context * ctx,
3908
0
        struct ggml_tensor  * a) {
3909
0
    GGML_ASSERT(a->ne[1] == 1);
3910
3911
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3912
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3913
3914
0
    result->op     = GGML_OP_DIAG;
3915
0
    result->src[0] = a;
3916
3917
0
    return result;
3918
0
}
3919
3920
// ggml_diag_mask_inf
3921
3922
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3923
        struct ggml_context * ctx,
3924
        struct ggml_tensor  * a,
3925
        int                   n_past,
3926
0
        bool                  inplace) {
3927
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3928
3929
0
    int32_t params[] = { n_past };
3930
0
    ggml_set_op_params(result, params, sizeof(params));
3931
3932
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3933
0
    result->src[0] = a;
3934
3935
0
    return result;
3936
0
}
3937
3938
struct ggml_tensor * ggml_diag_mask_inf(
3939
        struct ggml_context * ctx,
3940
        struct ggml_tensor  * a,
3941
0
        int                   n_past) {
3942
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3943
0
}
3944
3945
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3946
        struct ggml_context * ctx,
3947
        struct ggml_tensor  * a,
3948
0
        int                   n_past) {
3949
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3950
0
}
3951
3952
// ggml_diag_mask_zero
3953
3954
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3955
        struct ggml_context * ctx,
3956
        struct ggml_tensor  * a,
3957
        int                   n_past,
3958
0
        bool                  inplace) {
3959
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3960
3961
0
    int32_t params[] = { n_past };
3962
0
    ggml_set_op_params(result, params, sizeof(params));
3963
3964
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3965
0
    result->src[0] = a;
3966
3967
0
    return result;
3968
0
}
3969
3970
struct ggml_tensor * ggml_diag_mask_zero(
3971
        struct ggml_context * ctx,
3972
        struct ggml_tensor  * a,
3973
0
        int                   n_past) {
3974
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3975
0
}
3976
3977
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3978
        struct ggml_context * ctx,
3979
        struct ggml_tensor  * a,
3980
0
        int                   n_past) {
3981
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3982
0
}
3983
3984
// ggml_soft_max
3985
3986
static struct ggml_tensor * ggml_soft_max_impl(
3987
        struct ggml_context * ctx,
3988
        struct ggml_tensor  * a,
3989
        struct ggml_tensor  * mask,
3990
        float                 scale,
3991
        float                 max_bias,
3992
0
        bool                  inplace) {
3993
0
    GGML_ASSERT(ggml_is_contiguous(a));
3994
3995
0
    if (mask) {
3996
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3997
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3998
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3999
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
4000
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
4001
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
4002
0
    }
4003
4004
0
    if (max_bias > 0.0f) {
4005
0
        GGML_ASSERT(mask);
4006
0
    }
4007
4008
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4009
4010
0
    float params[] = { scale, max_bias };
4011
0
    ggml_set_op_params(result, params, sizeof(params));
4012
4013
0
    result->op     = GGML_OP_SOFT_MAX;
4014
0
    result->src[0] = a;
4015
0
    result->src[1] = mask;
4016
4017
0
    return result;
4018
0
}
4019
4020
struct ggml_tensor * ggml_soft_max(
4021
        struct ggml_context * ctx,
4022
0
        struct ggml_tensor  * a) {
4023
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
4024
0
}
4025
4026
struct ggml_tensor * ggml_soft_max_inplace(
4027
        struct ggml_context * ctx,
4028
0
        struct ggml_tensor  * a) {
4029
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
4030
0
}
4031
4032
struct ggml_tensor * ggml_soft_max_ext(
4033
        struct ggml_context * ctx,
4034
        struct ggml_tensor  * a,
4035
        struct ggml_tensor  * mask,
4036
        float                 scale,
4037
0
        float                 max_bias) {
4038
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
4039
0
}
4040
4041
struct ggml_tensor * ggml_soft_max_ext_inplace(
4042
        struct ggml_context * ctx,
4043
        struct ggml_tensor  * a,
4044
        struct ggml_tensor  * mask,
4045
        float                 scale,
4046
0
        float                 max_bias) {
4047
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
4048
0
}
4049
4050
void ggml_soft_max_add_sinks(
4051
        struct ggml_tensor * a,
4052
0
        struct ggml_tensor * sinks) {
4053
0
    if (!sinks) {
4054
0
        a->src[2] = NULL;
4055
0
        return;
4056
0
    }
4057
4058
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4059
0
    GGML_ASSERT(a->src[2] == NULL);
4060
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4061
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4062
4063
0
    a->src[2] = sinks;
4064
0
}
4065
4066
// ggml_soft_max_ext_back
4067
4068
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4069
        struct ggml_context * ctx,
4070
        struct ggml_tensor  * a,
4071
        struct ggml_tensor  * b,
4072
        float                 scale,
4073
        float                 max_bias,
4074
0
        bool                  inplace) {
4075
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4076
4077
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4078
0
    result->src[0] = a;
4079
0
    result->src[1] = b;
4080
4081
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4082
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4083
4084
0
    return result;
4085
0
}
4086
4087
struct ggml_tensor * ggml_soft_max_ext_back(
4088
        struct ggml_context * ctx,
4089
        struct ggml_tensor  * a,
4090
        struct ggml_tensor  * b,
4091
        float                 scale,
4092
0
        float                 max_bias) {
4093
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4094
0
}
4095
4096
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4097
        struct ggml_context * ctx,
4098
        struct ggml_tensor  * a,
4099
        struct ggml_tensor  * b,
4100
        float                 scale,
4101
0
        float                 max_bias) {
4102
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4103
0
}
4104
4105
// ggml_rope
4106
4107
static struct ggml_tensor * ggml_rope_impl(
4108
        struct ggml_context * ctx,
4109
        struct ggml_tensor  * a,
4110
        struct ggml_tensor  * b,
4111
        struct ggml_tensor  * c,
4112
        int                   n_dims,
4113
        int                   sections[GGML_MROPE_SECTIONS],
4114
        int                   mode,
4115
        int                   n_ctx_orig,
4116
        float                 freq_base,
4117
        float                 freq_scale,
4118
        float                 ext_factor,
4119
        float                 attn_factor,
4120
        float                 beta_fast,
4121
        float                 beta_slow,
4122
0
        bool                  inplace) {
4123
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4124
4125
0
    GGML_ASSERT(ggml_is_vector(b));
4126
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4127
4128
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4129
0
    if (mrope_used) {
4130
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4131
0
    } else {
4132
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4133
0
    }
4134
4135
0
    if (c) {
4136
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4137
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4138
0
    }
4139
4140
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4141
4142
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4143
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4144
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4145
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4146
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4147
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4148
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4149
0
    if (mrope_used && sections) {
4150
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4151
0
    } else {
4152
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4153
0
    }
4154
0
    ggml_set_op_params(result, params, sizeof(params));
4155
4156
0
    result->op     = GGML_OP_ROPE;
4157
0
    result->src[0] = a;
4158
0
    result->src[1] = b;
4159
0
    result->src[2] = c;
4160
4161
0
    return result;
4162
0
}
4163
4164
struct ggml_tensor * ggml_rope(
4165
        struct ggml_context * ctx,
4166
        struct ggml_tensor  * a,
4167
        struct ggml_tensor  * b,
4168
        int                   n_dims,
4169
0
        int                   mode) {
4170
0
    return ggml_rope_impl(
4171
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4172
0
    );
4173
0
}
4174
4175
struct ggml_tensor * ggml_rope_multi(
4176
        struct ggml_context * ctx,
4177
        struct ggml_tensor  * a,
4178
        struct ggml_tensor  * b,
4179
        struct ggml_tensor  * c,
4180
        int                   n_dims,
4181
        int                   sections[GGML_MROPE_SECTIONS],
4182
        int                   mode,
4183
        int                   n_ctx_orig,
4184
        float                 freq_base,
4185
        float                 freq_scale,
4186
        float                 ext_factor,
4187
        float                 attn_factor,
4188
        float                 beta_fast,
4189
0
        float                 beta_slow) {
4190
0
    return ggml_rope_impl(
4191
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4192
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4193
0
    );
4194
0
}
4195
4196
struct ggml_tensor * ggml_rope_multi_inplace(
4197
        struct ggml_context * ctx,
4198
        struct ggml_tensor  * a,
4199
        struct ggml_tensor  * b,
4200
        struct ggml_tensor  * c,
4201
        int                   n_dims,
4202
        int                   sections[GGML_MROPE_SECTIONS],
4203
        int                   mode,
4204
        int                   n_ctx_orig,
4205
        float                 freq_base,
4206
        float                 freq_scale,
4207
        float                 ext_factor,
4208
        float                 attn_factor,
4209
        float                 beta_fast,
4210
0
        float                 beta_slow) {
4211
0
    return ggml_rope_impl(
4212
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4213
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4214
0
    );
4215
0
}
4216
4217
struct ggml_tensor * ggml_rope_inplace(
4218
        struct ggml_context * ctx,
4219
        struct ggml_tensor  * a,
4220
        struct ggml_tensor  * b,
4221
        int                   n_dims,
4222
0
        int                   mode) {
4223
0
    return ggml_rope_impl(
4224
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4225
0
    );
4226
0
}
4227
4228
struct ggml_tensor * ggml_rope_ext(
4229
        struct ggml_context * ctx,
4230
        struct ggml_tensor  * a,
4231
        struct ggml_tensor  * b,
4232
        struct ggml_tensor  * c,
4233
        int                   n_dims,
4234
        int                   mode,
4235
        int                   n_ctx_orig,
4236
        float                 freq_base,
4237
        float                 freq_scale,
4238
        float                 ext_factor,
4239
        float                 attn_factor,
4240
        float                 beta_fast,
4241
0
        float                 beta_slow) {
4242
0
    return ggml_rope_impl(
4243
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4244
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4245
0
    );
4246
0
}
4247
4248
struct ggml_tensor * ggml_rope_ext_inplace(
4249
        struct ggml_context * ctx,
4250
        struct ggml_tensor  * a,
4251
        struct ggml_tensor  * b,
4252
        struct ggml_tensor  * c,
4253
        int                   n_dims,
4254
        int                   mode,
4255
        int                   n_ctx_orig,
4256
        float                 freq_base,
4257
        float                 freq_scale,
4258
        float                 ext_factor,
4259
        float                 attn_factor,
4260
        float                 beta_fast,
4261
0
        float                 beta_slow) {
4262
0
    return ggml_rope_impl(
4263
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4264
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4265
0
    );
4266
0
}
4267
4268
struct ggml_tensor * ggml_rope_custom(
4269
        struct ggml_context * ctx,
4270
        struct ggml_tensor  * a,
4271
        struct ggml_tensor  * b,
4272
        int                   n_dims,
4273
        int                   mode,
4274
        int                   n_ctx_orig,
4275
        float                 freq_base,
4276
        float                 freq_scale,
4277
        float                 ext_factor,
4278
        float                 attn_factor,
4279
        float                 beta_fast,
4280
0
        float                 beta_slow) {
4281
0
    return ggml_rope_impl(
4282
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4283
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4284
0
    );
4285
0
}
4286
4287
struct ggml_tensor * ggml_rope_custom_inplace(
4288
        struct ggml_context * ctx,
4289
        struct ggml_tensor  * a,
4290
        struct ggml_tensor  * b,
4291
        int                   n_dims,
4292
        int                   mode,
4293
        int                   n_ctx_orig,
4294
        float                 freq_base,
4295
        float                 freq_scale,
4296
        float                 ext_factor,
4297
        float                 attn_factor,
4298
        float                 beta_fast,
4299
0
        float                 beta_slow) {
4300
0
    return ggml_rope_impl(
4301
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4302
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4303
0
    );
4304
0
}
4305
4306
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4307
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4308
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4309
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4310
0
}
4311
4312
void ggml_rope_yarn_corr_dims(
4313
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4314
0
) {
4315
    // start and end correction dims
4316
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4317
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4318
0
    dims[0] = MAX(0, start);
4319
0
    dims[1] = MIN(n_dims - 1, end);
4320
0
}
4321
4322
// ggml_rope_back
4323
4324
struct ggml_tensor * ggml_rope_ext_back(
4325
        struct ggml_context * ctx,
4326
        struct ggml_tensor  * a,
4327
        struct ggml_tensor  * b,
4328
        struct ggml_tensor  * c,
4329
        int                   n_dims,
4330
        int                   mode,
4331
        int                   n_ctx_orig,
4332
        float                 freq_base,
4333
        float                 freq_scale,
4334
        float                 ext_factor,
4335
        float                 attn_factor,
4336
        float                 beta_fast,
4337
0
        float                 beta_slow) {
4338
0
    struct ggml_tensor * result = ggml_rope_ext(
4339
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4340
0
    result->op = GGML_OP_ROPE_BACK;
4341
0
    return result;
4342
0
}
4343
4344
struct ggml_tensor * ggml_rope_multi_back(
4345
        struct ggml_context * ctx,
4346
        struct ggml_tensor  * a,
4347
        struct ggml_tensor  * b,
4348
        struct ggml_tensor  * c,
4349
        int                   n_dims,
4350
        int                   sections[4],
4351
        int                   mode,
4352
        int                   n_ctx_orig,
4353
        float                 freq_base,
4354
        float                 freq_scale,
4355
        float                 ext_factor,
4356
        float                 attn_factor,
4357
        float                 beta_fast,
4358
0
        float                 beta_slow) {
4359
0
    struct ggml_tensor * result = ggml_rope_multi(
4360
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4361
0
    result->op = GGML_OP_ROPE_BACK;
4362
0
    return result;
4363
0
}
4364
// ggml_clamp
4365
4366
struct ggml_tensor * ggml_clamp(
4367
        struct ggml_context * ctx,
4368
        struct ggml_tensor  * a,
4369
        float                 min,
4370
0
        float                 max) {
4371
    // TODO: when implement backward, fix this:
4372
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4373
4374
0
    float params[] = { min, max };
4375
0
    ggml_set_op_params(result, params, sizeof(params));
4376
4377
0
    result->op     = GGML_OP_CLAMP;
4378
0
    result->src[0] = a;
4379
4380
0
    return result;
4381
0
}
4382
4383
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4384
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4385
0
}
4386
4387
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4388
// a: [OC,IC, KH, KW]
4389
// b: [N, IC, IH, IW]
4390
// result: [N, OH, OW, IC*KH*KW]
4391
struct ggml_tensor * ggml_im2col(
4392
        struct ggml_context * ctx,
4393
        struct ggml_tensor  * a,
4394
        struct ggml_tensor  * b,
4395
        int                   s0,
4396
        int                   s1,
4397
        int                   p0,
4398
        int                   p1,
4399
        int                   d0,
4400
        int                   d1,
4401
        bool                  is_2D,
4402
0
        enum ggml_type        dst_type) {
4403
0
    if (is_2D) {
4404
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4405
0
    } else {
4406
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4407
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4408
0
        GGML_ASSERT(b->ne[3] == 1);
4409
0
    }
4410
4411
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4412
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4413
4414
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4415
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4416
4417
0
    const int64_t ne[4] = {
4418
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4419
0
        OW,
4420
0
        is_2D ? OH : b->ne[2],
4421
0
        is_2D ?      b->ne[3] : 1,
4422
0
    };
4423
4424
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4425
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4426
0
    ggml_set_op_params(result, params, sizeof(params));
4427
4428
0
    result->op     = GGML_OP_IM2COL;
4429
0
    result->src[0] = a;
4430
0
    result->src[1] = b;
4431
4432
0
    return result;
4433
0
}
4434
4435
struct ggml_tensor * ggml_im2col_back(
4436
        struct ggml_context * ctx,
4437
        struct ggml_tensor  * a,
4438
        struct ggml_tensor  * b,
4439
        int64_t             * ne,
4440
        int                   s0,
4441
        int                   s1,
4442
        int                   p0,
4443
        int                   p1,
4444
        int                   d0,
4445
        int                   d1,
4446
0
        bool                  is_2D) {
4447
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4448
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4449
0
    ggml_set_op_params(result, params, sizeof(params));
4450
4451
0
    result->op     = GGML_OP_IM2COL_BACK;
4452
0
    result->src[0] = a;
4453
0
    result->src[1] = b;
4454
4455
0
    return result;
4456
0
}
4457
4458
// ggml_conv_1d
4459
4460
struct ggml_tensor * ggml_conv_1d(
4461
        struct ggml_context * ctx,
4462
        struct ggml_tensor  * a,
4463
        struct ggml_tensor  * b,
4464
        int                   s0,
4465
        int                   p0,
4466
0
        int                   d0) {
4467
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4468
4469
0
    struct ggml_tensor * result =
4470
0
        ggml_mul_mat(ctx,
4471
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4472
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4473
4474
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4475
4476
0
    return result;
4477
0
}
4478
4479
// ggml_conv_1d_ph
4480
4481
struct ggml_tensor* ggml_conv_1d_ph(
4482
        struct ggml_context * ctx,
4483
        struct ggml_tensor  * a,
4484
        struct ggml_tensor  * b,
4485
        int                   s,
4486
0
        int                   d) {
4487
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4488
0
}
4489
4490
// ggml_conv_1d_dw
4491
4492
struct ggml_tensor * ggml_conv_1d_dw(
4493
        struct ggml_context * ctx,
4494
        struct ggml_tensor  * a,
4495
        struct ggml_tensor  * b,
4496
        int                   s0,
4497
        int                   p0,
4498
0
        int                   d0) {
4499
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4500
4501
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4502
4503
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4504
4505
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4506
4507
0
    return result;
4508
0
}
4509
4510
// ggml_conv_1d_dw_ph
4511
4512
struct ggml_tensor * ggml_conv_1d_dw_ph(
4513
        struct ggml_context * ctx,
4514
        struct ggml_tensor  * a,
4515
        struct ggml_tensor  * b,
4516
        int                   s0,
4517
0
        int                   d0) {
4518
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4519
0
}
4520
4521
// ggml_conv_transpose_1d
4522
4523
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4524
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4525
0
}
4526
4527
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4528
        struct ggml_context * ctx,
4529
        struct ggml_tensor  * a,
4530
        struct ggml_tensor  * b,
4531
        int                   s0,
4532
        int                   p0,
4533
0
        int                   d0) {
4534
0
    GGML_ASSERT(ggml_is_matrix(b));
4535
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4536
0
    GGML_ASSERT(a->ne[3] == 1);
4537
4538
0
    GGML_ASSERT(p0 == 0);
4539
0
    GGML_ASSERT(d0 == 1);
4540
4541
0
    const int64_t ne[4] = {
4542
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4543
0
        a->ne[1], b->ne[2], 1,
4544
0
    };
4545
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4546
4547
0
    int32_t params[] = { s0, p0, d0 };
4548
0
    ggml_set_op_params(result, params, sizeof(params));
4549
4550
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4551
0
    result->src[0] = a;
4552
0
    result->src[1] = b;
4553
4554
0
    return result;
4555
0
}
4556
4557
// ggml_conv_2d
4558
4559
// a: [OC,IC, KH, KW]
4560
// b: [N, IC, IH, IW]
4561
// result: [N, OC, OH, OW]
4562
struct ggml_tensor * ggml_conv_2d(
4563
        struct ggml_context * ctx,
4564
        struct ggml_tensor  * a,
4565
        struct ggml_tensor  * b,
4566
        int                   s0,
4567
        int                   s1,
4568
        int                   p0,
4569
        int                   p1,
4570
        int                   d0,
4571
0
        int                   d1) {
4572
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4573
4574
0
    struct ggml_tensor * result =
4575
0
        ggml_mul_mat(ctx,
4576
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4577
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4578
4579
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4580
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4581
4582
4583
0
    return result;
4584
0
}
4585
4586
// a: [OC*IC, KD, KH, KW]
4587
// b: [N*IC, ID, IH, IW]
4588
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4589
struct ggml_tensor * ggml_im2col_3d(
4590
        struct ggml_context * ctx,
4591
        struct ggml_tensor  * a,
4592
        struct ggml_tensor  * b,
4593
        int64_t               IC,
4594
        int                   s0, // stride width
4595
        int                   s1, // stride height
4596
        int                   s2, // stride depth
4597
        int                   p0, // padding width
4598
        int                   p1, // padding height
4599
        int                   p2, // padding depth
4600
        int                   d0, // dilation width
4601
        int                   d1, // dilation height
4602
        int                   d2, // dilation depth
4603
0
        enum ggml_type        dst_type) {
4604
0
    const int64_t N = b->ne[3] / IC;
4605
0
    const int64_t ID = b->ne[2];
4606
0
    const int64_t IH = b->ne[1];
4607
0
    const int64_t IW = b->ne[0];
4608
4609
0
    const int64_t OC = a->ne[3] / IC;
4610
0
    UNUSED(OC);
4611
0
    const int64_t KD = a->ne[2];
4612
0
    const int64_t KH = a->ne[1];
4613
0
    const int64_t KW = a->ne[0];
4614
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4615
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4616
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4617
4618
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4619
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4620
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4621
4622
4623
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4624
4625
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4626
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4627
0
    ggml_set_op_params(result, params, sizeof(params));
4628
4629
0
    result->op     = GGML_OP_IM2COL_3D;
4630
0
    result->src[0] = a;
4631
0
    result->src[1] = b;
4632
4633
0
    return result;
4634
0
}
4635
4636
// a: [OC*IC, KD, KH, KW]
4637
// b: [N*IC, ID, IH, IW]
4638
// result: [N*OC, OD, OH, OW]
4639
struct ggml_tensor * ggml_conv_3d(
4640
        struct ggml_context * ctx,
4641
        struct ggml_tensor  * a,
4642
        struct ggml_tensor  * b,
4643
        int64_t               IC,
4644
        int                   s0, // stride width
4645
        int                   s1, // stride height
4646
        int                   s2, // stride depth
4647
        int                   p0, // padding width
4648
        int                   p1, // padding height
4649
        int                   p2, // padding depth
4650
        int                   d0, // dilation width
4651
        int                   d1, // dilation height
4652
        int                   d2  // dilation depth
4653
0
        ) {
4654
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4655
4656
0
    int64_t OC = a->ne[3] / IC;
4657
0
    int64_t N = b->ne[3] / IC;
4658
0
    struct ggml_tensor * result =
4659
0
        ggml_mul_mat(ctx,
4660
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4661
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4662
4663
0
    int64_t OD = im2col->ne[3] / N;
4664
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4665
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4666
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4667
4668
0
    return result;
4669
0
}
4670
4671
// ggml_conv_2d_sk_p0
4672
4673
struct ggml_tensor * ggml_conv_2d_sk_p0(
4674
        struct ggml_context * ctx,
4675
        struct ggml_tensor  * a,
4676
0
        struct ggml_tensor  * b) {
4677
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4678
0
}
4679
4680
// ggml_conv_2d_s1_ph
4681
4682
struct ggml_tensor * ggml_conv_2d_s1_ph(
4683
        struct ggml_context * ctx,
4684
        struct ggml_tensor  * a,
4685
0
        struct ggml_tensor  * b) {
4686
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4687
0
}
4688
4689
// ggml_conv_2d_dw
4690
4691
struct ggml_tensor * ggml_conv_2d_dw(
4692
        struct ggml_context * ctx,
4693
        struct ggml_tensor  * a,
4694
        struct ggml_tensor  * b,
4695
        int                   s0,
4696
        int                   s1,
4697
        int                   p0,
4698
        int                   p1,
4699
        int                   d0,
4700
0
        int                   d1) {
4701
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4702
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4703
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4704
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4705
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4706
4707
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4708
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4709
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4710
4711
0
    return result;
4712
0
}
4713
4714
// ggml_conv_2d_dw_direct
4715
4716
struct ggml_tensor * ggml_conv_2d_dw_direct(
4717
        struct ggml_context * ctx,
4718
        struct ggml_tensor  * a,
4719
        struct ggml_tensor  * b,
4720
        int                   stride0,
4721
        int                   stride1,
4722
        int                   pad0,
4723
        int                   pad1,
4724
        int                   dilation0,
4725
0
        int                   dilation1) {
4726
0
    GGML_ASSERT(a->ne[2] == 1);
4727
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4728
0
    int64_t ne[4];
4729
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4730
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4731
0
    ne[2] = b->ne[2];
4732
0
    ne[3] = b->ne[3];
4733
4734
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4735
4736
0
    if (ggml_is_contiguous_channels(b)) {
4737
        // Result will be permuted the same way as input (CWHN order)
4738
0
        const int64_t type_size = ggml_type_size(result->type);
4739
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4740
0
        result->nb[0] = result->ne[2] * type_size;
4741
0
        result->nb[1] = result->ne[0] * result->nb[0];
4742
0
        result->nb[2] = type_size;
4743
0
    }
4744
4745
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4746
0
    ggml_set_op_params(result, params, sizeof(params));
4747
4748
0
    result->op     = GGML_OP_CONV_2D_DW;
4749
0
    result->src[0] = a;
4750
0
    result->src[1] = b;
4751
0
    return result;
4752
0
}
4753
4754
// ggml_conv_2d_direct
4755
4756
struct ggml_tensor * ggml_conv_2d_direct(
4757
        struct ggml_context * ctx,
4758
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4759
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4760
        int                   s0,  // stride dimension 0
4761
        int                   s1,  // stride dimension 1
4762
        int                   p0,  // padding dimension 0
4763
        int                   p1,  // padding dimension 1
4764
        int                   d0,  // dilation dimension 0
4765
0
        int                   d1) {// dilation dimension 1
4766
4767
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4768
    //GGML_ASSERT(a->type == b->type);
4769
4770
0
    int64_t ne[4];
4771
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4772
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4773
0
    ne[2] = a->ne[3];
4774
0
    ne[3] = b->ne[3];
4775
4776
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4777
4778
0
    ggml_set_op_params_i32(result, 0, s0);
4779
0
    ggml_set_op_params_i32(result, 1, s1);
4780
0
    ggml_set_op_params_i32(result, 2, p0);
4781
0
    ggml_set_op_params_i32(result, 3, p1);
4782
0
    ggml_set_op_params_i32(result, 4, d0);
4783
0
    ggml_set_op_params_i32(result, 5, d1);
4784
4785
0
    result->op = GGML_OP_CONV_2D;
4786
0
    result->src[0] = a;
4787
0
    result->src[1] = b;
4788
4789
0
    return result;
4790
0
}
4791
4792
// ggml_conv_3d_direct
4793
4794
struct ggml_tensor * ggml_conv_3d_direct(
4795
        struct ggml_context * ctx,
4796
        struct ggml_tensor  * a,
4797
        struct ggml_tensor  * b,
4798
        int                   s0,
4799
        int                   s1,
4800
        int                   s2,
4801
        int                   p0,
4802
        int                   p1,
4803
        int                   p2,
4804
        int                   d0,
4805
        int                   d1,
4806
        int                   d2,
4807
        int                   c,
4808
        int                   n,
4809
0
        int                   oc) {
4810
4811
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4812
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4813
4814
0
    int64_t ne[4];
4815
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4816
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4817
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4818
0
    ne[3] = (int64_t) oc * n;
4819
4820
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4821
4822
0
    ggml_set_op_params_i32(result, 0,  s0);
4823
0
    ggml_set_op_params_i32(result, 1,  s1);
4824
0
    ggml_set_op_params_i32(result, 2,  s2);
4825
0
    ggml_set_op_params_i32(result, 3,  p0);
4826
0
    ggml_set_op_params_i32(result, 4,  p1);
4827
0
    ggml_set_op_params_i32(result, 5,  p2);
4828
0
    ggml_set_op_params_i32(result, 6,  d0);
4829
0
    ggml_set_op_params_i32(result, 7,  d1);
4830
0
    ggml_set_op_params_i32(result, 8,  d2);
4831
0
    ggml_set_op_params_i32(result, 9,  c);
4832
0
    ggml_set_op_params_i32(result, 10, n);
4833
0
    ggml_set_op_params_i32(result, 11, oc);
4834
4835
0
    result->op = GGML_OP_CONV_3D;
4836
0
    result->src[0] = a;
4837
0
    result->src[1] = b;
4838
4839
0
    return result;
4840
0
}
4841
4842
// ggml_conv_transpose_2d_p0
4843
4844
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4845
0
    return (ins - 1) * s - 2 * p + ks;
4846
0
}
4847
4848
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4849
        struct ggml_context * ctx,
4850
        struct ggml_tensor  * a,
4851
        struct ggml_tensor  * b,
4852
0
        int                   stride) {
4853
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4854
4855
0
    const int64_t ne[4] = {
4856
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4857
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4858
0
        a->ne[2], b->ne[3],
4859
0
    };
4860
4861
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4862
4863
0
    ggml_set_op_params_i32(result, 0, stride);
4864
4865
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4866
0
    result->src[0] = a;
4867
0
    result->src[1] = b;
4868
4869
0
    return result;
4870
0
}
4871
4872
// ggml_pool_*
4873
4874
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4875
0
    return (ins + 2 * p - ks) / s + 1;
4876
0
}
4877
4878
// ggml_pool_1d
4879
4880
struct ggml_tensor * ggml_pool_1d(
4881
        struct ggml_context * ctx,
4882
        struct ggml_tensor  * a,
4883
        enum ggml_op_pool     op,
4884
        int                   k0,
4885
        int                   s0,
4886
0
        int                   p0) {
4887
0
    const int64_t ne[4] = {
4888
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4889
0
        a->ne[1],
4890
0
        a->ne[2],
4891
0
        a->ne[3],
4892
0
    };
4893
0
    GGML_ASSERT(ne[0] > 0);
4894
4895
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4896
4897
0
    int32_t params[] = { op, k0, s0, p0 };
4898
0
    ggml_set_op_params(result, params, sizeof(params));
4899
4900
0
    result->op     = GGML_OP_POOL_1D;
4901
0
    result->src[0] = a;
4902
4903
0
    return result;
4904
0
}
4905
4906
// ggml_pool_2d
4907
4908
struct ggml_tensor * ggml_pool_2d(
4909
        struct ggml_context * ctx,
4910
        struct ggml_tensor  * a,
4911
        enum ggml_op_pool     op,
4912
        int                   k0,
4913
        int                   k1,
4914
        int                   s0,
4915
        int                   s1,
4916
        float                 p0,
4917
0
        float                 p1) {
4918
0
    struct ggml_tensor * result;
4919
0
    const int64_t ne[4] = {
4920
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4921
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4922
0
        a->ne[2],
4923
0
        a->ne[3],
4924
0
    };
4925
0
    GGML_ASSERT(ne[0] > 0);
4926
0
    GGML_ASSERT(ne[1] > 0);
4927
4928
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4929
4930
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4931
0
    ggml_set_op_params(result, params, sizeof(params));
4932
4933
0
    result->op     = GGML_OP_POOL_2D;
4934
0
    result->src[0] = a;
4935
4936
0
    return result;
4937
0
}
4938
4939
struct ggml_tensor * ggml_pool_2d_back(
4940
        struct ggml_context * ctx,
4941
        struct ggml_tensor  * a,
4942
        struct ggml_tensor  * af,
4943
        enum ggml_op_pool     op,
4944
        int                   k0,
4945
        int                   k1,
4946
        int                   s0,
4947
        int                   s1,
4948
        float                 p0,
4949
0
        float                 p1) {
4950
0
    struct ggml_tensor * result;
4951
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4952
4953
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4954
0
    ggml_set_op_params(result, params, sizeof(params));
4955
4956
0
    result->op     = GGML_OP_POOL_2D_BACK;
4957
0
    result->src[0] = a;
4958
0
    result->src[1] = af;
4959
4960
0
    return result;
4961
0
}
4962
4963
// ggml_upscale / ggml_interpolate
4964
4965
static struct ggml_tensor * ggml_interpolate_impl(
4966
        struct ggml_context * ctx,
4967
        struct ggml_tensor  * a,
4968
        int64_t               ne0,
4969
        int64_t               ne1,
4970
        int64_t               ne2,
4971
        int64_t               ne3,
4972
0
        uint32_t              mode) {
4973
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4974
    // TODO: implement antialias for modes other than bilinear
4975
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4976
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
4977
4978
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4979
4980
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4981
4982
0
    result->op     = GGML_OP_UPSCALE;
4983
0
    result->src[0] = a;
4984
4985
0
    return result;
4986
0
}
4987
4988
struct ggml_tensor * ggml_upscale(
4989
        struct ggml_context * ctx,
4990
        struct ggml_tensor  * a,
4991
        int                   scale_factor,
4992
0
        enum ggml_scale_mode  mode) {
4993
0
    GGML_ASSERT(scale_factor > 1);
4994
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4995
0
}
4996
4997
struct ggml_tensor * ggml_upscale_ext(
4998
        struct ggml_context * ctx,
4999
        struct ggml_tensor  * a,
5000
        int                   ne0,
5001
        int                   ne1,
5002
        int                   ne2,
5003
        int                   ne3,
5004
0
        enum ggml_scale_mode  mode) {
5005
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
5006
0
}
5007
5008
struct ggml_tensor * ggml_interpolate(
5009
        struct ggml_context * ctx,
5010
        struct ggml_tensor  * a,
5011
        int64_t               ne0,
5012
        int64_t               ne1,
5013
        int64_t               ne2,
5014
        int64_t               ne3,
5015
0
        uint32_t              mode) {
5016
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
5017
0
}
5018
5019
// ggml_pad
5020
5021
struct ggml_tensor * ggml_pad(
5022
        struct ggml_context * ctx,
5023
        struct ggml_tensor  * a,
5024
        int                   p0,
5025
        int                   p1,
5026
        int                   p2,
5027
0
        int                   p3) {
5028
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5029
0
}
5030
5031
// ggml_pad_circular
5032
5033
struct ggml_tensor * ggml_pad_circular(
5034
        struct ggml_context * ctx,
5035
        struct ggml_tensor  * a,
5036
        int                   p0,
5037
        int                   p1,
5038
        int                   p2,
5039
0
        int                   p3) {
5040
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5041
0
}
5042
5043
struct ggml_tensor * ggml_pad_ext(
5044
            struct ggml_context * ctx,
5045
            struct ggml_tensor  * a,
5046
            int                  lp0,
5047
            int                  rp0,
5048
            int                  lp1,
5049
            int                  rp1,
5050
            int                  lp2,
5051
            int                  rp2,
5052
            int                  lp3,
5053
            int                  rp3
5054
0
            ) {
5055
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5056
0
            a->ne[0] + lp0 + rp0,
5057
0
            a->ne[1] + lp1 + rp1,
5058
0
            a->ne[2] + lp2 + rp2,
5059
0
            a->ne[3] + lp3 + rp3);
5060
5061
0
    ggml_set_op_params_i32(result, 0, lp0);
5062
0
    ggml_set_op_params_i32(result, 1, rp0);
5063
0
    ggml_set_op_params_i32(result, 2, lp1);
5064
0
    ggml_set_op_params_i32(result, 3, rp1);
5065
0
    ggml_set_op_params_i32(result, 4, lp2);
5066
0
    ggml_set_op_params_i32(result, 5, rp2);
5067
0
    ggml_set_op_params_i32(result, 6, lp3);
5068
0
    ggml_set_op_params_i32(result, 7, rp3);
5069
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5070
5071
5072
0
    result->op     = GGML_OP_PAD;
5073
0
    result->src[0] = a;
5074
5075
0
    return result;
5076
0
}
5077
5078
// ggml_pad_ext_circular
5079
5080
struct ggml_tensor * ggml_pad_ext_circular(
5081
        struct ggml_context * ctx,
5082
        struct ggml_tensor  * a,
5083
        int                  lp0,
5084
        int                  rp0,
5085
        int                  lp1,
5086
        int                  rp1,
5087
        int                  lp2,
5088
        int                  rp2,
5089
        int                  lp3,
5090
        int                  rp3
5091
0
        ) {
5092
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5093
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5094
0
    return result;
5095
0
}
5096
5097
// ggml_pad_reflect_1d
5098
5099
struct ggml_tensor * ggml_pad_reflect_1d(
5100
        struct ggml_context * ctx,
5101
        struct ggml_tensor  * a,
5102
        int                   p0,
5103
0
        int                   p1) {
5104
0
    GGML_ASSERT(p0 >= 0);
5105
0
    GGML_ASSERT(p1 >= 0);
5106
5107
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5108
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5109
5110
0
    GGML_ASSERT(ggml_is_contiguous(a));
5111
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5112
5113
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5114
0
            a->ne[0] + p0 + p1,
5115
0
            a->ne[1],
5116
0
            a->ne[2],
5117
0
            a->ne[3]);
5118
5119
0
    int32_t params[] = { p0, p1 };
5120
0
    ggml_set_op_params(result, params, sizeof(params));
5121
5122
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5123
0
    result->src[0] = a;
5124
5125
0
    return result;
5126
0
}
5127
5128
// ggml_roll
5129
5130
struct ggml_tensor * ggml_roll(
5131
        struct ggml_context * ctx,
5132
        struct ggml_tensor  * a,
5133
        int                   shift0,
5134
        int                   shift1,
5135
        int                   shift2,
5136
0
        int                   shift3) {
5137
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5138
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5139
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5140
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5141
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5142
5143
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5144
5145
0
    ggml_set_op_params_i32(result, 0, shift0);
5146
0
    ggml_set_op_params_i32(result, 1, shift1);
5147
0
    ggml_set_op_params_i32(result, 2, shift2);
5148
0
    ggml_set_op_params_i32(result, 3, shift3);
5149
5150
0
    result->op     = GGML_OP_ROLL;
5151
0
    result->src[0] = a;
5152
5153
0
    return result;
5154
0
}
5155
5156
// ggml_timestep_embedding
5157
5158
struct ggml_tensor * ggml_timestep_embedding(
5159
        struct ggml_context * ctx,
5160
        struct ggml_tensor  * timesteps,
5161
        int                   dim,
5162
0
        int                   max_period) {
5163
5164
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5165
5166
0
    ggml_set_op_params_i32(result, 0, dim);
5167
0
    ggml_set_op_params_i32(result, 1, max_period);
5168
5169
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5170
0
    result->src[0] = timesteps;
5171
5172
0
    return result;
5173
0
}
5174
5175
// ggml_tri
5176
5177
struct ggml_tensor * ggml_tri(
5178
    struct ggml_context * ctx,
5179
    struct ggml_tensor  * a,
5180
0
    enum ggml_tri_type    type) {
5181
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5182
5183
0
    GGML_ASSERT(ggml_is_contiguous(a));
5184
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5185
5186
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5187
5188
0
    ggml_set_op_params_i32(result, 0, type);
5189
5190
0
    result->op = GGML_OP_TRI;
5191
0
    result->src[0] = a;
5192
5193
0
    return result;
5194
0
}
5195
5196
// ggml_fill
5197
5198
static struct ggml_tensor * ggml_fill_impl(
5199
    struct ggml_context * ctx,
5200
    struct ggml_tensor  * a,
5201
    float                 c,
5202
0
    bool                  inplace) {
5203
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5204
0
    GGML_ASSERT(ggml_is_contiguous(a));
5205
5206
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5207
5208
0
    ggml_set_op_params_f32(result, 0, c);
5209
5210
0
    result->op = GGML_OP_FILL;
5211
0
    result->src[0] = a;
5212
5213
0
    return result;
5214
0
}
5215
5216
struct ggml_tensor * ggml_fill(
5217
    struct ggml_context * ctx,
5218
    struct ggml_tensor  * a,
5219
0
    float                 c) {
5220
0
    return ggml_fill_impl(ctx, a, c, false);
5221
0
}
5222
5223
struct ggml_tensor * ggml_fill_inplace(
5224
    struct ggml_context * ctx,
5225
    struct ggml_tensor  * a,
5226
0
    float                 c) {
5227
0
    return ggml_fill_impl(ctx, a, c, true);
5228
0
}
5229
5230
// ggml_argsort
5231
5232
struct ggml_tensor * ggml_argsort(
5233
        struct ggml_context  * ctx,
5234
        struct ggml_tensor   * a,
5235
0
        enum ggml_sort_order   order) {
5236
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5237
5238
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5239
5240
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5241
5242
0
    result->op     = GGML_OP_ARGSORT;
5243
0
    result->src[0] = a;
5244
5245
0
    return result;
5246
0
}
5247
5248
// ggml_argsort_top_k
5249
5250
struct ggml_tensor * ggml_argsort_top_k(
5251
        struct ggml_context * ctx,
5252
        struct ggml_tensor  * a,
5253
0
        int                   k) {
5254
0
    GGML_ASSERT(a->ne[0] >= k);
5255
5256
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5257
5258
0
    result = ggml_view_4d(ctx, result,
5259
0
                k, result->ne[1], result->ne[2], result->ne[3],
5260
0
                   result->nb[1], result->nb[2], result->nb[3],
5261
0
                0);
5262
5263
0
    return result;
5264
0
}
5265
5266
// ggml_top_k
5267
5268
struct ggml_tensor * ggml_top_k(
5269
        struct ggml_context * ctx,
5270
        struct ggml_tensor  * a,
5271
0
        int                   k) {
5272
0
    GGML_ASSERT(a->ne[0] >= k);
5273
5274
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5275
5276
0
    result->op     = GGML_OP_TOP_K;
5277
0
    result->src[0] = a;
5278
5279
0
    return result;
5280
0
}
5281
5282
// ggml_arange
5283
5284
struct ggml_tensor * ggml_arange(
5285
        struct ggml_context * ctx,
5286
        float                 start,
5287
        float                 stop,
5288
0
        float                 step) {
5289
0
    GGML_ASSERT(stop > start);
5290
5291
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5292
5293
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5294
5295
0
    ggml_set_op_params_f32(result, 0, start);
5296
0
    ggml_set_op_params_f32(result, 1, stop);
5297
0
    ggml_set_op_params_f32(result, 2, step);
5298
5299
0
    result->op = GGML_OP_ARANGE;
5300
5301
0
    return result;
5302
0
}
5303
5304
// ggml_flash_attn_ext
5305
5306
struct ggml_tensor * ggml_flash_attn_ext(
5307
        struct ggml_context * ctx,
5308
        struct ggml_tensor  * q,
5309
        struct ggml_tensor  * k,
5310
        struct ggml_tensor  * v,
5311
        struct ggml_tensor  * mask,
5312
        float                 scale,
5313
        float                 max_bias,
5314
0
        float                 logit_softcap) {
5315
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5316
    // TODO: check if vT can be multiplied by (k*qT)
5317
5318
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5319
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5320
5321
0
    if (mask) {
5322
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16);
5323
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5324
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5325
5326
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5327
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5328
0
    }
5329
5330
0
    if (max_bias > 0.0f) {
5331
0
        GGML_ASSERT(mask);
5332
0
    }
5333
5334
    // permute(0, 2, 1, 3)
5335
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5336
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5337
5338
0
    float params[] = { scale, max_bias, logit_softcap };
5339
0
    ggml_set_op_params(result, params, sizeof(params));
5340
5341
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5342
0
    result->src[0] = q;
5343
0
    result->src[1] = k;
5344
0
    result->src[2] = v;
5345
0
    result->src[3] = mask;
5346
5347
0
    return result;
5348
0
}
5349
5350
void ggml_flash_attn_ext_set_prec(
5351
        struct ggml_tensor * a,
5352
0
        enum ggml_prec       prec) {
5353
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5354
5355
0
    const int32_t prec_i32 = (int32_t) prec;
5356
5357
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5358
0
}
5359
5360
enum ggml_prec ggml_flash_attn_ext_get_prec(
5361
0
        const struct ggml_tensor * a) {
5362
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5363
5364
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5365
5366
0
    return (enum ggml_prec) prec_i32;
5367
0
}
5368
5369
void ggml_flash_attn_ext_add_sinks(
5370
        struct ggml_tensor * a,
5371
0
        struct ggml_tensor * sinks) {
5372
0
    if (!sinks) {
5373
0
        a->src[4] = NULL;
5374
0
        return;
5375
0
    }
5376
5377
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5378
0
    GGML_ASSERT(a->src[4] == NULL);
5379
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5380
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5381
5382
0
    a->src[4] = sinks;
5383
0
}
5384
5385
// ggml_flash_attn_back
5386
5387
struct ggml_tensor * ggml_flash_attn_back(
5388
        struct ggml_context * ctx,
5389
        struct ggml_tensor  * q,
5390
        struct ggml_tensor  * k,
5391
        struct ggml_tensor  * v,
5392
        struct ggml_tensor  * d,
5393
0
        bool                  masked) {
5394
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5395
5396
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5397
    // TODO: check if vT can be multiplied by (k*qT)
5398
5399
    // d shape [D,N,ne2,ne3]
5400
    // q shape [D,N,ne2,ne3]
5401
    // k shape [D,M,kvne2,ne3]
5402
    // v shape [M,D,kvne2,ne3]
5403
5404
0
    const int64_t     D = q->ne[0];
5405
0
    const int64_t     N = q->ne[1];
5406
0
    const int64_t     M = k->ne[1];
5407
0
    const int64_t   ne2 = q->ne[2];
5408
0
    const int64_t   ne3 = q->ne[3];
5409
0
    const int64_t kvne2 = k->ne[2];
5410
5411
0
    GGML_ASSERT(k->ne[0] == D);
5412
0
    GGML_ASSERT(v->ne[0] == M);
5413
0
    GGML_ASSERT(v->ne[1] == D);
5414
0
    GGML_ASSERT(d->ne[0] == D);
5415
0
    GGML_ASSERT(d->ne[1] == N);
5416
0
    GGML_ASSERT(k->ne[2] == kvne2);
5417
0
    GGML_ASSERT(k->ne[3] == ne3);
5418
0
    GGML_ASSERT(v->ne[2] == kvne2);
5419
0
    GGML_ASSERT(v->ne[3] == ne3);
5420
0
    GGML_ASSERT(d->ne[2] == ne2);
5421
0
    GGML_ASSERT(d->ne[3] == ne3);
5422
5423
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5424
5425
    // store gradients of q, k and v as continuous tensors concatenated in result.
5426
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5427
0
    const int64_t elem_q = ggml_nelements(q);
5428
0
    const int64_t elem_k = ggml_nelements(k);
5429
0
    const int64_t elem_v = ggml_nelements(v);
5430
5431
0
    enum ggml_type result_type = GGML_TYPE_F32;
5432
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5433
0
    const size_t tsize = ggml_type_size(result_type);
5434
5435
0
    const size_t offs_q = 0;
5436
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5437
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5438
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5439
5440
0
    const size_t nelements = (end + tsize - 1)/tsize;
5441
5442
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5443
5444
0
    int32_t masked_i = masked ? 1 : 0;
5445
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5446
5447
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5448
0
    result->src[0] = q;
5449
0
    result->src[1] = k;
5450
0
    result->src[2] = v;
5451
0
    result->src[3] = d;
5452
5453
0
    return result;
5454
0
}
5455
5456
// ggml_ssm_conv
5457
5458
struct ggml_tensor * ggml_ssm_conv(
5459
        struct ggml_context * ctx,
5460
        struct ggml_tensor  * sx,
5461
0
        struct ggml_tensor  * c) {
5462
0
    GGML_ASSERT(ggml_is_3d(sx));
5463
0
    GGML_ASSERT(ggml_is_matrix(c));
5464
5465
0
    const int64_t d_conv  = c->ne[0];
5466
0
    const int64_t d_inner = c->ne[1];
5467
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5468
0
    const int64_t n_s     = sx->ne[2];
5469
5470
    // TODO: maybe support other strides than 1?
5471
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5472
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5473
0
    GGML_ASSERT(n_t >= 0);
5474
5475
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5476
5477
0
    result->op     = GGML_OP_SSM_CONV;
5478
0
    result->src[0] = sx;
5479
0
    result->src[1] = c;
5480
5481
0
    return result;
5482
0
}
5483
5484
// ggml_ssm_scan
5485
5486
struct ggml_tensor * ggml_ssm_scan(
5487
        struct ggml_context * ctx,
5488
        struct ggml_tensor  * s,
5489
        struct ggml_tensor  * x,
5490
        struct ggml_tensor  * dt,
5491
        struct ggml_tensor  * A,
5492
        struct ggml_tensor  * B,
5493
        struct ggml_tensor  * C,
5494
0
        struct ggml_tensor  * ids) {
5495
0
    GGML_ASSERT(ggml_is_contiguous(s));
5496
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5497
0
    GGML_ASSERT(ggml_is_contiguous(A));
5498
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5499
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5500
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5501
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5502
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5503
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5504
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5505
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5506
5507
0
    {
5508
0
        const int64_t d_state      = s->ne[0];
5509
0
        const int64_t head_dim     = x->ne[0];
5510
0
        const int64_t n_head       = x->ne[1];
5511
0
        const int64_t n_seq_tokens = x->ne[2];
5512
0
        const int64_t n_seqs       = x->ne[3];
5513
5514
0
        GGML_ASSERT(dt->ne[0] == n_head);
5515
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5516
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5517
0
        GGML_ASSERT(ggml_is_3d(dt));
5518
0
        GGML_ASSERT(s->ne[1] == head_dim);
5519
0
        GGML_ASSERT(s->ne[2] == n_head);
5520
0
        GGML_ASSERT(B->ne[0] == d_state);
5521
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5522
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5523
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5524
0
        GGML_ASSERT(ggml_is_vector(ids));
5525
0
        GGML_ASSERT(A->ne[1] == n_head);
5526
0
        GGML_ASSERT(ggml_is_matrix(A));
5527
5528
0
        if (A->ne[0] != 1) {
5529
            // Mamba-1 has more granular decay factors
5530
0
            GGML_ASSERT(A->ne[0] == d_state);
5531
0
        }
5532
0
    }
5533
5534
    // concatenated y + ssm_states
5535
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5536
5537
0
    result->op   = GGML_OP_SSM_SCAN;
5538
0
    result->src[0] = s;
5539
0
    result->src[1] = x;
5540
0
    result->src[2] = dt;
5541
0
    result->src[3] = A;
5542
0
    result->src[4] = B;
5543
0
    result->src[5] = C;
5544
0
    result->src[6] = ids;
5545
5546
0
    return result;
5547
0
}
5548
5549
// ggml_win_part
5550
5551
struct ggml_tensor * ggml_win_part(
5552
        struct ggml_context * ctx,
5553
        struct ggml_tensor  * a,
5554
0
        int                   w) {
5555
0
    GGML_ASSERT(a->ne[3] == 1);
5556
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5557
5558
    // padding
5559
0
    const int px = (w - a->ne[1]%w)%w;
5560
0
    const int py = (w - a->ne[2]%w)%w;
5561
5562
0
    const int npx = (px + a->ne[1])/w;
5563
0
    const int npy = (py + a->ne[2])/w;
5564
0
    const int np  = npx*npy;
5565
5566
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5567
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5568
5569
0
    int32_t params[] = { npx, npy, w };
5570
0
    ggml_set_op_params(result, params, sizeof(params));
5571
5572
0
    result->op     = GGML_OP_WIN_PART;
5573
0
    result->src[0] = a;
5574
5575
0
    return result;
5576
0
}
5577
5578
// ggml_win_unpart
5579
5580
struct ggml_tensor * ggml_win_unpart(
5581
        struct ggml_context * ctx,
5582
        struct ggml_tensor  * a,
5583
        int                   w0,
5584
        int                   h0,
5585
0
        int                   w) {
5586
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5587
5588
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5589
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5590
5591
0
    int32_t params[] = { w };
5592
0
    ggml_set_op_params(result, params, sizeof(params));
5593
5594
0
    result->op     = GGML_OP_WIN_UNPART;
5595
0
    result->src[0] = a;
5596
5597
0
    return result;
5598
0
}
5599
5600
// ggml_get_rel_pos
5601
5602
struct ggml_tensor * ggml_get_rel_pos(
5603
        struct ggml_context * ctx,
5604
        struct ggml_tensor  * a,
5605
        int                   qh,
5606
0
        int                   kh) {
5607
0
    GGML_ASSERT(qh == kh);
5608
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5609
5610
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5611
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5612
5613
0
    result->op     = GGML_OP_GET_REL_POS;
5614
0
    result->src[0] = a;
5615
5616
0
    return result;
5617
0
}
5618
5619
// ggml_add_rel_pos
5620
5621
static struct ggml_tensor * ggml_add_rel_pos_impl(
5622
        struct ggml_context * ctx,
5623
        struct ggml_tensor  * a,
5624
        struct ggml_tensor  * pw,
5625
        struct ggml_tensor  * ph,
5626
0
        bool                  inplace) {
5627
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5628
0
    GGML_ASSERT(ggml_is_contiguous(a));
5629
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5630
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5631
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5632
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5633
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5634
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5635
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5636
5637
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5638
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5639
5640
0
    result->op     = GGML_OP_ADD_REL_POS;
5641
0
    result->src[0] = a;
5642
0
    result->src[1] = pw;
5643
0
    result->src[2] = ph;
5644
5645
0
    return result;
5646
0
}
5647
5648
struct ggml_tensor * ggml_add_rel_pos(
5649
        struct ggml_context * ctx,
5650
        struct ggml_tensor  * a,
5651
        struct ggml_tensor  * pw,
5652
0
        struct ggml_tensor  * ph) {
5653
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5654
0
}
5655
5656
struct ggml_tensor * ggml_add_rel_pos_inplace(
5657
        struct ggml_context * ctx,
5658
        struct ggml_tensor  * a,
5659
        struct ggml_tensor  * pw,
5660
0
        struct ggml_tensor  * ph) {
5661
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5662
0
}
5663
5664
// ggml_rwkv_wkv6
5665
5666
struct ggml_tensor * ggml_rwkv_wkv6(
5667
        struct ggml_context * ctx,
5668
        struct ggml_tensor  * k,
5669
        struct ggml_tensor  * v,
5670
        struct ggml_tensor  * r,
5671
        struct ggml_tensor  * tf,
5672
        struct ggml_tensor  * td,
5673
0
        struct ggml_tensor  * state) {
5674
0
    GGML_ASSERT(ggml_is_contiguous(k));
5675
0
    GGML_ASSERT(ggml_is_contiguous(v));
5676
0
    GGML_ASSERT(ggml_is_contiguous(r));
5677
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5678
0
    GGML_ASSERT(ggml_is_contiguous(td));
5679
0
    GGML_ASSERT(ggml_is_contiguous(state));
5680
5681
0
    const int64_t S = k->ne[0];
5682
0
    const int64_t H = k->ne[1];
5683
0
    const int64_t n_tokens = k->ne[2];
5684
0
    const int64_t n_seqs = state->ne[1];
5685
0
    {
5686
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5687
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5688
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5689
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5690
0
    }
5691
5692
    // concat output and new_state
5693
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5694
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5695
5696
0
    result->op     = GGML_OP_RWKV_WKV6;
5697
0
    result->src[0] = k;
5698
0
    result->src[1] = v;
5699
0
    result->src[2] = r;
5700
0
    result->src[3] = tf;
5701
0
    result->src[4] = td;
5702
0
    result->src[5] = state;
5703
5704
0
    return result;
5705
0
}
5706
5707
// ggml_gated_linear_attn
5708
5709
struct ggml_tensor * ggml_gated_linear_attn(
5710
        struct ggml_context * ctx,
5711
        struct ggml_tensor  * k,
5712
        struct ggml_tensor  * v,
5713
        struct ggml_tensor  * q,
5714
        struct ggml_tensor  * g,
5715
        struct ggml_tensor  * state,
5716
0
        float scale) {
5717
0
    GGML_ASSERT(ggml_is_contiguous(k));
5718
0
    GGML_ASSERT(ggml_is_contiguous(v));
5719
0
    GGML_ASSERT(ggml_is_contiguous(q));
5720
0
    GGML_ASSERT(ggml_is_contiguous(g));
5721
0
    GGML_ASSERT(ggml_is_contiguous(state));
5722
5723
0
    const int64_t S = k->ne[0];
5724
0
    const int64_t H = k->ne[1];
5725
0
    const int64_t n_tokens = k->ne[2];
5726
0
    const int64_t n_seqs = state->ne[1];
5727
0
    {
5728
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5729
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5730
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5731
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5732
0
    }
5733
5734
    // concat output and new_state
5735
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5736
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5737
5738
0
    ggml_set_op_params_f32(result, 0, scale);
5739
5740
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5741
0
    result->src[0] = k;
5742
0
    result->src[1] = v;
5743
0
    result->src[2] = q;
5744
0
    result->src[3] = g;
5745
0
    result->src[4] = state;
5746
5747
0
    return result;
5748
0
}
5749
5750
// ggml_rwkv_wkv7
5751
5752
struct ggml_tensor * ggml_rwkv_wkv7(
5753
        struct ggml_context * ctx,
5754
        struct ggml_tensor  * r,
5755
        struct ggml_tensor  * w,
5756
        struct ggml_tensor  * k,
5757
        struct ggml_tensor  * v,
5758
        struct ggml_tensor  * a,
5759
        struct ggml_tensor  * b,
5760
0
        struct ggml_tensor  * state) {
5761
0
    GGML_ASSERT(ggml_is_contiguous(r));
5762
0
    GGML_ASSERT(ggml_is_contiguous(w));
5763
0
    GGML_ASSERT(ggml_is_contiguous(k));
5764
0
    GGML_ASSERT(ggml_is_contiguous(v));
5765
0
    GGML_ASSERT(ggml_is_contiguous(a));
5766
0
    GGML_ASSERT(ggml_is_contiguous(b));
5767
0
    GGML_ASSERT(ggml_is_contiguous(state));
5768
5769
0
    const int64_t S = k->ne[0];
5770
0
    const int64_t H = k->ne[1];
5771
0
    const int64_t n_tokens = k->ne[2];
5772
0
    const int64_t n_seqs = state->ne[1];
5773
0
    {
5774
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5775
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5776
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5777
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5778
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5779
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5780
0
    }
5781
5782
    // concat output and new_state
5783
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5784
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5785
5786
0
    result->op     = GGML_OP_RWKV_WKV7;
5787
0
    result->src[0] = r;
5788
0
    result->src[1] = w;
5789
0
    result->src[2] = k;
5790
0
    result->src[3] = v;
5791
0
    result->src[4] = a;
5792
0
    result->src[5] = b;
5793
0
    result->src[6] = state;
5794
5795
0
    return result;
5796
0
}
5797
5798
// ggml_unary
5799
5800
static struct ggml_tensor * ggml_unary_impl(
5801
        struct ggml_context * ctx,
5802
        struct ggml_tensor  * a,
5803
        enum ggml_unary_op    op,
5804
0
        bool                  inplace) {
5805
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
5806
5807
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5808
5809
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5810
5811
0
    result->op     = GGML_OP_UNARY;
5812
0
    result->src[0] = a;
5813
5814
0
    return result;
5815
0
}
5816
5817
struct ggml_tensor * ggml_unary(
5818
        struct ggml_context * ctx,
5819
        struct ggml_tensor  * a,
5820
0
        enum ggml_unary_op    op) {
5821
0
    return ggml_unary_impl(ctx, a, op, false);
5822
0
}
5823
5824
struct ggml_tensor * ggml_unary_inplace(
5825
        struct ggml_context * ctx,
5826
        struct ggml_tensor  * a,
5827
0
        enum ggml_unary_op    op) {
5828
0
    return ggml_unary_impl(ctx, a, op, true);
5829
0
}
5830
5831
// ggml_map_custom1
5832
5833
static struct ggml_tensor * ggml_map_custom1_impl(
5834
        struct ggml_context      * ctx,
5835
        struct ggml_tensor       * a,
5836
        const  ggml_custom1_op_t   fun,
5837
        int                        n_tasks,
5838
        void                     * userdata,
5839
0
        bool                       inplace) {
5840
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5841
5842
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5843
5844
0
    struct ggml_map_custom1_op_params params = {
5845
0
        /*.fun      =*/ fun,
5846
0
        /*.n_tasks  =*/ n_tasks,
5847
0
        /*.userdata =*/ userdata
5848
0
    };
5849
0
    ggml_set_op_params(result, &params, sizeof(params));
5850
5851
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5852
0
    result->src[0] = a;
5853
5854
0
    return result;
5855
0
}
5856
5857
struct ggml_tensor * ggml_map_custom1(
5858
        struct ggml_context      * ctx,
5859
        struct ggml_tensor       * a,
5860
        const  ggml_custom1_op_t   fun,
5861
        int                        n_tasks,
5862
0
        void                     * userdata) {
5863
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5864
0
}
5865
5866
struct ggml_tensor * ggml_map_custom1_inplace(
5867
        struct ggml_context      * ctx,
5868
        struct ggml_tensor       * a,
5869
        const  ggml_custom1_op_t   fun,
5870
        int                        n_tasks,
5871
0
        void                     * userdata) {
5872
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5873
0
}
5874
5875
// ggml_map_custom2
5876
5877
static struct ggml_tensor * ggml_map_custom2_impl(
5878
        struct ggml_context      * ctx,
5879
        struct ggml_tensor       * a,
5880
        struct ggml_tensor       * b,
5881
        const  ggml_custom2_op_t   fun,
5882
        int                        n_tasks,
5883
        void                     * userdata,
5884
0
        bool                       inplace) {
5885
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5886
5887
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5888
5889
0
    struct ggml_map_custom2_op_params params = {
5890
0
        /*.fun      =*/ fun,
5891
0
        /*.n_tasks  =*/ n_tasks,
5892
0
        /*.userdata =*/ userdata
5893
0
    };
5894
0
    ggml_set_op_params(result, &params, sizeof(params));
5895
5896
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5897
0
    result->src[0] = a;
5898
0
    result->src[1] = b;
5899
5900
0
    return result;
5901
0
}
5902
5903
struct ggml_tensor * ggml_map_custom2(
5904
        struct ggml_context      * ctx,
5905
        struct ggml_tensor       * a,
5906
        struct ggml_tensor       * b,
5907
        const  ggml_custom2_op_t   fun,
5908
        int                        n_tasks,
5909
0
        void                     * userdata) {
5910
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5911
0
}
5912
5913
struct ggml_tensor * ggml_map_custom2_inplace(
5914
        struct ggml_context      * ctx,
5915
        struct ggml_tensor       * a,
5916
        struct ggml_tensor       * b,
5917
        const  ggml_custom2_op_t   fun,
5918
        int                        n_tasks,
5919
0
        void                     * userdata) {
5920
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5921
0
}
5922
5923
// ggml_map_custom3
5924
5925
static struct ggml_tensor * ggml_map_custom3_impl(
5926
        struct ggml_context      * ctx,
5927
        struct ggml_tensor       * a,
5928
        struct ggml_tensor       * b,
5929
        struct ggml_tensor       * c,
5930
        const  ggml_custom3_op_t   fun,
5931
        int                        n_tasks,
5932
        void                     * userdata,
5933
0
        bool                       inplace) {
5934
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5935
5936
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5937
5938
0
    struct ggml_map_custom3_op_params params = {
5939
0
        /*.fun      =*/ fun,
5940
0
        /*.n_tasks  =*/ n_tasks,
5941
0
        /*.userdata =*/ userdata
5942
0
    };
5943
0
    ggml_set_op_params(result, &params, sizeof(params));
5944
5945
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5946
0
    result->src[0] = a;
5947
0
    result->src[1] = b;
5948
0
    result->src[2] = c;
5949
5950
0
    return result;
5951
0
}
5952
5953
struct ggml_tensor * ggml_map_custom3(
5954
        struct ggml_context      * ctx,
5955
        struct ggml_tensor       * a,
5956
        struct ggml_tensor       * b,
5957
        struct ggml_tensor       * c,
5958
        const  ggml_custom3_op_t   fun,
5959
        int                        n_tasks,
5960
0
        void                     * userdata) {
5961
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5962
0
}
5963
5964
struct ggml_tensor * ggml_map_custom3_inplace(
5965
        struct ggml_context      * ctx,
5966
        struct ggml_tensor       * a,
5967
        struct ggml_tensor       * b,
5968
        struct ggml_tensor       * c,
5969
        const  ggml_custom3_op_t   fun,
5970
        int                        n_tasks,
5971
0
        void                     * userdata) {
5972
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5973
0
}
5974
5975
struct ggml_tensor * ggml_custom_4d(
5976
        struct ggml_context * ctx,
5977
        enum ggml_type        type,
5978
        int64_t               ne0,
5979
        int64_t               ne1,
5980
        int64_t               ne2,
5981
        int64_t               ne3,
5982
        struct ggml_tensor ** args,
5983
        int                   n_args,
5984
        ggml_custom_op_t      fun,
5985
        int                   n_tasks,
5986
0
        void                * userdata) {
5987
5988
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5989
5990
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5991
5992
0
    struct ggml_custom_op_params params = {
5993
0
        /*.fun      =*/ fun,
5994
0
        /*.n_tasks  =*/ n_tasks,
5995
0
        /*.userdata =*/ userdata
5996
0
    };
5997
0
    ggml_set_op_params(result, &params, sizeof(params));
5998
5999
0
    result->op = GGML_OP_CUSTOM;
6000
0
    for (int i = 0; i < n_args; i++) {
6001
0
        result->src[i] = args[i];
6002
0
    }
6003
6004
0
    return result;
6005
0
}
6006
6007
struct ggml_tensor * ggml_custom_inplace(
6008
        struct ggml_context * ctx,
6009
        struct ggml_tensor  * a,
6010
        struct ggml_tensor ** args,
6011
        int                   n_args,
6012
        ggml_custom_op_t      fun,
6013
        int                   n_tasks,
6014
0
        void                * userdata) {
6015
6016
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
6017
6018
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6019
6020
0
    struct ggml_custom_op_params params = {
6021
0
        /*.fun      =*/ fun,
6022
0
        /*.n_tasks  =*/ n_tasks,
6023
0
        /*.userdata =*/ userdata
6024
0
    };
6025
0
    ggml_set_op_params(result, &params, sizeof(params));
6026
6027
0
    result->op = GGML_OP_CUSTOM;
6028
0
    result->src[0] = a;
6029
0
    for (int i = 0; i < n_args; i++) {
6030
0
        result->src[i + 1] = args[i];
6031
0
    }
6032
6033
0
    return result;
6034
0
}
6035
// ggml_cross_entropy_loss
6036
6037
struct ggml_tensor * ggml_cross_entropy_loss(
6038
        struct ggml_context * ctx,
6039
        struct ggml_tensor  * a,
6040
0
        struct ggml_tensor  * b) {
6041
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
6042
6043
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
6044
6045
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
6046
0
    result->src[0] = a;
6047
0
    result->src[1] = b;
6048
6049
0
    return result;
6050
0
}
6051
6052
// ggml_cross_entropy_loss_back
6053
6054
struct ggml_tensor * ggml_cross_entropy_loss_back(
6055
        struct ggml_context * ctx,
6056
        struct ggml_tensor  * a,
6057
        struct ggml_tensor  * b,
6058
0
        struct ggml_tensor  * c) {
6059
0
    GGML_ASSERT(ggml_is_scalar(a));
6060
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6061
6062
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6063
6064
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6065
0
    result->src[0] = a;
6066
0
    result->src[1] = b;
6067
0
    result->src[2] = c;
6068
6069
0
    return result;
6070
0
}
6071
6072
// opt_step_adamw
6073
6074
struct ggml_tensor * ggml_opt_step_adamw(
6075
        struct ggml_context * ctx,
6076
        struct ggml_tensor  * a,
6077
        struct ggml_tensor  * grad,
6078
        struct ggml_tensor  * m,
6079
        struct ggml_tensor  * v,
6080
0
        struct ggml_tensor  * adamw_params) {
6081
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6082
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6083
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6084
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6085
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6086
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6087
6088
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6089
6090
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6091
0
    result->src[0] = a;
6092
0
    result->src[1] = grad;
6093
0
    result->src[2] = m;
6094
0
    result->src[3] = v;
6095
0
    result->src[4] = adamw_params;
6096
6097
0
    return result;
6098
0
}
6099
6100
// opt_step_sgd
6101
6102
struct ggml_tensor * ggml_opt_step_sgd(
6103
        struct ggml_context * ctx,
6104
        struct ggml_tensor  * a,
6105
        struct ggml_tensor  * grad,
6106
0
        struct ggml_tensor  * params) {
6107
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6108
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6109
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6110
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6111
6112
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6113
6114
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6115
0
    result->src[0] = a;
6116
0
    result->src[1] = grad;
6117
0
    result->src[2] = params;
6118
6119
0
    return result;
6120
0
}
6121
6122
// solve_tri
6123
6124
struct ggml_tensor * ggml_solve_tri(
6125
        struct ggml_context * ctx,
6126
        struct ggml_tensor  * a,
6127
        struct ggml_tensor  * b,
6128
        bool                  left,
6129
        bool                  lower,
6130
0
        bool                  uni) {
6131
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6132
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6133
6134
    // A must be square and lower diagonal
6135
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6136
    // B must have same outer dimension as A
6137
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6138
6139
    // batch dimensions must be equal
6140
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6141
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6142
6143
0
    GGML_ASSERT(ggml_is_contiguous(a));
6144
0
    GGML_ASSERT(ggml_is_contiguous(b));
6145
6146
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6147
6148
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6149
6150
0
    result->op     = GGML_OP_SOLVE_TRI;
6151
0
    result->src[0] = a;
6152
0
    result->src[1] = b;
6153
6154
0
    return result;
6155
0
}
6156
6157
// ggml_gated_delta_net
6158
6159
struct ggml_tensor * ggml_gated_delta_net(
6160
        struct ggml_context * ctx,
6161
        struct ggml_tensor  * q,
6162
        struct ggml_tensor  * k,
6163
        struct ggml_tensor  * v,
6164
        struct ggml_tensor  * g,
6165
        struct ggml_tensor  * beta,
6166
0
        struct ggml_tensor  * state) {
6167
0
    GGML_ASSERT(ggml_is_contiguous_rows(q));
6168
0
    GGML_ASSERT(ggml_is_contiguous_rows(k));
6169
0
    GGML_ASSERT(ggml_is_contiguous_rows(v));
6170
0
    GGML_ASSERT(ggml_is_contiguous(g));
6171
0
    GGML_ASSERT(ggml_is_contiguous(beta));
6172
0
    GGML_ASSERT(ggml_is_contiguous(state));
6173
6174
0
    GGML_ASSERT(q->type == GGML_TYPE_F32);
6175
0
    GGML_ASSERT(k->type == GGML_TYPE_F32);
6176
0
    GGML_ASSERT(v->type == GGML_TYPE_F32);
6177
0
    GGML_ASSERT(g->type == GGML_TYPE_F32);
6178
0
    GGML_ASSERT(beta->type == GGML_TYPE_F32);
6179
0
    GGML_ASSERT(state->type == GGML_TYPE_F32);
6180
6181
0
    const int64_t S_v      = v->ne[0];
6182
0
    const int64_t H        = v->ne[1];
6183
0
    const int64_t n_tokens = v->ne[2];
6184
0
    const int64_t n_seqs   = v->ne[3];
6185
6186
    // gate: scalar [1, H, T, B] or vector [S_v, H, T, B] (KDA)
6187
0
    GGML_ASSERT(g->ne[0] == 1 || g->ne[0] == S_v);
6188
0
    GGML_ASSERT(beta->ne[0] == 1);
6189
6190
0
    GGML_ASSERT(ggml_nelements(state) == S_v * S_v * H * n_seqs);
6191
6192
    // concat output and new_state into a single tensor
6193
    // output: S_v * H * n_tokens * n_seqs, state: S_v * S_v * H * n_seqs
6194
0
    const int64_t ne[4] = { S_v * H, n_tokens * n_seqs + S_v * n_seqs, 1, 1 };
6195
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6196
6197
0
    result->op     = GGML_OP_GATED_DELTA_NET;
6198
0
    result->src[0] = q;
6199
0
    result->src[1] = k;
6200
0
    result->src[2] = v;
6201
0
    result->src[3] = g;
6202
0
    result->src[4] = beta;
6203
0
    result->src[5] = state;
6204
6205
0
    return result;
6206
0
}
6207
6208
////////////////////////////////////////////////////////////////////////////////
6209
6210
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6211
0
    size = ggml_hash_size(size);
6212
0
    struct ggml_hash_set result;
6213
0
    result.size = size;
6214
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6215
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6216
0
    return result;
6217
0
}
6218
6219
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6220
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6221
0
}
6222
6223
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6224
0
    GGML_FREE(hash_set->used);
6225
0
    GGML_FREE(hash_set->keys);
6226
0
}
6227
6228
0
size_t ggml_hash_size(size_t min_sz) {
6229
    // next primes after powers of two
6230
0
    static const size_t primes[] = {
6231
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6232
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6233
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6234
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6235
0
        536870923, 1073741827, 2147483659
6236
0
    };
6237
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6238
6239
    // find the smallest prime that is larger or equal than min_sz
6240
0
    size_t l = 0;
6241
0
    size_t r = n_primes;
6242
0
    while (l < r) {
6243
0
        size_t m = (l + r)/2;
6244
0
        if (primes[m] < min_sz) {
6245
0
            l = m + 1;
6246
0
        } else {
6247
0
            r = m;
6248
0
        }
6249
0
    }
6250
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6251
0
    return sz;
6252
0
}
6253
6254
struct hash_map {
6255
    struct ggml_hash_set set;
6256
    struct ggml_tensor ** vals;
6257
};
6258
6259
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6260
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6261
0
    result->set = ggml_hash_set_new(size);
6262
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6263
0
    return result;
6264
0
}
6265
6266
0
static void ggml_hash_map_free(struct hash_map * map) {
6267
0
    ggml_hash_set_free(&map->set);
6268
0
    GGML_FREE(map->vals);
6269
0
    GGML_FREE(map);
6270
0
}
6271
6272
// utility functions to change gradients
6273
// isrc is the index of tensor in cgraph->visited_has_set.keys
6274
// the corresponding gradient (accumulators) are also at position isrc
6275
// if tensor has a gradient accumulator, modify that accumulator in-place
6276
// else if there is no gradient for tensor, set the corresponding value
6277
// else, just add/subtract/etc. the gradients
6278
6279
static void ggml_add_or_set(
6280
        struct ggml_context * ctx,
6281
        struct ggml_cgraph  * cgraph,
6282
        size_t                isrc,
6283
0
        struct ggml_tensor  * tensor) {
6284
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6285
0
    GGML_ASSERT(src);
6286
0
    if (cgraph->grads[isrc]) {
6287
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6288
0
    } else {
6289
0
        cgraph->grads[isrc] = tensor;
6290
0
    }
6291
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6292
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6293
0
}
6294
6295
static void ggml_acc_or_set(
6296
        struct ggml_context * ctx,
6297
        struct ggml_cgraph  * cgraph,
6298
        size_t                isrc,
6299
        struct ggml_tensor  * tensor,
6300
        const  size_t         nb1,
6301
        const  size_t         nb2,
6302
        const  size_t         nb3,
6303
0
        const  size_t         offset) {
6304
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6305
0
    GGML_ASSERT(src);
6306
0
    if (cgraph->grads[isrc]) {
6307
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6308
0
    } else {
6309
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6310
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6311
0
    }
6312
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6313
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6314
0
}
6315
6316
static void ggml_add1_or_set(
6317
        struct ggml_context * ctx,
6318
        struct ggml_cgraph  * cgraph,
6319
        size_t                isrc,
6320
0
        struct ggml_tensor  * tensor) {
6321
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6322
0
    GGML_ASSERT(src);
6323
0
    if (cgraph->grads[isrc]) {
6324
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6325
0
    } else {
6326
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6327
0
    }
6328
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6329
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6330
0
}
6331
6332
static void ggml_sub_or_set(
6333
        struct ggml_context * ctx,
6334
        struct ggml_cgraph  * cgraph,
6335
        size_t                isrc,
6336
0
        struct ggml_tensor  * tensor) {
6337
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6338
0
    GGML_ASSERT(src);
6339
0
    if (cgraph->grads[isrc]) {
6340
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6341
0
    } else {
6342
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6343
0
    }
6344
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6345
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6346
0
}
6347
6348
static void ggml_compute_backward(
6349
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6350
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6351
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6352
6353
0
    if (!grad) {
6354
0
        return;
6355
0
    }
6356
6357
0
    struct ggml_tensor * src0 = tensor->src[0];
6358
0
    struct ggml_tensor * src1 = tensor->src[1];
6359
0
    struct ggml_tensor * src2 = tensor->src[2];
6360
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6361
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6362
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6363
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6364
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6365
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6366
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6367
6368
0
    switch (tensor->op) {
6369
0
        case GGML_OP_DUP: {
6370
0
            if (src0_needs_grads) {
6371
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6372
0
            }
6373
0
        } break;
6374
0
        case GGML_OP_ADD: {
6375
0
            if (src0_needs_grads) {
6376
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6377
0
            }
6378
0
            if (src1_needs_grads) {
6379
0
                struct ggml_tensor * tmp = grad;
6380
0
                if (!ggml_are_same_shape(src0, src1)) {
6381
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6382
0
                }
6383
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6384
0
            }
6385
0
        } break;
6386
0
        case GGML_OP_ADD1: {
6387
0
            if (src0_needs_grads) {
6388
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6389
0
            }
6390
0
            if (src1_needs_grads) {
6391
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6392
0
            }
6393
0
        } break;
6394
0
        case GGML_OP_ACC: {
6395
0
            if (src0_needs_grads) {
6396
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6397
0
            }
6398
0
            if (src1_needs_grads) {
6399
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6400
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6401
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6402
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6403
6404
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6405
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6406
0
                    nb1, nb2, nb3, offset);
6407
6408
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6409
0
            }
6410
0
        } break;
6411
0
        case GGML_OP_SUB: {
6412
0
            if (src0_needs_grads) {
6413
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6414
0
            }
6415
0
            if (src1_needs_grads) {
6416
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6417
0
            }
6418
0
        } break;
6419
0
        case GGML_OP_MUL: {
6420
0
            if (src0_needs_grads) {
6421
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6422
0
            }
6423
0
            if (src1_needs_grads) {
6424
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6425
0
                if (!ggml_are_same_shape(src0, src1)) {
6426
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6427
0
                }
6428
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6429
0
            }
6430
0
        } break;
6431
0
        case GGML_OP_DIV: {
6432
0
            if (src0_needs_grads) {
6433
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6434
0
            }
6435
0
            if (src1_needs_grads) {
6436
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6437
0
            }
6438
0
        } break;
6439
0
        case GGML_OP_SQR: {
6440
0
            if (src0_needs_grads) {
6441
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6442
0
            }
6443
0
        } break;
6444
0
        case GGML_OP_SQRT: {
6445
0
            if (src0_needs_grads) {
6446
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6447
0
            }
6448
0
        } break;
6449
0
        case GGML_OP_LOG: {
6450
0
            if (src0_needs_grads) {
6451
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6452
0
            }
6453
0
        } break;
6454
0
        case GGML_OP_SIN: {
6455
0
            if (src0_needs_grads) {
6456
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6457
0
            }
6458
0
        } break;
6459
0
        case GGML_OP_COS: {
6460
0
            if (src0_needs_grads) {
6461
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6462
0
            }
6463
0
        } break;
6464
0
        case GGML_OP_SUM: {
6465
0
            if (src0_needs_grads) {
6466
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6467
0
            }
6468
0
        } break;
6469
0
        case GGML_OP_SUM_ROWS: {
6470
0
            if (src0_needs_grads) {
6471
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6472
0
            }
6473
0
        } break;
6474
0
        case GGML_OP_MEAN: {
6475
0
            if (src0_needs_grads) {
6476
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6477
0
            }
6478
0
        } break;
6479
0
        case GGML_OP_REPEAT: {
6480
0
            if (src0_needs_grads) {
6481
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6482
0
            }
6483
0
        } break;
6484
0
        case GGML_OP_REPEAT_BACK: {
6485
0
            if (src0_needs_grads) {
6486
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6487
0
            }
6488
0
        } break;
6489
0
        case GGML_OP_RMS_NORM: {
6490
0
            if (src0_needs_grads) {
6491
0
                float eps;
6492
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6493
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6494
0
            }
6495
0
        } break;
6496
0
        case GGML_OP_MUL_MAT: {
6497
            // https://cs231n.github.io/optimization-2/#staged
6498
            // # forward pass
6499
            // s0 = np.random.randn(5, 10)
6500
            // s1 = np.random.randn(10, 3)
6501
            // t = s0.dot(s1)
6502
6503
            // # now suppose we had the gradient on t from above in the circuit
6504
            // dt = np.random.randn(*t.shape) # same shape as t
6505
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6506
            // ds1 = t.T.dot(dt)
6507
6508
            // tensor.shape [m,p,qq,rr]
6509
            // src0.shape   [n,m,q1,r1]
6510
            // src1.shape   [n,p,qq,rr]
6511
6512
0
            if (src0_needs_grads) {
6513
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6514
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6515
0
                struct ggml_tensor * tmp =
6516
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6517
0
                        src1,          // [n,p,qq,rr]
6518
0
                        grad);         // [m,p,qq,rr]
6519
0
                if (!ggml_are_same_shape(tmp, src0)) {
6520
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6521
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6522
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6523
6524
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6525
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6526
0
                    const size_t nb3 = tmp->nb[2];
6527
6528
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6529
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6530
0
                }
6531
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6532
0
            }
6533
0
            if (src1_needs_grads) {
6534
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6535
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6536
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6537
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6538
                        //     grad),                          // [m,p,qq,rr]
6539
6540
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6541
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6542
                        // and then use ggml_out_prod
6543
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6544
0
                            src0,               // [n,m,q1,r1]
6545
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6546
0
                                grad)));        // [m,p,qq,rr]
6547
0
            }
6548
0
        } break;
6549
0
        case GGML_OP_SCALE: {
6550
0
            if (src0_needs_grads) {
6551
0
                float s;
6552
0
                memcpy(&s, tensor->op_params, sizeof(float));
6553
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6554
0
            }
6555
0
        } break;
6556
0
        case GGML_OP_SET: {
6557
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6558
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6559
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6560
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6561
6562
0
            struct ggml_tensor * tensor_grad_view = NULL;
6563
6564
0
            if (src0_needs_grads || src1_needs_grads) {
6565
0
                GGML_ASSERT(src0->type == tensor->type);
6566
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6567
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6568
6569
0
                tensor_grad_view = ggml_view_4d(ctx,
6570
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6571
0
                    nb1, nb2, nb3, offset);
6572
0
            }
6573
6574
0
            if (src0_needs_grads) {
6575
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6576
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6577
0
            }
6578
6579
0
            if (src1_needs_grads) {
6580
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6581
0
            }
6582
0
        } break;
6583
0
        case GGML_OP_CPY: {
6584
            // cpy overwrites value of src1 by src0 and returns view(src1)
6585
            // the overwriting is mathematically equivalent to:
6586
            // tensor = src0 * 1 + src1 * 0
6587
0
            if (src0_needs_grads) {
6588
                // dsrc0 = dtensor * 1
6589
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6590
0
            }
6591
0
            if (src1_needs_grads) {
6592
                // dsrc1 = dtensor * 0 -> noop
6593
0
            }
6594
0
        } break;
6595
0
        case GGML_OP_CONT: {
6596
            // same as cpy
6597
0
            if (src0_needs_grads) {
6598
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6599
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6600
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6601
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6602
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6603
0
            }
6604
0
        } break;
6605
0
        case GGML_OP_RESHAPE: {
6606
0
            if (src0_needs_grads) {
6607
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6608
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6609
0
            }
6610
0
        } break;
6611
0
        case GGML_OP_VIEW: {
6612
0
            if (src0_needs_grads) {
6613
0
                size_t offset;
6614
6615
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6616
6617
0
                size_t nb1 = tensor->nb[1];
6618
0
                size_t nb2 = tensor->nb[2];
6619
0
                size_t nb3 = tensor->nb[3];
6620
6621
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6622
                    // gradient is typically F32, but src0 could be other type
6623
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6624
0
                    size_t n0 = ggml_element_size(src0);
6625
0
                    GGML_ASSERT(offset % n0 == 0);
6626
0
                    GGML_ASSERT(nb1 % n0 == 0);
6627
0
                    GGML_ASSERT(nb2 % n0 == 0);
6628
0
                    GGML_ASSERT(nb3 % n0 == 0);
6629
0
                    offset = (offset / n0) * ng;
6630
0
                    nb1 = (nb1 / n0) * ng;
6631
0
                    nb2 = (nb2 / n0) * ng;
6632
0
                    nb3 = (nb3 / n0) * ng;
6633
0
                }
6634
6635
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6636
0
            }
6637
0
        } break;
6638
0
        case GGML_OP_PERMUTE: {
6639
0
            if (src0_needs_grads) {
6640
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6641
0
                const int axis0 = axes[0] & 0x3;
6642
0
                const int axis1 = axes[1] & 0x3;
6643
0
                const int axis2 = axes[2] & 0x3;
6644
0
                const int axis3 = axes[3] & 0x3;
6645
0
                int axb[4] = {0,0,0,0}; // axes backward
6646
0
                axb[axis0] = 0;
6647
0
                axb[axis1] = 1;
6648
0
                axb[axis2] = 2;
6649
0
                axb[axis3] = 3;
6650
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6651
0
            }
6652
0
        } break;
6653
0
        case GGML_OP_TRANSPOSE: {
6654
0
            if (src0_needs_grads) {
6655
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6656
0
            }
6657
0
        } break;
6658
0
        case GGML_OP_GET_ROWS: {
6659
0
            if (src0_needs_grads) {
6660
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6661
0
            }
6662
0
            if (src1_needs_grads) {
6663
                // noop
6664
0
            }
6665
0
        } break;
6666
0
        case GGML_OP_DIAG_MASK_INF: {
6667
0
            if (src0_needs_grads) {
6668
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6669
                /* ref:  https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
6670
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6671
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6672
0
            }
6673
0
        } break;
6674
0
        case GGML_OP_DIAG_MASK_ZERO: {
6675
0
            if (src0_needs_grads) {
6676
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6677
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6678
0
            }
6679
0
        } break;
6680
0
        case GGML_OP_SOFT_MAX: {
6681
0
            if (src0_needs_grads) {
6682
0
                float scale    = 1.0f;
6683
0
                float max_bias = 0.0f;
6684
6685
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6686
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6687
6688
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6689
0
            }
6690
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6691
0
        } break;
6692
0
        case GGML_OP_ROPE: {
6693
0
            if (src0_needs_grads) {
6694
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6695
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6696
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6697
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6698
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6699
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6700
0
                int sections[4] = {0, 0, 0, 0};
6701
6702
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6703
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6704
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6705
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6706
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6707
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6708
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6709
6710
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6711
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6712
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6713
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6714
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6715
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6716
0
            }
6717
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6718
0
        } break;
6719
0
        case GGML_OP_IM2COL: {
6720
0
            if (src1_needs_grads) {
6721
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6722
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6723
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6724
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6725
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6726
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6727
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6728
6729
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6730
0
            }
6731
0
        } break;
6732
0
        case GGML_OP_POOL_2D: {
6733
0
            if (src0_needs_grads) {
6734
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6735
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6736
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6737
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6738
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6739
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6740
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6741
6742
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6743
0
            }
6744
0
        } break;
6745
0
        case GGML_OP_WIN_PART:
6746
0
        case GGML_OP_WIN_UNPART:
6747
0
        case GGML_OP_UNARY: {
6748
0
            switch (ggml_get_unary_op(tensor)) {
6749
0
                case GGML_UNARY_OP_ABS: {
6750
0
                    if (src0_needs_grads) {
6751
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6752
0
                    }
6753
0
                } break;
6754
0
                case GGML_UNARY_OP_SGN: {
6755
                    // noop
6756
0
                } break;
6757
0
                case GGML_UNARY_OP_NEG: {
6758
0
                    if (src0_needs_grads) {
6759
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6760
0
                    }
6761
0
                } break;
6762
0
                case GGML_UNARY_OP_STEP: {
6763
                    // noop
6764
0
                } break;
6765
0
                case GGML_UNARY_OP_RELU: {
6766
0
                    if (src0_needs_grads) {
6767
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6768
0
                    }
6769
0
                } break;
6770
0
                case GGML_UNARY_OP_SILU: {
6771
0
                    if (src0_needs_grads) {
6772
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6773
0
                    }
6774
0
                } break;
6775
0
                case GGML_UNARY_OP_EXP: {
6776
0
                    if (src0_needs_grads) {
6777
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6778
0
                    }
6779
0
                } break;
6780
0
                case GGML_UNARY_OP_EXPM1: {
6781
0
                    if (src0_needs_grads) {
6782
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6783
0
                    }
6784
0
                } break;
6785
0
                case GGML_UNARY_OP_SOFTPLUS: {
6786
0
                    if (src0_needs_grads) {
6787
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6788
0
                    }
6789
0
                } break;
6790
0
                default: {
6791
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6792
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6793
0
                    GGML_ABORT("fatal error");
6794
0
                } //break;
6795
0
            }
6796
0
        } break;
6797
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6798
0
            if (src0_needs_grads) {
6799
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6800
0
            }
6801
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6802
0
        } break;
6803
0
        case GGML_OP_GLU: {
6804
0
            switch (ggml_get_glu_op(tensor)) {
6805
0
                case GGML_GLU_OP_SWIGLU: {
6806
0
                    if (src0_needs_grads) {
6807
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6808
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6809
0
                    }
6810
0
                    if (src1_needs_grads) {
6811
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6812
0
                    }
6813
0
                } break;
6814
0
                default: {
6815
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6816
0
                } //break;
6817
0
            }
6818
0
        } break;
6819
0
        case GGML_OP_NONE: {
6820
            // noop
6821
0
        } break;
6822
0
        case GGML_OP_COUNT:
6823
0
        default: {
6824
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6825
0
        } //break;
6826
0
    }
6827
6828
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6829
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6830
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6831
0
}
6832
6833
0
static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
6834
0
    if (node->op != GGML_OP_NONE && compute) {
6835
0
        node->flags |= GGML_TENSOR_FLAG_COMPUTE;
6836
0
    }
6837
6838
0
    const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6839
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6840
6841
0
    if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6842
        // already visited
6843
6844
0
        if (compute) {
6845
            // update the compute flag regardless
6846
0
            for (int i = 0; i < GGML_MAX_SRC; ++i) {
6847
0
                struct ggml_tensor * src = node->src[i];
6848
0
                if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
6849
0
                    ggml_visit_parents_graph(cgraph, src, true);
6850
0
                }
6851
0
            }
6852
0
        }
6853
6854
0
        return node_hash_pos;
6855
0
    }
6856
6857
    // This is the first time we see this node in the current graph.
6858
0
    cgraph->visited_hash_set.keys[node_hash_pos] = node;
6859
0
    ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6860
0
    cgraph->use_counts[node_hash_pos] = 0;
6861
6862
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6863
0
        const int k =
6864
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6865
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6866
0
            /* unknown order, just fall back to using i */ i;
6867
6868
0
        struct ggml_tensor * src = node->src[k];
6869
0
        if (src) {
6870
0
            const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
6871
6872
            // Update the use count for this operand.
6873
0
            cgraph->use_counts[src_hash_pos]++;
6874
0
        }
6875
0
    }
6876
6877
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6878
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6879
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6880
6881
0
        if (strlen(node->name) == 0) {
6882
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6883
0
        }
6884
6885
0
        cgraph->leafs[cgraph->n_leafs] = node;
6886
0
        cgraph->n_leafs++;
6887
0
    } else {
6888
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6889
6890
0
        if (strlen(node->name) == 0) {
6891
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6892
0
        }
6893
6894
0
        cgraph->nodes[cgraph->n_nodes] = node;
6895
0
        cgraph->n_nodes++;
6896
0
    }
6897
6898
0
    return node_hash_pos;
6899
0
}
6900
6901
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
6902
0
    if (!expand) {
6903
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6904
0
        ggml_graph_clear(cgraph);
6905
0
    }
6906
6907
0
    const int n_old = cgraph->n_nodes;
6908
6909
0
    ggml_visit_parents_graph(cgraph, tensor, compute);
6910
6911
0
    const int n_new = cgraph->n_nodes - n_old;
6912
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6913
6914
0
    if (n_new > 0) {
6915
        // the last added node should always be starting point
6916
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6917
0
    }
6918
0
}
6919
6920
struct ggml_tensor * ggml_build_forward_select(
6921
        struct ggml_cgraph  * cgraph,
6922
        struct ggml_tensor ** tensors,
6923
        int                   n_tensors,
6924
0
        int                   idx) {
6925
0
    GGML_ASSERT(idx >= 0 && idx < n_tensors);
6926
6927
0
    for (int i = 0; i < n_tensors; i++) {
6928
0
        ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
6929
0
    }
6930
6931
0
    return tensors[idx];
6932
0
}
6933
6934
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6935
0
    ggml_build_forward_impl(cgraph, tensor, true, true);
6936
0
}
6937
6938
void ggml_build_backward_expand(
6939
        struct ggml_context *  ctx,
6940
        struct ggml_cgraph  *  cgraph,
6941
0
        struct ggml_tensor  ** grad_accs) {
6942
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6943
0
    GGML_ASSERT(cgraph->grads);
6944
0
    GGML_ASSERT(cgraph->grad_accs);
6945
6946
0
    const int n_nodes_f = cgraph->n_nodes;
6947
6948
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6949
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6950
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6951
6952
0
    {
6953
0
        bool any_params = false;
6954
0
        bool any_loss   = false;
6955
0
        for (int i = 0; i < n_nodes_f; ++i) {
6956
0
            struct ggml_tensor * node = cgraph->nodes[i];
6957
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6958
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6959
0
        }
6960
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6961
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6962
0
    }
6963
6964
0
    for (int i = 0; i < n_nodes_f; ++i) {
6965
0
        struct ggml_tensor * node = cgraph->nodes[i];
6966
6967
0
        if (node->type == GGML_TYPE_I32) {
6968
0
            continue;
6969
0
        }
6970
6971
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6972
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6973
0
        switch (node->op) {
6974
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6975
0
            case GGML_OP_IM2COL:      // only used for its shape
6976
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6977
0
                ignore_src[0] = true;
6978
0
                break;
6979
0
            case GGML_OP_UNARY: {
6980
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6981
                // SGN and STEP unary ops are piecewise constant
6982
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6983
0
                    ignore_src[0] = true;
6984
0
                }
6985
0
            } break;
6986
6987
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6988
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6989
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6990
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6991
0
            case GGML_OP_ROPE:          // positions not differentiable
6992
0
                ignore_src[1] = true;
6993
0
                break;
6994
6995
0
            default:
6996
0
                break;
6997
0
        }
6998
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6999
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
7000
0
                continue;
7001
0
            }
7002
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
7003
0
            node_needs_grad = true;
7004
0
            break;
7005
0
        }
7006
0
        if (!node_needs_grad) {
7007
0
            continue;
7008
0
        }
7009
7010
        // inplace operations are currently not supported
7011
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
7012
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
7013
7014
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
7015
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
7016
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
7017
0
        if (grad_accs && grad_accs[i]) {
7018
0
            cgraph->grad_accs[ihash] = grad_accs[i];
7019
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
7020
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7021
            // loss tensors always need a gradient accumulator
7022
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
7023
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
7024
0
        }
7025
0
        grads_needed[ihash] = true;
7026
0
    }
7027
7028
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
7029
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
7030
        // use allocator to automatically make inplace operations
7031
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
7032
0
    }
7033
7034
0
    free(grads_needed);
7035
0
}
7036
7037
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
7038
0
    void * ptr = *p;
7039
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
7040
0
    *p = (void *) ((char *) ptr + size);
7041
0
    return ptr;
7042
0
}
7043
7044
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
7045
0
    size_t hash_size = ggml_hash_size(size * 2);
7046
0
    void * p = 0;
7047
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
7048
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
7049
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
7050
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
7051
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
7052
0
    if (grads) {
7053
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
7054
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
7055
0
    }
7056
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7057
7058
0
    size_t nbytes = (size_t) p;
7059
0
    return nbytes;
7060
0
}
7061
7062
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
7063
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
7064
0
}
7065
7066
0
size_t ggml_graph_overhead(void) {
7067
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
7068
0
}
7069
7070
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
7071
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
7072
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
7073
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
7074
7075
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
7076
0
    size_t hash_size = ggml_hash_size(size * 2);
7077
7078
0
    void * p = cgraph + 1;
7079
7080
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7081
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7082
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
7083
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7084
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7085
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7086
7087
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7088
7089
    // check that we allocated the correct amount of memory
7090
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
7091
7092
0
    *cgraph = (struct ggml_cgraph) {
7093
0
        /*.size         =*/ size,
7094
0
        /*.n_nodes      =*/ 0,
7095
0
        /*.n_leafs      =*/ 0,
7096
0
        /*.nodes        =*/ nodes_ptr,
7097
0
        /*.grads        =*/ grads_ptr,
7098
0
        /*.grad_accs    =*/ grad_accs_ptr,
7099
0
        /*.leafs        =*/ leafs_ptr,
7100
0
        /*.use_counts   =*/ use_counts_ptr,
7101
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
7102
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
7103
0
    };
7104
7105
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7106
0
    if (grads) {
7107
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
7108
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
7109
0
    }
7110
7111
0
    return cgraph;
7112
0
}
7113
7114
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
7115
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
7116
0
}
7117
7118
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
7119
0
    struct ggml_cgraph cgraph = {
7120
0
        /*.size             =*/ 0,
7121
0
        /*.n_nodes          =*/ i1 - i0,
7122
0
        /*.n_leafs          =*/ 0,
7123
0
        /*.nodes            =*/ cgraph0->nodes + i0,
7124
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
7125
0
        /*.grad_accs        =*/ NULL,
7126
0
        /*.leafs            =*/ NULL,
7127
0
        /*.use_counts       =*/ cgraph0->use_counts,
7128
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
7129
0
        /*.order            =*/ cgraph0->order,
7130
0
    };
7131
7132
0
    return cgraph;
7133
0
}
7134
7135
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
7136
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7137
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7138
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7139
7140
0
    dst->n_leafs = src->n_leafs;
7141
0
    dst->n_nodes = src->n_nodes;
7142
0
    dst->order   = src->order;
7143
7144
0
    for (int i = 0; i < src->n_leafs; ++i) {
7145
0
        dst->leafs[i] = src->leafs[i];
7146
0
    }
7147
7148
0
    for (int i = 0; i < src->n_nodes; ++i) {
7149
0
        dst->nodes[i] = src->nodes[i];
7150
0
    }
7151
7152
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7153
        // copy all hashset keys (tensors) that are in use
7154
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7155
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7156
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7157
0
        }
7158
0
    }
7159
7160
0
    if (dst->grads) {
7161
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7162
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7163
0
    }
7164
0
    if (src->grads) {
7165
0
        GGML_ASSERT(dst->grads     != NULL);
7166
0
        GGML_ASSERT(dst->grad_accs != NULL);
7167
0
        for (int i = 0; i < src->n_nodes; ++i) {
7168
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7169
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7170
7171
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7172
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7173
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7174
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7175
7176
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7177
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7178
0
        }
7179
0
    }
7180
0
}
7181
7182
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7183
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7184
0
    ggml_graph_cpy(cgraph, result);
7185
0
    return result;
7186
0
}
7187
7188
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7189
0
    if (ggml_is_empty(tensor)) {
7190
0
        return tensor;
7191
0
    }
7192
0
    if (tensor->buffer) {
7193
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7194
0
    } else {
7195
0
        GGML_ASSERT(tensor->data);
7196
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7197
0
    }
7198
0
    return tensor;
7199
0
}
7200
7201
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7202
0
    if (!cgraph) {
7203
0
        return;
7204
0
    }
7205
0
    GGML_ASSERT(cgraph->grads != NULL);
7206
7207
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7208
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7209
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7210
7211
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7212
            // clear momenta
7213
0
            ggml_set_zero(node->src[2]);
7214
0
            ggml_set_zero(node->src[3]);
7215
0
        }
7216
7217
        // initial gradients of loss should be 1, 0 otherwise
7218
0
        if (grad_acc) {
7219
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7220
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7221
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7222
7223
0
                const float onef = 1.0f;
7224
0
                if (grad_acc->buffer) {
7225
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7226
0
                } else {
7227
0
                    GGML_ASSERT(grad_acc->data);
7228
0
                    *((float *) grad_acc->data) = onef;
7229
0
                }
7230
0
            } else {
7231
0
                ggml_set_zero(grad_acc);
7232
0
            }
7233
0
        }
7234
0
    }
7235
0
}
7236
7237
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7238
0
    cgraph->n_leafs = 0;
7239
0
    cgraph->n_nodes = 0;
7240
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7241
0
}
7242
7243
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7244
0
    return cgraph->size;
7245
0
}
7246
7247
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7248
0
    if (i < 0) {
7249
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7250
0
        return cgraph->nodes[cgraph->n_nodes + i];
7251
0
    }
7252
7253
0
    GGML_ASSERT(i < cgraph->n_nodes);
7254
0
    return cgraph->nodes[i];
7255
0
}
7256
7257
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7258
0
    return cgraph->nodes;
7259
0
}
7260
7261
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7262
0
    return cgraph->n_nodes;
7263
0
}
7264
7265
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7266
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7267
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7268
0
    cgraph->n_nodes++;
7269
0
}
7270
7271
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7272
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7273
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7274
7275
0
        if (strcmp(leaf->name, name) == 0) {
7276
0
            return leaf;
7277
0
        }
7278
0
    }
7279
7280
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7281
0
        struct ggml_tensor * node = cgraph->nodes[i];
7282
7283
0
        if (strcmp(node->name, name) == 0) {
7284
0
            return node;
7285
0
        }
7286
0
    }
7287
7288
0
    return NULL;
7289
0
}
7290
7291
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7292
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7293
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7294
0
}
7295
7296
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7297
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7298
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7299
0
}
7300
7301
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7302
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7303
7304
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7305
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7306
0
        struct ggml_tensor * node = cgraph->nodes[i];
7307
7308
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7309
0
                i,
7310
0
                node->ne[0], node->ne[1], node->ne[2],
7311
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7312
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7313
0
    }
7314
7315
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7316
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7317
0
        struct ggml_tensor * node = cgraph->leafs[i];
7318
7319
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7320
0
                i,
7321
0
                node->ne[0], node->ne[1],
7322
0
                ggml_op_name(node->op),
7323
0
                ggml_get_name(node));
7324
0
    }
7325
7326
0
    GGML_LOG_INFO("========================================\n");
7327
0
}
7328
7329
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7330
                                      const int *                idxs,
7331
                                      int                        count,
7332
0
                                      const struct ggml_tensor * tensor) {
7333
0
    GGML_ASSERT(cgraph && idxs);
7334
0
    for (int i = 0; i < count; ++i) {
7335
0
        const int node_idx = idxs[i];
7336
7337
0
        if (node_idx >= cgraph->n_nodes) {
7338
0
            return -1;
7339
0
        }
7340
0
        if (cgraph->nodes[node_idx] == tensor) {
7341
0
            return i;
7342
0
        }
7343
0
    }
7344
0
    return -1;
7345
0
}
7346
7347
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7348
                                const int *                node_idxs,
7349
                                int                        count,
7350
                                const enum ggml_op *       ops,
7351
                                const int *                outputs,
7352
0
                                int                        num_outputs) {
7353
0
    GGML_ASSERT(outputs && num_outputs > 0);
7354
7355
0
    for (int i = 0; i < count; ++i) {
7356
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7357
0
            return false;
7358
0
        }
7359
7360
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7361
7362
0
        if (node->op != ops[i]) {
7363
0
            return false;
7364
0
        }
7365
7366
0
        if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
7367
0
            return false;
7368
0
        }
7369
7370
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7371
0
            continue;
7372
0
        }
7373
7374
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7375
0
            return false;
7376
0
        }
7377
7378
0
        int subgraph_uses = 0;
7379
0
        for (int j = i + 1; j < count; ++j) {
7380
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7381
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7382
0
                if (other_node->src[src_idx] == node) {
7383
0
                    subgraph_uses++;
7384
0
                }
7385
0
            }
7386
0
        }
7387
7388
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7389
0
            return false;
7390
0
        }
7391
7392
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7393
0
        struct ggml_tensor * view_src = node->view_src;
7394
0
        while (view_src) {
7395
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7396
0
                return false;
7397
0
            }
7398
0
            view_src = view_src->view_src;
7399
0
        }
7400
0
    }
7401
7402
0
    return true;
7403
0
}
7404
7405
// check if node is part of the graph
7406
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7407
0
    if (cgraph == NULL) {
7408
0
        return true;
7409
0
    }
7410
7411
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7412
0
        if (cgraph->nodes[i] == node) {
7413
0
            return true;
7414
0
        }
7415
0
    }
7416
7417
0
    return false;
7418
0
}
7419
7420
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7421
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7422
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7423
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7424
7425
0
        if (grad == node) {
7426
0
            return parent;
7427
0
        }
7428
0
    }
7429
7430
0
    return NULL;
7431
0
}
7432
7433
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7434
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7435
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7436
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7437
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7438
0
            gparent ? (void *) gparent : (void *) node,
7439
0
            gparent ? "empty" : "vee",
7440
0
            gparent ? "dashed" : "solid",
7441
0
            label);
7442
0
}
7443
7444
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7445
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7446
0
            (void *) parent,
7447
0
            (void *) node,
7448
0
            label);
7449
0
}
7450
7451
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
7452
0
    char color[16];
7453
7454
0
    FILE * fp = ggml_fopen(filename, "w");
7455
0
    GGML_ASSERT(fp);
7456
7457
0
    fprintf(fp, "digraph G {\n");
7458
0
    fprintf(fp, "  newrank = true;\n");
7459
0
    fprintf(fp, "  rankdir = TB;\n");
7460
7461
0
    for (int i = 0; i < gb->n_nodes; i++) {
7462
0
        struct ggml_tensor * node = gb->nodes[i];
7463
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7464
7465
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7466
0
            continue;
7467
0
        }
7468
7469
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7470
0
            snprintf(color, sizeof(color), "yellow");
7471
0
        } else if (grad) {
7472
0
            if (ggml_graph_find(cgraph, node)) {
7473
0
                snprintf(color, sizeof(color), "green");
7474
0
            } else {
7475
0
                snprintf(color, sizeof(color), "lightblue");
7476
0
            }
7477
0
        } else {
7478
0
            snprintf(color, sizeof(color), "white");
7479
0
        }
7480
7481
0
        fprintf(fp, "  \"%p\" [ "
7482
0
                    "style = filled; fillcolor = %s; shape = record; "
7483
0
                    "label=\"",
7484
0
                (void *) node, color);
7485
7486
0
        if (strlen(node->name) > 0) {
7487
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7488
0
        } else {
7489
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7490
0
        }
7491
7492
0
        if (ggml_is_matrix(node)) {
7493
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7494
0
        } else {
7495
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7496
0
        }
7497
7498
0
        if (grad) {
7499
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7500
0
        } else {
7501
0
            fprintf(fp, "\"; ]\n");
7502
0
        }
7503
0
    }
7504
7505
0
    for (int i = 0; i < gb->n_leafs; i++) {
7506
0
        struct ggml_tensor * node = gb->leafs[i];
7507
7508
0
        snprintf(color, sizeof(color), "pink");
7509
7510
0
        fprintf(fp, "  \"%p\" [ "
7511
0
                    "style = filled; fillcolor = %s; shape = record; "
7512
0
                    "label=\"<x>",
7513
0
                (void *) node, color);
7514
7515
0
        if (strlen(node->name) > 0) {
7516
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7517
0
        } else {
7518
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7519
0
        }
7520
7521
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7522
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7523
0
            fprintf(fp, " | (");
7524
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7525
                // FIXME: use ggml-backend to obtain the tensor data
7526
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7527
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7528
                //}
7529
                //else if (node->type == GGML_TYPE_F32 ||
7530
                //         node->type == GGML_TYPE_F16 ||
7531
                //         node->type == GGML_TYPE_BF16) {
7532
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7533
                //}
7534
                //else
7535
0
                {
7536
0
                    fprintf(fp, "#");
7537
0
                }
7538
0
                if (j < ggml_nelements(node) - 1) {
7539
0
                    fprintf(fp, ", ");
7540
0
                }
7541
0
            }
7542
0
            fprintf(fp, ")");
7543
0
        }
7544
0
        fprintf(fp, "\"; ]\n");
7545
0
    }
7546
7547
0
    for (int i = 0; i < gb->n_nodes; i++) {
7548
0
        struct ggml_tensor * node = gb->nodes[i];
7549
7550
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7551
0
            if (node->src[j]) {
7552
0
                char label[16];
7553
0
                snprintf(label, sizeof(label), "src %d", j);
7554
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7555
0
            }
7556
0
        }
7557
0
    }
7558
7559
0
    for (int i = 0; i < gb->n_leafs; i++) {
7560
0
        struct ggml_tensor * node = gb->leafs[i];
7561
7562
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7563
0
            if (node->src[j]) {
7564
0
                char label[16];
7565
0
                snprintf(label, sizeof(label), "src %d", j);
7566
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7567
0
            }
7568
0
        }
7569
0
    }
7570
7571
0
    fprintf(fp, "}\n");
7572
7573
0
    fclose(fp);
7574
7575
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7576
0
}
7577
7578
////////////////////////////////////////////////////////////////////////////////
7579
7580
0
void ggml_set_input(struct ggml_tensor * tensor) {
7581
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7582
0
}
7583
7584
0
void ggml_set_output(struct ggml_tensor * tensor) {
7585
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7586
0
}
7587
7588
0
void ggml_set_param(struct ggml_tensor * tensor) {
7589
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7590
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7591
0
}
7592
7593
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7594
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7595
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7596
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7597
0
}
7598
7599
////////////////////////////////////////////////////////////////////////////////
7600
7601
0
void ggml_quantize_init(enum ggml_type type) {
7602
0
    ggml_critical_section_start();
7603
7604
0
    switch (type) {
7605
0
        case GGML_TYPE_IQ2_XXS:
7606
0
        case GGML_TYPE_IQ2_XS:
7607
0
        case GGML_TYPE_IQ2_S:
7608
0
        case GGML_TYPE_IQ1_S:
7609
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7610
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7611
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7612
0
        default: // nothing
7613
0
            break;
7614
0
    }
7615
7616
0
    ggml_critical_section_end();
7617
0
}
7618
7619
4.14k
void ggml_quantize_free(void) {
7620
4.14k
    ggml_critical_section_start();
7621
7622
4.14k
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7623
4.14k
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7624
4.14k
    iq2xs_free_impl(GGML_TYPE_IQ2_S);
7625
4.14k
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7626
4.14k
    iq2xs_free_impl(GGML_TYPE_IQ1_M);
7627
4.14k
    iq3xs_free_impl(256);
7628
4.14k
    iq3xs_free_impl(512);
7629
7630
4.14k
    ggml_critical_section_end();
7631
4.14k
}
7632
7633
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7634
0
    return
7635
0
        type == GGML_TYPE_IQ2_XXS ||
7636
0
        type == GGML_TYPE_IQ2_XS  ||
7637
0
        type == GGML_TYPE_IQ1_S;//   ||
7638
        //type == GGML_TYPE_IQ1_M;
7639
0
}
7640
7641
size_t ggml_quantize_chunk(
7642
        enum ggml_type   type,
7643
           const float * src,
7644
                  void * dst,
7645
               int64_t   start,
7646
               int64_t   nrows,
7647
               int64_t   n_per_row,
7648
0
           const float * imatrix) {
7649
0
    const int64_t n = (int64_t) nrows * n_per_row;
7650
7651
0
    if (ggml_quantize_requires_imatrix(type)) {
7652
0
        GGML_ASSERT(imatrix != NULL);
7653
0
    }
7654
7655
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7656
0
    GGML_ASSERT(start % n_per_row == 0);
7657
7658
0
    ggml_quantize_init(type); // this is noop if already initialized
7659
7660
0
    const size_t start_row = start / n_per_row;
7661
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7662
7663
0
    size_t result = 0;
7664
7665
0
    switch (type) {
7666
0
        case GGML_TYPE_Q1_0:    result = quantize_q1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7667
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7668
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7669
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7670
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7671
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7672
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7673
0
        case GGML_TYPE_NVFP4:   result = quantize_nvfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7674
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7675
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7676
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7677
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7678
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7679
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7680
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7681
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7682
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7683
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7684
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7685
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7686
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7687
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7688
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7689
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7690
0
        case GGML_TYPE_F16:
7691
0
            {
7692
0
                size_t elemsize = sizeof(ggml_fp16_t);
7693
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7694
0
                result = n * elemsize;
7695
0
            } break;
7696
0
        case GGML_TYPE_BF16:
7697
0
            {
7698
0
                size_t elemsize = sizeof(ggml_bf16_t);
7699
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7700
0
                result = n * elemsize;
7701
0
            } break;
7702
0
        case GGML_TYPE_F32:
7703
0
            {
7704
0
                size_t elemsize = sizeof(float);
7705
0
                result = n * elemsize;
7706
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7707
0
            } break;
7708
0
        default:
7709
0
            assert(false);
7710
0
    }
7711
7712
0
    GGML_ASSERT(result == nrows * row_size);
7713
7714
0
    return result;
7715
0
}
7716
7717
////////////////////////////////////////////////////////////////////////////////
7718
7719
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7720
0
    *log_callback = g_logger_state.log_callback;
7721
0
    *user_data    = g_logger_state.log_callback_user_data;
7722
0
}
7723
7724
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7725
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7726
0
    g_logger_state.log_callback_user_data = user_data;
7727
0
}
7728
7729
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7730
0
    p->n_threads  = n_threads;
7731
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7732
0
    p->poll       = 50;    // hybrid-polling enabled
7733
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7734
0
    p->paused     = false; // threads are ready to go
7735
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7736
0
}
7737
7738
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7739
0
    struct ggml_threadpool_params p;
7740
0
    ggml_threadpool_params_init(&p, n_threads);
7741
0
    return p;
7742
0
}
7743
7744
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7745
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7746
0
    if (p0->prio           != p1->prio       )    return false;
7747
0
    if (p0->poll           != p1->poll       )    return false;
7748
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7749
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7750
0
}