Coverage Report

Created: 2026-03-21 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
308
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
308
    fflush(stdout);
239
240
308
    char message[2048];
241
308
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
308
    va_list args;
244
308
    va_start(args, fmt);
245
308
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
308
    va_end(args);
247
248
308
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
308
    } else {
251
        // default: print error and backtrace to stderr
252
308
        fprintf(stderr, "%s\n", message);
253
        
254
308
    }
255
256
308
    abort();
257
308
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
5.51k
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
5.51k
    if (format == NULL) {
273
0
        return;
274
0
    }
275
5.51k
    va_list args_copy;
276
5.51k
    va_copy(args_copy, args);
277
5.51k
    char buffer[128];
278
5.51k
    int len = vsnprintf(buffer, 128, format, args);
279
5.51k
    if (len < 128) {
280
5.40k
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
5.40k
    } else {
282
111
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
111
        vsnprintf(buffer2, len + 1, format, args_copy);
284
111
        buffer2[len] = 0;
285
111
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
111
        free(buffer2);
287
111
    }
288
5.51k
    va_end(args_copy);
289
5.51k
}
290
291
5.51k
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
5.51k
    va_list args;
293
5.51k
    va_start(args, format);
294
5.51k
    ggml_log_internal_v(level, format, args);
295
5.51k
    va_end(args);
296
5.51k
}
297
298
5.51k
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
5.51k
    (void) level;
300
5.51k
    (void) user_data;
301
5.51k
    fputs(text, stderr);
302
5.51k
    fflush(stderr);
303
5.51k
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
5.98k
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
5.98k
    const int alignment = 64;
321
5.98k
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
5.98k
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
5.98k
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
5.98k
    int result = posix_memalign(&aligned_memory, alignment, size);
353
5.98k
  #endif
354
5.98k
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
5.98k
    return aligned_memory;
369
5.98k
#endif
370
5.98k
}
371
372
5.98k
void ggml_aligned_free(void * ptr, size_t size) {
373
5.98k
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
5.98k
    free(ptr);
386
5.98k
#endif
387
5.98k
}
388
389
390
5.98k
inline static void * ggml_malloc(size_t size) {
391
5.98k
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
5.98k
    void * result = malloc(size);
396
5.98k
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
5.98k
    return result;
401
5.98k
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
5.98k
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
5.98k
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
15.8k
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
9.52k
int64_t ggml_time_us(void) {
545
9.52k
    struct timespec ts;
546
9.52k
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
9.52k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
9.52k
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
5.97k
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
5.97k
    return fopen(fname, mode);
607
5.97k
#endif
608
609
5.97k
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q4_0] = {
657
        .type_name                = "q4_0",
658
        .blck_size                = QK4_0,
659
        .type_size                = sizeof(block_q4_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
663
    },
664
    [GGML_TYPE_Q4_1] = {
665
        .type_name                = "q4_1",
666
        .blck_size                = QK4_1,
667
        .type_size                = sizeof(block_q4_1),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
671
    },
672
    [4] = { // GGML_TYPE_Q4_2
673
        .type_name                = "DEPRECATED",
674
        .blck_size                = 0,
675
        .type_size                = 0,
676
        .is_quantized             = false,
677
    },
678
    [5] = { // GGML_TYPE_Q4_3
679
        .type_name                = "DEPRECATED",
680
        .blck_size                = 0,
681
        .type_size                = 0,
682
        .is_quantized             = false,
683
    },
684
    [GGML_TYPE_Q5_0] = {
685
        .type_name                = "q5_0",
686
        .blck_size                = QK5_0,
687
        .type_size                = sizeof(block_q5_0),
688
        .is_quantized             = true,
689
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
691
    },
692
    [GGML_TYPE_Q5_1] = {
693
        .type_name                = "q5_1",
694
        .blck_size                = QK5_1,
695
        .type_size                = sizeof(block_q5_1),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
699
    },
700
    [GGML_TYPE_Q8_0] = {
701
        .type_name                = "q8_0",
702
        .blck_size                = QK8_0,
703
        .type_size                = sizeof(block_q8_0),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
707
    },
708
    [GGML_TYPE_Q8_1] = {
709
        .type_name                = "q8_1",
710
        .blck_size                = QK8_1,
711
        .type_size                = sizeof(block_q8_1),
712
        .is_quantized             = true,
713
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
714
    },
715
    [GGML_TYPE_MXFP4] = {
716
        .type_name                = "mxfp4",
717
        .blck_size                = QK_MXFP4,
718
        .type_size                = sizeof(block_mxfp4),
719
        .is_quantized             = true,
720
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
721
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
722
    },
723
    [GGML_TYPE_NVFP4] = {
724
        .type_name                = "nvfp4",
725
        .blck_size                = QK_NVFP4,
726
        .type_size                = sizeof(block_nvfp4),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_nvfp4,
729
        .from_float_ref           = (ggml_from_float_t)quantize_row_nvfp4_ref,
730
    },
731
    [GGML_TYPE_Q2_K] = {
732
        .type_name                = "q2_K",
733
        .blck_size                = QK_K,
734
        .type_size                = sizeof(block_q2_K),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
737
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
738
    },
739
    [GGML_TYPE_Q3_K] = {
740
        .type_name                = "q3_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q3_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
746
    },
747
    [GGML_TYPE_Q4_K] = {
748
        .type_name                = "q4_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q4_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
754
    },
755
    [GGML_TYPE_Q5_K] = {
756
        .type_name                = "q5_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q5_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
762
    },
763
    [GGML_TYPE_Q6_K] = {
764
        .type_name                = "q6_K",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_q6_K),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
769
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
770
    },
771
    [GGML_TYPE_IQ2_XXS] = {
772
        .type_name                = "iq2_xxs",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_iq2_xxs),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
777
        .from_float_ref           = NULL,
778
    },
779
    [GGML_TYPE_IQ2_XS] = {
780
        .type_name                = "iq2_xs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq2_xs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
785
        .from_float_ref           = NULL,
786
    },
787
    [GGML_TYPE_IQ3_XXS] = {
788
        .type_name                = "iq3_xxs",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq3_xxs),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
793
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
794
    },
795
    [GGML_TYPE_IQ3_S] = {
796
        .type_name                = "iq3_s",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq3_s),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
802
    },
803
    [GGML_TYPE_IQ2_S] = {
804
        .type_name                = "iq2_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq2_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
809
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
810
    },
811
    [GGML_TYPE_IQ1_S] = {
812
        .type_name                = "iq1_s",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq1_s),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
817
        .from_float_ref           = NULL,
818
    },
819
    [GGML_TYPE_IQ1_M] = {
820
        .type_name                = "iq1_m",
821
        .blck_size                = QK_K,
822
        .type_size                = sizeof(block_iq1_m),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
825
        .from_float_ref           = NULL,
826
    },
827
    [GGML_TYPE_IQ4_NL] = {
828
        .type_name                = "iq4_nl",
829
        .blck_size                = QK4_NL,
830
        .type_size                = sizeof(block_iq4_nl),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
833
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
834
    },
835
    [GGML_TYPE_IQ4_XS] = {
836
        .type_name                = "iq4_xs",
837
        .blck_size                = QK_K,
838
        .type_size                = sizeof(block_iq4_xs),
839
        .is_quantized             = true,
840
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
841
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
842
    },
843
    [GGML_TYPE_Q8_K] = {
844
        .type_name                = "q8_K",
845
        .blck_size                = QK_K,
846
        .type_size                = sizeof(block_q8_K),
847
        .is_quantized             = true,
848
    },
849
    [GGML_TYPE_BF16] = {
850
        .type_name                = "bf16",
851
        .blck_size                = 1,
852
        .type_size                = sizeof(ggml_bf16_t),
853
        .is_quantized             = false,
854
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
855
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
856
    },
857
    [31] = { // GGML_TYPE_Q4_0_4_4
858
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
859
        .blck_size                = 0,
860
        .type_size                = 0,
861
        .is_quantized             = false,
862
    },
863
    [32] = { // GGML_TYPE_Q4_0_4_8
864
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
865
        .blck_size                = 0,
866
        .type_size                = 0,
867
        .is_quantized             = false,
868
    },
869
    [33] = { // GGML_TYPE_Q4_0_8_8
870
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
871
        .blck_size                = 0,
872
        .type_size                = 0,
873
        .is_quantized             = false,
874
    },
875
    [GGML_TYPE_TQ1_0] = {
876
        .type_name                = "tq1_0",
877
        .blck_size                = QK_K,
878
        .type_size                = sizeof(block_tq1_0),
879
        .is_quantized             = true,
880
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
881
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
882
    },
883
    [GGML_TYPE_TQ2_0] = {
884
        .type_name                = "tq2_0",
885
        .blck_size                = QK_K,
886
        .type_size                = sizeof(block_tq2_0),
887
        .is_quantized             = true,
888
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
889
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
890
    },
891
    [36] = { // GGML_TYPE_IQ4_NL_4_4
892
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
893
        .blck_size                = 0,
894
        .type_size                = 0,
895
        .is_quantized             = false,
896
    },
897
    [37] = { // GGML_TYPE_IQ4_NL_4_8
898
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
899
        .blck_size                = 0,
900
        .type_size                = 0,
901
        .is_quantized             = false,
902
    },
903
    [38] = { // GGML_TYPE_IQ4_NL_8_8
904
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
905
        .blck_size                = 0,
906
        .type_size                = 0,
907
        .is_quantized             = false,
908
    },
909
};
910
911
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
912
0
    assert(type >= 0);
913
0
    assert(type < GGML_TYPE_COUNT);
914
0
    return &type_traits[type];
915
0
}
916
917
//
918
// ggml object
919
//
920
921
struct ggml_object {
922
    size_t offs;
923
    size_t size;
924
925
    struct ggml_object * next;
926
927
    enum ggml_object_type type;
928
929
    char padding[4];
930
};
931
932
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
933
934
//
935
// ggml context
936
//
937
938
struct ggml_context {
939
    size_t mem_size;
940
    void * mem_buffer;
941
    bool   mem_buffer_owned;
942
    bool   no_alloc;
943
944
    int    n_objects;
945
946
    struct ggml_object * objects_begin;
947
    struct ggml_object * objects_end;
948
};
949
950
//
951
// data types
952
//
953
954
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
955
    "NONE",
956
957
    "DUP",
958
    "ADD",
959
    "ADD_ID",
960
    "ADD1",
961
    "ACC",
962
    "SUB",
963
    "MUL",
964
    "DIV",
965
    "SQR",
966
    "SQRT",
967
    "LOG",
968
    "SIN",
969
    "COS",
970
    "SUM",
971
    "SUM_ROWS",
972
    "CUMSUM",
973
    "MEAN",
974
    "ARGMAX",
975
    "COUNT_EQUAL",
976
    "REPEAT",
977
    "REPEAT_BACK",
978
    "CONCAT",
979
    "SILU_BACK",
980
    "NORM",
981
    "RMS_NORM",
982
    "RMS_NORM_BACK",
983
    "GROUP_NORM",
984
    "L2_NORM",
985
986
    "MUL_MAT",
987
    "MUL_MAT_ID",
988
    "OUT_PROD",
989
990
    "SCALE",
991
    "SET",
992
    "CPY",
993
    "CONT",
994
    "RESHAPE",
995
    "VIEW",
996
    "PERMUTE",
997
    "TRANSPOSE",
998
    "GET_ROWS",
999
    "GET_ROWS_BACK",
1000
    "SET_ROWS",
1001
    "DIAG",
1002
    "DIAG_MASK_INF",
1003
    "DIAG_MASK_ZERO",
1004
    "SOFT_MAX",
1005
    "SOFT_MAX_BACK",
1006
    "ROPE",
1007
    "ROPE_BACK",
1008
    "CLAMP",
1009
    "CONV_TRANSPOSE_1D",
1010
    "IM2COL",
1011
    "IM2COL_BACK",
1012
    "IM2COL_3D",
1013
    "CONV_2D",
1014
    "CONV_3D",
1015
    "CONV_2D_DW",
1016
    "CONV_TRANSPOSE_2D",
1017
    "POOL_1D",
1018
    "POOL_2D",
1019
    "POOL_2D_BACK",
1020
    "UPSCALE",
1021
    "PAD",
1022
    "PAD_REFLECT_1D",
1023
    "ROLL",
1024
    "ARANGE",
1025
    "TIMESTEP_EMBEDDING",
1026
    "ARGSORT",
1027
    "TOP_K",
1028
    "LEAKY_RELU",
1029
    "TRI",
1030
    "FILL",
1031
1032
    "FLASH_ATTN_EXT",
1033
    "FLASH_ATTN_BACK",
1034
    "SSM_CONV",
1035
    "SSM_SCAN",
1036
    "WIN_PART",
1037
    "WIN_UNPART",
1038
    "GET_REL_POS",
1039
    "ADD_REL_POS",
1040
    "RWKV_WKV6",
1041
    "GATED_LINEAR_ATTN",
1042
    "RWKV_WKV7",
1043
    "SOLVE_TRI",
1044
    "GATED_DELTA_NET",
1045
1046
    "UNARY",
1047
1048
    "MAP_CUSTOM1",
1049
    "MAP_CUSTOM2",
1050
    "MAP_CUSTOM3",
1051
1052
    "CUSTOM",
1053
1054
    "CROSS_ENTROPY_LOSS",
1055
    "CROSS_ENTROPY_LOSS_BACK",
1056
    "OPT_STEP_ADAMW",
1057
    "OPT_STEP_SGD",
1058
1059
    "GLU",
1060
};
1061
1062
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1063
1064
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1065
    "none",
1066
1067
    "x",
1068
    "x+y",
1069
    "x[i]+y",
1070
    "x+y",
1071
    "view(x,nb,offset)+=y->x",
1072
    "x-y",
1073
    "x*y",
1074
    "x/y",
1075
    "x^2",
1076
    "√x",
1077
    "log(x)",
1078
    "sin(x)",
1079
    "cos(x)",
1080
    "Σx",
1081
    "Σx_k",
1082
    "cumsum(x)",
1083
    "Σx/n",
1084
    "argmax(x)",
1085
    "count_equal(x)",
1086
    "repeat(x)",
1087
    "repeat_back(x)",
1088
    "concat(x, y)",
1089
    "silu_back(x)",
1090
    "norm(x)",
1091
    "rms_norm(x)",
1092
    "rms_norm_back(x)",
1093
    "group_norm(x)",
1094
    "l2_norm(x)",
1095
1096
    "X*Y",
1097
    "X[i]*Y",
1098
    "X*Y",
1099
1100
    "x*v",
1101
    "y-\\>view(x)",
1102
    "x-\\>y",
1103
    "cont(x)",
1104
    "reshape(x)",
1105
    "view(x)",
1106
    "permute(x)",
1107
    "transpose(x)",
1108
    "get_rows(x)",
1109
    "get_rows_back(x)",
1110
    "set_rows(x)",
1111
    "diag(x)",
1112
    "diag_mask_inf(x)",
1113
    "diag_mask_zero(x)",
1114
    "soft_max(x)",
1115
    "soft_max_back(x)",
1116
    "rope(x)",
1117
    "rope_back(x)",
1118
    "clamp(x)",
1119
    "conv_transpose_1d(x)",
1120
    "im2col(x)",
1121
    "im2col_back(x)",
1122
    "im2col_3d(x)",
1123
    "conv_2d(x)",
1124
    "conv_3d(x)",
1125
    "conv_2d_dw(x)",
1126
    "conv_transpose_2d(x)",
1127
    "pool_1d(x)",
1128
    "pool_2d(x)",
1129
    "pool_2d_back(x)",
1130
    "upscale(x)",
1131
    "pad(x)",
1132
    "pad_reflect_1d(x)",
1133
    "roll(x)",
1134
    "arange(start, stop, step)",
1135
    "timestep_embedding(timesteps, dim, max_period)",
1136
    "argsort(x)",
1137
    "top_k(x)",
1138
    "leaky_relu(x)",
1139
    "tri(x)",
1140
    "fill(x, c)",
1141
1142
    "flash_attn_ext(x)",
1143
    "flash_attn_back(x)",
1144
    "ssm_conv(x)",
1145
    "ssm_scan(x)",
1146
    "win_part(x)",
1147
    "win_unpart(x)",
1148
    "get_rel_pos(x)",
1149
    "add_rel_pos(x)",
1150
    "rwkv_wkv6(k, v, r, tf, td, s)",
1151
    "gated_linear_attn(k, v, q, gate, s)",
1152
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1153
    "A X = B, A triangular, solve X",
1154
    "gated_delta_net(q, k, v, g, beta, s)",
1155
1156
    "unary(x)",
1157
1158
    "map_custom(x)",
1159
    "map_custom(x,y)",
1160
    "map_custom(x,y,z)",
1161
1162
    "custom(x)",
1163
1164
    "cross_entropy_loss(x,y)",
1165
    "cross_entropy_loss_back(x,y)",
1166
    "adamw(x)",
1167
    "sgd(x)",
1168
1169
    "glu(x)",
1170
};
1171
1172
static_assert(GGML_OP_COUNT == 96, "GGML_OP_COUNT != 96");
1173
1174
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1175
1176
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1177
    "ABS",
1178
    "SGN",
1179
    "NEG",
1180
    "STEP",
1181
    "TANH",
1182
    "ELU",
1183
    "RELU",
1184
    "SIGMOID",
1185
    "GELU",
1186
    "GELU_QUICK",
1187
    "SILU",
1188
    "HARDSWISH",
1189
    "HARDSIGMOID",
1190
    "EXP",
1191
    "EXPM1",
1192
    "SOFTPLUS",
1193
    "GELU_ERF",
1194
    "XIELU",
1195
    "FLOOR",
1196
    "CEIL",
1197
    "ROUND",
1198
    "TRUNC",
1199
};
1200
1201
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1202
1203
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1204
    "REGLU",
1205
    "GEGLU",
1206
    "SWIGLU",
1207
    "SWIGLU_OAI",
1208
    "GEGLU_ERF",
1209
    "GEGLU_QUICK",
1210
};
1211
1212
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1213
1214
1215
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1216
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1217
1218
1219
////////////////////////////////////////////////////////////////////////////////
1220
1221
0
void ggml_print_object(const struct ggml_object * obj) {
1222
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1223
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1224
0
}
1225
1226
0
void ggml_print_objects(const struct ggml_context * ctx) {
1227
0
    struct ggml_object * obj = ctx->objects_begin;
1228
1229
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1230
1231
0
    while (obj != NULL) {
1232
0
        ggml_print_object(obj);
1233
0
        obj = obj->next;
1234
0
    }
1235
1236
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1237
0
}
1238
1239
4.75k
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1240
4.75k
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1241
1242
4.75k
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1243
4.75k
}
1244
1245
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1246
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1247
1248
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1249
0
}
1250
1251
6.84k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1252
33.7k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1253
27.0k
        if (tensor->ne[i] <= 0) {
1254
120
            return 0;
1255
120
        }
1256
27.0k
    }
1257
1258
6.72k
    size_t nbytes;
1259
6.72k
    const size_t blck_size = ggml_blck_size(tensor->type);
1260
6.72k
    if (blck_size == 1) {
1261
6.59k
        nbytes = ggml_type_size(tensor->type);
1262
32.9k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1263
26.3k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1264
26.3k
        }
1265
6.59k
    }
1266
129
    else {
1267
129
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1268
516
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1269
387
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1270
387
        }
1271
129
    }
1272
1273
6.72k
    return nbytes;
1274
6.84k
}
1275
1276
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1277
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1278
0
}
1279
1280
17.4k
int64_t ggml_blck_size(enum ggml_type type) {
1281
17.4k
    assert(type >= 0);
1282
17.4k
    assert(type < GGML_TYPE_COUNT);
1283
17.4k
    return type_traits[type].blck_size;
1284
17.4k
}
1285
1286
17.2k
size_t ggml_type_size(enum ggml_type type) {
1287
17.2k
    assert(type >= 0);
1288
17.2k
    assert(type < GGML_TYPE_COUNT);
1289
17.2k
    return type_traits[type].type_size;
1290
17.2k
}
1291
1292
2.08k
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1293
2.08k
    assert(type >= 0);
1294
2.08k
    assert(type < GGML_TYPE_COUNT);
1295
2.08k
    assert(ne % ggml_blck_size(type) == 0);
1296
2.08k
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1297
2.08k
}
1298
1299
0
double ggml_type_sizef(enum ggml_type type) {
1300
0
    assert(type >= 0);
1301
0
    assert(type < GGML_TYPE_COUNT);
1302
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1303
0
}
1304
1305
278
const char * ggml_type_name(enum ggml_type type) {
1306
278
    assert(type >= 0);
1307
278
    assert(type < GGML_TYPE_COUNT);
1308
278
    return type_traits[type].type_name;
1309
278
}
1310
1311
0
bool ggml_is_quantized(enum ggml_type type) {
1312
0
    assert(type >= 0);
1313
0
    assert(type < GGML_TYPE_COUNT);
1314
0
    return type_traits[type].is_quantized;
1315
0
}
1316
1317
0
const char * ggml_op_name(enum ggml_op op) {
1318
0
    return GGML_OP_NAME[op];
1319
0
}
1320
1321
0
const char * ggml_op_symbol(enum ggml_op op) {
1322
0
    return GGML_OP_SYMBOL[op];
1323
0
}
1324
1325
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1326
0
    return GGML_UNARY_OP_NAME[op];
1327
0
}
1328
1329
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1330
0
    return GGML_GLU_OP_NAME[op];
1331
0
}
1332
1333
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1334
0
    if (t->op == GGML_OP_UNARY) {
1335
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1336
0
        return ggml_unary_op_name(uop);
1337
0
    }
1338
0
    if (t->op == GGML_OP_GLU) {
1339
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1340
0
        return ggml_glu_op_name(gop);
1341
0
    }
1342
0
    return ggml_op_name(t->op);
1343
0
}
1344
1345
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1346
0
    return ggml_type_size(tensor->type);
1347
0
}
1348
1349
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1350
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1351
1352
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1353
0
}
1354
1355
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1356
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1357
1358
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1359
0
}
1360
1361
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1362
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1363
1364
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1365
0
}
1366
1367
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1368
0
    return tensor->ne[3] == 1;
1369
0
}
1370
1371
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1372
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1373
0
        if (tensor->ne[i] > 1) {
1374
0
            return i + 1;
1375
0
        }
1376
0
    }
1377
0
    return 1;
1378
0
}
1379
1380
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1381
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1382
1383
0
    switch (ftype) {
1384
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1385
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1386
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1387
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1388
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1389
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1390
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1391
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1392
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1393
0
        case GGML_FTYPE_MOSTLY_NVFP4:         wtype = GGML_TYPE_NVFP4; break;
1394
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1395
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1396
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1397
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1398
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1399
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1400
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1401
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1402
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1403
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1404
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1405
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1406
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1407
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1408
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1409
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1410
0
    }
1411
1412
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1413
1414
0
    return wtype;
1415
0
}
1416
1417
1.54k
size_t ggml_tensor_overhead(void) {
1418
1.54k
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1419
1.54k
}
1420
1421
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1422
0
    return tensor->nb[0] > tensor->nb[1];
1423
0
}
1424
1425
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1426
0
    size_t next_nb = ggml_type_size(tensor->type);
1427
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1428
0
        return false;
1429
0
    }
1430
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1431
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1432
0
        if (i > n) {
1433
0
            if (tensor->ne[i] != 1 && tensor->nb[i] != next_nb) {
1434
0
                return false;
1435
0
            }
1436
0
            next_nb *= tensor->ne[i];
1437
0
        } else {
1438
            // this dimension does not need to be contiguous
1439
0
            next_nb = tensor->ne[i]*tensor->nb[i];
1440
0
        }
1441
0
    }
1442
0
    return true;
1443
0
}
1444
1445
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1446
0
    return ggml_is_contiguous_0(tensor);
1447
0
}
1448
1449
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1450
0
    return ggml_is_contiguous_n(tensor, 0);
1451
0
}
1452
1453
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1454
0
    return ggml_is_contiguous_n(tensor, 1);
1455
0
}
1456
1457
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1458
0
    return ggml_is_contiguous_n(tensor, 2);
1459
0
}
1460
1461
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1462
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1463
0
}
1464
1465
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1466
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1467
1468
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1469
0
}
1470
1471
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1472
0
    return
1473
0
        tensor->nb[0] > tensor->nb[2] &&
1474
0
        tensor->nb[1] > tensor->nb[0] &&
1475
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1476
0
}
1477
1478
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1479
0
    return
1480
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1481
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1482
0
}
1483
1484
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1485
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1486
1487
0
    return
1488
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1489
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1490
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1491
0
}
1492
1493
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1494
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1495
0
        if (tensor->ne[i] == 0) {
1496
            // empty if any dimension has no elements
1497
0
            return true;
1498
0
        }
1499
0
    }
1500
0
    return false;
1501
0
}
1502
1503
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1504
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1505
1506
0
    return
1507
0
        (t0->ne[0] == t1->ne[0]) &&
1508
0
        (t0->ne[1] == t1->ne[1]) &&
1509
0
        (t0->ne[2] == t1->ne[2]) &&
1510
0
        (t0->ne[3] == t1->ne[3]);
1511
0
}
1512
1513
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1514
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1515
1516
0
    return
1517
0
        (t0->nb[0] == t1->nb[0]) &&
1518
0
        (t0->nb[1] == t1->nb[1]) &&
1519
0
        (t0->nb[2] == t1->nb[2]) &&
1520
0
        (t0->nb[3] == t1->nb[3]);
1521
0
}
1522
1523
0
bool ggml_is_view(const struct ggml_tensor * t) {
1524
0
    return ggml_impl_is_view(t);
1525
0
}
1526
1527
// check if t1 can be represented as a repetition of t0
1528
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1529
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1530
1531
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1532
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1533
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1534
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1535
0
        (t1->ne[3]%t0->ne[3] == 0);
1536
0
}
1537
1538
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1539
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1540
1541
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1542
0
}
1543
1544
// assert that pointer is aligned to GGML_MEM_ALIGN
1545
#define GGML_ASSERT_ALIGNED(ptr) \
1546
8.06k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1547
1548
////////////////////////////////////////////////////////////////////////////////
1549
1550
5.98k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1551
5.98k
    bool is_first_call = true;
1552
1553
5.98k
    ggml_critical_section_start();
1554
1555
5.98k
    if (is_first_call) {
1556
        // initialize time system (required on Windows)
1557
5.98k
        ggml_time_init();
1558
1559
5.98k
        is_first_call = false;
1560
5.98k
    }
1561
1562
5.98k
    ggml_critical_section_end();
1563
1564
5.98k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1565
1566
    // allow to call ggml_init with 0 size
1567
5.98k
    if (params.mem_size == 0) {
1568
5.51k
        params.mem_size = GGML_MEM_ALIGN;
1569
5.51k
    }
1570
1571
5.98k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1572
1573
5.98k
    *ctx = (struct ggml_context) {
1574
5.98k
        /*.mem_size           =*/ mem_size,
1575
5.98k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1576
5.98k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1577
5.98k
        /*.no_alloc           =*/ params.no_alloc,
1578
5.98k
        /*.n_objects          =*/ 0,
1579
5.98k
        /*.objects_begin      =*/ NULL,
1580
5.98k
        /*.objects_end        =*/ NULL,
1581
5.98k
    };
1582
1583
5.98k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1584
1585
5.98k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1586
1587
5.98k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1588
1589
5.98k
    return ctx;
1590
5.98k
}
1591
1592
0
void ggml_reset(struct ggml_context * ctx) {
1593
0
    if (ctx == NULL) {
1594
0
        return;
1595
0
    }
1596
1597
0
    ctx->n_objects     = 0;
1598
0
    ctx->objects_begin = NULL;
1599
0
    ctx->objects_end   = NULL;
1600
0
}
1601
1602
5.98k
void ggml_free(struct ggml_context * ctx) {
1603
5.98k
    if (ctx == NULL) {
1604
0
        return;
1605
0
    }
1606
1607
5.98k
    if (ctx->mem_buffer_owned) {
1608
5.98k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1609
5.98k
    }
1610
1611
5.98k
    GGML_FREE(ctx);
1612
5.98k
}
1613
1614
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1615
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1616
0
}
1617
1618
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1619
0
    return ctx->no_alloc;
1620
0
}
1621
1622
2.14k
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1623
2.14k
    ctx->no_alloc = no_alloc;
1624
2.14k
}
1625
1626
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1627
0
    return ctx->mem_buffer;
1628
0
}
1629
1630
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1631
0
    return ctx->mem_size;
1632
0
}
1633
1634
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1635
0
    size_t max_size = 0;
1636
1637
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1638
0
        size_t bytes = ggml_nbytes(tensor);
1639
0
        max_size = MAX(max_size, bytes);
1640
0
    }
1641
1642
0
    return max_size;
1643
0
}
1644
1645
////////////////////////////////////////////////////////////////////////////////
1646
1647
2.08k
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1648
    // always insert objects at the end of the context's memory pool
1649
2.08k
    struct ggml_object * obj_cur = ctx->objects_end;
1650
1651
2.08k
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1652
2.08k
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1653
2.08k
    const size_t cur_end  = cur_offs + cur_size;
1654
1655
    // align to GGML_MEM_ALIGN
1656
2.08k
    GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
1657
2.08k
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1658
1659
2.08k
    char * const mem_buffer = ctx->mem_buffer;
1660
2.08k
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1661
1662
    // integer overflow checks
1663
2.08k
    if (cur_end > SIZE_MAX - size_needed) {
1664
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
1665
0
        return NULL;
1666
0
    }
1667
2.08k
    if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
1668
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
1669
0
                cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
1670
0
        return NULL;
1671
0
    }
1672
1673
2.08k
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1674
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1675
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1676
#ifndef NDEBUG
1677
        GGML_ABORT("not enough space in the context's memory pool");
1678
#endif
1679
0
        return NULL;
1680
0
    }
1681
1682
2.08k
    *obj_new = (struct ggml_object) {
1683
2.08k
        .offs = cur_end + GGML_OBJECT_SIZE,
1684
2.08k
        .size = size_needed,
1685
2.08k
        .next = NULL,
1686
2.08k
        .type = type,
1687
2.08k
    };
1688
1689
2.08k
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1690
1691
2.08k
    if (obj_cur != NULL) {
1692
1.60k
        obj_cur->next = obj_new;
1693
1.60k
    } else {
1694
        // this is the first object in this context
1695
473
        ctx->objects_begin = obj_new;
1696
473
    }
1697
1698
2.08k
    ctx->objects_end = obj_new;
1699
1700
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1701
1702
2.08k
    return obj_new;
1703
2.08k
}
1704
1705
static struct ggml_tensor * ggml_new_tensor_impl(
1706
        struct ggml_context * ctx,
1707
        enum   ggml_type      type,
1708
        int                   n_dims,
1709
        const int64_t       * ne,
1710
        struct ggml_tensor  * view_src,
1711
2.08k
        size_t                view_offs) {
1712
1713
2.08k
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1714
2.08k
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1715
1716
    // find the base tensor and absolute offset
1717
2.08k
    if (view_src != NULL && view_src->view_src != NULL) {
1718
0
        view_offs += view_src->view_offs;
1719
0
        view_src   = view_src->view_src;
1720
0
    }
1721
1722
2.08k
    size_t data_size = ggml_row_size(type, ne[0]);
1723
8.32k
    for (int i = 1; i < n_dims; i++) {
1724
6.24k
        data_size *= ne[i];
1725
6.24k
    }
1726
1727
2.08k
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1728
1729
2.08k
    void * data = view_src != NULL ? view_src->data : NULL;
1730
2.08k
    if (data != NULL) {
1731
0
        data = (char *) data + view_offs;
1732
0
    }
1733
1734
2.08k
    size_t obj_alloc_size = 0;
1735
1736
2.08k
    if (view_src == NULL && !ctx->no_alloc) {
1737
        // allocate tensor data in the context's memory pool
1738
0
        obj_alloc_size = data_size;
1739
0
    }
1740
1741
2.08k
    GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
1742
1743
2.08k
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1744
2.08k
    GGML_ASSERT(obj_new);
1745
1746
2.08k
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1747
1748
2.08k
    *result = (struct ggml_tensor) {
1749
2.08k
        /*.type         =*/ type,
1750
2.08k
        /*.buffer       =*/ NULL,
1751
2.08k
        /*.ne           =*/ { 1, 1, 1, 1 },
1752
2.08k
        /*.nb           =*/ { 0, 0, 0, 0 },
1753
2.08k
        /*.op           =*/ GGML_OP_NONE,
1754
2.08k
        /*.op_params    =*/ { 0 },
1755
2.08k
        /*.flags        =*/ 0,
1756
2.08k
        /*.src          =*/ { NULL },
1757
2.08k
        /*.view_src     =*/ view_src,
1758
2.08k
        /*.view_offs    =*/ view_offs,
1759
2.08k
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1760
2.08k
        /*.name         =*/ { 0 },
1761
2.08k
        /*.extra        =*/ NULL,
1762
2.08k
        /*.padding      =*/ { 0 },
1763
2.08k
    };
1764
1765
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1766
    //GGML_ASSERT_ALIGNED(result->data);
1767
1768
10.4k
    for (int i = 0; i < n_dims; i++) {
1769
8.32k
        result->ne[i] = ne[i];
1770
8.32k
    }
1771
1772
2.08k
    result->nb[0] = ggml_type_size(type);
1773
2.08k
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1774
6.24k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1775
4.16k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1776
4.16k
    }
1777
1778
2.08k
    ctx->n_objects++;
1779
1780
2.08k
    return result;
1781
2.08k
}
1782
1783
struct ggml_tensor * ggml_new_tensor(
1784
        struct ggml_context * ctx,
1785
        enum   ggml_type      type,
1786
        int                   n_dims,
1787
2.08k
        const int64_t       * ne) {
1788
2.08k
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1789
2.08k
}
1790
1791
struct ggml_tensor * ggml_new_tensor_1d(
1792
        struct ggml_context * ctx,
1793
        enum   ggml_type      type,
1794
0
        int64_t ne0) {
1795
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1796
0
}
1797
1798
struct ggml_tensor * ggml_new_tensor_2d(
1799
        struct ggml_context * ctx,
1800
        enum   ggml_type      type,
1801
        int64_t ne0,
1802
0
        int64_t ne1) {
1803
0
    const int64_t ne[2] = { ne0, ne1 };
1804
0
    return ggml_new_tensor(ctx, type, 2, ne);
1805
0
}
1806
1807
struct ggml_tensor * ggml_new_tensor_3d(
1808
        struct ggml_context * ctx,
1809
        enum   ggml_type      type,
1810
        int64_t ne0,
1811
        int64_t ne1,
1812
0
        int64_t ne2) {
1813
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1814
0
    return ggml_new_tensor(ctx, type, 3, ne);
1815
0
}
1816
1817
struct ggml_tensor * ggml_new_tensor_4d(
1818
        struct ggml_context * ctx,
1819
        enum   ggml_type type,
1820
        int64_t ne0,
1821
        int64_t ne1,
1822
        int64_t ne2,
1823
0
        int64_t ne3) {
1824
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1825
0
    return ggml_new_tensor(ctx, type, 4, ne);
1826
0
}
1827
1828
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1829
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1830
1831
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1832
0
}
1833
1834
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1835
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1836
0
}
1837
1838
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1839
0
    const int64_t ne2 = tensor->ne[2];
1840
0
    const int64_t ne1 = tensor->ne[1];
1841
0
    const int64_t ne0 = tensor->ne[0];
1842
1843
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1844
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1845
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1846
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1847
1848
0
    if (i0) {
1849
0
        * i0 = i0_;
1850
0
    }
1851
0
    if (i1) {
1852
0
        * i1 = i1_;
1853
0
    }
1854
0
    if (i2) {
1855
0
        * i2 = i2_;
1856
0
    }
1857
0
    if (i3) {
1858
0
        * i3 = i3_;
1859
0
    }
1860
0
}
1861
1862
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1863
0
    return tensor->data;
1864
0
}
1865
1866
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1867
0
    assert(tensor->type == GGML_TYPE_F32);
1868
0
    return (float *)(tensor->data);
1869
0
}
1870
1871
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1872
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1873
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1874
0
}
1875
1876
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1877
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1878
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1879
0
}
1880
1881
1.83k
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1882
1.83k
    return tensor->name;
1883
1.83k
}
1884
1885
6.18k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1886
6.18k
    size_t i;
1887
47.7k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1888
41.5k
        tensor->name[i] = name[i];
1889
41.5k
    }
1890
6.18k
    tensor->name[i] = '\0';
1891
6.18k
    return tensor;
1892
6.18k
}
1893
1894
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1895
0
    va_list args;
1896
0
    va_start(args, fmt);
1897
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1898
0
    va_end(args);
1899
0
    return tensor;
1900
0
}
1901
1902
struct ggml_tensor * ggml_view_tensor(
1903
        struct ggml_context * ctx,
1904
0
        struct ggml_tensor  * src) {
1905
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1906
0
    ggml_format_name(result, "%s (view)", src->name);
1907
1908
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1909
0
        result->nb[i] = src->nb[i];
1910
0
    }
1911
1912
0
    return result;
1913
0
}
1914
1915
1.06k
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1916
1.06k
    struct ggml_object * obj = ctx->objects_begin;
1917
1918
1.06k
    char * const mem_buffer = ctx->mem_buffer;
1919
1920
1.06k
    while (obj != NULL) {
1921
473
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1922
473
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1923
473
        }
1924
1925
0
        obj = obj->next;
1926
0
    }
1927
1928
591
    return NULL;
1929
1.06k
}
1930
1931
1.17k
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1932
1.17k
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1933
1.17k
    obj = obj->next;
1934
1935
1.17k
    char * const mem_buffer = ctx->mem_buffer;
1936
1937
1.17k
    while (obj != NULL) {
1938
1.02k
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1939
1.02k
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1940
1.02k
        }
1941
1942
0
        obj = obj->next;
1943
0
    }
1944
1945
142
    return NULL;
1946
1.17k
}
1947
1948
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1949
0
    struct ggml_object * obj = ctx->objects_begin;
1950
1951
0
    char * const mem_buffer = ctx->mem_buffer;
1952
1953
0
    while (obj != NULL) {
1954
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1955
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1956
0
            if (strcmp(cur->name, name) == 0) {
1957
0
                return cur;
1958
0
            }
1959
0
        }
1960
1961
0
        obj = obj->next;
1962
0
    }
1963
1964
0
    return NULL;
1965
0
}
1966
1967
////////////////////////////////////////////////////////////////////////////////
1968
1969
// ggml_dup
1970
1971
static struct ggml_tensor * ggml_dup_impl(
1972
        struct ggml_context * ctx,
1973
        struct ggml_tensor  * a,
1974
0
        bool                  inplace) {
1975
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1976
1977
0
    result->op     = GGML_OP_DUP;
1978
0
    result->src[0] = a;
1979
1980
0
    return result;
1981
0
}
1982
1983
struct ggml_tensor * ggml_dup(
1984
        struct ggml_context * ctx,
1985
0
        struct ggml_tensor  * a) {
1986
0
    return ggml_dup_impl(ctx, a, false);
1987
0
}
1988
1989
struct ggml_tensor * ggml_dup_inplace(
1990
        struct ggml_context * ctx,
1991
0
        struct ggml_tensor  * a) {
1992
0
    return ggml_dup_impl(ctx, a, true);
1993
0
}
1994
1995
// ggml_add
1996
1997
static struct ggml_tensor * ggml_add_impl(
1998
        struct ggml_context * ctx,
1999
        struct ggml_tensor  * a,
2000
        struct ggml_tensor  * b,
2001
0
        bool                  inplace) {
2002
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2003
2004
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2005
2006
0
    result->op     = GGML_OP_ADD;
2007
0
    result->src[0] = a;
2008
0
    result->src[1] = b;
2009
2010
0
    return result;
2011
0
}
2012
2013
struct ggml_tensor * ggml_add(
2014
        struct ggml_context * ctx,
2015
        struct ggml_tensor  * a,
2016
0
        struct ggml_tensor  * b) {
2017
0
    return ggml_add_impl(ctx, a, b, false);
2018
0
}
2019
2020
struct ggml_tensor * ggml_add_inplace(
2021
        struct ggml_context * ctx,
2022
        struct ggml_tensor  * a,
2023
0
        struct ggml_tensor  * b) {
2024
0
    return ggml_add_impl(ctx, a, b, true);
2025
0
}
2026
2027
// ggml_add_cast
2028
2029
static struct ggml_tensor * ggml_add_cast_impl(
2030
        struct ggml_context * ctx,
2031
        struct ggml_tensor  * a,
2032
        struct ggml_tensor  * b,
2033
0
        enum   ggml_type      type) {
2034
    // TODO: support less-strict constraint
2035
    //       GGML_ASSERT(ggml_can_repeat(b, a));
2036
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
2037
2038
    // currently only supported for quantized input and f16
2039
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2040
0
                a->type == GGML_TYPE_F16 ||
2041
0
                a->type == GGML_TYPE_BF16);
2042
2043
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2044
2045
0
    result->op     = GGML_OP_ADD;
2046
0
    result->src[0] = a;
2047
0
    result->src[1] = b;
2048
2049
0
    return result;
2050
0
}
2051
2052
struct ggml_tensor * ggml_add_cast(
2053
        struct ggml_context * ctx,
2054
        struct ggml_tensor  * a,
2055
        struct ggml_tensor  * b,
2056
0
        enum   ggml_type      type) {
2057
0
    return ggml_add_cast_impl(ctx, a, b, type);
2058
0
}
2059
2060
struct ggml_tensor * ggml_add_id(
2061
            struct ggml_context * ctx,
2062
            struct ggml_tensor  * a,
2063
            struct ggml_tensor  * b,
2064
0
            struct ggml_tensor  * ids) {
2065
2066
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2067
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2068
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2069
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2070
2071
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2072
2073
0
    result->op     = GGML_OP_ADD_ID;
2074
0
    result->src[0] = a;
2075
0
    result->src[1] = b;
2076
0
    result->src[2] = ids;
2077
2078
0
    return result;
2079
0
}
2080
2081
// ggml_add1
2082
2083
static struct ggml_tensor * ggml_add1_impl(
2084
        struct ggml_context * ctx,
2085
        struct ggml_tensor  * a,
2086
        struct ggml_tensor  * b,
2087
0
        bool                  inplace) {
2088
0
    GGML_ASSERT(ggml_is_scalar(b));
2089
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2090
2091
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2092
2093
0
    result->op     = GGML_OP_ADD1;
2094
0
    result->src[0] = a;
2095
0
    result->src[1] = b;
2096
2097
0
    return result;
2098
0
}
2099
2100
struct ggml_tensor * ggml_add1(
2101
        struct ggml_context * ctx,
2102
        struct ggml_tensor  * a,
2103
0
        struct ggml_tensor  * b) {
2104
0
    return ggml_add1_impl(ctx, a, b, false);
2105
0
}
2106
2107
struct ggml_tensor * ggml_add1_inplace(
2108
        struct ggml_context * ctx,
2109
        struct ggml_tensor  * a,
2110
0
        struct ggml_tensor  * b) {
2111
0
    return ggml_add1_impl(ctx, a, b, true);
2112
0
}
2113
2114
// ggml_acc
2115
2116
static struct ggml_tensor * ggml_acc_impl(
2117
        struct ggml_context * ctx,
2118
        struct ggml_tensor  * a,
2119
        struct ggml_tensor  * b,
2120
        size_t                nb1,
2121
        size_t                nb2,
2122
        size_t                nb3,
2123
        size_t                offset,
2124
0
        bool                  inplace) {
2125
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2126
0
    GGML_ASSERT(ggml_is_contiguous(a));
2127
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2128
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2129
2130
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2131
2132
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2133
0
    ggml_set_op_params(result, params, sizeof(params));
2134
2135
0
    result->op     = GGML_OP_ACC;
2136
0
    result->src[0] = a;
2137
0
    result->src[1] = b;
2138
2139
0
    return result;
2140
0
}
2141
2142
struct ggml_tensor * ggml_acc(
2143
        struct ggml_context * ctx,
2144
        struct ggml_tensor  * a,
2145
        struct ggml_tensor  * b,
2146
        size_t                nb1,
2147
        size_t                nb2,
2148
        size_t                nb3,
2149
0
        size_t                offset) {
2150
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2151
0
}
2152
2153
struct ggml_tensor * ggml_acc_inplace(
2154
        struct ggml_context * ctx,
2155
        struct ggml_tensor  * a,
2156
        struct ggml_tensor  * b,
2157
        size_t                nb1,
2158
        size_t                nb2,
2159
        size_t                nb3,
2160
0
        size_t                offset) {
2161
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2162
0
}
2163
2164
// ggml_sub
2165
2166
static struct ggml_tensor * ggml_sub_impl(
2167
        struct ggml_context * ctx,
2168
        struct ggml_tensor  * a,
2169
        struct ggml_tensor  * b,
2170
0
        bool                  inplace) {
2171
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2172
2173
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2174
2175
0
    result->op     = GGML_OP_SUB;
2176
0
    result->src[0] = a;
2177
0
    result->src[1] = b;
2178
2179
0
    return result;
2180
0
}
2181
2182
struct ggml_tensor * ggml_sub(
2183
        struct ggml_context * ctx,
2184
        struct ggml_tensor  * a,
2185
0
        struct ggml_tensor  * b) {
2186
0
    return ggml_sub_impl(ctx, a, b, false);
2187
0
}
2188
2189
struct ggml_tensor * ggml_sub_inplace(
2190
        struct ggml_context * ctx,
2191
        struct ggml_tensor  * a,
2192
0
        struct ggml_tensor  * b) {
2193
0
    return ggml_sub_impl(ctx, a, b, true);
2194
0
}
2195
2196
// ggml_mul
2197
2198
static struct ggml_tensor * ggml_mul_impl(
2199
        struct ggml_context * ctx,
2200
        struct ggml_tensor  * a,
2201
        struct ggml_tensor  * b,
2202
0
        bool                  inplace) {
2203
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2204
2205
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2206
2207
0
    result->op     = GGML_OP_MUL;
2208
0
    result->src[0] = a;
2209
0
    result->src[1] = b;
2210
2211
0
    return result;
2212
0
}
2213
2214
struct ggml_tensor * ggml_mul(
2215
        struct ggml_context * ctx,
2216
        struct ggml_tensor  * a,
2217
0
        struct ggml_tensor  * b) {
2218
0
    return ggml_mul_impl(ctx, a, b, false);
2219
0
}
2220
2221
struct ggml_tensor * ggml_mul_inplace(
2222
        struct ggml_context * ctx,
2223
        struct ggml_tensor  * a,
2224
0
        struct ggml_tensor  * b) {
2225
0
    return ggml_mul_impl(ctx, a, b, true);
2226
0
}
2227
2228
// ggml_div
2229
2230
static struct ggml_tensor * ggml_div_impl(
2231
        struct ggml_context * ctx,
2232
        struct ggml_tensor  * a,
2233
        struct ggml_tensor  * b,
2234
0
        bool                  inplace) {
2235
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2236
2237
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2238
2239
0
    result->op     = GGML_OP_DIV;
2240
0
    result->src[0] = a;
2241
0
    result->src[1] = b;
2242
2243
0
    return result;
2244
0
}
2245
2246
struct ggml_tensor * ggml_div(
2247
        struct ggml_context * ctx,
2248
        struct ggml_tensor  * a,
2249
0
        struct ggml_tensor  * b) {
2250
0
    return ggml_div_impl(ctx, a, b, false);
2251
0
}
2252
2253
struct ggml_tensor * ggml_div_inplace(
2254
        struct ggml_context * ctx,
2255
        struct ggml_tensor  * a,
2256
0
        struct ggml_tensor  * b) {
2257
0
    return ggml_div_impl(ctx, a, b, true);
2258
0
}
2259
2260
// ggml_sqr
2261
2262
static struct ggml_tensor * ggml_sqr_impl(
2263
        struct ggml_context * ctx,
2264
        struct ggml_tensor  * a,
2265
0
        bool                  inplace) {
2266
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2267
2268
0
    result->op     = GGML_OP_SQR;
2269
0
    result->src[0] = a;
2270
2271
0
    return result;
2272
0
}
2273
2274
struct ggml_tensor * ggml_sqr(
2275
        struct ggml_context * ctx,
2276
0
        struct ggml_tensor  * a) {
2277
0
    return ggml_sqr_impl(ctx, a, false);
2278
0
}
2279
2280
struct ggml_tensor * ggml_sqr_inplace(
2281
        struct ggml_context * ctx,
2282
0
        struct ggml_tensor  * a) {
2283
0
    return ggml_sqr_impl(ctx, a, true);
2284
0
}
2285
2286
// ggml_sqrt
2287
2288
static struct ggml_tensor * ggml_sqrt_impl(
2289
        struct ggml_context * ctx,
2290
        struct ggml_tensor  * a,
2291
0
        bool                  inplace) {
2292
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2293
2294
0
    result->op     = GGML_OP_SQRT;
2295
0
    result->src[0] = a;
2296
2297
0
    return result;
2298
0
}
2299
2300
struct ggml_tensor * ggml_sqrt(
2301
        struct ggml_context * ctx,
2302
0
        struct ggml_tensor  * a) {
2303
0
    return ggml_sqrt_impl(ctx, a, false);
2304
0
}
2305
2306
struct ggml_tensor * ggml_sqrt_inplace(
2307
        struct ggml_context * ctx,
2308
0
        struct ggml_tensor  * a) {
2309
0
    return ggml_sqrt_impl(ctx, a, true);
2310
0
}
2311
2312
// ggml_log
2313
2314
static struct ggml_tensor * ggml_log_impl(
2315
        struct ggml_context * ctx,
2316
        struct ggml_tensor  * a,
2317
0
        bool                  inplace) {
2318
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2319
2320
0
    result->op     = GGML_OP_LOG;
2321
0
    result->src[0] = a;
2322
2323
0
    return result;
2324
0
}
2325
2326
struct ggml_tensor * ggml_log(
2327
        struct ggml_context * ctx,
2328
0
        struct ggml_tensor  * a) {
2329
0
    return ggml_log_impl(ctx, a, false);
2330
0
}
2331
2332
struct ggml_tensor * ggml_log_inplace(
2333
        struct ggml_context * ctx,
2334
0
        struct ggml_tensor  * a) {
2335
0
    return ggml_log_impl(ctx, a, true);
2336
0
}
2337
2338
struct ggml_tensor * ggml_expm1(
2339
        struct ggml_context * ctx,
2340
0
        struct ggml_tensor  * a) {
2341
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2342
0
}
2343
2344
struct ggml_tensor * ggml_expm1_inplace(
2345
        struct ggml_context * ctx,
2346
0
        struct ggml_tensor  * a) {
2347
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2348
0
}
2349
2350
struct ggml_tensor * ggml_softplus(
2351
        struct ggml_context * ctx,
2352
0
        struct ggml_tensor  * a) {
2353
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2354
0
}
2355
2356
struct ggml_tensor * ggml_softplus_inplace(
2357
        struct ggml_context * ctx,
2358
0
        struct ggml_tensor  * a) {
2359
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2360
0
}
2361
2362
// ggml_sin
2363
2364
static struct ggml_tensor * ggml_sin_impl(
2365
        struct ggml_context * ctx,
2366
        struct ggml_tensor  * a,
2367
0
        bool                  inplace) {
2368
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2369
2370
0
    result->op     = GGML_OP_SIN;
2371
0
    result->src[0] = a;
2372
2373
0
    return result;
2374
0
}
2375
2376
struct ggml_tensor * ggml_sin(
2377
        struct ggml_context * ctx,
2378
0
        struct ggml_tensor  * a) {
2379
0
    return ggml_sin_impl(ctx, a, false);
2380
0
}
2381
2382
struct ggml_tensor * ggml_sin_inplace(
2383
        struct ggml_context * ctx,
2384
0
        struct ggml_tensor  * a) {
2385
0
    return ggml_sin_impl(ctx, a, true);
2386
0
}
2387
2388
// ggml_cos
2389
2390
static struct ggml_tensor * ggml_cos_impl(
2391
        struct ggml_context * ctx,
2392
        struct ggml_tensor  * a,
2393
0
        bool                  inplace) {
2394
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2395
2396
0
    result->op     = GGML_OP_COS;
2397
0
    result->src[0] = a;
2398
2399
0
    return result;
2400
0
}
2401
2402
struct ggml_tensor * ggml_cos(
2403
        struct ggml_context * ctx,
2404
0
        struct ggml_tensor  * a) {
2405
0
    return ggml_cos_impl(ctx, a, false);
2406
0
}
2407
2408
struct ggml_tensor * ggml_cos_inplace(
2409
        struct ggml_context * ctx,
2410
0
        struct ggml_tensor  * a) {
2411
0
    return ggml_cos_impl(ctx, a, true);
2412
0
}
2413
2414
// ggml_sum
2415
2416
struct ggml_tensor * ggml_sum(
2417
        struct ggml_context * ctx,
2418
0
        struct ggml_tensor  * a) {
2419
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2420
2421
0
    result->op     = GGML_OP_SUM;
2422
0
    result->src[0] = a;
2423
2424
0
    return result;
2425
0
}
2426
2427
// ggml_sum_rows
2428
2429
struct ggml_tensor * ggml_sum_rows(
2430
        struct ggml_context * ctx,
2431
0
        struct ggml_tensor  * a) {
2432
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2433
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2434
0
        ne[i] = a->ne[i];
2435
0
    }
2436
2437
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2438
2439
0
    result->op     = GGML_OP_SUM_ROWS;
2440
0
    result->src[0] = a;
2441
2442
0
    return result;
2443
0
}
2444
2445
// ggml_cumsum
2446
2447
struct ggml_tensor * ggml_cumsum(
2448
        struct ggml_context * ctx,
2449
0
        struct ggml_tensor  * a) {
2450
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2451
2452
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2453
2454
0
    result->op     = GGML_OP_CUMSUM;
2455
0
    result->src[0] = a;
2456
2457
0
    return result;
2458
0
}
2459
2460
// ggml_mean
2461
2462
struct ggml_tensor * ggml_mean(
2463
        struct ggml_context * ctx,
2464
0
        struct ggml_tensor  * a) {
2465
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2466
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2467
2468
0
    result->op     = GGML_OP_MEAN;
2469
0
    result->src[0] = a;
2470
2471
0
    return result;
2472
0
}
2473
2474
// ggml_argmax
2475
2476
struct ggml_tensor * ggml_argmax(
2477
        struct ggml_context * ctx,
2478
0
        struct ggml_tensor  * a) {
2479
0
    GGML_ASSERT(ggml_is_matrix(a));
2480
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2481
2482
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2483
2484
0
    result->op     = GGML_OP_ARGMAX;
2485
0
    result->src[0] = a;
2486
2487
0
    return result;
2488
0
}
2489
2490
// ggml_count_equal
2491
2492
struct ggml_tensor * ggml_count_equal(
2493
        struct ggml_context * ctx,
2494
        struct ggml_tensor  * a,
2495
0
        struct ggml_tensor  * b) {
2496
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2497
2498
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2499
2500
0
    result->op     = GGML_OP_COUNT_EQUAL;
2501
0
    result->src[0] = a;
2502
0
    result->src[1] = b;
2503
2504
0
    return result;
2505
0
}
2506
2507
// ggml_repeat
2508
2509
struct ggml_tensor * ggml_repeat(
2510
        struct ggml_context * ctx,
2511
        struct ggml_tensor  * a,
2512
0
        struct ggml_tensor  * b) {
2513
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2514
2515
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2516
2517
0
    result->op     = GGML_OP_REPEAT;
2518
0
    result->src[0] = a;
2519
2520
0
    return result;
2521
0
}
2522
2523
struct ggml_tensor * ggml_repeat_4d(
2524
        struct ggml_context * ctx,
2525
        struct ggml_tensor * a,
2526
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2527
0
    const bool can_repeat = ggml_is_empty(a) || (
2528
0
        (ne0 % a->ne[0] == 0) &&
2529
0
        (ne1 % a->ne[1] == 0) &&
2530
0
        (ne2 % a->ne[2] == 0) &&
2531
0
        (ne3 % a->ne[3] == 0)
2532
0
    );
2533
0
    GGML_ASSERT(can_repeat);
2534
2535
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2536
2537
0
    result->op     = GGML_OP_REPEAT;
2538
0
    result->src[0] = a;
2539
2540
0
    return result;
2541
0
}
2542
2543
// ggml_repeat_back
2544
2545
struct ggml_tensor * ggml_repeat_back(
2546
        struct ggml_context * ctx,
2547
        struct ggml_tensor  * a,
2548
0
        struct ggml_tensor  * b) {
2549
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2550
2551
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2552
2553
0
    result->op     = GGML_OP_REPEAT_BACK;
2554
0
    result->src[0] = a;
2555
2556
0
    return result;
2557
0
}
2558
2559
// ggml_concat
2560
2561
struct ggml_tensor * ggml_concat(
2562
    struct ggml_context * ctx,
2563
    struct ggml_tensor  * a,
2564
    struct ggml_tensor  * b,
2565
0
    int                   dim) {
2566
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2567
0
    GGML_ASSERT(a->type == b->type);
2568
2569
0
    int64_t ne[GGML_MAX_DIMS];
2570
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2571
0
        if (d == dim) {
2572
0
            ne[d] = a->ne[d] + b->ne[d];
2573
0
            continue;
2574
0
        }
2575
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2576
0
        ne[d] = a->ne[d];
2577
0
    }
2578
2579
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2580
2581
0
    ggml_set_op_params_i32(result, 0, dim);
2582
2583
0
    result->op     = GGML_OP_CONCAT;
2584
0
    result->src[0] = a;
2585
0
    result->src[1] = b;
2586
2587
0
    return result;
2588
0
}
2589
2590
// ggml_abs
2591
2592
struct ggml_tensor * ggml_abs(
2593
        struct ggml_context * ctx,
2594
0
        struct ggml_tensor  * a) {
2595
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2596
0
}
2597
2598
struct ggml_tensor * ggml_abs_inplace(
2599
        struct ggml_context * ctx,
2600
0
        struct ggml_tensor  * a) {
2601
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2602
0
}
2603
2604
// ggml_sgn
2605
2606
struct ggml_tensor * ggml_sgn(
2607
        struct ggml_context * ctx,
2608
0
        struct ggml_tensor  * a) {
2609
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2610
0
}
2611
2612
struct ggml_tensor * ggml_sgn_inplace(
2613
        struct ggml_context * ctx,
2614
0
        struct ggml_tensor  * a) {
2615
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2616
0
}
2617
2618
// ggml_neg
2619
2620
struct ggml_tensor * ggml_neg(
2621
        struct ggml_context * ctx,
2622
0
        struct ggml_tensor  * a) {
2623
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2624
0
}
2625
2626
struct ggml_tensor * ggml_neg_inplace(
2627
        struct ggml_context * ctx,
2628
0
        struct ggml_tensor  * a) {
2629
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2630
0
}
2631
2632
// ggml_step
2633
2634
struct ggml_tensor * ggml_step(
2635
        struct ggml_context * ctx,
2636
0
        struct ggml_tensor  * a) {
2637
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2638
0
}
2639
2640
struct ggml_tensor * ggml_step_inplace(
2641
        struct ggml_context * ctx,
2642
0
        struct ggml_tensor  * a) {
2643
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2644
0
}
2645
2646
// ggml_tanh
2647
2648
struct ggml_tensor * ggml_tanh(
2649
        struct ggml_context * ctx,
2650
0
        struct ggml_tensor  * a) {
2651
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2652
0
}
2653
2654
struct ggml_tensor * ggml_tanh_inplace(
2655
        struct ggml_context * ctx,
2656
0
        struct ggml_tensor  * a) {
2657
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2658
0
}
2659
2660
// ggml_elu
2661
2662
struct ggml_tensor * ggml_elu(
2663
    struct ggml_context * ctx,
2664
0
    struct ggml_tensor  * a) {
2665
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2666
0
}
2667
2668
struct ggml_tensor * ggml_elu_inplace(
2669
    struct ggml_context * ctx,
2670
0
    struct ggml_tensor  * a) {
2671
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2672
0
}
2673
2674
// ggml_relu
2675
2676
struct ggml_tensor * ggml_relu(
2677
        struct ggml_context * ctx,
2678
0
        struct ggml_tensor  * a) {
2679
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2680
0
}
2681
2682
struct ggml_tensor * ggml_relu_inplace(
2683
        struct ggml_context * ctx,
2684
0
        struct ggml_tensor  * a) {
2685
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2686
0
}
2687
2688
// ggml_leaky_relu
2689
2690
struct ggml_tensor * ggml_leaky_relu(
2691
        struct ggml_context * ctx,
2692
        struct ggml_tensor  * a,
2693
        float                 negative_slope,
2694
0
        bool                  inplace) {
2695
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2696
2697
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2698
2699
0
    result->op     = GGML_OP_LEAKY_RELU;
2700
0
    result->src[0] = a;
2701
2702
0
    return result;
2703
0
}
2704
2705
// ggml_sigmoid
2706
2707
struct ggml_tensor * ggml_sigmoid(
2708
        struct ggml_context * ctx,
2709
0
        struct ggml_tensor  * a) {
2710
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2711
0
}
2712
2713
struct ggml_tensor * ggml_sigmoid_inplace(
2714
        struct ggml_context * ctx,
2715
0
        struct ggml_tensor  * a) {
2716
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2717
0
}
2718
2719
// ggml_gelu
2720
2721
struct ggml_tensor * ggml_gelu(
2722
        struct ggml_context * ctx,
2723
0
        struct ggml_tensor  * a) {
2724
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2725
0
}
2726
2727
struct ggml_tensor * ggml_gelu_inplace(
2728
        struct ggml_context * ctx,
2729
0
        struct ggml_tensor  * a) {
2730
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2731
0
}
2732
2733
// ggml_gelu_erf
2734
2735
struct ggml_tensor * ggml_gelu_erf(
2736
        struct ggml_context * ctx,
2737
0
        struct ggml_tensor  * a) {
2738
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2739
0
}
2740
2741
struct ggml_tensor * ggml_gelu_erf_inplace(
2742
        struct ggml_context * ctx,
2743
0
        struct ggml_tensor  * a) {
2744
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2745
0
}
2746
2747
// ggml_gelu_quick
2748
2749
struct ggml_tensor * ggml_gelu_quick(
2750
        struct ggml_context * ctx,
2751
0
        struct ggml_tensor  * a) {
2752
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2753
0
}
2754
2755
struct ggml_tensor * ggml_gelu_quick_inplace(
2756
        struct ggml_context * ctx,
2757
0
        struct ggml_tensor  * a) {
2758
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2759
0
}
2760
2761
// ggml_silu
2762
2763
struct ggml_tensor * ggml_silu(
2764
        struct ggml_context * ctx,
2765
0
        struct ggml_tensor  * a) {
2766
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2767
0
}
2768
2769
struct ggml_tensor * ggml_silu_inplace(
2770
        struct ggml_context * ctx,
2771
0
        struct ggml_tensor  * a) {
2772
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2773
0
}
2774
2775
// ggml_xielu
2776
2777
struct ggml_tensor * ggml_xielu(
2778
        struct ggml_context * ctx,
2779
        struct ggml_tensor  * a,
2780
        float alpha_n,
2781
        float alpha_p,
2782
        float beta,
2783
0
        float eps) {
2784
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2785
2786
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2787
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2788
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2789
0
    ggml_set_op_params_f32(result, 3, beta);
2790
0
    ggml_set_op_params_f32(result, 4, eps);
2791
2792
0
    result->op     = GGML_OP_UNARY;
2793
0
    result->src[0] = a;
2794
2795
0
    return result;
2796
0
}
2797
2798
// ggml_silu_back
2799
2800
struct ggml_tensor * ggml_silu_back(
2801
        struct ggml_context * ctx,
2802
        struct ggml_tensor  * a,
2803
0
        struct ggml_tensor  * b) {
2804
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2805
2806
0
    result->op     = GGML_OP_SILU_BACK;
2807
0
    result->src[0] = a;
2808
0
    result->src[1] = b;
2809
2810
0
    return result;
2811
0
}
2812
2813
// ggml hardswish
2814
2815
struct ggml_tensor * ggml_hardswish(
2816
        struct ggml_context * ctx,
2817
0
        struct ggml_tensor  * a) {
2818
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2819
0
}
2820
2821
// ggml hardsigmoid
2822
2823
struct ggml_tensor * ggml_hardsigmoid(
2824
        struct ggml_context * ctx,
2825
0
        struct ggml_tensor  * a) {
2826
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2827
0
}
2828
2829
// ggml exp
2830
2831
struct ggml_tensor * ggml_exp(
2832
        struct ggml_context * ctx,
2833
0
        struct ggml_tensor  * a) {
2834
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2835
0
}
2836
2837
struct ggml_tensor * ggml_exp_inplace(
2838
        struct ggml_context * ctx,
2839
0
        struct ggml_tensor  * a) {
2840
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2841
0
}
2842
2843
// ggml_glu
2844
2845
static struct ggml_tensor * ggml_glu_impl(
2846
        struct ggml_context * ctx,
2847
        struct ggml_tensor  * a,
2848
        struct ggml_tensor  * b,
2849
        enum ggml_glu_op      op,
2850
0
        bool                  swapped) {
2851
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2852
2853
0
    if (b) {
2854
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2855
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2856
0
        GGML_ASSERT(a->type == b->type);
2857
0
    }
2858
2859
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2860
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2861
2862
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2863
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2864
2865
0
    result->op     = GGML_OP_GLU;
2866
0
    result->src[0] = a;
2867
0
    result->src[1] = b;
2868
2869
0
    return result;
2870
0
}
2871
2872
// ggml_floor
2873
2874
struct ggml_tensor * ggml_floor(
2875
        struct ggml_context * ctx,
2876
0
        struct ggml_tensor  * a) {
2877
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2878
0
}
2879
2880
struct ggml_tensor * ggml_floor_inplace(
2881
        struct ggml_context * ctx,
2882
0
        struct ggml_tensor  * a) {
2883
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2884
0
}
2885
2886
// ggml_ceil
2887
2888
struct ggml_tensor * ggml_ceil(
2889
        struct ggml_context * ctx,
2890
0
        struct ggml_tensor  * a) {
2891
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2892
0
}
2893
2894
struct ggml_tensor * ggml_ceil_inplace(
2895
        struct ggml_context * ctx,
2896
0
        struct ggml_tensor  * a) {
2897
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2898
0
}
2899
2900
//ggml_round
2901
2902
struct ggml_tensor * ggml_round(
2903
        struct ggml_context * ctx,
2904
0
        struct ggml_tensor  * a) {
2905
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2906
0
}
2907
2908
struct ggml_tensor * ggml_round_inplace(
2909
        struct ggml_context * ctx,
2910
0
        struct ggml_tensor  * a) {
2911
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2912
0
}
2913
2914
//ggml_trunc
2915
2916
struct ggml_tensor * ggml_trunc(
2917
        struct ggml_context * ctx,
2918
0
        struct ggml_tensor  * a) {
2919
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2920
0
}
2921
2922
struct ggml_tensor * ggml_trunc_inplace(
2923
        struct ggml_context * ctx,
2924
0
        struct ggml_tensor  * a) {
2925
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2926
0
}
2927
2928
struct ggml_tensor * ggml_glu(
2929
        struct ggml_context * ctx,
2930
        struct ggml_tensor  * a,
2931
        enum ggml_glu_op      op,
2932
0
        bool                  swapped) {
2933
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2934
0
}
2935
2936
struct ggml_tensor * ggml_glu_split(
2937
        struct ggml_context * ctx,
2938
        struct ggml_tensor  * a,
2939
        struct ggml_tensor  * b,
2940
0
        enum ggml_glu_op      op) {
2941
0
    return ggml_glu_impl(ctx, a, b, op, false);
2942
0
}
2943
2944
// ggml_reglu
2945
2946
struct ggml_tensor * ggml_reglu(
2947
        struct ggml_context * ctx,
2948
0
        struct ggml_tensor  * a) {
2949
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2950
0
}
2951
2952
struct ggml_tensor * ggml_reglu_swapped(
2953
        struct ggml_context * ctx,
2954
0
        struct ggml_tensor  * a) {
2955
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2956
0
}
2957
2958
struct ggml_tensor * ggml_reglu_split(
2959
        struct ggml_context * ctx,
2960
        struct ggml_tensor  * a,
2961
0
        struct ggml_tensor  * b) {
2962
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2963
0
}
2964
2965
// ggml_geglu
2966
2967
struct ggml_tensor * ggml_geglu(
2968
        struct ggml_context * ctx,
2969
0
        struct ggml_tensor  * a) {
2970
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2971
0
}
2972
2973
struct ggml_tensor * ggml_geglu_swapped(
2974
        struct ggml_context * ctx,
2975
0
        struct ggml_tensor  * a) {
2976
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2977
0
}
2978
2979
struct ggml_tensor * ggml_geglu_split(
2980
        struct ggml_context * ctx,
2981
        struct ggml_tensor  * a,
2982
0
        struct ggml_tensor  * b) {
2983
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2984
0
}
2985
2986
// ggml_swiglu
2987
2988
struct ggml_tensor * ggml_swiglu(
2989
        struct ggml_context * ctx,
2990
0
        struct ggml_tensor  * a) {
2991
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2992
0
}
2993
2994
struct ggml_tensor * ggml_swiglu_swapped(
2995
        struct ggml_context * ctx,
2996
0
        struct ggml_tensor  * a) {
2997
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2998
0
}
2999
3000
struct ggml_tensor * ggml_swiglu_split(
3001
        struct ggml_context * ctx,
3002
        struct ggml_tensor  * a,
3003
0
        struct ggml_tensor  * b) {
3004
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
3005
0
}
3006
3007
// ggml_geglu_erf
3008
3009
struct ggml_tensor * ggml_geglu_erf(
3010
        struct ggml_context * ctx,
3011
0
        struct ggml_tensor  * a) {
3012
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
3013
0
}
3014
3015
struct ggml_tensor * ggml_geglu_erf_swapped(
3016
        struct ggml_context * ctx,
3017
0
        struct ggml_tensor  * a) {
3018
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
3019
0
}
3020
3021
struct ggml_tensor * ggml_geglu_erf_split(
3022
        struct ggml_context * ctx,
3023
        struct ggml_tensor  * a,
3024
0
        struct ggml_tensor  * b) {
3025
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
3026
0
}
3027
3028
// ggml_geglu_quick
3029
3030
struct ggml_tensor * ggml_geglu_quick(
3031
        struct ggml_context * ctx,
3032
0
        struct ggml_tensor  * a) {
3033
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
3034
0
}
3035
3036
struct ggml_tensor * ggml_geglu_quick_swapped(
3037
        struct ggml_context * ctx,
3038
0
        struct ggml_tensor  * a) {
3039
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3040
0
}
3041
3042
struct ggml_tensor * ggml_geglu_quick_split(
3043
        struct ggml_context * ctx,
3044
        struct ggml_tensor  * a,
3045
0
        struct ggml_tensor  * b) {
3046
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3047
0
}
3048
3049
struct ggml_tensor * ggml_swiglu_oai(
3050
        struct ggml_context * ctx,
3051
        struct ggml_tensor  * a,
3052
        struct ggml_tensor  * b,
3053
        float                 alpha,
3054
0
        float                 limit) {
3055
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3056
0
    ggml_set_op_params_f32(result, 2, alpha);
3057
0
    ggml_set_op_params_f32(result, 3, limit);
3058
3059
0
    return result;
3060
0
}
3061
3062
// ggml_norm
3063
3064
static struct ggml_tensor * ggml_norm_impl(
3065
        struct ggml_context * ctx,
3066
        struct ggml_tensor  * a,
3067
        float                 eps,
3068
0
        bool                  inplace) {
3069
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3070
3071
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3072
3073
0
    result->op     = GGML_OP_NORM;
3074
0
    result->src[0] = a;
3075
3076
0
    return result;
3077
0
}
3078
3079
struct ggml_tensor * ggml_norm(
3080
        struct ggml_context * ctx,
3081
        struct ggml_tensor  * a,
3082
0
        float                 eps) {
3083
0
    return ggml_norm_impl(ctx, a, eps, false);
3084
0
}
3085
3086
struct ggml_tensor * ggml_norm_inplace(
3087
        struct ggml_context * ctx,
3088
        struct ggml_tensor  * a,
3089
0
        float                 eps) {
3090
0
    return ggml_norm_impl(ctx, a, eps, true);
3091
0
}
3092
3093
// ggml_rms_norm
3094
3095
static struct ggml_tensor * ggml_rms_norm_impl(
3096
        struct ggml_context * ctx,
3097
        struct ggml_tensor  * a,
3098
        float                 eps,
3099
0
        bool                  inplace) {
3100
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3101
3102
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3103
3104
0
    result->op     = GGML_OP_RMS_NORM;
3105
0
    result->src[0] = a;
3106
3107
0
    return result;
3108
0
}
3109
3110
struct ggml_tensor * ggml_rms_norm(
3111
        struct ggml_context * ctx,
3112
        struct ggml_tensor  * a,
3113
0
        float                 eps) {
3114
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3115
0
}
3116
3117
struct ggml_tensor * ggml_rms_norm_inplace(
3118
        struct ggml_context * ctx,
3119
        struct ggml_tensor  * a,
3120
0
        float                 eps) {
3121
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3122
0
}
3123
3124
// ggml_rms_norm_back
3125
3126
struct ggml_tensor * ggml_rms_norm_back(
3127
        struct ggml_context * ctx,
3128
        struct ggml_tensor  * a,
3129
        struct ggml_tensor  * b,
3130
0
        float                 eps) {
3131
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3132
3133
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3134
3135
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3136
0
    result->src[0] = a;
3137
0
    result->src[1] = b;
3138
3139
0
    return result;
3140
0
}
3141
3142
// ggml_group_norm
3143
3144
static struct ggml_tensor * ggml_group_norm_impl(
3145
        struct ggml_context * ctx,
3146
        struct ggml_tensor  * a,
3147
        int                   n_groups,
3148
        float                 eps,
3149
0
        bool                  inplace) {
3150
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3151
3152
0
    ggml_set_op_params_i32(result, 0, n_groups);
3153
0
    ggml_set_op_params_f32(result, 1, eps);
3154
3155
0
    result->op     = GGML_OP_GROUP_NORM;
3156
0
    result->src[0] = a;
3157
3158
0
    return result;
3159
0
}
3160
3161
struct ggml_tensor * ggml_group_norm(
3162
        struct ggml_context * ctx,
3163
        struct ggml_tensor  * a,
3164
        int                   n_groups,
3165
0
        float                 eps) {
3166
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3167
0
}
3168
3169
struct ggml_tensor * ggml_group_norm_inplace(
3170
        struct ggml_context * ctx,
3171
        struct ggml_tensor  * a,
3172
        int                   n_groups,
3173
0
        float                 eps) {
3174
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3175
0
}
3176
3177
// ggml_l2_norm
3178
3179
static struct ggml_tensor * ggml_l2_norm_impl(
3180
        struct ggml_context * ctx,
3181
        struct ggml_tensor  * a,
3182
        float                 eps,
3183
0
        bool                  inplace) {
3184
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3185
3186
0
    ggml_set_op_params_f32(result, 0, eps);
3187
3188
0
    result->op     = GGML_OP_L2_NORM;
3189
0
    result->src[0] = a;
3190
3191
0
    return result;
3192
0
}
3193
3194
struct ggml_tensor * ggml_l2_norm(
3195
        struct ggml_context * ctx,
3196
        struct ggml_tensor  * a,
3197
0
        float                 eps) {
3198
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3199
0
}
3200
3201
struct ggml_tensor * ggml_l2_norm_inplace(
3202
        struct ggml_context * ctx,
3203
        struct ggml_tensor  * a,
3204
0
        float                 eps) {
3205
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3206
0
}
3207
3208
// ggml_mul_mat
3209
3210
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3211
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3212
3213
0
    return (t0->ne[0]           == t1->ne[0])  &&
3214
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3215
0
           (t1->ne[3]%t0->ne[3] == 0);
3216
0
}
3217
3218
struct ggml_tensor * ggml_mul_mat(
3219
        struct ggml_context * ctx,
3220
        struct ggml_tensor  * a,
3221
0
        struct ggml_tensor  * b) {
3222
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3223
0
    GGML_ASSERT(!ggml_is_transposed(a));
3224
3225
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3226
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3227
3228
0
    result->op     = GGML_OP_MUL_MAT;
3229
0
    result->src[0] = a;
3230
0
    result->src[1] = b;
3231
3232
0
    return result;
3233
0
}
3234
3235
void ggml_mul_mat_set_prec(
3236
        struct ggml_tensor * a,
3237
0
        enum ggml_prec       prec) {
3238
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3239
3240
0
    const int32_t prec_i32 = (int32_t) prec;
3241
3242
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3243
0
}
3244
3245
// ggml_mul_mat_id
3246
3247
/*
3248
    c = ggml_mul_mat_id(ctx, as, b, ids);
3249
3250
    as  -> [cols, rows, n_expert]
3251
    b   -> [cols, n_expert_used, n_tokens]
3252
    ids -> [n_expert_used, n_tokens] (i32)
3253
    c   -> [rows, n_expert_used, n_tokens]
3254
3255
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3256
3257
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3258
*/
3259
struct ggml_tensor * ggml_mul_mat_id(
3260
        struct ggml_context * ctx,
3261
        struct ggml_tensor  * as,
3262
        struct ggml_tensor  * b,
3263
0
        struct ggml_tensor  * ids) {
3264
0
    GGML_ASSERT(!ggml_is_transposed(as));
3265
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3266
3267
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3268
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3269
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3270
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3271
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3272
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3273
3274
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3275
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3276
3277
0
    result->op     = GGML_OP_MUL_MAT_ID;
3278
0
    result->src[0] = as;
3279
0
    result->src[1] = b;
3280
0
    result->src[2] = ids;
3281
3282
0
    return result;
3283
0
}
3284
3285
// ggml_out_prod
3286
3287
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3288
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3289
3290
0
    return (t0->ne[1] == t1->ne[1])   &&
3291
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3292
0
           (t1->ne[3]%t0->ne[3] == 0);
3293
0
}
3294
3295
struct ggml_tensor * ggml_out_prod(
3296
        struct ggml_context * ctx,
3297
        struct ggml_tensor  * a,
3298
0
        struct ggml_tensor  * b) {
3299
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3300
0
    GGML_ASSERT(!ggml_is_transposed(a));
3301
3302
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3303
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3304
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3305
3306
0
    result->op     = GGML_OP_OUT_PROD;
3307
0
    result->src[0] = a;
3308
0
    result->src[1] = b;
3309
3310
0
    return result;
3311
0
}
3312
3313
// ggml_scale
3314
3315
static struct ggml_tensor * ggml_scale_impl(
3316
        struct ggml_context * ctx,
3317
        struct ggml_tensor  * a,
3318
        float                 s,
3319
        float                 b,
3320
0
        bool                  inplace) {
3321
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3322
3323
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3324
3325
0
    float params[2] = { s, b };
3326
0
    ggml_set_op_params(result, &params, sizeof(params));
3327
3328
0
    result->op     = GGML_OP_SCALE;
3329
0
    result->src[0] = a;
3330
3331
0
    return result;
3332
0
}
3333
3334
struct ggml_tensor * ggml_scale(
3335
        struct ggml_context * ctx,
3336
        struct ggml_tensor  * a,
3337
0
        float                 s) {
3338
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3339
0
}
3340
3341
struct ggml_tensor * ggml_scale_inplace(
3342
        struct ggml_context * ctx,
3343
        struct ggml_tensor  * a,
3344
0
        float                 s) {
3345
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3346
0
}
3347
3348
struct ggml_tensor * ggml_scale_bias(
3349
        struct ggml_context * ctx,
3350
        struct ggml_tensor  * a,
3351
        float                 s,
3352
0
        float                 b) {
3353
0
    return ggml_scale_impl(ctx, a, s, b, false);
3354
0
}
3355
3356
struct ggml_tensor * ggml_scale_bias_inplace(
3357
        struct ggml_context * ctx,
3358
        struct ggml_tensor  * a,
3359
        float                 s,
3360
0
        float                 b) {
3361
0
    return ggml_scale_impl(ctx, a, s, b, true);
3362
0
}
3363
3364
// ggml_set
3365
3366
static struct ggml_tensor * ggml_set_impl(
3367
        struct ggml_context * ctx,
3368
        struct ggml_tensor  * a,
3369
        struct ggml_tensor  * b,
3370
        size_t                nb1,
3371
        size_t                nb2,
3372
        size_t                nb3,
3373
        size_t                offset,
3374
0
        bool                  inplace) {
3375
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3376
3377
    // make a view of the destination
3378
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3379
3380
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3381
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3382
0
    ggml_set_op_params(result, params, sizeof(params));
3383
3384
0
    result->op     = GGML_OP_SET;
3385
0
    result->src[0] = a;
3386
0
    result->src[1] = b;
3387
3388
0
    return result;
3389
0
}
3390
3391
struct ggml_tensor * ggml_set(
3392
        struct ggml_context * ctx,
3393
        struct ggml_tensor  * a,
3394
        struct ggml_tensor  * b,
3395
        size_t                nb1,
3396
        size_t                nb2,
3397
        size_t                nb3,
3398
0
        size_t                offset) {
3399
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3400
0
}
3401
3402
struct ggml_tensor * ggml_set_inplace(
3403
        struct ggml_context * ctx,
3404
        struct ggml_tensor  * a,
3405
        struct ggml_tensor  * b,
3406
        size_t                nb1,
3407
        size_t                nb2,
3408
        size_t                nb3,
3409
0
        size_t                offset) {
3410
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3411
0
}
3412
3413
struct ggml_tensor * ggml_set_1d(
3414
        struct ggml_context * ctx,
3415
        struct ggml_tensor  * a,
3416
        struct ggml_tensor  * b,
3417
0
        size_t                offset) {
3418
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3419
0
}
3420
3421
struct ggml_tensor * ggml_set_1d_inplace(
3422
        struct ggml_context * ctx,
3423
        struct ggml_tensor  * a,
3424
        struct ggml_tensor  * b,
3425
0
        size_t                offset) {
3426
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3427
0
}
3428
3429
struct ggml_tensor * ggml_set_2d(
3430
        struct ggml_context * ctx,
3431
        struct ggml_tensor  * a,
3432
        struct ggml_tensor  * b,
3433
        size_t                nb1,
3434
0
        size_t                offset) {
3435
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3436
0
}
3437
3438
struct ggml_tensor * ggml_set_2d_inplace(
3439
        struct ggml_context * ctx,
3440
        struct ggml_tensor  * a,
3441
        struct ggml_tensor  * b,
3442
        size_t                nb1,
3443
0
        size_t                offset) {
3444
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3445
0
}
3446
3447
// ggml_cpy
3448
3449
static struct ggml_tensor * ggml_cpy_impl(
3450
        struct ggml_context * ctx,
3451
        struct ggml_tensor  * a,
3452
0
        struct ggml_tensor  * b) {
3453
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3454
3455
    // make a view of the destination
3456
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3457
0
    if (strlen(b->name) > 0) {
3458
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3459
0
    } else {
3460
0
        ggml_format_name(result, "%s (copy)", a->name);
3461
0
    }
3462
3463
0
    result->op     = GGML_OP_CPY;
3464
0
    result->src[0] = a;
3465
0
    result->src[1] = b;
3466
3467
0
    return result;
3468
0
}
3469
3470
struct ggml_tensor * ggml_cpy(
3471
        struct ggml_context * ctx,
3472
        struct ggml_tensor * a,
3473
0
        struct ggml_tensor * b) {
3474
0
    return ggml_cpy_impl(ctx, a, b);
3475
0
}
3476
3477
struct ggml_tensor * ggml_cast(
3478
        struct ggml_context * ctx,
3479
        struct ggml_tensor  * a,
3480
0
        enum   ggml_type      type) {
3481
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3482
0
    ggml_format_name(result, "%s (copy)", a->name);
3483
3484
0
    result->op     = GGML_OP_CPY;
3485
0
    result->src[0] = a;
3486
0
    result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
3487
                             //       backends for consistency with ggml_cpy_impl() above
3488
3489
0
    return result;
3490
0
}
3491
3492
// ggml_cont
3493
3494
static struct ggml_tensor * ggml_cont_impl(
3495
        struct ggml_context * ctx,
3496
0
        struct ggml_tensor  * a) {
3497
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3498
0
    ggml_format_name(result, "%s (cont)", a->name);
3499
3500
0
    result->op     = GGML_OP_CONT;
3501
0
    result->src[0] = a;
3502
3503
0
    return result;
3504
0
}
3505
3506
struct ggml_tensor * ggml_cont(
3507
        struct ggml_context * ctx,
3508
0
        struct ggml_tensor * a) {
3509
0
    return ggml_cont_impl(ctx, a);
3510
0
}
3511
3512
// make contiguous, with new shape
3513
GGML_API struct ggml_tensor * ggml_cont_1d(
3514
        struct ggml_context * ctx,
3515
        struct ggml_tensor  * a,
3516
0
        int64_t               ne0) {
3517
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3518
0
}
3519
3520
GGML_API struct ggml_tensor * ggml_cont_2d(
3521
        struct ggml_context * ctx,
3522
        struct ggml_tensor  * a,
3523
        int64_t               ne0,
3524
0
        int64_t               ne1) {
3525
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3526
0
}
3527
3528
GGML_API struct ggml_tensor * ggml_cont_3d(
3529
        struct ggml_context * ctx,
3530
        struct ggml_tensor  * a,
3531
        int64_t               ne0,
3532
        int64_t               ne1,
3533
0
        int64_t               ne2) {
3534
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3535
0
}
3536
3537
struct ggml_tensor * ggml_cont_4d(
3538
        struct ggml_context * ctx,
3539
        struct ggml_tensor  * a,
3540
        int64_t               ne0,
3541
        int64_t               ne1,
3542
        int64_t               ne2,
3543
0
        int64_t               ne3) {
3544
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3545
3546
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3547
0
    ggml_format_name(result, "%s (cont)", a->name);
3548
3549
0
    result->op     = GGML_OP_CONT;
3550
0
    result->src[0] = a;
3551
3552
0
    return result;
3553
0
}
3554
3555
// ggml_reshape
3556
3557
struct ggml_tensor * ggml_reshape(
3558
        struct ggml_context * ctx,
3559
        struct ggml_tensor * a,
3560
0
        struct ggml_tensor * b) {
3561
0
    GGML_ASSERT(ggml_is_contiguous(a));
3562
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3563
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3564
3565
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3566
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3567
3568
0
    result->op     = GGML_OP_RESHAPE;
3569
0
    result->src[0] = a;
3570
3571
0
    return result;
3572
0
}
3573
3574
struct ggml_tensor * ggml_reshape_1d(
3575
        struct ggml_context * ctx,
3576
        struct ggml_tensor  * a,
3577
0
        int64_t               ne0) {
3578
0
    GGML_ASSERT(ggml_is_contiguous(a));
3579
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3580
3581
0
    const int64_t ne[1] = { ne0 };
3582
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3583
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3584
3585
0
    result->op     = GGML_OP_RESHAPE;
3586
0
    result->src[0] = a;
3587
3588
0
    return result;
3589
0
}
3590
3591
struct ggml_tensor * ggml_reshape_2d(
3592
        struct ggml_context * ctx,
3593
        struct ggml_tensor  * a,
3594
        int64_t               ne0,
3595
0
        int64_t               ne1) {
3596
0
    GGML_ASSERT(ggml_is_contiguous(a));
3597
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3598
3599
0
    const int64_t ne[2] = { ne0, ne1 };
3600
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3601
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3602
3603
0
    result->op     = GGML_OP_RESHAPE;
3604
0
    result->src[0] = a;
3605
3606
0
    return result;
3607
0
}
3608
3609
struct ggml_tensor * ggml_reshape_3d(
3610
        struct ggml_context * ctx,
3611
        struct ggml_tensor  * a,
3612
        int64_t               ne0,
3613
        int64_t               ne1,
3614
0
        int64_t               ne2) {
3615
0
    GGML_ASSERT(ggml_is_contiguous(a));
3616
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3617
3618
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3619
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3620
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3621
3622
0
    result->op     = GGML_OP_RESHAPE;
3623
0
    result->src[0] = a;
3624
3625
0
    return result;
3626
0
}
3627
3628
struct ggml_tensor * ggml_reshape_4d(
3629
        struct ggml_context * ctx,
3630
        struct ggml_tensor  * a,
3631
        int64_t               ne0,
3632
        int64_t               ne1,
3633
        int64_t               ne2,
3634
0
        int64_t               ne3) {
3635
0
    GGML_ASSERT(ggml_is_contiguous(a));
3636
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3637
3638
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3639
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3640
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3641
3642
0
    result->op     = GGML_OP_RESHAPE;
3643
0
    result->src[0] = a;
3644
3645
0
    return result;
3646
0
}
3647
3648
static struct ggml_tensor * ggml_view_impl(
3649
        struct ggml_context * ctx,
3650
        struct ggml_tensor  * a,
3651
        int                   n_dims,
3652
        const int64_t       * ne,
3653
0
        size_t                offset) {
3654
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3655
0
    ggml_format_name(result, "%s (view)", a->name);
3656
3657
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3658
3659
0
    result->op     = GGML_OP_VIEW;
3660
0
    result->src[0] = a;
3661
3662
0
    return result;
3663
0
}
3664
3665
// ggml_view_1d
3666
3667
struct ggml_tensor * ggml_view_1d(
3668
        struct ggml_context * ctx,
3669
        struct ggml_tensor  * a,
3670
        int64_t               ne0,
3671
0
        size_t                offset) {
3672
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3673
3674
0
    return result;
3675
0
}
3676
3677
// ggml_view_2d
3678
3679
struct ggml_tensor * ggml_view_2d(
3680
        struct ggml_context * ctx,
3681
        struct ggml_tensor  * a,
3682
        int64_t               ne0,
3683
        int64_t               ne1,
3684
        size_t                nb1,
3685
0
        size_t                offset) {
3686
0
    const int64_t ne[2] = { ne0, ne1 };
3687
3688
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3689
3690
0
    result->nb[1] = nb1;
3691
0
    result->nb[2] = result->nb[1]*ne1;
3692
0
    result->nb[3] = result->nb[2];
3693
3694
0
    return result;
3695
0
}
3696
3697
// ggml_view_3d
3698
3699
struct ggml_tensor * ggml_view_3d(
3700
        struct ggml_context * ctx,
3701
        struct ggml_tensor  * a,
3702
        int64_t               ne0,
3703
        int64_t               ne1,
3704
        int64_t               ne2,
3705
        size_t                nb1,
3706
        size_t                nb2,
3707
0
        size_t                offset) {
3708
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3709
3710
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3711
3712
0
    result->nb[1] = nb1;
3713
0
    result->nb[2] = nb2;
3714
0
    result->nb[3] = result->nb[2]*ne2;
3715
3716
0
    return result;
3717
0
}
3718
3719
// ggml_view_4d
3720
3721
struct ggml_tensor * ggml_view_4d(
3722
        struct ggml_context * ctx,
3723
        struct ggml_tensor  * a,
3724
        int64_t               ne0,
3725
        int64_t               ne1,
3726
        int64_t               ne2,
3727
        int64_t               ne3,
3728
        size_t                nb1,
3729
        size_t                nb2,
3730
        size_t                nb3,
3731
0
        size_t                offset) {
3732
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3733
3734
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3735
3736
0
    result->nb[1] = nb1;
3737
0
    result->nb[2] = nb2;
3738
0
    result->nb[3] = nb3;
3739
3740
0
    return result;
3741
0
}
3742
3743
// ggml_permute
3744
3745
struct ggml_tensor * ggml_permute(
3746
        struct ggml_context * ctx,
3747
        struct ggml_tensor  * a,
3748
        int                   axis0,
3749
        int                   axis1,
3750
        int                   axis2,
3751
0
        int                   axis3) {
3752
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3753
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3754
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3755
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3756
3757
0
    GGML_ASSERT(axis0 != axis1);
3758
0
    GGML_ASSERT(axis0 != axis2);
3759
0
    GGML_ASSERT(axis0 != axis3);
3760
0
    GGML_ASSERT(axis1 != axis2);
3761
0
    GGML_ASSERT(axis1 != axis3);
3762
0
    GGML_ASSERT(axis2 != axis3);
3763
3764
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3765
0
    ggml_format_name(result, "%s (permuted)", a->name);
3766
3767
0
    int ne[GGML_MAX_DIMS];
3768
0
    int nb[GGML_MAX_DIMS];
3769
3770
0
    ne[axis0] = a->ne[0];
3771
0
    ne[axis1] = a->ne[1];
3772
0
    ne[axis2] = a->ne[2];
3773
0
    ne[axis3] = a->ne[3];
3774
3775
0
    nb[axis0] = a->nb[0];
3776
0
    nb[axis1] = a->nb[1];
3777
0
    nb[axis2] = a->nb[2];
3778
0
    nb[axis3] = a->nb[3];
3779
3780
0
    result->ne[0] = ne[0];
3781
0
    result->ne[1] = ne[1];
3782
0
    result->ne[2] = ne[2];
3783
0
    result->ne[3] = ne[3];
3784
3785
0
    result->nb[0] = nb[0];
3786
0
    result->nb[1] = nb[1];
3787
0
    result->nb[2] = nb[2];
3788
0
    result->nb[3] = nb[3];
3789
3790
0
    result->op     = GGML_OP_PERMUTE;
3791
0
    result->src[0] = a;
3792
3793
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3794
0
    ggml_set_op_params(result, params, sizeof(params));
3795
3796
0
    return result;
3797
0
}
3798
3799
// ggml_transpose
3800
3801
struct ggml_tensor * ggml_transpose(
3802
        struct ggml_context * ctx,
3803
0
        struct ggml_tensor  * a) {
3804
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3805
0
    ggml_format_name(result, "%s (transposed)", a->name);
3806
3807
0
    result->ne[0] = a->ne[1];
3808
0
    result->ne[1] = a->ne[0];
3809
3810
0
    result->nb[0] = a->nb[1];
3811
0
    result->nb[1] = a->nb[0];
3812
3813
0
    result->op     = GGML_OP_TRANSPOSE;
3814
0
    result->src[0] = a;
3815
3816
0
    return result;
3817
0
}
3818
3819
// ggml_get_rows
3820
3821
struct ggml_tensor * ggml_get_rows(
3822
        struct ggml_context * ctx,
3823
        struct ggml_tensor  * a,
3824
0
        struct ggml_tensor  * b) {
3825
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3826
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3827
0
    GGML_ASSERT(b->ne[3] == 1);
3828
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3829
3830
    // TODO: implement non F32 return
3831
0
    enum ggml_type type = GGML_TYPE_F32;
3832
0
    if (a->type == GGML_TYPE_I32) {
3833
0
        type = a->type;
3834
0
    }
3835
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3836
3837
0
    result->op     = GGML_OP_GET_ROWS;
3838
0
    result->src[0] = a;
3839
0
    result->src[1] = b;
3840
3841
0
    return result;
3842
0
}
3843
3844
// ggml_get_rows_back
3845
3846
struct ggml_tensor * ggml_get_rows_back(
3847
        struct ggml_context * ctx,
3848
        struct ggml_tensor  * a,
3849
        struct ggml_tensor  * b,
3850
0
        struct ggml_tensor  * c) {
3851
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3852
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3853
3854
    // TODO: implement non F32 return
3855
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3856
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3857
3858
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3859
0
    result->src[0] = a;
3860
0
    result->src[1] = b;
3861
3862
0
    return result;
3863
0
}
3864
3865
// ggml_set_rows
3866
3867
struct ggml_tensor * ggml_set_rows(
3868
        struct ggml_context * ctx,
3869
        struct ggml_tensor  * a,
3870
        struct ggml_tensor  * b,
3871
0
        struct ggml_tensor  * c) {
3872
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3873
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3874
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3875
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3876
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3877
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3878
0
    GGML_ASSERT(c->ne[3] == 1);
3879
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3880
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3881
3882
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3883
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3884
3885
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3886
3887
0
    result->op     = GGML_OP_SET_ROWS;
3888
0
    result->src[0] = b;
3889
0
    result->src[1] = c;
3890
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3891
3892
0
    return result;
3893
0
}
3894
3895
// ggml_diag
3896
3897
struct ggml_tensor * ggml_diag(
3898
        struct ggml_context * ctx,
3899
0
        struct ggml_tensor  * a) {
3900
0
    GGML_ASSERT(a->ne[1] == 1);
3901
3902
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3903
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3904
3905
0
    result->op     = GGML_OP_DIAG;
3906
0
    result->src[0] = a;
3907
3908
0
    return result;
3909
0
}
3910
3911
// ggml_diag_mask_inf
3912
3913
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3914
        struct ggml_context * ctx,
3915
        struct ggml_tensor  * a,
3916
        int                   n_past,
3917
0
        bool                  inplace) {
3918
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3919
3920
0
    int32_t params[] = { n_past };
3921
0
    ggml_set_op_params(result, params, sizeof(params));
3922
3923
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3924
0
    result->src[0] = a;
3925
3926
0
    return result;
3927
0
}
3928
3929
struct ggml_tensor * ggml_diag_mask_inf(
3930
        struct ggml_context * ctx,
3931
        struct ggml_tensor  * a,
3932
0
        int                   n_past) {
3933
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3934
0
}
3935
3936
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3937
        struct ggml_context * ctx,
3938
        struct ggml_tensor  * a,
3939
0
        int                   n_past) {
3940
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3941
0
}
3942
3943
// ggml_diag_mask_zero
3944
3945
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3946
        struct ggml_context * ctx,
3947
        struct ggml_tensor  * a,
3948
        int                   n_past,
3949
0
        bool                  inplace) {
3950
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3951
3952
0
    int32_t params[] = { n_past };
3953
0
    ggml_set_op_params(result, params, sizeof(params));
3954
3955
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3956
0
    result->src[0] = a;
3957
3958
0
    return result;
3959
0
}
3960
3961
struct ggml_tensor * ggml_diag_mask_zero(
3962
        struct ggml_context * ctx,
3963
        struct ggml_tensor  * a,
3964
0
        int                   n_past) {
3965
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3966
0
}
3967
3968
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3969
        struct ggml_context * ctx,
3970
        struct ggml_tensor  * a,
3971
0
        int                   n_past) {
3972
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3973
0
}
3974
3975
// ggml_soft_max
3976
3977
static struct ggml_tensor * ggml_soft_max_impl(
3978
        struct ggml_context * ctx,
3979
        struct ggml_tensor  * a,
3980
        struct ggml_tensor  * mask,
3981
        float                 scale,
3982
        float                 max_bias,
3983
0
        bool                  inplace) {
3984
0
    GGML_ASSERT(ggml_is_contiguous(a));
3985
3986
0
    if (mask) {
3987
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3988
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3989
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3990
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3991
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3992
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3993
0
    }
3994
3995
0
    if (max_bias > 0.0f) {
3996
0
        GGML_ASSERT(mask);
3997
0
    }
3998
3999
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4000
4001
0
    float params[] = { scale, max_bias };
4002
0
    ggml_set_op_params(result, params, sizeof(params));
4003
4004
0
    result->op     = GGML_OP_SOFT_MAX;
4005
0
    result->src[0] = a;
4006
0
    result->src[1] = mask;
4007
4008
0
    return result;
4009
0
}
4010
4011
struct ggml_tensor * ggml_soft_max(
4012
        struct ggml_context * ctx,
4013
0
        struct ggml_tensor  * a) {
4014
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
4015
0
}
4016
4017
struct ggml_tensor * ggml_soft_max_inplace(
4018
        struct ggml_context * ctx,
4019
0
        struct ggml_tensor  * a) {
4020
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
4021
0
}
4022
4023
struct ggml_tensor * ggml_soft_max_ext(
4024
        struct ggml_context * ctx,
4025
        struct ggml_tensor  * a,
4026
        struct ggml_tensor  * mask,
4027
        float                 scale,
4028
0
        float                 max_bias) {
4029
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
4030
0
}
4031
4032
struct ggml_tensor * ggml_soft_max_ext_inplace(
4033
        struct ggml_context * ctx,
4034
        struct ggml_tensor  * a,
4035
        struct ggml_tensor  * mask,
4036
        float                 scale,
4037
0
        float                 max_bias) {
4038
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
4039
0
}
4040
4041
void ggml_soft_max_add_sinks(
4042
        struct ggml_tensor * a,
4043
0
        struct ggml_tensor * sinks) {
4044
0
    if (!sinks) {
4045
0
        a->src[2] = NULL;
4046
0
        return;
4047
0
    }
4048
4049
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4050
0
    GGML_ASSERT(a->src[2] == NULL);
4051
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4052
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4053
4054
0
    a->src[2] = sinks;
4055
0
}
4056
4057
// ggml_soft_max_ext_back
4058
4059
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4060
        struct ggml_context * ctx,
4061
        struct ggml_tensor  * a,
4062
        struct ggml_tensor  * b,
4063
        float                 scale,
4064
        float                 max_bias,
4065
0
        bool                  inplace) {
4066
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4067
4068
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4069
0
    result->src[0] = a;
4070
0
    result->src[1] = b;
4071
4072
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4073
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4074
4075
0
    return result;
4076
0
}
4077
4078
struct ggml_tensor * ggml_soft_max_ext_back(
4079
        struct ggml_context * ctx,
4080
        struct ggml_tensor  * a,
4081
        struct ggml_tensor  * b,
4082
        float                 scale,
4083
0
        float                 max_bias) {
4084
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4085
0
}
4086
4087
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4088
        struct ggml_context * ctx,
4089
        struct ggml_tensor  * a,
4090
        struct ggml_tensor  * b,
4091
        float                 scale,
4092
0
        float                 max_bias) {
4093
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4094
0
}
4095
4096
// ggml_rope
4097
4098
static struct ggml_tensor * ggml_rope_impl(
4099
        struct ggml_context * ctx,
4100
        struct ggml_tensor  * a,
4101
        struct ggml_tensor  * b,
4102
        struct ggml_tensor  * c,
4103
        int                   n_dims,
4104
        int                   sections[GGML_MROPE_SECTIONS],
4105
        int                   mode,
4106
        int                   n_ctx_orig,
4107
        float                 freq_base,
4108
        float                 freq_scale,
4109
        float                 ext_factor,
4110
        float                 attn_factor,
4111
        float                 beta_fast,
4112
        float                 beta_slow,
4113
0
        bool                  inplace) {
4114
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4115
4116
0
    GGML_ASSERT(ggml_is_vector(b));
4117
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4118
4119
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4120
0
    if (mrope_used) {
4121
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4122
0
    } else {
4123
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4124
0
    }
4125
4126
0
    if (c) {
4127
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4128
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4129
0
    }
4130
4131
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4132
4133
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4134
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4135
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4136
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4137
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4138
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4139
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4140
0
    if (mrope_used && sections) {
4141
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4142
0
    } else {
4143
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4144
0
    }
4145
0
    ggml_set_op_params(result, params, sizeof(params));
4146
4147
0
    result->op     = GGML_OP_ROPE;
4148
0
    result->src[0] = a;
4149
0
    result->src[1] = b;
4150
0
    result->src[2] = c;
4151
4152
0
    return result;
4153
0
}
4154
4155
struct ggml_tensor * ggml_rope(
4156
        struct ggml_context * ctx,
4157
        struct ggml_tensor  * a,
4158
        struct ggml_tensor  * b,
4159
        int                   n_dims,
4160
0
        int                   mode) {
4161
0
    return ggml_rope_impl(
4162
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4163
0
    );
4164
0
}
4165
4166
struct ggml_tensor * ggml_rope_multi(
4167
        struct ggml_context * ctx,
4168
        struct ggml_tensor  * a,
4169
        struct ggml_tensor  * b,
4170
        struct ggml_tensor  * c,
4171
        int                   n_dims,
4172
        int                   sections[GGML_MROPE_SECTIONS],
4173
        int                   mode,
4174
        int                   n_ctx_orig,
4175
        float                 freq_base,
4176
        float                 freq_scale,
4177
        float                 ext_factor,
4178
        float                 attn_factor,
4179
        float                 beta_fast,
4180
0
        float                 beta_slow) {
4181
0
    return ggml_rope_impl(
4182
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4183
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4184
0
    );
4185
0
}
4186
4187
struct ggml_tensor * ggml_rope_multi_inplace(
4188
        struct ggml_context * ctx,
4189
        struct ggml_tensor  * a,
4190
        struct ggml_tensor  * b,
4191
        struct ggml_tensor  * c,
4192
        int                   n_dims,
4193
        int                   sections[GGML_MROPE_SECTIONS],
4194
        int                   mode,
4195
        int                   n_ctx_orig,
4196
        float                 freq_base,
4197
        float                 freq_scale,
4198
        float                 ext_factor,
4199
        float                 attn_factor,
4200
        float                 beta_fast,
4201
0
        float                 beta_slow) {
4202
0
    return ggml_rope_impl(
4203
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4204
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4205
0
    );
4206
0
}
4207
4208
struct ggml_tensor * ggml_rope_inplace(
4209
        struct ggml_context * ctx,
4210
        struct ggml_tensor  * a,
4211
        struct ggml_tensor  * b,
4212
        int                   n_dims,
4213
0
        int                   mode) {
4214
0
    return ggml_rope_impl(
4215
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4216
0
    );
4217
0
}
4218
4219
struct ggml_tensor * ggml_rope_ext(
4220
        struct ggml_context * ctx,
4221
        struct ggml_tensor  * a,
4222
        struct ggml_tensor  * b,
4223
        struct ggml_tensor  * c,
4224
        int                   n_dims,
4225
        int                   mode,
4226
        int                   n_ctx_orig,
4227
        float                 freq_base,
4228
        float                 freq_scale,
4229
        float                 ext_factor,
4230
        float                 attn_factor,
4231
        float                 beta_fast,
4232
0
        float                 beta_slow) {
4233
0
    return ggml_rope_impl(
4234
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4235
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4236
0
    );
4237
0
}
4238
4239
struct ggml_tensor * ggml_rope_ext_inplace(
4240
        struct ggml_context * ctx,
4241
        struct ggml_tensor  * a,
4242
        struct ggml_tensor  * b,
4243
        struct ggml_tensor  * c,
4244
        int                   n_dims,
4245
        int                   mode,
4246
        int                   n_ctx_orig,
4247
        float                 freq_base,
4248
        float                 freq_scale,
4249
        float                 ext_factor,
4250
        float                 attn_factor,
4251
        float                 beta_fast,
4252
0
        float                 beta_slow) {
4253
0
    return ggml_rope_impl(
4254
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4255
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4256
0
    );
4257
0
}
4258
4259
struct ggml_tensor * ggml_rope_custom(
4260
        struct ggml_context * ctx,
4261
        struct ggml_tensor  * a,
4262
        struct ggml_tensor  * b,
4263
        int                   n_dims,
4264
        int                   mode,
4265
        int                   n_ctx_orig,
4266
        float                 freq_base,
4267
        float                 freq_scale,
4268
        float                 ext_factor,
4269
        float                 attn_factor,
4270
        float                 beta_fast,
4271
0
        float                 beta_slow) {
4272
0
    return ggml_rope_impl(
4273
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4274
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4275
0
    );
4276
0
}
4277
4278
struct ggml_tensor * ggml_rope_custom_inplace(
4279
        struct ggml_context * ctx,
4280
        struct ggml_tensor  * a,
4281
        struct ggml_tensor  * b,
4282
        int                   n_dims,
4283
        int                   mode,
4284
        int                   n_ctx_orig,
4285
        float                 freq_base,
4286
        float                 freq_scale,
4287
        float                 ext_factor,
4288
        float                 attn_factor,
4289
        float                 beta_fast,
4290
0
        float                 beta_slow) {
4291
0
    return ggml_rope_impl(
4292
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4293
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4294
0
    );
4295
0
}
4296
4297
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4298
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4299
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4300
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4301
0
}
4302
4303
void ggml_rope_yarn_corr_dims(
4304
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4305
0
) {
4306
    // start and end correction dims
4307
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4308
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4309
0
    dims[0] = MAX(0, start);
4310
0
    dims[1] = MIN(n_dims - 1, end);
4311
0
}
4312
4313
// ggml_rope_back
4314
4315
struct ggml_tensor * ggml_rope_ext_back(
4316
        struct ggml_context * ctx,
4317
        struct ggml_tensor  * a,
4318
        struct ggml_tensor  * b,
4319
        struct ggml_tensor  * c,
4320
        int                   n_dims,
4321
        int                   mode,
4322
        int                   n_ctx_orig,
4323
        float                 freq_base,
4324
        float                 freq_scale,
4325
        float                 ext_factor,
4326
        float                 attn_factor,
4327
        float                 beta_fast,
4328
0
        float                 beta_slow) {
4329
0
    struct ggml_tensor * result = ggml_rope_ext(
4330
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4331
0
    result->op = GGML_OP_ROPE_BACK;
4332
0
    return result;
4333
0
}
4334
4335
struct ggml_tensor * ggml_rope_multi_back(
4336
        struct ggml_context * ctx,
4337
        struct ggml_tensor  * a,
4338
        struct ggml_tensor  * b,
4339
        struct ggml_tensor  * c,
4340
        int                   n_dims,
4341
        int                   sections[4],
4342
        int                   mode,
4343
        int                   n_ctx_orig,
4344
        float                 freq_base,
4345
        float                 freq_scale,
4346
        float                 ext_factor,
4347
        float                 attn_factor,
4348
        float                 beta_fast,
4349
0
        float                 beta_slow) {
4350
0
    struct ggml_tensor * result = ggml_rope_multi(
4351
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4352
0
    result->op = GGML_OP_ROPE_BACK;
4353
0
    return result;
4354
0
}
4355
// ggml_clamp
4356
4357
struct ggml_tensor * ggml_clamp(
4358
        struct ggml_context * ctx,
4359
        struct ggml_tensor  * a,
4360
        float                 min,
4361
0
        float                 max) {
4362
    // TODO: when implement backward, fix this:
4363
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4364
4365
0
    float params[] = { min, max };
4366
0
    ggml_set_op_params(result, params, sizeof(params));
4367
4368
0
    result->op     = GGML_OP_CLAMP;
4369
0
    result->src[0] = a;
4370
4371
0
    return result;
4372
0
}
4373
4374
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4375
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4376
0
}
4377
4378
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4379
// a: [OC,IC, KH, KW]
4380
// b: [N, IC, IH, IW]
4381
// result: [N, OH, OW, IC*KH*KW]
4382
struct ggml_tensor * ggml_im2col(
4383
        struct ggml_context * ctx,
4384
        struct ggml_tensor  * a,
4385
        struct ggml_tensor  * b,
4386
        int                   s0,
4387
        int                   s1,
4388
        int                   p0,
4389
        int                   p1,
4390
        int                   d0,
4391
        int                   d1,
4392
        bool                  is_2D,
4393
0
        enum ggml_type        dst_type) {
4394
0
    if (is_2D) {
4395
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4396
0
    } else {
4397
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4398
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4399
0
        GGML_ASSERT(b->ne[3] == 1);
4400
0
    }
4401
4402
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4403
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4404
4405
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4406
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4407
4408
0
    const int64_t ne[4] = {
4409
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4410
0
        OW,
4411
0
        is_2D ? OH : b->ne[2],
4412
0
        is_2D ?      b->ne[3] : 1,
4413
0
    };
4414
4415
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4416
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4417
0
    ggml_set_op_params(result, params, sizeof(params));
4418
4419
0
    result->op     = GGML_OP_IM2COL;
4420
0
    result->src[0] = a;
4421
0
    result->src[1] = b;
4422
4423
0
    return result;
4424
0
}
4425
4426
struct ggml_tensor * ggml_im2col_back(
4427
        struct ggml_context * ctx,
4428
        struct ggml_tensor  * a,
4429
        struct ggml_tensor  * b,
4430
        int64_t             * ne,
4431
        int                   s0,
4432
        int                   s1,
4433
        int                   p0,
4434
        int                   p1,
4435
        int                   d0,
4436
        int                   d1,
4437
0
        bool                  is_2D) {
4438
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4439
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4440
0
    ggml_set_op_params(result, params, sizeof(params));
4441
4442
0
    result->op     = GGML_OP_IM2COL_BACK;
4443
0
    result->src[0] = a;
4444
0
    result->src[1] = b;
4445
4446
0
    return result;
4447
0
}
4448
4449
// ggml_conv_1d
4450
4451
struct ggml_tensor * ggml_conv_1d(
4452
        struct ggml_context * ctx,
4453
        struct ggml_tensor  * a,
4454
        struct ggml_tensor  * b,
4455
        int                   s0,
4456
        int                   p0,
4457
0
        int                   d0) {
4458
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4459
4460
0
    struct ggml_tensor * result =
4461
0
        ggml_mul_mat(ctx,
4462
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4463
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4464
4465
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4466
4467
0
    return result;
4468
0
}
4469
4470
// ggml_conv_1d_ph
4471
4472
struct ggml_tensor* ggml_conv_1d_ph(
4473
        struct ggml_context * ctx,
4474
        struct ggml_tensor  * a,
4475
        struct ggml_tensor  * b,
4476
        int                   s,
4477
0
        int                   d) {
4478
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4479
0
}
4480
4481
// ggml_conv_1d_dw
4482
4483
struct ggml_tensor * ggml_conv_1d_dw(
4484
        struct ggml_context * ctx,
4485
        struct ggml_tensor  * a,
4486
        struct ggml_tensor  * b,
4487
        int                   s0,
4488
        int                   p0,
4489
0
        int                   d0) {
4490
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4491
4492
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4493
4494
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4495
4496
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4497
4498
0
    return result;
4499
0
}
4500
4501
// ggml_conv_1d_dw_ph
4502
4503
struct ggml_tensor * ggml_conv_1d_dw_ph(
4504
        struct ggml_context * ctx,
4505
        struct ggml_tensor  * a,
4506
        struct ggml_tensor  * b,
4507
        int                   s0,
4508
0
        int                   d0) {
4509
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4510
0
}
4511
4512
// ggml_conv_transpose_1d
4513
4514
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4515
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4516
0
}
4517
4518
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4519
        struct ggml_context * ctx,
4520
        struct ggml_tensor  * a,
4521
        struct ggml_tensor  * b,
4522
        int                   s0,
4523
        int                   p0,
4524
0
        int                   d0) {
4525
0
    GGML_ASSERT(ggml_is_matrix(b));
4526
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4527
0
    GGML_ASSERT(a->ne[3] == 1);
4528
4529
0
    GGML_ASSERT(p0 == 0);
4530
0
    GGML_ASSERT(d0 == 1);
4531
4532
0
    const int64_t ne[4] = {
4533
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4534
0
        a->ne[1], b->ne[2], 1,
4535
0
    };
4536
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4537
4538
0
    int32_t params[] = { s0, p0, d0 };
4539
0
    ggml_set_op_params(result, params, sizeof(params));
4540
4541
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4542
0
    result->src[0] = a;
4543
0
    result->src[1] = b;
4544
4545
0
    return result;
4546
0
}
4547
4548
// ggml_conv_2d
4549
4550
// a: [OC,IC, KH, KW]
4551
// b: [N, IC, IH, IW]
4552
// result: [N, OC, OH, OW]
4553
struct ggml_tensor * ggml_conv_2d(
4554
        struct ggml_context * ctx,
4555
        struct ggml_tensor  * a,
4556
        struct ggml_tensor  * b,
4557
        int                   s0,
4558
        int                   s1,
4559
        int                   p0,
4560
        int                   p1,
4561
        int                   d0,
4562
0
        int                   d1) {
4563
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4564
4565
0
    struct ggml_tensor * result =
4566
0
        ggml_mul_mat(ctx,
4567
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4568
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4569
4570
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4571
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4572
4573
4574
0
    return result;
4575
0
}
4576
4577
// a: [OC*IC, KD, KH, KW]
4578
// b: [N*IC, ID, IH, IW]
4579
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4580
struct ggml_tensor * ggml_im2col_3d(
4581
        struct ggml_context * ctx,
4582
        struct ggml_tensor  * a,
4583
        struct ggml_tensor  * b,
4584
        int64_t               IC,
4585
        int                   s0, // stride width
4586
        int                   s1, // stride height
4587
        int                   s2, // stride depth
4588
        int                   p0, // padding width
4589
        int                   p1, // padding height
4590
        int                   p2, // padding depth
4591
        int                   d0, // dilation width
4592
        int                   d1, // dilation height
4593
        int                   d2, // dilation depth
4594
0
        enum ggml_type        dst_type) {
4595
0
    const int64_t N = b->ne[3] / IC;
4596
0
    const int64_t ID = b->ne[2];
4597
0
    const int64_t IH = b->ne[1];
4598
0
    const int64_t IW = b->ne[0];
4599
4600
0
    const int64_t OC = a->ne[3] / IC;
4601
0
    UNUSED(OC);
4602
0
    const int64_t KD = a->ne[2];
4603
0
    const int64_t KH = a->ne[1];
4604
0
    const int64_t KW = a->ne[0];
4605
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4606
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4607
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4608
4609
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4610
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4611
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4612
4613
4614
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4615
4616
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4617
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4618
0
    ggml_set_op_params(result, params, sizeof(params));
4619
4620
0
    result->op     = GGML_OP_IM2COL_3D;
4621
0
    result->src[0] = a;
4622
0
    result->src[1] = b;
4623
4624
0
    return result;
4625
0
}
4626
4627
// a: [OC*IC, KD, KH, KW]
4628
// b: [N*IC, ID, IH, IW]
4629
// result: [N*OC, OD, OH, OW]
4630
struct ggml_tensor * ggml_conv_3d(
4631
        struct ggml_context * ctx,
4632
        struct ggml_tensor  * a,
4633
        struct ggml_tensor  * b,
4634
        int64_t               IC,
4635
        int                   s0, // stride width
4636
        int                   s1, // stride height
4637
        int                   s2, // stride depth
4638
        int                   p0, // padding width
4639
        int                   p1, // padding height
4640
        int                   p2, // padding depth
4641
        int                   d0, // dilation width
4642
        int                   d1, // dilation height
4643
        int                   d2  // dilation depth
4644
0
        ) {
4645
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4646
4647
0
    int64_t OC = a->ne[3] / IC;
4648
0
    int64_t N = b->ne[3] / IC;
4649
0
    struct ggml_tensor * result =
4650
0
        ggml_mul_mat(ctx,
4651
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4652
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4653
4654
0
    int64_t OD = im2col->ne[3] / N;
4655
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4656
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4657
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4658
4659
0
    return result;
4660
0
}
4661
4662
// ggml_conv_2d_sk_p0
4663
4664
struct ggml_tensor * ggml_conv_2d_sk_p0(
4665
        struct ggml_context * ctx,
4666
        struct ggml_tensor  * a,
4667
0
        struct ggml_tensor  * b) {
4668
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4669
0
}
4670
4671
// ggml_conv_2d_s1_ph
4672
4673
struct ggml_tensor * ggml_conv_2d_s1_ph(
4674
        struct ggml_context * ctx,
4675
        struct ggml_tensor  * a,
4676
0
        struct ggml_tensor  * b) {
4677
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4678
0
}
4679
4680
// ggml_conv_2d_dw
4681
4682
struct ggml_tensor * ggml_conv_2d_dw(
4683
        struct ggml_context * ctx,
4684
        struct ggml_tensor  * a,
4685
        struct ggml_tensor  * b,
4686
        int                   s0,
4687
        int                   s1,
4688
        int                   p0,
4689
        int                   p1,
4690
        int                   d0,
4691
0
        int                   d1) {
4692
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4693
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4694
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4695
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4696
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4697
4698
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4699
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4700
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4701
4702
0
    return result;
4703
0
}
4704
4705
// ggml_conv_2d_dw_direct
4706
4707
struct ggml_tensor * ggml_conv_2d_dw_direct(
4708
        struct ggml_context * ctx,
4709
        struct ggml_tensor  * a,
4710
        struct ggml_tensor  * b,
4711
        int                   stride0,
4712
        int                   stride1,
4713
        int                   pad0,
4714
        int                   pad1,
4715
        int                   dilation0,
4716
0
        int                   dilation1) {
4717
0
    GGML_ASSERT(a->ne[2] == 1);
4718
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4719
0
    int64_t ne[4];
4720
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4721
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4722
0
    ne[2] = b->ne[2];
4723
0
    ne[3] = b->ne[3];
4724
4725
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4726
4727
0
    if (ggml_is_contiguous_channels(b)) {
4728
        // Result will be permuted the same way as input (CWHN order)
4729
0
        const int64_t type_size = ggml_type_size(result->type);
4730
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4731
0
        result->nb[0] = result->ne[2] * type_size;
4732
0
        result->nb[1] = result->ne[0] * result->nb[0];
4733
0
        result->nb[2] = type_size;
4734
0
    }
4735
4736
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4737
0
    ggml_set_op_params(result, params, sizeof(params));
4738
4739
0
    result->op     = GGML_OP_CONV_2D_DW;
4740
0
    result->src[0] = a;
4741
0
    result->src[1] = b;
4742
0
    return result;
4743
0
}
4744
4745
// ggml_conv_2d_direct
4746
4747
struct ggml_tensor * ggml_conv_2d_direct(
4748
        struct ggml_context * ctx,
4749
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4750
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4751
        int                   s0,  // stride dimension 0
4752
        int                   s1,  // stride dimension 1
4753
        int                   p0,  // padding dimension 0
4754
        int                   p1,  // padding dimension 1
4755
        int                   d0,  // dilation dimension 0
4756
0
        int                   d1) {// dilation dimension 1
4757
4758
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4759
    //GGML_ASSERT(a->type == b->type);
4760
4761
0
    int64_t ne[4];
4762
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4763
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4764
0
    ne[2] = a->ne[3];
4765
0
    ne[3] = b->ne[3];
4766
4767
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4768
4769
0
    ggml_set_op_params_i32(result, 0, s0);
4770
0
    ggml_set_op_params_i32(result, 1, s1);
4771
0
    ggml_set_op_params_i32(result, 2, p0);
4772
0
    ggml_set_op_params_i32(result, 3, p1);
4773
0
    ggml_set_op_params_i32(result, 4, d0);
4774
0
    ggml_set_op_params_i32(result, 5, d1);
4775
4776
0
    result->op = GGML_OP_CONV_2D;
4777
0
    result->src[0] = a;
4778
0
    result->src[1] = b;
4779
4780
0
    return result;
4781
0
}
4782
4783
// ggml_conv_3d_direct
4784
4785
struct ggml_tensor * ggml_conv_3d_direct(
4786
        struct ggml_context * ctx,
4787
        struct ggml_tensor  * a,
4788
        struct ggml_tensor  * b,
4789
        int                   s0,
4790
        int                   s1,
4791
        int                   s2,
4792
        int                   p0,
4793
        int                   p1,
4794
        int                   p2,
4795
        int                   d0,
4796
        int                   d1,
4797
        int                   d2,
4798
        int                   c,
4799
        int                   n,
4800
0
        int                   oc) {
4801
4802
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4803
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4804
4805
0
    int64_t ne[4];
4806
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4807
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4808
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4809
0
    ne[3] = (int64_t) oc * n;
4810
4811
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4812
4813
0
    ggml_set_op_params_i32(result, 0,  s0);
4814
0
    ggml_set_op_params_i32(result, 1,  s1);
4815
0
    ggml_set_op_params_i32(result, 2,  s2);
4816
0
    ggml_set_op_params_i32(result, 3,  p0);
4817
0
    ggml_set_op_params_i32(result, 4,  p1);
4818
0
    ggml_set_op_params_i32(result, 5,  p2);
4819
0
    ggml_set_op_params_i32(result, 6,  d0);
4820
0
    ggml_set_op_params_i32(result, 7,  d1);
4821
0
    ggml_set_op_params_i32(result, 8,  d2);
4822
0
    ggml_set_op_params_i32(result, 9,  c);
4823
0
    ggml_set_op_params_i32(result, 10, n);
4824
0
    ggml_set_op_params_i32(result, 11, oc);
4825
4826
0
    result->op = GGML_OP_CONV_3D;
4827
0
    result->src[0] = a;
4828
0
    result->src[1] = b;
4829
4830
0
    return result;
4831
0
}
4832
4833
// ggml_conv_transpose_2d_p0
4834
4835
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4836
0
    return (ins - 1) * s - 2 * p + ks;
4837
0
}
4838
4839
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4840
        struct ggml_context * ctx,
4841
        struct ggml_tensor  * a,
4842
        struct ggml_tensor  * b,
4843
0
        int                   stride) {
4844
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4845
4846
0
    const int64_t ne[4] = {
4847
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4848
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4849
0
        a->ne[2], b->ne[3],
4850
0
    };
4851
4852
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4853
4854
0
    ggml_set_op_params_i32(result, 0, stride);
4855
4856
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4857
0
    result->src[0] = a;
4858
0
    result->src[1] = b;
4859
4860
0
    return result;
4861
0
}
4862
4863
// ggml_pool_*
4864
4865
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4866
0
    return (ins + 2 * p - ks) / s + 1;
4867
0
}
4868
4869
// ggml_pool_1d
4870
4871
struct ggml_tensor * ggml_pool_1d(
4872
        struct ggml_context * ctx,
4873
        struct ggml_tensor  * a,
4874
        enum ggml_op_pool     op,
4875
        int                   k0,
4876
        int                   s0,
4877
0
        int                   p0) {
4878
0
    const int64_t ne[4] = {
4879
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4880
0
        a->ne[1],
4881
0
        a->ne[2],
4882
0
        a->ne[3],
4883
0
    };
4884
0
    GGML_ASSERT(ne[0] > 0);
4885
4886
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4887
4888
0
    int32_t params[] = { op, k0, s0, p0 };
4889
0
    ggml_set_op_params(result, params, sizeof(params));
4890
4891
0
    result->op     = GGML_OP_POOL_1D;
4892
0
    result->src[0] = a;
4893
4894
0
    return result;
4895
0
}
4896
4897
// ggml_pool_2d
4898
4899
struct ggml_tensor * ggml_pool_2d(
4900
        struct ggml_context * ctx,
4901
        struct ggml_tensor  * a,
4902
        enum ggml_op_pool     op,
4903
        int                   k0,
4904
        int                   k1,
4905
        int                   s0,
4906
        int                   s1,
4907
        float                 p0,
4908
0
        float                 p1) {
4909
0
    struct ggml_tensor * result;
4910
0
    const int64_t ne[4] = {
4911
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4912
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4913
0
        a->ne[2],
4914
0
        a->ne[3],
4915
0
    };
4916
0
    GGML_ASSERT(ne[0] > 0);
4917
0
    GGML_ASSERT(ne[1] > 0);
4918
4919
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4920
4921
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4922
0
    ggml_set_op_params(result, params, sizeof(params));
4923
4924
0
    result->op     = GGML_OP_POOL_2D;
4925
0
    result->src[0] = a;
4926
4927
0
    return result;
4928
0
}
4929
4930
struct ggml_tensor * ggml_pool_2d_back(
4931
        struct ggml_context * ctx,
4932
        struct ggml_tensor  * a,
4933
        struct ggml_tensor  * af,
4934
        enum ggml_op_pool     op,
4935
        int                   k0,
4936
        int                   k1,
4937
        int                   s0,
4938
        int                   s1,
4939
        float                 p0,
4940
0
        float                 p1) {
4941
0
    struct ggml_tensor * result;
4942
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4943
4944
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4945
0
    ggml_set_op_params(result, params, sizeof(params));
4946
4947
0
    result->op     = GGML_OP_POOL_2D_BACK;
4948
0
    result->src[0] = a;
4949
0
    result->src[1] = af;
4950
4951
0
    return result;
4952
0
}
4953
4954
// ggml_upscale / ggml_interpolate
4955
4956
static struct ggml_tensor * ggml_interpolate_impl(
4957
        struct ggml_context * ctx,
4958
        struct ggml_tensor  * a,
4959
        int64_t               ne0,
4960
        int64_t               ne1,
4961
        int64_t               ne2,
4962
        int64_t               ne3,
4963
0
        uint32_t              mode) {
4964
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4965
    // TODO: implement antialias for modes other than bilinear
4966
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4967
4968
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4969
4970
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4971
4972
0
    result->op     = GGML_OP_UPSCALE;
4973
0
    result->src[0] = a;
4974
4975
0
    return result;
4976
0
}
4977
4978
struct ggml_tensor * ggml_upscale(
4979
        struct ggml_context * ctx,
4980
        struct ggml_tensor  * a,
4981
        int                   scale_factor,
4982
0
        enum ggml_scale_mode  mode) {
4983
0
    GGML_ASSERT(scale_factor > 1);
4984
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4985
0
}
4986
4987
struct ggml_tensor * ggml_upscale_ext(
4988
        struct ggml_context * ctx,
4989
        struct ggml_tensor  * a,
4990
        int                   ne0,
4991
        int                   ne1,
4992
        int                   ne2,
4993
        int                   ne3,
4994
0
        enum ggml_scale_mode  mode) {
4995
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4996
0
}
4997
4998
struct ggml_tensor * ggml_interpolate(
4999
        struct ggml_context * ctx,
5000
        struct ggml_tensor  * a,
5001
        int64_t               ne0,
5002
        int64_t               ne1,
5003
        int64_t               ne2,
5004
        int64_t               ne3,
5005
0
        uint32_t              mode) {
5006
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
5007
0
}
5008
5009
// ggml_pad
5010
5011
struct ggml_tensor * ggml_pad(
5012
        struct ggml_context * ctx,
5013
        struct ggml_tensor  * a,
5014
        int                   p0,
5015
        int                   p1,
5016
        int                   p2,
5017
0
        int                   p3) {
5018
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5019
0
}
5020
5021
// ggml_pad_circular
5022
5023
struct ggml_tensor * ggml_pad_circular(
5024
        struct ggml_context * ctx,
5025
        struct ggml_tensor  * a,
5026
        int                   p0,
5027
        int                   p1,
5028
        int                   p2,
5029
0
        int                   p3) {
5030
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5031
0
}
5032
5033
struct ggml_tensor * ggml_pad_ext(
5034
            struct ggml_context * ctx,
5035
            struct ggml_tensor  * a,
5036
            int                  lp0,
5037
            int                  rp0,
5038
            int                  lp1,
5039
            int                  rp1,
5040
            int                  lp2,
5041
            int                  rp2,
5042
            int                  lp3,
5043
            int                  rp3
5044
0
            ) {
5045
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5046
0
            a->ne[0] + lp0 + rp0,
5047
0
            a->ne[1] + lp1 + rp1,
5048
0
            a->ne[2] + lp2 + rp2,
5049
0
            a->ne[3] + lp3 + rp3);
5050
5051
0
    ggml_set_op_params_i32(result, 0, lp0);
5052
0
    ggml_set_op_params_i32(result, 1, rp0);
5053
0
    ggml_set_op_params_i32(result, 2, lp1);
5054
0
    ggml_set_op_params_i32(result, 3, rp1);
5055
0
    ggml_set_op_params_i32(result, 4, lp2);
5056
0
    ggml_set_op_params_i32(result, 5, rp2);
5057
0
    ggml_set_op_params_i32(result, 6, lp3);
5058
0
    ggml_set_op_params_i32(result, 7, rp3);
5059
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5060
5061
5062
0
    result->op     = GGML_OP_PAD;
5063
0
    result->src[0] = a;
5064
5065
0
    return result;
5066
0
}
5067
5068
// ggml_pad_ext_circular
5069
5070
struct ggml_tensor * ggml_pad_ext_circular(
5071
        struct ggml_context * ctx,
5072
        struct ggml_tensor  * a,
5073
        int                  lp0,
5074
        int                  rp0,
5075
        int                  lp1,
5076
        int                  rp1,
5077
        int                  lp2,
5078
        int                  rp2,
5079
        int                  lp3,
5080
        int                  rp3
5081
0
        ) {
5082
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5083
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5084
0
    return result;
5085
0
}
5086
5087
// ggml_pad_reflect_1d
5088
5089
struct ggml_tensor * ggml_pad_reflect_1d(
5090
        struct ggml_context * ctx,
5091
        struct ggml_tensor  * a,
5092
        int                   p0,
5093
0
        int                   p1) {
5094
0
    GGML_ASSERT(p0 >= 0);
5095
0
    GGML_ASSERT(p1 >= 0);
5096
5097
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5098
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5099
5100
0
    GGML_ASSERT(ggml_is_contiguous(a));
5101
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5102
5103
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5104
0
            a->ne[0] + p0 + p1,
5105
0
            a->ne[1],
5106
0
            a->ne[2],
5107
0
            a->ne[3]);
5108
5109
0
    int32_t params[] = { p0, p1 };
5110
0
    ggml_set_op_params(result, params, sizeof(params));
5111
5112
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5113
0
    result->src[0] = a;
5114
5115
0
    return result;
5116
0
}
5117
5118
// ggml_roll
5119
5120
struct ggml_tensor * ggml_roll(
5121
        struct ggml_context * ctx,
5122
        struct ggml_tensor  * a,
5123
        int                   shift0,
5124
        int                   shift1,
5125
        int                   shift2,
5126
0
        int                   shift3) {
5127
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5128
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5129
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5130
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5131
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5132
5133
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5134
5135
0
    ggml_set_op_params_i32(result, 0, shift0);
5136
0
    ggml_set_op_params_i32(result, 1, shift1);
5137
0
    ggml_set_op_params_i32(result, 2, shift2);
5138
0
    ggml_set_op_params_i32(result, 3, shift3);
5139
5140
0
    result->op     = GGML_OP_ROLL;
5141
0
    result->src[0] = a;
5142
5143
0
    return result;
5144
0
}
5145
5146
// ggml_timestep_embedding
5147
5148
struct ggml_tensor * ggml_timestep_embedding(
5149
        struct ggml_context * ctx,
5150
        struct ggml_tensor  * timesteps,
5151
        int                   dim,
5152
0
        int                   max_period) {
5153
5154
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5155
5156
0
    ggml_set_op_params_i32(result, 0, dim);
5157
0
    ggml_set_op_params_i32(result, 1, max_period);
5158
5159
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5160
0
    result->src[0] = timesteps;
5161
5162
0
    return result;
5163
0
}
5164
5165
// ggml_tri
5166
5167
struct ggml_tensor * ggml_tri(
5168
    struct ggml_context * ctx,
5169
    struct ggml_tensor  * a,
5170
0
    enum ggml_tri_type    type) {
5171
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5172
5173
0
    GGML_ASSERT(ggml_is_contiguous(a));
5174
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5175
5176
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5177
5178
0
    ggml_set_op_params_i32(result, 0, type);
5179
5180
0
    result->op = GGML_OP_TRI;
5181
0
    result->src[0] = a;
5182
5183
0
    return result;
5184
0
}
5185
5186
// ggml_fill
5187
5188
static struct ggml_tensor * ggml_fill_impl(
5189
    struct ggml_context * ctx,
5190
    struct ggml_tensor  * a,
5191
    float                 c,
5192
0
    bool                  inplace) {
5193
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5194
0
    GGML_ASSERT(ggml_is_contiguous(a));
5195
5196
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5197
5198
0
    ggml_set_op_params_f32(result, 0, c);
5199
5200
0
    result->op = GGML_OP_FILL;
5201
0
    result->src[0] = a;
5202
5203
0
    return result;
5204
0
}
5205
5206
struct ggml_tensor * ggml_fill(
5207
    struct ggml_context * ctx,
5208
    struct ggml_tensor  * a,
5209
0
    float                 c) {
5210
0
    return ggml_fill_impl(ctx, a, c, false);
5211
0
}
5212
5213
struct ggml_tensor * ggml_fill_inplace(
5214
    struct ggml_context * ctx,
5215
    struct ggml_tensor  * a,
5216
0
    float                 c) {
5217
0
    return ggml_fill_impl(ctx, a, c, true);
5218
0
}
5219
5220
// ggml_argsort
5221
5222
struct ggml_tensor * ggml_argsort(
5223
        struct ggml_context  * ctx,
5224
        struct ggml_tensor   * a,
5225
0
        enum ggml_sort_order   order) {
5226
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5227
5228
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5229
5230
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5231
5232
0
    result->op     = GGML_OP_ARGSORT;
5233
0
    result->src[0] = a;
5234
5235
0
    return result;
5236
0
}
5237
5238
// ggml_argsort_top_k
5239
5240
struct ggml_tensor * ggml_argsort_top_k(
5241
        struct ggml_context * ctx,
5242
        struct ggml_tensor  * a,
5243
0
        int                   k) {
5244
0
    GGML_ASSERT(a->ne[0] >= k);
5245
5246
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5247
5248
0
    result = ggml_view_4d(ctx, result,
5249
0
                k, result->ne[1], result->ne[2], result->ne[3],
5250
0
                   result->nb[1], result->nb[2], result->nb[3],
5251
0
                0);
5252
5253
0
    return result;
5254
0
}
5255
5256
// ggml_top_k
5257
5258
struct ggml_tensor * ggml_top_k(
5259
        struct ggml_context * ctx,
5260
        struct ggml_tensor  * a,
5261
0
        int                   k) {
5262
0
    GGML_ASSERT(a->ne[0] >= k);
5263
5264
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5265
5266
0
    result->op     = GGML_OP_TOP_K;
5267
0
    result->src[0] = a;
5268
5269
0
    return result;
5270
0
}
5271
5272
// ggml_arange
5273
5274
struct ggml_tensor * ggml_arange(
5275
        struct ggml_context * ctx,
5276
        float                 start,
5277
        float                 stop,
5278
0
        float                 step) {
5279
0
    GGML_ASSERT(stop > start);
5280
5281
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5282
5283
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5284
5285
0
    ggml_set_op_params_f32(result, 0, start);
5286
0
    ggml_set_op_params_f32(result, 1, stop);
5287
0
    ggml_set_op_params_f32(result, 2, step);
5288
5289
0
    result->op = GGML_OP_ARANGE;
5290
5291
0
    return result;
5292
0
}
5293
5294
// ggml_flash_attn_ext
5295
5296
struct ggml_tensor * ggml_flash_attn_ext(
5297
        struct ggml_context * ctx,
5298
        struct ggml_tensor  * q,
5299
        struct ggml_tensor  * k,
5300
        struct ggml_tensor  * v,
5301
        struct ggml_tensor  * mask,
5302
        float                 scale,
5303
        float                 max_bias,
5304
0
        float                 logit_softcap) {
5305
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5306
    // TODO: check if vT can be multiplied by (k*qT)
5307
5308
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5309
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5310
5311
0
    if (mask) {
5312
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5313
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5314
5315
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5316
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5317
0
    }
5318
5319
0
    if (max_bias > 0.0f) {
5320
0
        GGML_ASSERT(mask);
5321
0
    }
5322
5323
    // permute(0, 2, 1, 3)
5324
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5325
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5326
5327
0
    float params[] = { scale, max_bias, logit_softcap };
5328
0
    ggml_set_op_params(result, params, sizeof(params));
5329
5330
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5331
0
    result->src[0] = q;
5332
0
    result->src[1] = k;
5333
0
    result->src[2] = v;
5334
0
    result->src[3] = mask;
5335
5336
0
    return result;
5337
0
}
5338
5339
void ggml_flash_attn_ext_set_prec(
5340
        struct ggml_tensor * a,
5341
0
        enum ggml_prec       prec) {
5342
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5343
5344
0
    const int32_t prec_i32 = (int32_t) prec;
5345
5346
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5347
0
}
5348
5349
enum ggml_prec ggml_flash_attn_ext_get_prec(
5350
0
        const struct ggml_tensor * a) {
5351
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5352
5353
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5354
5355
0
    return (enum ggml_prec) prec_i32;
5356
0
}
5357
5358
void ggml_flash_attn_ext_add_sinks(
5359
        struct ggml_tensor * a,
5360
0
        struct ggml_tensor * sinks) {
5361
0
    if (!sinks) {
5362
0
        a->src[4] = NULL;
5363
0
        return;
5364
0
    }
5365
5366
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5367
0
    GGML_ASSERT(a->src[4] == NULL);
5368
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5369
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5370
5371
0
    a->src[4] = sinks;
5372
0
}
5373
5374
// ggml_flash_attn_back
5375
5376
struct ggml_tensor * ggml_flash_attn_back(
5377
        struct ggml_context * ctx,
5378
        struct ggml_tensor  * q,
5379
        struct ggml_tensor  * k,
5380
        struct ggml_tensor  * v,
5381
        struct ggml_tensor  * d,
5382
0
        bool                  masked) {
5383
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5384
5385
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5386
    // TODO: check if vT can be multiplied by (k*qT)
5387
5388
    // d shape [D,N,ne2,ne3]
5389
    // q shape [D,N,ne2,ne3]
5390
    // k shape [D,M,kvne2,ne3]
5391
    // v shape [M,D,kvne2,ne3]
5392
5393
0
    const int64_t     D = q->ne[0];
5394
0
    const int64_t     N = q->ne[1];
5395
0
    const int64_t     M = k->ne[1];
5396
0
    const int64_t   ne2 = q->ne[2];
5397
0
    const int64_t   ne3 = q->ne[3];
5398
0
    const int64_t kvne2 = k->ne[2];
5399
5400
0
    GGML_ASSERT(k->ne[0] == D);
5401
0
    GGML_ASSERT(v->ne[0] == M);
5402
0
    GGML_ASSERT(v->ne[1] == D);
5403
0
    GGML_ASSERT(d->ne[0] == D);
5404
0
    GGML_ASSERT(d->ne[1] == N);
5405
0
    GGML_ASSERT(k->ne[2] == kvne2);
5406
0
    GGML_ASSERT(k->ne[3] == ne3);
5407
0
    GGML_ASSERT(v->ne[2] == kvne2);
5408
0
    GGML_ASSERT(v->ne[3] == ne3);
5409
0
    GGML_ASSERT(d->ne[2] == ne2);
5410
0
    GGML_ASSERT(d->ne[3] == ne3);
5411
5412
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5413
5414
    // store gradients of q, k and v as continuous tensors concatenated in result.
5415
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5416
0
    const int64_t elem_q = ggml_nelements(q);
5417
0
    const int64_t elem_k = ggml_nelements(k);
5418
0
    const int64_t elem_v = ggml_nelements(v);
5419
5420
0
    enum ggml_type result_type = GGML_TYPE_F32;
5421
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5422
0
    const size_t tsize = ggml_type_size(result_type);
5423
5424
0
    const size_t offs_q = 0;
5425
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5426
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5427
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5428
5429
0
    const size_t nelements = (end + tsize - 1)/tsize;
5430
5431
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5432
5433
0
    int32_t masked_i = masked ? 1 : 0;
5434
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5435
5436
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5437
0
    result->src[0] = q;
5438
0
    result->src[1] = k;
5439
0
    result->src[2] = v;
5440
0
    result->src[3] = d;
5441
5442
0
    return result;
5443
0
}
5444
5445
// ggml_ssm_conv
5446
5447
struct ggml_tensor * ggml_ssm_conv(
5448
        struct ggml_context * ctx,
5449
        struct ggml_tensor  * sx,
5450
0
        struct ggml_tensor  * c) {
5451
0
    GGML_ASSERT(ggml_is_3d(sx));
5452
0
    GGML_ASSERT(ggml_is_matrix(c));
5453
5454
0
    const int64_t d_conv  = c->ne[0];
5455
0
    const int64_t d_inner = c->ne[1];
5456
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5457
0
    const int64_t n_s     = sx->ne[2];
5458
5459
    // TODO: maybe support other strides than 1?
5460
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5461
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5462
0
    GGML_ASSERT(n_t >= 0);
5463
5464
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5465
5466
0
    result->op     = GGML_OP_SSM_CONV;
5467
0
    result->src[0] = sx;
5468
0
    result->src[1] = c;
5469
5470
0
    return result;
5471
0
}
5472
5473
// ggml_ssm_scan
5474
5475
struct ggml_tensor * ggml_ssm_scan(
5476
        struct ggml_context * ctx,
5477
        struct ggml_tensor  * s,
5478
        struct ggml_tensor  * x,
5479
        struct ggml_tensor  * dt,
5480
        struct ggml_tensor  * A,
5481
        struct ggml_tensor  * B,
5482
        struct ggml_tensor  * C,
5483
0
        struct ggml_tensor  * ids) {
5484
0
    GGML_ASSERT(ggml_is_contiguous(s));
5485
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5486
0
    GGML_ASSERT(ggml_is_contiguous(A));
5487
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5488
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5489
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5490
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5491
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5492
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5493
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5494
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5495
5496
0
    {
5497
0
        const int64_t d_state      = s->ne[0];
5498
0
        const int64_t head_dim     = x->ne[0];
5499
0
        const int64_t n_head       = x->ne[1];
5500
0
        const int64_t n_seq_tokens = x->ne[2];
5501
0
        const int64_t n_seqs       = x->ne[3];
5502
5503
0
        GGML_ASSERT(dt->ne[0] == n_head);
5504
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5505
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5506
0
        GGML_ASSERT(ggml_is_3d(dt));
5507
0
        GGML_ASSERT(s->ne[1] == head_dim);
5508
0
        GGML_ASSERT(s->ne[2] == n_head);
5509
0
        GGML_ASSERT(B->ne[0] == d_state);
5510
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5511
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5512
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5513
0
        GGML_ASSERT(ggml_is_vector(ids));
5514
0
        GGML_ASSERT(A->ne[1] == n_head);
5515
0
        GGML_ASSERT(ggml_is_matrix(A));
5516
5517
0
        if (A->ne[0] != 1) {
5518
            // Mamba-1 has more granular decay factors
5519
0
            GGML_ASSERT(A->ne[0] == d_state);
5520
0
        }
5521
0
    }
5522
5523
    // concatenated y + ssm_states
5524
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5525
5526
0
    result->op   = GGML_OP_SSM_SCAN;
5527
0
    result->src[0] = s;
5528
0
    result->src[1] = x;
5529
0
    result->src[2] = dt;
5530
0
    result->src[3] = A;
5531
0
    result->src[4] = B;
5532
0
    result->src[5] = C;
5533
0
    result->src[6] = ids;
5534
5535
0
    return result;
5536
0
}
5537
5538
// ggml_win_part
5539
5540
struct ggml_tensor * ggml_win_part(
5541
        struct ggml_context * ctx,
5542
        struct ggml_tensor  * a,
5543
0
        int                   w) {
5544
0
    GGML_ASSERT(a->ne[3] == 1);
5545
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5546
5547
    // padding
5548
0
    const int px = (w - a->ne[1]%w)%w;
5549
0
    const int py = (w - a->ne[2]%w)%w;
5550
5551
0
    const int npx = (px + a->ne[1])/w;
5552
0
    const int npy = (py + a->ne[2])/w;
5553
0
    const int np  = npx*npy;
5554
5555
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5556
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5557
5558
0
    int32_t params[] = { npx, npy, w };
5559
0
    ggml_set_op_params(result, params, sizeof(params));
5560
5561
0
    result->op     = GGML_OP_WIN_PART;
5562
0
    result->src[0] = a;
5563
5564
0
    return result;
5565
0
}
5566
5567
// ggml_win_unpart
5568
5569
struct ggml_tensor * ggml_win_unpart(
5570
        struct ggml_context * ctx,
5571
        struct ggml_tensor  * a,
5572
        int                   w0,
5573
        int                   h0,
5574
0
        int                   w) {
5575
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5576
5577
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5578
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5579
5580
0
    int32_t params[] = { w };
5581
0
    ggml_set_op_params(result, params, sizeof(params));
5582
5583
0
    result->op     = GGML_OP_WIN_UNPART;
5584
0
    result->src[0] = a;
5585
5586
0
    return result;
5587
0
}
5588
5589
// ggml_get_rel_pos
5590
5591
struct ggml_tensor * ggml_get_rel_pos(
5592
        struct ggml_context * ctx,
5593
        struct ggml_tensor  * a,
5594
        int                   qh,
5595
0
        int                   kh) {
5596
0
    GGML_ASSERT(qh == kh);
5597
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5598
5599
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5600
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5601
5602
0
    result->op     = GGML_OP_GET_REL_POS;
5603
0
    result->src[0] = a;
5604
5605
0
    return result;
5606
0
}
5607
5608
// ggml_add_rel_pos
5609
5610
static struct ggml_tensor * ggml_add_rel_pos_impl(
5611
        struct ggml_context * ctx,
5612
        struct ggml_tensor  * a,
5613
        struct ggml_tensor  * pw,
5614
        struct ggml_tensor  * ph,
5615
0
        bool                  inplace) {
5616
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5617
0
    GGML_ASSERT(ggml_is_contiguous(a));
5618
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5619
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5620
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5621
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5622
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5623
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5624
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5625
5626
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5627
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5628
5629
0
    result->op     = GGML_OP_ADD_REL_POS;
5630
0
    result->src[0] = a;
5631
0
    result->src[1] = pw;
5632
0
    result->src[2] = ph;
5633
5634
0
    return result;
5635
0
}
5636
5637
struct ggml_tensor * ggml_add_rel_pos(
5638
        struct ggml_context * ctx,
5639
        struct ggml_tensor  * a,
5640
        struct ggml_tensor  * pw,
5641
0
        struct ggml_tensor  * ph) {
5642
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5643
0
}
5644
5645
struct ggml_tensor * ggml_add_rel_pos_inplace(
5646
        struct ggml_context * ctx,
5647
        struct ggml_tensor  * a,
5648
        struct ggml_tensor  * pw,
5649
0
        struct ggml_tensor  * ph) {
5650
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5651
0
}
5652
5653
// ggml_rwkv_wkv6
5654
5655
struct ggml_tensor * ggml_rwkv_wkv6(
5656
        struct ggml_context * ctx,
5657
        struct ggml_tensor  * k,
5658
        struct ggml_tensor  * v,
5659
        struct ggml_tensor  * r,
5660
        struct ggml_tensor  * tf,
5661
        struct ggml_tensor  * td,
5662
0
        struct ggml_tensor  * state) {
5663
0
    GGML_ASSERT(ggml_is_contiguous(k));
5664
0
    GGML_ASSERT(ggml_is_contiguous(v));
5665
0
    GGML_ASSERT(ggml_is_contiguous(r));
5666
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5667
0
    GGML_ASSERT(ggml_is_contiguous(td));
5668
0
    GGML_ASSERT(ggml_is_contiguous(state));
5669
5670
0
    const int64_t S = k->ne[0];
5671
0
    const int64_t H = k->ne[1];
5672
0
    const int64_t n_tokens = k->ne[2];
5673
0
    const int64_t n_seqs = state->ne[1];
5674
0
    {
5675
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5676
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5677
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5678
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5679
0
    }
5680
5681
    // concat output and new_state
5682
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5683
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5684
5685
0
    result->op     = GGML_OP_RWKV_WKV6;
5686
0
    result->src[0] = k;
5687
0
    result->src[1] = v;
5688
0
    result->src[2] = r;
5689
0
    result->src[3] = tf;
5690
0
    result->src[4] = td;
5691
0
    result->src[5] = state;
5692
5693
0
    return result;
5694
0
}
5695
5696
// ggml_gated_linear_attn
5697
5698
struct ggml_tensor * ggml_gated_linear_attn(
5699
        struct ggml_context * ctx,
5700
        struct ggml_tensor  * k,
5701
        struct ggml_tensor  * v,
5702
        struct ggml_tensor  * q,
5703
        struct ggml_tensor  * g,
5704
        struct ggml_tensor  * state,
5705
0
        float scale) {
5706
0
    GGML_ASSERT(ggml_is_contiguous(k));
5707
0
    GGML_ASSERT(ggml_is_contiguous(v));
5708
0
    GGML_ASSERT(ggml_is_contiguous(q));
5709
0
    GGML_ASSERT(ggml_is_contiguous(g));
5710
0
    GGML_ASSERT(ggml_is_contiguous(state));
5711
5712
0
    const int64_t S = k->ne[0];
5713
0
    const int64_t H = k->ne[1];
5714
0
    const int64_t n_tokens = k->ne[2];
5715
0
    const int64_t n_seqs = state->ne[1];
5716
0
    {
5717
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5718
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5719
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5720
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5721
0
    }
5722
5723
    // concat output and new_state
5724
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5725
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5726
5727
0
    ggml_set_op_params_f32(result, 0, scale);
5728
5729
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5730
0
    result->src[0] = k;
5731
0
    result->src[1] = v;
5732
0
    result->src[2] = q;
5733
0
    result->src[3] = g;
5734
0
    result->src[4] = state;
5735
5736
0
    return result;
5737
0
}
5738
5739
// ggml_rwkv_wkv7
5740
5741
struct ggml_tensor * ggml_rwkv_wkv7(
5742
        struct ggml_context * ctx,
5743
        struct ggml_tensor  * r,
5744
        struct ggml_tensor  * w,
5745
        struct ggml_tensor  * k,
5746
        struct ggml_tensor  * v,
5747
        struct ggml_tensor  * a,
5748
        struct ggml_tensor  * b,
5749
0
        struct ggml_tensor  * state) {
5750
0
    GGML_ASSERT(ggml_is_contiguous(r));
5751
0
    GGML_ASSERT(ggml_is_contiguous(w));
5752
0
    GGML_ASSERT(ggml_is_contiguous(k));
5753
0
    GGML_ASSERT(ggml_is_contiguous(v));
5754
0
    GGML_ASSERT(ggml_is_contiguous(a));
5755
0
    GGML_ASSERT(ggml_is_contiguous(b));
5756
0
    GGML_ASSERT(ggml_is_contiguous(state));
5757
5758
0
    const int64_t S = k->ne[0];
5759
0
    const int64_t H = k->ne[1];
5760
0
    const int64_t n_tokens = k->ne[2];
5761
0
    const int64_t n_seqs = state->ne[1];
5762
0
    {
5763
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5764
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5765
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5766
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5767
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5768
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5769
0
    }
5770
5771
    // concat output and new_state
5772
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5773
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5774
5775
0
    result->op     = GGML_OP_RWKV_WKV7;
5776
0
    result->src[0] = r;
5777
0
    result->src[1] = w;
5778
0
    result->src[2] = k;
5779
0
    result->src[3] = v;
5780
0
    result->src[4] = a;
5781
0
    result->src[5] = b;
5782
0
    result->src[6] = state;
5783
5784
0
    return result;
5785
0
}
5786
5787
// ggml_unary
5788
5789
static struct ggml_tensor * ggml_unary_impl(
5790
        struct ggml_context * ctx,
5791
        struct ggml_tensor  * a,
5792
        enum ggml_unary_op    op,
5793
0
        bool                  inplace) {
5794
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
5795
5796
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5797
5798
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5799
5800
0
    result->op     = GGML_OP_UNARY;
5801
0
    result->src[0] = a;
5802
5803
0
    return result;
5804
0
}
5805
5806
struct ggml_tensor * ggml_unary(
5807
        struct ggml_context * ctx,
5808
        struct ggml_tensor  * a,
5809
0
        enum ggml_unary_op    op) {
5810
0
    return ggml_unary_impl(ctx, a, op, false);
5811
0
}
5812
5813
struct ggml_tensor * ggml_unary_inplace(
5814
        struct ggml_context * ctx,
5815
        struct ggml_tensor  * a,
5816
0
        enum ggml_unary_op    op) {
5817
0
    return ggml_unary_impl(ctx, a, op, true);
5818
0
}
5819
5820
// ggml_map_custom1
5821
5822
static struct ggml_tensor * ggml_map_custom1_impl(
5823
        struct ggml_context      * ctx,
5824
        struct ggml_tensor       * a,
5825
        const  ggml_custom1_op_t   fun,
5826
        int                        n_tasks,
5827
        void                     * userdata,
5828
0
        bool                       inplace) {
5829
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5830
5831
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5832
5833
0
    struct ggml_map_custom1_op_params params = {
5834
0
        /*.fun      =*/ fun,
5835
0
        /*.n_tasks  =*/ n_tasks,
5836
0
        /*.userdata =*/ userdata
5837
0
    };
5838
0
    ggml_set_op_params(result, &params, sizeof(params));
5839
5840
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5841
0
    result->src[0] = a;
5842
5843
0
    return result;
5844
0
}
5845
5846
struct ggml_tensor * ggml_map_custom1(
5847
        struct ggml_context      * ctx,
5848
        struct ggml_tensor       * a,
5849
        const  ggml_custom1_op_t   fun,
5850
        int                        n_tasks,
5851
0
        void                     * userdata) {
5852
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5853
0
}
5854
5855
struct ggml_tensor * ggml_map_custom1_inplace(
5856
        struct ggml_context      * ctx,
5857
        struct ggml_tensor       * a,
5858
        const  ggml_custom1_op_t   fun,
5859
        int                        n_tasks,
5860
0
        void                     * userdata) {
5861
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5862
0
}
5863
5864
// ggml_map_custom2
5865
5866
static struct ggml_tensor * ggml_map_custom2_impl(
5867
        struct ggml_context      * ctx,
5868
        struct ggml_tensor       * a,
5869
        struct ggml_tensor       * b,
5870
        const  ggml_custom2_op_t   fun,
5871
        int                        n_tasks,
5872
        void                     * userdata,
5873
0
        bool                       inplace) {
5874
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5875
5876
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5877
5878
0
    struct ggml_map_custom2_op_params params = {
5879
0
        /*.fun      =*/ fun,
5880
0
        /*.n_tasks  =*/ n_tasks,
5881
0
        /*.userdata =*/ userdata
5882
0
    };
5883
0
    ggml_set_op_params(result, &params, sizeof(params));
5884
5885
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5886
0
    result->src[0] = a;
5887
0
    result->src[1] = b;
5888
5889
0
    return result;
5890
0
}
5891
5892
struct ggml_tensor * ggml_map_custom2(
5893
        struct ggml_context      * ctx,
5894
        struct ggml_tensor       * a,
5895
        struct ggml_tensor       * b,
5896
        const  ggml_custom2_op_t   fun,
5897
        int                        n_tasks,
5898
0
        void                     * userdata) {
5899
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5900
0
}
5901
5902
struct ggml_tensor * ggml_map_custom2_inplace(
5903
        struct ggml_context      * ctx,
5904
        struct ggml_tensor       * a,
5905
        struct ggml_tensor       * b,
5906
        const  ggml_custom2_op_t   fun,
5907
        int                        n_tasks,
5908
0
        void                     * userdata) {
5909
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5910
0
}
5911
5912
// ggml_map_custom3
5913
5914
static struct ggml_tensor * ggml_map_custom3_impl(
5915
        struct ggml_context      * ctx,
5916
        struct ggml_tensor       * a,
5917
        struct ggml_tensor       * b,
5918
        struct ggml_tensor       * c,
5919
        const  ggml_custom3_op_t   fun,
5920
        int                        n_tasks,
5921
        void                     * userdata,
5922
0
        bool                       inplace) {
5923
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5924
5925
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5926
5927
0
    struct ggml_map_custom3_op_params params = {
5928
0
        /*.fun      =*/ fun,
5929
0
        /*.n_tasks  =*/ n_tasks,
5930
0
        /*.userdata =*/ userdata
5931
0
    };
5932
0
    ggml_set_op_params(result, &params, sizeof(params));
5933
5934
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5935
0
    result->src[0] = a;
5936
0
    result->src[1] = b;
5937
0
    result->src[2] = c;
5938
5939
0
    return result;
5940
0
}
5941
5942
struct ggml_tensor * ggml_map_custom3(
5943
        struct ggml_context      * ctx,
5944
        struct ggml_tensor       * a,
5945
        struct ggml_tensor       * b,
5946
        struct ggml_tensor       * c,
5947
        const  ggml_custom3_op_t   fun,
5948
        int                        n_tasks,
5949
0
        void                     * userdata) {
5950
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5951
0
}
5952
5953
struct ggml_tensor * ggml_map_custom3_inplace(
5954
        struct ggml_context      * ctx,
5955
        struct ggml_tensor       * a,
5956
        struct ggml_tensor       * b,
5957
        struct ggml_tensor       * c,
5958
        const  ggml_custom3_op_t   fun,
5959
        int                        n_tasks,
5960
0
        void                     * userdata) {
5961
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5962
0
}
5963
5964
struct ggml_tensor * ggml_custom_4d(
5965
        struct ggml_context * ctx,
5966
        enum ggml_type        type,
5967
        int64_t               ne0,
5968
        int64_t               ne1,
5969
        int64_t               ne2,
5970
        int64_t               ne3,
5971
        struct ggml_tensor ** args,
5972
        int                   n_args,
5973
        ggml_custom_op_t      fun,
5974
        int                   n_tasks,
5975
0
        void                * userdata) {
5976
5977
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5978
5979
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5980
5981
0
    struct ggml_custom_op_params params = {
5982
0
        /*.fun      =*/ fun,
5983
0
        /*.n_tasks  =*/ n_tasks,
5984
0
        /*.userdata =*/ userdata
5985
0
    };
5986
0
    ggml_set_op_params(result, &params, sizeof(params));
5987
5988
0
    result->op = GGML_OP_CUSTOM;
5989
0
    for (int i = 0; i < n_args; i++) {
5990
0
        result->src[i] = args[i];
5991
0
    }
5992
5993
0
    return result;
5994
0
}
5995
5996
struct ggml_tensor * ggml_custom_inplace(
5997
        struct ggml_context * ctx,
5998
        struct ggml_tensor  * a,
5999
        struct ggml_tensor ** args,
6000
        int                   n_args,
6001
        ggml_custom_op_t      fun,
6002
        int                   n_tasks,
6003
0
        void                * userdata) {
6004
6005
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
6006
6007
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6008
6009
0
    struct ggml_custom_op_params params = {
6010
0
        /*.fun      =*/ fun,
6011
0
        /*.n_tasks  =*/ n_tasks,
6012
0
        /*.userdata =*/ userdata
6013
0
    };
6014
0
    ggml_set_op_params(result, &params, sizeof(params));
6015
6016
0
    result->op = GGML_OP_CUSTOM;
6017
0
    result->src[0] = a;
6018
0
    for (int i = 0; i < n_args; i++) {
6019
0
        result->src[i + 1] = args[i];
6020
0
    }
6021
6022
0
    return result;
6023
0
}
6024
// ggml_cross_entropy_loss
6025
6026
struct ggml_tensor * ggml_cross_entropy_loss(
6027
        struct ggml_context * ctx,
6028
        struct ggml_tensor  * a,
6029
0
        struct ggml_tensor  * b) {
6030
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
6031
6032
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
6033
6034
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
6035
0
    result->src[0] = a;
6036
0
    result->src[1] = b;
6037
6038
0
    return result;
6039
0
}
6040
6041
// ggml_cross_entropy_loss_back
6042
6043
struct ggml_tensor * ggml_cross_entropy_loss_back(
6044
        struct ggml_context * ctx,
6045
        struct ggml_tensor  * a,
6046
        struct ggml_tensor  * b,
6047
0
        struct ggml_tensor  * c) {
6048
0
    GGML_ASSERT(ggml_is_scalar(a));
6049
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6050
6051
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6052
6053
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6054
0
    result->src[0] = a;
6055
0
    result->src[1] = b;
6056
0
    result->src[2] = c;
6057
6058
0
    return result;
6059
0
}
6060
6061
// opt_step_adamw
6062
6063
struct ggml_tensor * ggml_opt_step_adamw(
6064
        struct ggml_context * ctx,
6065
        struct ggml_tensor  * a,
6066
        struct ggml_tensor  * grad,
6067
        struct ggml_tensor  * m,
6068
        struct ggml_tensor  * v,
6069
0
        struct ggml_tensor  * adamw_params) {
6070
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6071
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6072
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6073
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6074
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6075
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6076
6077
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6078
6079
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6080
0
    result->src[0] = a;
6081
0
    result->src[1] = grad;
6082
0
    result->src[2] = m;
6083
0
    result->src[3] = v;
6084
0
    result->src[4] = adamw_params;
6085
6086
0
    return result;
6087
0
}
6088
6089
// opt_step_sgd
6090
6091
struct ggml_tensor * ggml_opt_step_sgd(
6092
        struct ggml_context * ctx,
6093
        struct ggml_tensor  * a,
6094
        struct ggml_tensor  * grad,
6095
0
        struct ggml_tensor  * params) {
6096
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6097
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6098
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6099
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6100
6101
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6102
6103
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6104
0
    result->src[0] = a;
6105
0
    result->src[1] = grad;
6106
0
    result->src[2] = params;
6107
6108
0
    return result;
6109
0
}
6110
6111
// solve_tri
6112
6113
struct ggml_tensor * ggml_solve_tri(
6114
        struct ggml_context * ctx,
6115
        struct ggml_tensor  * a,
6116
        struct ggml_tensor  * b,
6117
        bool                  left,
6118
        bool                  lower,
6119
0
        bool                  uni) {
6120
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6121
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6122
6123
    // A must be square and lower diagonal
6124
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6125
    // B must have same outer dimension as A
6126
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6127
6128
    // batch dimensions must be equal
6129
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6130
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6131
6132
0
    GGML_ASSERT(ggml_is_contiguous(a));
6133
0
    GGML_ASSERT(ggml_is_contiguous(b));
6134
6135
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6136
6137
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6138
6139
0
    result->op     = GGML_OP_SOLVE_TRI;
6140
0
    result->src[0] = a;
6141
0
    result->src[1] = b;
6142
6143
0
    return result;
6144
0
}
6145
6146
// ggml_gated_delta_net
6147
6148
struct ggml_tensor * ggml_gated_delta_net(
6149
        struct ggml_context * ctx,
6150
        struct ggml_tensor  * q,
6151
        struct ggml_tensor  * k,
6152
        struct ggml_tensor  * v,
6153
        struct ggml_tensor  * g,
6154
        struct ggml_tensor  * beta,
6155
0
        struct ggml_tensor  * state) {
6156
0
    GGML_ASSERT(ggml_is_contiguous_rows(q));
6157
0
    GGML_ASSERT(ggml_is_contiguous_rows(k));
6158
0
    GGML_ASSERT(ggml_is_contiguous_rows(v));
6159
0
    GGML_ASSERT(ggml_is_contiguous(g));
6160
0
    GGML_ASSERT(ggml_is_contiguous(beta));
6161
0
    GGML_ASSERT(ggml_is_contiguous(state));
6162
6163
0
    GGML_ASSERT(q->type == GGML_TYPE_F32);
6164
0
    GGML_ASSERT(k->type == GGML_TYPE_F32);
6165
0
    GGML_ASSERT(v->type == GGML_TYPE_F32);
6166
0
    GGML_ASSERT(g->type == GGML_TYPE_F32);
6167
0
    GGML_ASSERT(beta->type == GGML_TYPE_F32);
6168
0
    GGML_ASSERT(state->type == GGML_TYPE_F32);
6169
6170
0
    const int64_t S_v      = v->ne[0];
6171
0
    const int64_t H        = v->ne[1];
6172
0
    const int64_t n_tokens = v->ne[2];
6173
0
    const int64_t n_seqs   = v->ne[3];
6174
6175
    // gate: scalar [1, H, T, B] or vector [S_v, H, T, B] (KDA)
6176
0
    GGML_ASSERT(g->ne[0] == 1 || g->ne[0] == S_v);
6177
0
    GGML_ASSERT(beta->ne[0] == 1);
6178
6179
0
    GGML_ASSERT(ggml_nelements(state) == S_v * S_v * H * n_seqs);
6180
6181
    // concat output and new_state into a single tensor
6182
    // output: S_v * H * n_tokens * n_seqs, state: S_v * S_v * H * n_seqs
6183
0
    const int64_t ne[4] = { S_v * H, n_tokens * n_seqs + S_v * n_seqs, 1, 1 };
6184
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
6185
6186
0
    result->op     = GGML_OP_GATED_DELTA_NET;
6187
0
    result->src[0] = q;
6188
0
    result->src[1] = k;
6189
0
    result->src[2] = v;
6190
0
    result->src[3] = g;
6191
0
    result->src[4] = beta;
6192
0
    result->src[5] = state;
6193
6194
0
    return result;
6195
0
}
6196
6197
////////////////////////////////////////////////////////////////////////////////
6198
6199
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6200
0
    size = ggml_hash_size(size);
6201
0
    struct ggml_hash_set result;
6202
0
    result.size = size;
6203
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6204
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6205
0
    return result;
6206
0
}
6207
6208
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6209
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6210
0
}
6211
6212
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6213
0
    GGML_FREE(hash_set->used);
6214
0
    GGML_FREE(hash_set->keys);
6215
0
}
6216
6217
0
size_t ggml_hash_size(size_t min_sz) {
6218
    // next primes after powers of two
6219
0
    static const size_t primes[] = {
6220
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6221
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6222
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6223
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6224
0
        536870923, 1073741827, 2147483659
6225
0
    };
6226
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6227
6228
    // find the smallest prime that is larger or equal than min_sz
6229
0
    size_t l = 0;
6230
0
    size_t r = n_primes;
6231
0
    while (l < r) {
6232
0
        size_t m = (l + r)/2;
6233
0
        if (primes[m] < min_sz) {
6234
0
            l = m + 1;
6235
0
        } else {
6236
0
            r = m;
6237
0
        }
6238
0
    }
6239
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6240
0
    return sz;
6241
0
}
6242
6243
struct hash_map {
6244
    struct ggml_hash_set set;
6245
    struct ggml_tensor ** vals;
6246
};
6247
6248
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6249
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6250
0
    result->set = ggml_hash_set_new(size);
6251
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6252
0
    return result;
6253
0
}
6254
6255
0
static void ggml_hash_map_free(struct hash_map * map) {
6256
0
    ggml_hash_set_free(&map->set);
6257
0
    GGML_FREE(map->vals);
6258
0
    GGML_FREE(map);
6259
0
}
6260
6261
// utility functions to change gradients
6262
// isrc is the index of tensor in cgraph->visited_has_set.keys
6263
// the corresponding gradient (accumulators) are also at position isrc
6264
// if tensor has a gradient accumulator, modify that accumulator in-place
6265
// else if there is no gradient for tensor, set the corresponding value
6266
// else, just add/subtract/etc. the gradients
6267
6268
static void ggml_add_or_set(
6269
        struct ggml_context * ctx,
6270
        struct ggml_cgraph  * cgraph,
6271
        size_t                isrc,
6272
0
        struct ggml_tensor  * tensor) {
6273
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6274
0
    GGML_ASSERT(src);
6275
0
    if (cgraph->grads[isrc]) {
6276
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6277
0
    } else {
6278
0
        cgraph->grads[isrc] = tensor;
6279
0
    }
6280
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6281
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6282
0
}
6283
6284
static void ggml_acc_or_set(
6285
        struct ggml_context * ctx,
6286
        struct ggml_cgraph  * cgraph,
6287
        size_t                isrc,
6288
        struct ggml_tensor  * tensor,
6289
        const  size_t         nb1,
6290
        const  size_t         nb2,
6291
        const  size_t         nb3,
6292
0
        const  size_t         offset) {
6293
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6294
0
    GGML_ASSERT(src);
6295
0
    if (cgraph->grads[isrc]) {
6296
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6297
0
    } else {
6298
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6299
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6300
0
    }
6301
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6302
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6303
0
}
6304
6305
static void ggml_add1_or_set(
6306
        struct ggml_context * ctx,
6307
        struct ggml_cgraph  * cgraph,
6308
        size_t                isrc,
6309
0
        struct ggml_tensor  * tensor) {
6310
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6311
0
    GGML_ASSERT(src);
6312
0
    if (cgraph->grads[isrc]) {
6313
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6314
0
    } else {
6315
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6316
0
    }
6317
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6318
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6319
0
}
6320
6321
static void ggml_sub_or_set(
6322
        struct ggml_context * ctx,
6323
        struct ggml_cgraph  * cgraph,
6324
        size_t                isrc,
6325
0
        struct ggml_tensor  * tensor) {
6326
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6327
0
    GGML_ASSERT(src);
6328
0
    if (cgraph->grads[isrc]) {
6329
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6330
0
    } else {
6331
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6332
0
    }
6333
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6334
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6335
0
}
6336
6337
static void ggml_compute_backward(
6338
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6339
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6340
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6341
6342
0
    if (!grad) {
6343
0
        return;
6344
0
    }
6345
6346
0
    struct ggml_tensor * src0 = tensor->src[0];
6347
0
    struct ggml_tensor * src1 = tensor->src[1];
6348
0
    struct ggml_tensor * src2 = tensor->src[2];
6349
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6350
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6351
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6352
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6353
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6354
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6355
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6356
6357
0
    switch (tensor->op) {
6358
0
        case GGML_OP_DUP: {
6359
0
            if (src0_needs_grads) {
6360
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6361
0
            }
6362
0
        } break;
6363
0
        case GGML_OP_ADD: {
6364
0
            if (src0_needs_grads) {
6365
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6366
0
            }
6367
0
            if (src1_needs_grads) {
6368
0
                struct ggml_tensor * tmp = grad;
6369
0
                if (!ggml_are_same_shape(src0, src1)) {
6370
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6371
0
                }
6372
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6373
0
            }
6374
0
        } break;
6375
0
        case GGML_OP_ADD1: {
6376
0
            if (src0_needs_grads) {
6377
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6378
0
            }
6379
0
            if (src1_needs_grads) {
6380
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6381
0
            }
6382
0
        } break;
6383
0
        case GGML_OP_ACC: {
6384
0
            if (src0_needs_grads) {
6385
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6386
0
            }
6387
0
            if (src1_needs_grads) {
6388
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6389
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6390
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6391
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6392
6393
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6394
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6395
0
                    nb1, nb2, nb3, offset);
6396
6397
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6398
0
            }
6399
0
        } break;
6400
0
        case GGML_OP_SUB: {
6401
0
            if (src0_needs_grads) {
6402
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6403
0
            }
6404
0
            if (src1_needs_grads) {
6405
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6406
0
            }
6407
0
        } break;
6408
0
        case GGML_OP_MUL: {
6409
0
            if (src0_needs_grads) {
6410
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6411
0
            }
6412
0
            if (src1_needs_grads) {
6413
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6414
0
                if (!ggml_are_same_shape(src0, src1)) {
6415
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6416
0
                }
6417
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6418
0
            }
6419
0
        } break;
6420
0
        case GGML_OP_DIV: {
6421
0
            if (src0_needs_grads) {
6422
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6423
0
            }
6424
0
            if (src1_needs_grads) {
6425
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6426
0
            }
6427
0
        } break;
6428
0
        case GGML_OP_SQR: {
6429
0
            if (src0_needs_grads) {
6430
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6431
0
            }
6432
0
        } break;
6433
0
        case GGML_OP_SQRT: {
6434
0
            if (src0_needs_grads) {
6435
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6436
0
            }
6437
0
        } break;
6438
0
        case GGML_OP_LOG: {
6439
0
            if (src0_needs_grads) {
6440
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6441
0
            }
6442
0
        } break;
6443
0
        case GGML_OP_SIN: {
6444
0
            if (src0_needs_grads) {
6445
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6446
0
            }
6447
0
        } break;
6448
0
        case GGML_OP_COS: {
6449
0
            if (src0_needs_grads) {
6450
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6451
0
            }
6452
0
        } break;
6453
0
        case GGML_OP_SUM: {
6454
0
            if (src0_needs_grads) {
6455
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6456
0
            }
6457
0
        } break;
6458
0
        case GGML_OP_SUM_ROWS: {
6459
0
            if (src0_needs_grads) {
6460
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6461
0
            }
6462
0
        } break;
6463
0
        case GGML_OP_MEAN: {
6464
0
            if (src0_needs_grads) {
6465
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6466
0
            }
6467
0
        } break;
6468
0
        case GGML_OP_REPEAT: {
6469
0
            if (src0_needs_grads) {
6470
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6471
0
            }
6472
0
        } break;
6473
0
        case GGML_OP_REPEAT_BACK: {
6474
0
            if (src0_needs_grads) {
6475
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6476
0
            }
6477
0
        } break;
6478
0
        case GGML_OP_RMS_NORM: {
6479
0
            if (src0_needs_grads) {
6480
0
                float eps;
6481
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6482
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6483
0
            }
6484
0
        } break;
6485
0
        case GGML_OP_MUL_MAT: {
6486
            // https://cs231n.github.io/optimization-2/#staged
6487
            // # forward pass
6488
            // s0 = np.random.randn(5, 10)
6489
            // s1 = np.random.randn(10, 3)
6490
            // t = s0.dot(s1)
6491
6492
            // # now suppose we had the gradient on t from above in the circuit
6493
            // dt = np.random.randn(*t.shape) # same shape as t
6494
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6495
            // ds1 = t.T.dot(dt)
6496
6497
            // tensor.shape [m,p,qq,rr]
6498
            // src0.shape   [n,m,q1,r1]
6499
            // src1.shape   [n,p,qq,rr]
6500
6501
0
            if (src0_needs_grads) {
6502
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6503
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6504
0
                struct ggml_tensor * tmp =
6505
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6506
0
                        src1,          // [n,p,qq,rr]
6507
0
                        grad);         // [m,p,qq,rr]
6508
0
                if (!ggml_are_same_shape(tmp, src0)) {
6509
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6510
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6511
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6512
6513
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6514
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6515
0
                    const size_t nb3 = tmp->nb[2];
6516
6517
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6518
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6519
0
                }
6520
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6521
0
            }
6522
0
            if (src1_needs_grads) {
6523
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6524
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6525
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6526
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6527
                        //     grad),                          // [m,p,qq,rr]
6528
6529
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6530
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6531
                        // and then use ggml_out_prod
6532
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6533
0
                            src0,               // [n,m,q1,r1]
6534
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6535
0
                                grad)));        // [m,p,qq,rr]
6536
0
            }
6537
0
        } break;
6538
0
        case GGML_OP_SCALE: {
6539
0
            if (src0_needs_grads) {
6540
0
                float s;
6541
0
                memcpy(&s, tensor->op_params, sizeof(float));
6542
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6543
0
            }
6544
0
        } break;
6545
0
        case GGML_OP_SET: {
6546
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6547
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6548
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6549
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6550
6551
0
            struct ggml_tensor * tensor_grad_view = NULL;
6552
6553
0
            if (src0_needs_grads || src1_needs_grads) {
6554
0
                GGML_ASSERT(src0->type == tensor->type);
6555
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6556
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6557
6558
0
                tensor_grad_view = ggml_view_4d(ctx,
6559
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6560
0
                    nb1, nb2, nb3, offset);
6561
0
            }
6562
6563
0
            if (src0_needs_grads) {
6564
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6565
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6566
0
            }
6567
6568
0
            if (src1_needs_grads) {
6569
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6570
0
            }
6571
0
        } break;
6572
0
        case GGML_OP_CPY: {
6573
            // cpy overwrites value of src1 by src0 and returns view(src1)
6574
            // the overwriting is mathematically equivalent to:
6575
            // tensor = src0 * 1 + src1 * 0
6576
0
            if (src0_needs_grads) {
6577
                // dsrc0 = dtensor * 1
6578
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6579
0
            }
6580
0
            if (src1_needs_grads) {
6581
                // dsrc1 = dtensor * 0 -> noop
6582
0
            }
6583
0
        } break;
6584
0
        case GGML_OP_CONT: {
6585
            // same as cpy
6586
0
            if (src0_needs_grads) {
6587
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6588
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6589
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6590
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6591
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6592
0
            }
6593
0
        } break;
6594
0
        case GGML_OP_RESHAPE: {
6595
0
            if (src0_needs_grads) {
6596
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6597
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6598
0
            }
6599
0
        } break;
6600
0
        case GGML_OP_VIEW: {
6601
0
            if (src0_needs_grads) {
6602
0
                size_t offset;
6603
6604
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6605
6606
0
                size_t nb1 = tensor->nb[1];
6607
0
                size_t nb2 = tensor->nb[2];
6608
0
                size_t nb3 = tensor->nb[3];
6609
6610
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6611
                    // gradient is typically F32, but src0 could be other type
6612
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6613
0
                    size_t n0 = ggml_element_size(src0);
6614
0
                    GGML_ASSERT(offset % n0 == 0);
6615
0
                    GGML_ASSERT(nb1 % n0 == 0);
6616
0
                    GGML_ASSERT(nb2 % n0 == 0);
6617
0
                    GGML_ASSERT(nb3 % n0 == 0);
6618
0
                    offset = (offset / n0) * ng;
6619
0
                    nb1 = (nb1 / n0) * ng;
6620
0
                    nb2 = (nb2 / n0) * ng;
6621
0
                    nb3 = (nb3 / n0) * ng;
6622
0
                }
6623
6624
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6625
0
            }
6626
0
        } break;
6627
0
        case GGML_OP_PERMUTE: {
6628
0
            if (src0_needs_grads) {
6629
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6630
0
                const int axis0 = axes[0] & 0x3;
6631
0
                const int axis1 = axes[1] & 0x3;
6632
0
                const int axis2 = axes[2] & 0x3;
6633
0
                const int axis3 = axes[3] & 0x3;
6634
0
                int axb[4] = {0,0,0,0}; // axes backward
6635
0
                axb[axis0] = 0;
6636
0
                axb[axis1] = 1;
6637
0
                axb[axis2] = 2;
6638
0
                axb[axis3] = 3;
6639
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6640
0
            }
6641
0
        } break;
6642
0
        case GGML_OP_TRANSPOSE: {
6643
0
            if (src0_needs_grads) {
6644
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6645
0
            }
6646
0
        } break;
6647
0
        case GGML_OP_GET_ROWS: {
6648
0
            if (src0_needs_grads) {
6649
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6650
0
            }
6651
0
            if (src1_needs_grads) {
6652
                // noop
6653
0
            }
6654
0
        } break;
6655
0
        case GGML_OP_DIAG_MASK_INF: {
6656
0
            if (src0_needs_grads) {
6657
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6658
                /* ref:  https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
6659
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6660
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6661
0
            }
6662
0
        } break;
6663
0
        case GGML_OP_DIAG_MASK_ZERO: {
6664
0
            if (src0_needs_grads) {
6665
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6666
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6667
0
            }
6668
0
        } break;
6669
0
        case GGML_OP_SOFT_MAX: {
6670
0
            if (src0_needs_grads) {
6671
0
                float scale    = 1.0f;
6672
0
                float max_bias = 0.0f;
6673
6674
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6675
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6676
6677
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6678
0
            }
6679
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6680
0
        } break;
6681
0
        case GGML_OP_ROPE: {
6682
0
            if (src0_needs_grads) {
6683
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6684
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6685
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6686
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6687
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6688
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6689
0
                int sections[4] = {0, 0, 0, 0};
6690
6691
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6692
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6693
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6694
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6695
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6696
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6697
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6698
6699
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6700
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6701
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6702
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6703
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6704
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6705
0
            }
6706
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6707
0
        } break;
6708
0
        case GGML_OP_IM2COL: {
6709
0
            if (src1_needs_grads) {
6710
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6711
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6712
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6713
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6714
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6715
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6716
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6717
6718
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6719
0
            }
6720
0
        } break;
6721
0
        case GGML_OP_POOL_2D: {
6722
0
            if (src0_needs_grads) {
6723
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6724
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6725
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6726
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6727
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6728
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6729
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6730
6731
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6732
0
            }
6733
0
        } break;
6734
0
        case GGML_OP_WIN_PART:
6735
0
        case GGML_OP_WIN_UNPART:
6736
0
        case GGML_OP_UNARY: {
6737
0
            switch (ggml_get_unary_op(tensor)) {
6738
0
                case GGML_UNARY_OP_ABS: {
6739
0
                    if (src0_needs_grads) {
6740
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6741
0
                    }
6742
0
                } break;
6743
0
                case GGML_UNARY_OP_SGN: {
6744
                    // noop
6745
0
                } break;
6746
0
                case GGML_UNARY_OP_NEG: {
6747
0
                    if (src0_needs_grads) {
6748
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6749
0
                    }
6750
0
                } break;
6751
0
                case GGML_UNARY_OP_STEP: {
6752
                    // noop
6753
0
                } break;
6754
0
                case GGML_UNARY_OP_RELU: {
6755
0
                    if (src0_needs_grads) {
6756
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6757
0
                    }
6758
0
                } break;
6759
0
                case GGML_UNARY_OP_SILU: {
6760
0
                    if (src0_needs_grads) {
6761
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6762
0
                    }
6763
0
                } break;
6764
0
                case GGML_UNARY_OP_EXP: {
6765
0
                    if (src0_needs_grads) {
6766
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6767
0
                    }
6768
0
                } break;
6769
0
                case GGML_UNARY_OP_EXPM1: {
6770
0
                    if (src0_needs_grads) {
6771
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6772
0
                    }
6773
0
                } break;
6774
0
                case GGML_UNARY_OP_SOFTPLUS: {
6775
0
                    if (src0_needs_grads) {
6776
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6777
0
                    }
6778
0
                } break;
6779
0
                default: {
6780
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6781
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6782
0
                    GGML_ABORT("fatal error");
6783
0
                } //break;
6784
0
            }
6785
0
        } break;
6786
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6787
0
            if (src0_needs_grads) {
6788
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6789
0
            }
6790
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6791
0
        } break;
6792
0
        case GGML_OP_GLU: {
6793
0
            switch (ggml_get_glu_op(tensor)) {
6794
0
                case GGML_GLU_OP_SWIGLU: {
6795
0
                    if (src0_needs_grads) {
6796
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6797
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6798
0
                    }
6799
0
                    if (src1_needs_grads) {
6800
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6801
0
                    }
6802
0
                } break;
6803
0
                default: {
6804
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6805
0
                } //break;
6806
0
            }
6807
0
        } break;
6808
0
        case GGML_OP_NONE: {
6809
            // noop
6810
0
        } break;
6811
0
        case GGML_OP_COUNT:
6812
0
        default: {
6813
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6814
0
        } //break;
6815
0
    }
6816
6817
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6818
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6819
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6820
0
}
6821
6822
0
static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
6823
0
    if (node->op != GGML_OP_NONE && compute) {
6824
0
        node->flags |= GGML_TENSOR_FLAG_COMPUTE;
6825
0
    }
6826
6827
0
    const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6828
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6829
6830
0
    if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6831
        // already visited
6832
6833
0
        if (compute) {
6834
            // update the compute flag regardless
6835
0
            for (int i = 0; i < GGML_MAX_SRC; ++i) {
6836
0
                struct ggml_tensor * src = node->src[i];
6837
0
                if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
6838
0
                    ggml_visit_parents_graph(cgraph, src, true);
6839
0
                }
6840
0
            }
6841
0
        }
6842
6843
0
        return node_hash_pos;
6844
0
    }
6845
6846
    // This is the first time we see this node in the current graph.
6847
0
    cgraph->visited_hash_set.keys[node_hash_pos] = node;
6848
0
    ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6849
0
    cgraph->use_counts[node_hash_pos] = 0;
6850
6851
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6852
0
        const int k =
6853
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6854
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6855
0
            /* unknown order, just fall back to using i */ i;
6856
6857
0
        struct ggml_tensor * src = node->src[k];
6858
0
        if (src) {
6859
0
            const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
6860
6861
            // Update the use count for this operand.
6862
0
            cgraph->use_counts[src_hash_pos]++;
6863
0
        }
6864
0
    }
6865
6866
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6867
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6868
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6869
6870
0
        if (strlen(node->name) == 0) {
6871
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6872
0
        }
6873
6874
0
        cgraph->leafs[cgraph->n_leafs] = node;
6875
0
        cgraph->n_leafs++;
6876
0
    } else {
6877
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6878
6879
0
        if (strlen(node->name) == 0) {
6880
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6881
0
        }
6882
6883
0
        cgraph->nodes[cgraph->n_nodes] = node;
6884
0
        cgraph->n_nodes++;
6885
0
    }
6886
6887
0
    return node_hash_pos;
6888
0
}
6889
6890
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
6891
0
    if (!expand) {
6892
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6893
0
        ggml_graph_clear(cgraph);
6894
0
    }
6895
6896
0
    const int n_old = cgraph->n_nodes;
6897
6898
0
    ggml_visit_parents_graph(cgraph, tensor, compute);
6899
6900
0
    const int n_new = cgraph->n_nodes - n_old;
6901
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6902
6903
0
    if (n_new > 0) {
6904
        // the last added node should always be starting point
6905
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6906
0
    }
6907
0
}
6908
6909
struct ggml_tensor * ggml_build_forward_select(
6910
        struct ggml_cgraph  * cgraph,
6911
        struct ggml_tensor ** tensors,
6912
        int                   n_tensors,
6913
0
        int                   idx) {
6914
0
    GGML_ASSERT(idx >= 0 && idx < n_tensors);
6915
6916
0
    for (int i = 0; i < n_tensors; i++) {
6917
0
        ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
6918
0
    }
6919
6920
0
    return tensors[idx];
6921
0
}
6922
6923
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6924
0
    ggml_build_forward_impl(cgraph, tensor, true, true);
6925
0
}
6926
6927
void ggml_build_backward_expand(
6928
        struct ggml_context *  ctx,
6929
        struct ggml_cgraph  *  cgraph,
6930
0
        struct ggml_tensor  ** grad_accs) {
6931
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6932
0
    GGML_ASSERT(cgraph->grads);
6933
0
    GGML_ASSERT(cgraph->grad_accs);
6934
6935
0
    const int n_nodes_f = cgraph->n_nodes;
6936
6937
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6938
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6939
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6940
6941
0
    {
6942
0
        bool any_params = false;
6943
0
        bool any_loss   = false;
6944
0
        for (int i = 0; i < n_nodes_f; ++i) {
6945
0
            struct ggml_tensor * node = cgraph->nodes[i];
6946
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6947
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6948
0
        }
6949
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6950
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6951
0
    }
6952
6953
0
    for (int i = 0; i < n_nodes_f; ++i) {
6954
0
        struct ggml_tensor * node = cgraph->nodes[i];
6955
6956
0
        if (node->type == GGML_TYPE_I32) {
6957
0
            continue;
6958
0
        }
6959
6960
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6961
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6962
0
        switch (node->op) {
6963
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6964
0
            case GGML_OP_IM2COL:      // only used for its shape
6965
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6966
0
                ignore_src[0] = true;
6967
0
                break;
6968
0
            case GGML_OP_UNARY: {
6969
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6970
                // SGN and STEP unary ops are piecewise constant
6971
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6972
0
                    ignore_src[0] = true;
6973
0
                }
6974
0
            } break;
6975
6976
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6977
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6978
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6979
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6980
0
            case GGML_OP_ROPE:          // positions not differentiable
6981
0
                ignore_src[1] = true;
6982
0
                break;
6983
6984
0
            default:
6985
0
                break;
6986
0
        }
6987
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6988
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6989
0
                continue;
6990
0
            }
6991
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6992
0
            node_needs_grad = true;
6993
0
            break;
6994
0
        }
6995
0
        if (!node_needs_grad) {
6996
0
            continue;
6997
0
        }
6998
6999
        // inplace operations are currently not supported
7000
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
7001
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
7002
7003
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
7004
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
7005
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
7006
0
        if (grad_accs && grad_accs[i]) {
7007
0
            cgraph->grad_accs[ihash] = grad_accs[i];
7008
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
7009
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7010
            // loss tensors always need a gradient accumulator
7011
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
7012
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
7013
0
        }
7014
0
        grads_needed[ihash] = true;
7015
0
    }
7016
7017
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
7018
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
7019
        // use allocator to automatically make inplace operations
7020
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
7021
0
    }
7022
7023
0
    free(grads_needed);
7024
0
}
7025
7026
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
7027
0
    void * ptr = *p;
7028
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
7029
0
    *p = (void *) ((char *) ptr + size);
7030
0
    return ptr;
7031
0
}
7032
7033
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
7034
0
    size_t hash_size = ggml_hash_size(size * 2);
7035
0
    void * p = 0;
7036
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
7037
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
7038
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
7039
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
7040
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
7041
0
    if (grads) {
7042
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
7043
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
7044
0
    }
7045
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7046
7047
0
    size_t nbytes = (size_t) p;
7048
0
    return nbytes;
7049
0
}
7050
7051
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
7052
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
7053
0
}
7054
7055
0
size_t ggml_graph_overhead(void) {
7056
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
7057
0
}
7058
7059
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
7060
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
7061
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
7062
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
7063
7064
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
7065
0
    size_t hash_size = ggml_hash_size(size * 2);
7066
7067
0
    void * p = cgraph + 1;
7068
7069
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7070
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7071
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
7072
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7073
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7074
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7075
7076
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7077
7078
    // check that we allocated the correct amount of memory
7079
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
7080
7081
0
    *cgraph = (struct ggml_cgraph) {
7082
0
        /*.size         =*/ size,
7083
0
        /*.n_nodes      =*/ 0,
7084
0
        /*.n_leafs      =*/ 0,
7085
0
        /*.nodes        =*/ nodes_ptr,
7086
0
        /*.grads        =*/ grads_ptr,
7087
0
        /*.grad_accs    =*/ grad_accs_ptr,
7088
0
        /*.leafs        =*/ leafs_ptr,
7089
0
        /*.use_counts   =*/ use_counts_ptr,
7090
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
7091
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
7092
0
    };
7093
7094
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7095
0
    if (grads) {
7096
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
7097
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
7098
0
    }
7099
7100
0
    return cgraph;
7101
0
}
7102
7103
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
7104
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
7105
0
}
7106
7107
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
7108
0
    struct ggml_cgraph cgraph = {
7109
0
        /*.size             =*/ 0,
7110
0
        /*.n_nodes          =*/ i1 - i0,
7111
0
        /*.n_leafs          =*/ 0,
7112
0
        /*.nodes            =*/ cgraph0->nodes + i0,
7113
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
7114
0
        /*.grad_accs        =*/ NULL,
7115
0
        /*.leafs            =*/ NULL,
7116
0
        /*.use_counts       =*/ cgraph0->use_counts,
7117
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
7118
0
        /*.order            =*/ cgraph0->order,
7119
0
    };
7120
7121
0
    return cgraph;
7122
0
}
7123
7124
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
7125
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7126
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7127
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7128
7129
0
    dst->n_leafs = src->n_leafs;
7130
0
    dst->n_nodes = src->n_nodes;
7131
0
    dst->order   = src->order;
7132
7133
0
    for (int i = 0; i < src->n_leafs; ++i) {
7134
0
        dst->leafs[i] = src->leafs[i];
7135
0
    }
7136
7137
0
    for (int i = 0; i < src->n_nodes; ++i) {
7138
0
        dst->nodes[i] = src->nodes[i];
7139
0
    }
7140
7141
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7142
        // copy all hashset keys (tensors) that are in use
7143
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7144
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7145
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7146
0
        }
7147
0
    }
7148
7149
0
    if (dst->grads) {
7150
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7151
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7152
0
    }
7153
0
    if (src->grads) {
7154
0
        GGML_ASSERT(dst->grads     != NULL);
7155
0
        GGML_ASSERT(dst->grad_accs != NULL);
7156
0
        for (int i = 0; i < src->n_nodes; ++i) {
7157
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7158
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7159
7160
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7161
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7162
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7163
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7164
7165
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7166
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7167
0
        }
7168
0
    }
7169
0
}
7170
7171
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7172
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7173
0
    ggml_graph_cpy(cgraph, result);
7174
0
    return result;
7175
0
}
7176
7177
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7178
0
    if (ggml_is_empty(tensor)) {
7179
0
        return tensor;
7180
0
    }
7181
0
    if (tensor->buffer) {
7182
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7183
0
    } else {
7184
0
        GGML_ASSERT(tensor->data);
7185
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7186
0
    }
7187
0
    return tensor;
7188
0
}
7189
7190
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7191
0
    if (!cgraph) {
7192
0
        return;
7193
0
    }
7194
0
    GGML_ASSERT(cgraph->grads != NULL);
7195
7196
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7197
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7198
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7199
7200
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7201
            // clear momenta
7202
0
            ggml_set_zero(node->src[2]);
7203
0
            ggml_set_zero(node->src[3]);
7204
0
        }
7205
7206
        // initial gradients of loss should be 1, 0 otherwise
7207
0
        if (grad_acc) {
7208
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7209
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7210
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7211
7212
0
                const float onef = 1.0f;
7213
0
                if (grad_acc->buffer) {
7214
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7215
0
                } else {
7216
0
                    GGML_ASSERT(grad_acc->data);
7217
0
                    *((float *) grad_acc->data) = onef;
7218
0
                }
7219
0
            } else {
7220
0
                ggml_set_zero(grad_acc);
7221
0
            }
7222
0
        }
7223
0
    }
7224
0
}
7225
7226
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7227
0
    cgraph->n_leafs = 0;
7228
0
    cgraph->n_nodes = 0;
7229
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7230
0
}
7231
7232
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7233
0
    return cgraph->size;
7234
0
}
7235
7236
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7237
0
    if (i < 0) {
7238
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7239
0
        return cgraph->nodes[cgraph->n_nodes + i];
7240
0
    }
7241
7242
0
    GGML_ASSERT(i < cgraph->n_nodes);
7243
0
    return cgraph->nodes[i];
7244
0
}
7245
7246
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7247
0
    return cgraph->nodes;
7248
0
}
7249
7250
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7251
0
    return cgraph->n_nodes;
7252
0
}
7253
7254
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7255
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7256
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7257
0
    cgraph->n_nodes++;
7258
0
}
7259
7260
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7261
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7262
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7263
7264
0
        if (strcmp(leaf->name, name) == 0) {
7265
0
            return leaf;
7266
0
        }
7267
0
    }
7268
7269
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7270
0
        struct ggml_tensor * node = cgraph->nodes[i];
7271
7272
0
        if (strcmp(node->name, name) == 0) {
7273
0
            return node;
7274
0
        }
7275
0
    }
7276
7277
0
    return NULL;
7278
0
}
7279
7280
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7281
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7282
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7283
0
}
7284
7285
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7286
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7287
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7288
0
}
7289
7290
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7291
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7292
7293
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7294
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7295
0
        struct ggml_tensor * node = cgraph->nodes[i];
7296
7297
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7298
0
                i,
7299
0
                node->ne[0], node->ne[1], node->ne[2],
7300
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7301
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7302
0
    }
7303
7304
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7305
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7306
0
        struct ggml_tensor * node = cgraph->leafs[i];
7307
7308
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7309
0
                i,
7310
0
                node->ne[0], node->ne[1],
7311
0
                ggml_op_name(node->op),
7312
0
                ggml_get_name(node));
7313
0
    }
7314
7315
0
    GGML_LOG_INFO("========================================\n");
7316
0
}
7317
7318
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7319
                                      const int *                idxs,
7320
                                      int                        count,
7321
0
                                      const struct ggml_tensor * tensor) {
7322
0
    GGML_ASSERT(cgraph && idxs);
7323
0
    for (int i = 0; i < count; ++i) {
7324
0
        const int node_idx = idxs[i];
7325
7326
0
        if (node_idx >= cgraph->n_nodes) {
7327
0
            return -1;
7328
0
        }
7329
0
        if (cgraph->nodes[node_idx] == tensor) {
7330
0
            return i;
7331
0
        }
7332
0
    }
7333
0
    return -1;
7334
0
}
7335
7336
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7337
                                const int *                node_idxs,
7338
                                int                        count,
7339
                                const enum ggml_op *       ops,
7340
                                const int *                outputs,
7341
0
                                int                        num_outputs) {
7342
0
    GGML_ASSERT(outputs && num_outputs > 0);
7343
7344
0
    for (int i = 0; i < count; ++i) {
7345
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7346
0
            return false;
7347
0
        }
7348
7349
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7350
7351
0
        if (node->op != ops[i]) {
7352
0
            return false;
7353
0
        }
7354
7355
0
        if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
7356
0
            return false;
7357
0
        }
7358
7359
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7360
0
            continue;
7361
0
        }
7362
7363
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7364
0
            return false;
7365
0
        }
7366
7367
0
        int subgraph_uses = 0;
7368
0
        for (int j = i + 1; j < count; ++j) {
7369
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7370
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7371
0
                if (other_node->src[src_idx] == node) {
7372
0
                    subgraph_uses++;
7373
0
                }
7374
0
            }
7375
0
        }
7376
7377
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7378
0
            return false;
7379
0
        }
7380
7381
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7382
0
        struct ggml_tensor * view_src = node->view_src;
7383
0
        while (view_src) {
7384
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7385
0
                return false;
7386
0
            }
7387
0
            view_src = view_src->view_src;
7388
0
        }
7389
0
    }
7390
7391
0
    return true;
7392
0
}
7393
7394
// check if node is part of the graph
7395
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7396
0
    if (cgraph == NULL) {
7397
0
        return true;
7398
0
    }
7399
7400
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7401
0
        if (cgraph->nodes[i] == node) {
7402
0
            return true;
7403
0
        }
7404
0
    }
7405
7406
0
    return false;
7407
0
}
7408
7409
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7410
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7411
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7412
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7413
7414
0
        if (grad == node) {
7415
0
            return parent;
7416
0
        }
7417
0
    }
7418
7419
0
    return NULL;
7420
0
}
7421
7422
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7423
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7424
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7425
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7426
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7427
0
            gparent ? (void *) gparent : (void *) node,
7428
0
            gparent ? "empty" : "vee",
7429
0
            gparent ? "dashed" : "solid",
7430
0
            label);
7431
0
}
7432
7433
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7434
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7435
0
            (void *) parent,
7436
0
            (void *) node,
7437
0
            label);
7438
0
}
7439
7440
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
7441
0
    char color[16];
7442
7443
0
    FILE * fp = ggml_fopen(filename, "w");
7444
0
    GGML_ASSERT(fp);
7445
7446
0
    fprintf(fp, "digraph G {\n");
7447
0
    fprintf(fp, "  newrank = true;\n");
7448
0
    fprintf(fp, "  rankdir = TB;\n");
7449
7450
0
    for (int i = 0; i < gb->n_nodes; i++) {
7451
0
        struct ggml_tensor * node = gb->nodes[i];
7452
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7453
7454
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7455
0
            continue;
7456
0
        }
7457
7458
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7459
0
            snprintf(color, sizeof(color), "yellow");
7460
0
        } else if (grad) {
7461
0
            if (ggml_graph_find(cgraph, node)) {
7462
0
                snprintf(color, sizeof(color), "green");
7463
0
            } else {
7464
0
                snprintf(color, sizeof(color), "lightblue");
7465
0
            }
7466
0
        } else {
7467
0
            snprintf(color, sizeof(color), "white");
7468
0
        }
7469
7470
0
        fprintf(fp, "  \"%p\" [ "
7471
0
                    "style = filled; fillcolor = %s; shape = record; "
7472
0
                    "label=\"",
7473
0
                (void *) node, color);
7474
7475
0
        if (strlen(node->name) > 0) {
7476
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7477
0
        } else {
7478
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7479
0
        }
7480
7481
0
        if (ggml_is_matrix(node)) {
7482
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7483
0
        } else {
7484
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7485
0
        }
7486
7487
0
        if (grad) {
7488
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7489
0
        } else {
7490
0
            fprintf(fp, "\"; ]\n");
7491
0
        }
7492
0
    }
7493
7494
0
    for (int i = 0; i < gb->n_leafs; i++) {
7495
0
        struct ggml_tensor * node = gb->leafs[i];
7496
7497
0
        snprintf(color, sizeof(color), "pink");
7498
7499
0
        fprintf(fp, "  \"%p\" [ "
7500
0
                    "style = filled; fillcolor = %s; shape = record; "
7501
0
                    "label=\"<x>",
7502
0
                (void *) node, color);
7503
7504
0
        if (strlen(node->name) > 0) {
7505
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7506
0
        } else {
7507
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7508
0
        }
7509
7510
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7511
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7512
0
            fprintf(fp, " | (");
7513
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7514
                // FIXME: use ggml-backend to obtain the tensor data
7515
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7516
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7517
                //}
7518
                //else if (node->type == GGML_TYPE_F32 ||
7519
                //         node->type == GGML_TYPE_F16 ||
7520
                //         node->type == GGML_TYPE_BF16) {
7521
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7522
                //}
7523
                //else
7524
0
                {
7525
0
                    fprintf(fp, "#");
7526
0
                }
7527
0
                if (j < ggml_nelements(node) - 1) {
7528
0
                    fprintf(fp, ", ");
7529
0
                }
7530
0
            }
7531
0
            fprintf(fp, ")");
7532
0
        }
7533
0
        fprintf(fp, "\"; ]\n");
7534
0
    }
7535
7536
0
    for (int i = 0; i < gb->n_nodes; i++) {
7537
0
        struct ggml_tensor * node = gb->nodes[i];
7538
7539
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7540
0
            if (node->src[j]) {
7541
0
                char label[16];
7542
0
                snprintf(label, sizeof(label), "src %d", j);
7543
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7544
0
            }
7545
0
        }
7546
0
    }
7547
7548
0
    for (int i = 0; i < gb->n_leafs; i++) {
7549
0
        struct ggml_tensor * node = gb->leafs[i];
7550
7551
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7552
0
            if (node->src[j]) {
7553
0
                char label[16];
7554
0
                snprintf(label, sizeof(label), "src %d", j);
7555
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7556
0
            }
7557
0
        }
7558
0
    }
7559
7560
0
    fprintf(fp, "}\n");
7561
7562
0
    fclose(fp);
7563
7564
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7565
0
}
7566
7567
////////////////////////////////////////////////////////////////////////////////
7568
7569
0
void ggml_set_input(struct ggml_tensor * tensor) {
7570
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7571
0
}
7572
7573
0
void ggml_set_output(struct ggml_tensor * tensor) {
7574
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7575
0
}
7576
7577
0
void ggml_set_param(struct ggml_tensor * tensor) {
7578
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7579
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7580
0
}
7581
7582
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7583
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7584
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7585
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7586
0
}
7587
7588
////////////////////////////////////////////////////////////////////////////////
7589
7590
0
void ggml_quantize_init(enum ggml_type type) {
7591
0
    ggml_critical_section_start();
7592
7593
0
    switch (type) {
7594
0
        case GGML_TYPE_IQ2_XXS:
7595
0
        case GGML_TYPE_IQ2_XS:
7596
0
        case GGML_TYPE_IQ2_S:
7597
0
        case GGML_TYPE_IQ1_S:
7598
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7599
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7600
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7601
0
        default: // nothing
7602
0
            break;
7603
0
    }
7604
7605
0
    ggml_critical_section_end();
7606
0
}
7607
7608
4.91k
void ggml_quantize_free(void) {
7609
4.91k
    ggml_critical_section_start();
7610
7611
4.91k
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7612
4.91k
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7613
4.91k
    iq2xs_free_impl(GGML_TYPE_IQ2_S);
7614
4.91k
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7615
4.91k
    iq2xs_free_impl(GGML_TYPE_IQ1_M);
7616
4.91k
    iq3xs_free_impl(256);
7617
4.91k
    iq3xs_free_impl(512);
7618
7619
4.91k
    ggml_critical_section_end();
7620
4.91k
}
7621
7622
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7623
0
    return
7624
0
        type == GGML_TYPE_IQ2_XXS ||
7625
0
        type == GGML_TYPE_IQ2_XS  ||
7626
0
        type == GGML_TYPE_IQ1_S;//   ||
7627
        //type == GGML_TYPE_IQ1_M;
7628
0
}
7629
7630
size_t ggml_quantize_chunk(
7631
        enum ggml_type   type,
7632
           const float * src,
7633
                  void * dst,
7634
               int64_t   start,
7635
               int64_t   nrows,
7636
               int64_t   n_per_row,
7637
0
           const float * imatrix) {
7638
0
    const int64_t n = (int64_t) nrows * n_per_row;
7639
7640
0
    if (ggml_quantize_requires_imatrix(type)) {
7641
0
        GGML_ASSERT(imatrix != NULL);
7642
0
    }
7643
7644
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7645
0
    GGML_ASSERT(start % n_per_row == 0);
7646
7647
0
    ggml_quantize_init(type); // this is noop if already initialized
7648
7649
0
    const size_t start_row = start / n_per_row;
7650
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7651
7652
0
    size_t result = 0;
7653
7654
0
    switch (type) {
7655
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7656
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7657
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7658
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7659
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7660
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7661
0
        case GGML_TYPE_NVFP4:   result = quantize_nvfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7662
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7663
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7664
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7665
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7666
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7667
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7668
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7669
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7670
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7671
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7672
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7673
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7674
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7675
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7676
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7677
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7678
0
        case GGML_TYPE_F16:
7679
0
            {
7680
0
                size_t elemsize = sizeof(ggml_fp16_t);
7681
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7682
0
                result = n * elemsize;
7683
0
            } break;
7684
0
        case GGML_TYPE_BF16:
7685
0
            {
7686
0
                size_t elemsize = sizeof(ggml_bf16_t);
7687
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7688
0
                result = n * elemsize;
7689
0
            } break;
7690
0
        case GGML_TYPE_F32:
7691
0
            {
7692
0
                size_t elemsize = sizeof(float);
7693
0
                result = n * elemsize;
7694
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7695
0
            } break;
7696
0
        default:
7697
0
            assert(false);
7698
0
    }
7699
7700
0
    GGML_ASSERT(result == nrows * row_size);
7701
7702
0
    return result;
7703
0
}
7704
7705
////////////////////////////////////////////////////////////////////////////////
7706
7707
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7708
0
    *log_callback = g_logger_state.log_callback;
7709
0
    *user_data    = g_logger_state.log_callback_user_data;
7710
0
}
7711
7712
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7713
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7714
0
    g_logger_state.log_callback_user_data = user_data;
7715
0
}
7716
7717
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7718
0
    p->n_threads  = n_threads;
7719
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7720
0
    p->poll       = 50;    // hybrid-polling enabled
7721
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7722
0
    p->paused     = false; // threads are ready to go
7723
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7724
0
}
7725
7726
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7727
0
    struct ggml_threadpool_params p;
7728
0
    ggml_threadpool_params_init(&p, n_threads);
7729
0
    return p;
7730
0
}
7731
7732
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7733
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7734
0
    if (p0->prio           != p1->prio       )    return false;
7735
0
    if (p0->poll           != p1->poll       )    return false;
7736
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7737
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7738
0
}