Coverage Report

Created: 2026-01-17 06:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
0
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
0
    fflush(stdout);
239
240
0
    char message[2048];
241
0
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
0
    va_list args;
244
0
    va_start(args, fmt);
245
0
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
0
    va_end(args);
247
248
0
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
0
    } else {
251
        // default: print error and backtrace to stderr
252
0
        fprintf(stderr, "%s\n", message);
253
        
254
0
    }
255
256
0
    abort();
257
0
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
0
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
0
    if (format == NULL) {
273
0
        return;
274
0
    }
275
0
    va_list args_copy;
276
0
    va_copy(args_copy, args);
277
0
    char buffer[128];
278
0
    int len = vsnprintf(buffer, 128, format, args);
279
0
    if (len < 128) {
280
0
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
0
    } else {
282
0
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
0
        vsnprintf(buffer2, len + 1, format, args_copy);
284
0
        buffer2[len] = 0;
285
0
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
0
        free(buffer2);
287
0
    }
288
0
    va_end(args_copy);
289
0
}
290
291
0
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
0
    va_list args;
293
0
    va_start(args, format);
294
0
    ggml_log_internal_v(level, format, args);
295
0
    va_end(args);
296
0
}
297
298
0
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
0
    (void) level;
300
0
    (void) user_data;
301
0
    fputs(text, stderr);
302
0
    fflush(stderr);
303
0
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
0
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
0
    const int alignment = 64;
321
0
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
0
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
0
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
0
    int result = posix_memalign(&aligned_memory, alignment, size);
353
0
  #endif
354
0
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
0
    return aligned_memory;
369
0
#endif
370
0
}
371
372
0
void ggml_aligned_free(void * ptr, size_t size) {
373
0
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
0
    free(ptr);
386
0
#endif
387
0
}
388
389
390
0
inline static void * ggml_malloc(size_t size) {
391
0
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
0
    void * result = malloc(size);
396
0
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
0
    return result;
401
0
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
0
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
0
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
0
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
0
int64_t ggml_time_us(void) {
545
0
    struct timespec ts;
546
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
0
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
0
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
0
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
0
    return fopen(fname, mode);
607
0
#endif
608
609
0
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q4_0] = {
657
        .type_name                = "q4_0",
658
        .blck_size                = QK4_0,
659
        .type_size                = sizeof(block_q4_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
663
    },
664
    [GGML_TYPE_Q4_1] = {
665
        .type_name                = "q4_1",
666
        .blck_size                = QK4_1,
667
        .type_size                = sizeof(block_q4_1),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
671
    },
672
    [4] = { // GGML_TYPE_Q4_2
673
        .type_name                = "DEPRECATED",
674
        .blck_size                = 0,
675
        .type_size                = 0,
676
        .is_quantized             = false,
677
    },
678
    [5] = { // GGML_TYPE_Q4_3
679
        .type_name                = "DEPRECATED",
680
        .blck_size                = 0,
681
        .type_size                = 0,
682
        .is_quantized             = false,
683
    },
684
    [GGML_TYPE_Q5_0] = {
685
        .type_name                = "q5_0",
686
        .blck_size                = QK5_0,
687
        .type_size                = sizeof(block_q5_0),
688
        .is_quantized             = true,
689
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
691
    },
692
    [GGML_TYPE_Q5_1] = {
693
        .type_name                = "q5_1",
694
        .blck_size                = QK5_1,
695
        .type_size                = sizeof(block_q5_1),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
699
    },
700
    [GGML_TYPE_Q8_0] = {
701
        .type_name                = "q8_0",
702
        .blck_size                = QK8_0,
703
        .type_size                = sizeof(block_q8_0),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
707
    },
708
    [GGML_TYPE_Q8_1] = {
709
        .type_name                = "q8_1",
710
        .blck_size                = QK8_1,
711
        .type_size                = sizeof(block_q8_1),
712
        .is_quantized             = true,
713
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
714
    },
715
    [GGML_TYPE_MXFP4] = {
716
        .type_name                = "mxfp4",
717
        .blck_size                = QK_MXFP4,
718
        .type_size                = sizeof(block_mxfp4),
719
        .is_quantized             = true,
720
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
721
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
722
    },
723
    [GGML_TYPE_Q2_K] = {
724
        .type_name                = "q2_K",
725
        .blck_size                = QK_K,
726
        .type_size                = sizeof(block_q2_K),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
729
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
730
    },
731
    [GGML_TYPE_Q3_K] = {
732
        .type_name                = "q3_K",
733
        .blck_size                = QK_K,
734
        .type_size                = sizeof(block_q3_K),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
737
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
738
    },
739
    [GGML_TYPE_Q4_K] = {
740
        .type_name                = "q4_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q4_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
746
    },
747
    [GGML_TYPE_Q5_K] = {
748
        .type_name                = "q5_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q5_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
754
    },
755
    [GGML_TYPE_Q6_K] = {
756
        .type_name                = "q6_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q6_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
762
    },
763
    [GGML_TYPE_IQ2_XXS] = {
764
        .type_name                = "iq2_xxs",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_iq2_xxs),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
769
        .from_float_ref           = NULL,
770
    },
771
    [GGML_TYPE_IQ2_XS] = {
772
        .type_name                = "iq2_xs",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_iq2_xs),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
777
        .from_float_ref           = NULL,
778
    },
779
    [GGML_TYPE_IQ3_XXS] = {
780
        .type_name                = "iq3_xxs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq3_xxs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
785
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
786
    },
787
    [GGML_TYPE_IQ3_S] = {
788
        .type_name                = "iq3_s",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq3_s),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
793
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
794
    },
795
    [GGML_TYPE_IQ2_S] = {
796
        .type_name                = "iq2_s",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq2_s),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
802
    },
803
    [GGML_TYPE_IQ1_S] = {
804
        .type_name                = "iq1_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq1_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
809
        .from_float_ref           = NULL,
810
    },
811
    [GGML_TYPE_IQ1_M] = {
812
        .type_name                = "iq1_m",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq1_m),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
817
        .from_float_ref           = NULL,
818
    },
819
    [GGML_TYPE_IQ4_NL] = {
820
        .type_name                = "iq4_nl",
821
        .blck_size                = QK4_NL,
822
        .type_size                = sizeof(block_iq4_nl),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
825
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
826
    },
827
    [GGML_TYPE_IQ4_XS] = {
828
        .type_name                = "iq4_xs",
829
        .blck_size                = QK_K,
830
        .type_size                = sizeof(block_iq4_xs),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
833
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
834
    },
835
    [GGML_TYPE_Q8_K] = {
836
        .type_name                = "q8_K",
837
        .blck_size                = QK_K,
838
        .type_size                = sizeof(block_q8_K),
839
        .is_quantized             = true,
840
    },
841
    [GGML_TYPE_BF16] = {
842
        .type_name                = "bf16",
843
        .blck_size                = 1,
844
        .type_size                = sizeof(ggml_bf16_t),
845
        .is_quantized             = false,
846
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
847
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
848
    },
849
    [31] = { // GGML_TYPE_Q4_0_4_4
850
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
851
        .blck_size                = 0,
852
        .type_size                = 0,
853
        .is_quantized             = false,
854
    },
855
    [32] = { // GGML_TYPE_Q4_0_4_8
856
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
857
        .blck_size                = 0,
858
        .type_size                = 0,
859
        .is_quantized             = false,
860
    },
861
    [33] = { // GGML_TYPE_Q4_0_8_8
862
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
863
        .blck_size                = 0,
864
        .type_size                = 0,
865
        .is_quantized             = false,
866
    },
867
    [GGML_TYPE_TQ1_0] = {
868
        .type_name                = "tq1_0",
869
        .blck_size                = QK_K,
870
        .type_size                = sizeof(block_tq1_0),
871
        .is_quantized             = true,
872
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
873
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
874
    },
875
    [GGML_TYPE_TQ2_0] = {
876
        .type_name                = "tq2_0",
877
        .blck_size                = QK_K,
878
        .type_size                = sizeof(block_tq2_0),
879
        .is_quantized             = true,
880
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
881
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
882
    },
883
    [36] = { // GGML_TYPE_IQ4_NL_4_4
884
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
885
        .blck_size                = 0,
886
        .type_size                = 0,
887
        .is_quantized             = false,
888
    },
889
    [37] = { // GGML_TYPE_IQ4_NL_4_8
890
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
891
        .blck_size                = 0,
892
        .type_size                = 0,
893
        .is_quantized             = false,
894
    },
895
    [38] = { // GGML_TYPE_IQ4_NL_8_8
896
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
897
        .blck_size                = 0,
898
        .type_size                = 0,
899
        .is_quantized             = false,
900
    },
901
};
902
903
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
904
0
    GGML_ASSERT(type < GGML_TYPE_COUNT);
905
0
    return &type_traits[type];
906
0
}
907
908
//
909
// ggml object
910
//
911
912
struct ggml_object {
913
    size_t offs;
914
    size_t size;
915
916
    struct ggml_object * next;
917
918
    enum ggml_object_type type;
919
920
    char padding[4];
921
};
922
923
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
924
925
//
926
// ggml context
927
//
928
929
struct ggml_context {
930
    size_t mem_size;
931
    void * mem_buffer;
932
    bool   mem_buffer_owned;
933
    bool   no_alloc;
934
935
    int    n_objects;
936
937
    struct ggml_object * objects_begin;
938
    struct ggml_object * objects_end;
939
};
940
941
//
942
// data types
943
//
944
945
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
946
    "NONE",
947
948
    "DUP",
949
    "ADD",
950
    "ADD_ID",
951
    "ADD1",
952
    "ACC",
953
    "SUB",
954
    "MUL",
955
    "DIV",
956
    "SQR",
957
    "SQRT",
958
    "LOG",
959
    "SIN",
960
    "COS",
961
    "SUM",
962
    "SUM_ROWS",
963
    "CUMSUM",
964
    "MEAN",
965
    "ARGMAX",
966
    "COUNT_EQUAL",
967
    "REPEAT",
968
    "REPEAT_BACK",
969
    "CONCAT",
970
    "SILU_BACK",
971
    "NORM",
972
    "RMS_NORM",
973
    "RMS_NORM_BACK",
974
    "GROUP_NORM",
975
    "L2_NORM",
976
977
    "MUL_MAT",
978
    "MUL_MAT_ID",
979
    "OUT_PROD",
980
981
    "SCALE",
982
    "SET",
983
    "CPY",
984
    "CONT",
985
    "RESHAPE",
986
    "VIEW",
987
    "PERMUTE",
988
    "TRANSPOSE",
989
    "GET_ROWS",
990
    "GET_ROWS_BACK",
991
    "SET_ROWS",
992
    "DIAG",
993
    "DIAG_MASK_INF",
994
    "DIAG_MASK_ZERO",
995
    "SOFT_MAX",
996
    "SOFT_MAX_BACK",
997
    "ROPE",
998
    "ROPE_BACK",
999
    "CLAMP",
1000
    "CONV_TRANSPOSE_1D",
1001
    "IM2COL",
1002
    "IM2COL_BACK",
1003
    "IM2COL_3D",
1004
    "CONV_2D",
1005
    "CONV_3D",
1006
    "CONV_2D_DW",
1007
    "CONV_TRANSPOSE_2D",
1008
    "POOL_1D",
1009
    "POOL_2D",
1010
    "POOL_2D_BACK",
1011
    "UPSCALE",
1012
    "PAD",
1013
    "PAD_REFLECT_1D",
1014
    "ROLL",
1015
    "ARANGE",
1016
    "TIMESTEP_EMBEDDING",
1017
    "ARGSORT",
1018
    "TOP_K",
1019
    "LEAKY_RELU",
1020
    "TRI",
1021
    "FILL",
1022
1023
    "FLASH_ATTN_EXT",
1024
    "FLASH_ATTN_BACK",
1025
    "SSM_CONV",
1026
    "SSM_SCAN",
1027
    "WIN_PART",
1028
    "WIN_UNPART",
1029
    "GET_REL_POS",
1030
    "ADD_REL_POS",
1031
    "RWKV_WKV6",
1032
    "GATED_LINEAR_ATTN",
1033
    "RWKV_WKV7",
1034
    "SOLVE_TRI",
1035
1036
    "UNARY",
1037
1038
    "MAP_CUSTOM1",
1039
    "MAP_CUSTOM2",
1040
    "MAP_CUSTOM3",
1041
1042
    "CUSTOM",
1043
1044
    "CROSS_ENTROPY_LOSS",
1045
    "CROSS_ENTROPY_LOSS_BACK",
1046
    "OPT_STEP_ADAMW",
1047
    "OPT_STEP_SGD",
1048
1049
    "GLU",
1050
};
1051
1052
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1053
1054
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1055
    "none",
1056
1057
    "x",
1058
    "x+y",
1059
    "x[i]+y",
1060
    "x+y",
1061
    "view(x,nb,offset)+=y->x",
1062
    "x-y",
1063
    "x*y",
1064
    "x/y",
1065
    "x^2",
1066
    "√x",
1067
    "log(x)",
1068
    "sin(x)",
1069
    "cos(x)",
1070
    "Σx",
1071
    "Σx_k",
1072
    "cumsum(x)",
1073
    "Σx/n",
1074
    "argmax(x)",
1075
    "count_equal(x)",
1076
    "repeat(x)",
1077
    "repeat_back(x)",
1078
    "concat(x, y)",
1079
    "silu_back(x)",
1080
    "norm(x)",
1081
    "rms_norm(x)",
1082
    "rms_norm_back(x)",
1083
    "group_norm(x)",
1084
    "l2_norm(x)",
1085
1086
    "X*Y",
1087
    "X[i]*Y",
1088
    "X*Y",
1089
1090
    "x*v",
1091
    "y-\\>view(x)",
1092
    "x-\\>y",
1093
    "cont(x)",
1094
    "reshape(x)",
1095
    "view(x)",
1096
    "permute(x)",
1097
    "transpose(x)",
1098
    "get_rows(x)",
1099
    "get_rows_back(x)",
1100
    "set_rows(x)",
1101
    "diag(x)",
1102
    "diag_mask_inf(x)",
1103
    "diag_mask_zero(x)",
1104
    "soft_max(x)",
1105
    "soft_max_back(x)",
1106
    "rope(x)",
1107
    "rope_back(x)",
1108
    "clamp(x)",
1109
    "conv_transpose_1d(x)",
1110
    "im2col(x)",
1111
    "im2col_back(x)",
1112
    "im2col_3d(x)",
1113
    "conv_2d(x)",
1114
    "conv_3d(x)",
1115
    "conv_2d_dw(x)",
1116
    "conv_transpose_2d(x)",
1117
    "pool_1d(x)",
1118
    "pool_2d(x)",
1119
    "pool_2d_back(x)",
1120
    "upscale(x)",
1121
    "pad(x)",
1122
    "pad_reflect_1d(x)",
1123
    "roll(x)",
1124
    "arange(start, stop, step)",
1125
    "timestep_embedding(timesteps, dim, max_period)",
1126
    "argsort(x)",
1127
    "top_k(x)",
1128
    "leaky_relu(x)",
1129
    "tri(x)",
1130
    "fill(x, c)",
1131
1132
    "flash_attn_ext(x)",
1133
    "flash_attn_back(x)",
1134
    "ssm_conv(x)",
1135
    "ssm_scan(x)",
1136
    "win_part(x)",
1137
    "win_unpart(x)",
1138
    "get_rel_pos(x)",
1139
    "add_rel_pos(x)",
1140
    "rwkv_wkv6(k, v, r, tf, td, s)",
1141
    "gated_linear_attn(k, v, q, gate, s)",
1142
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1143
    "A X = B, A triangular, solve X",
1144
1145
    "unary(x)",
1146
1147
    "map_custom(x)",
1148
    "map_custom(x,y)",
1149
    "map_custom(x,y,z)",
1150
1151
    "custom(x)",
1152
1153
    "cross_entropy_loss(x,y)",
1154
    "cross_entropy_loss_back(x,y)",
1155
    "adamw(x)",
1156
    "sgd(x)",
1157
1158
    "glu(x)",
1159
};
1160
1161
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1162
1163
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1164
1165
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1166
    "ABS",
1167
    "SGN",
1168
    "NEG",
1169
    "STEP",
1170
    "TANH",
1171
    "ELU",
1172
    "RELU",
1173
    "SIGMOID",
1174
    "GELU",
1175
    "GELU_QUICK",
1176
    "SILU",
1177
    "HARDSWISH",
1178
    "HARDSIGMOID",
1179
    "EXP",
1180
    "EXPM1",
1181
    "SOFTPLUS",
1182
    "GELU_ERF",
1183
    "XIELU",
1184
    "FLOOR",
1185
    "CEIL",
1186
    "ROUND",
1187
    "TRUNC",
1188
};
1189
1190
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1191
1192
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1193
    "REGLU",
1194
    "GEGLU",
1195
    "SWIGLU",
1196
    "SWIGLU_OAI",
1197
    "GEGLU_ERF",
1198
    "GEGLU_QUICK",
1199
};
1200
1201
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1202
1203
1204
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1205
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1206
1207
1208
////////////////////////////////////////////////////////////////////////////////
1209
1210
0
void ggml_print_object(const struct ggml_object * obj) {
1211
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1212
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1213
0
}
1214
1215
0
void ggml_print_objects(const struct ggml_context * ctx) {
1216
0
    struct ggml_object * obj = ctx->objects_begin;
1217
1218
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1219
1220
0
    while (obj != NULL) {
1221
0
        ggml_print_object(obj);
1222
0
        obj = obj->next;
1223
0
    }
1224
1225
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1226
0
}
1227
1228
0
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1229
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1230
1231
0
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1232
0
}
1233
1234
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1235
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1236
1237
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1238
0
}
1239
1240
0
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1241
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1242
0
        if (tensor->ne[i] <= 0) {
1243
0
            return 0;
1244
0
        }
1245
0
    }
1246
1247
0
    size_t nbytes;
1248
0
    const size_t blck_size = ggml_blck_size(tensor->type);
1249
0
    if (blck_size == 1) {
1250
0
        nbytes = ggml_type_size(tensor->type);
1251
0
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1252
0
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1253
0
        }
1254
0
    }
1255
0
    else {
1256
0
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1257
0
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1258
0
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1259
0
        }
1260
0
    }
1261
1262
0
    return nbytes;
1263
0
}
1264
1265
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1266
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1267
0
}
1268
1269
0
int64_t ggml_blck_size(enum ggml_type type) {
1270
0
    return type_traits[type].blck_size;
1271
0
}
1272
1273
0
size_t ggml_type_size(enum ggml_type type) {
1274
0
    return type_traits[type].type_size;
1275
0
}
1276
1277
0
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1278
0
    assert(ne % ggml_blck_size(type) == 0);
1279
0
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1280
0
}
1281
1282
0
double ggml_type_sizef(enum ggml_type type) {
1283
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1284
0
}
1285
1286
0
const char * ggml_type_name(enum ggml_type type) {
1287
0
    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
1288
0
}
1289
1290
0
bool ggml_is_quantized(enum ggml_type type) {
1291
0
    return type_traits[type].is_quantized;
1292
0
}
1293
1294
0
const char * ggml_op_name(enum ggml_op op) {
1295
0
    return GGML_OP_NAME[op];
1296
0
}
1297
1298
0
const char * ggml_op_symbol(enum ggml_op op) {
1299
0
    return GGML_OP_SYMBOL[op];
1300
0
}
1301
1302
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1303
0
    return GGML_UNARY_OP_NAME[op];
1304
0
}
1305
1306
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1307
0
    return GGML_GLU_OP_NAME[op];
1308
0
}
1309
1310
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1311
0
    if (t->op == GGML_OP_UNARY) {
1312
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1313
0
        return ggml_unary_op_name(uop);
1314
0
    }
1315
0
    if (t->op == GGML_OP_GLU) {
1316
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1317
0
        return ggml_glu_op_name(gop);
1318
0
    }
1319
0
    return ggml_op_name(t->op);
1320
0
}
1321
1322
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1323
0
    return ggml_type_size(tensor->type);
1324
0
}
1325
1326
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1327
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1328
1329
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1330
0
}
1331
1332
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1333
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1334
1335
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1336
0
}
1337
1338
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1339
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1340
1341
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1342
0
}
1343
1344
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1345
0
    return tensor->ne[3] == 1;
1346
0
}
1347
1348
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1349
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1350
0
        if (tensor->ne[i] > 1) {
1351
0
            return i + 1;
1352
0
        }
1353
0
    }
1354
0
    return 1;
1355
0
}
1356
1357
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1358
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1359
1360
0
    switch (ftype) {
1361
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1362
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1363
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1364
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1365
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1366
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1367
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1368
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1369
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1370
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1371
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1372
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1373
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1374
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1375
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1376
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1377
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1378
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1379
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1380
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1381
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1382
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1383
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1384
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1385
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1386
0
    }
1387
1388
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1389
1390
0
    return wtype;
1391
0
}
1392
1393
0
size_t ggml_tensor_overhead(void) {
1394
0
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1395
0
}
1396
1397
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1398
0
    return tensor->nb[0] > tensor->nb[1];
1399
0
}
1400
1401
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1402
0
    size_t next_nb = ggml_type_size(tensor->type);
1403
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1404
0
        return false;
1405
0
    }
1406
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1407
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1408
0
        if (tensor->ne[i] != 1) {
1409
0
            if (i > n) {
1410
0
                if (tensor->nb[i] != next_nb) {
1411
0
                    return false;
1412
0
                }
1413
0
                next_nb *= tensor->ne[i];
1414
0
            } else {
1415
                // this dimension does not need to be contiguous
1416
0
                next_nb = tensor->ne[i]*tensor->nb[i];
1417
0
            }
1418
0
        }
1419
0
    }
1420
0
    return true;
1421
0
}
1422
1423
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1424
0
    return ggml_is_contiguous_0(tensor);
1425
0
}
1426
1427
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1428
0
    return ggml_is_contiguous_n(tensor, 0);
1429
0
}
1430
1431
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1432
0
    return ggml_is_contiguous_n(tensor, 1);
1433
0
}
1434
1435
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1436
0
    return ggml_is_contiguous_n(tensor, 2);
1437
0
}
1438
1439
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1440
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1441
0
}
1442
1443
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1444
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1445
1446
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1447
0
}
1448
1449
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1450
0
    return
1451
0
        tensor->nb[0] > tensor->nb[2] &&
1452
0
        tensor->nb[1] > tensor->nb[0] &&
1453
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1454
0
}
1455
1456
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1457
0
    return
1458
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1459
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1460
0
}
1461
1462
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1463
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1464
1465
0
    return
1466
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1467
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1468
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1469
0
}
1470
1471
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1472
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1473
0
        if (tensor->ne[i] == 0) {
1474
            // empty if any dimension has no elements
1475
0
            return true;
1476
0
        }
1477
0
    }
1478
0
    return false;
1479
0
}
1480
1481
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1482
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1483
1484
0
    return
1485
0
        (t0->ne[0] == t1->ne[0]) &&
1486
0
        (t0->ne[1] == t1->ne[1]) &&
1487
0
        (t0->ne[2] == t1->ne[2]) &&
1488
0
        (t0->ne[3] == t1->ne[3]);
1489
0
}
1490
1491
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1492
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1493
1494
0
    return
1495
0
        (t0->nb[0] == t1->nb[0]) &&
1496
0
        (t0->nb[1] == t1->nb[1]) &&
1497
0
        (t0->nb[2] == t1->nb[2]) &&
1498
0
        (t0->nb[3] == t1->nb[3]);
1499
0
}
1500
1501
// check if t1 can be represented as a repetition of t0
1502
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1503
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1504
1505
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1506
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1507
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1508
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1509
0
        (t1->ne[3]%t0->ne[3] == 0);
1510
0
}
1511
1512
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1513
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1514
1515
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1516
0
}
1517
1518
// assert that pointer is aligned to GGML_MEM_ALIGN
1519
#define GGML_ASSERT_ALIGNED(ptr) \
1520
0
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1521
1522
////////////////////////////////////////////////////////////////////////////////
1523
1524
0
struct ggml_context * ggml_init(struct ggml_init_params params) {
1525
0
    bool is_first_call = true;
1526
1527
0
    ggml_critical_section_start();
1528
1529
0
    if (is_first_call) {
1530
        // initialize time system (required on Windows)
1531
0
        ggml_time_init();
1532
1533
0
        is_first_call = false;
1534
0
    }
1535
1536
0
    ggml_critical_section_end();
1537
1538
0
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1539
1540
    // allow to call ggml_init with 0 size
1541
0
    if (params.mem_size == 0) {
1542
0
        params.mem_size = GGML_MEM_ALIGN;
1543
0
    }
1544
1545
0
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1546
1547
0
    *ctx = (struct ggml_context) {
1548
0
        /*.mem_size           =*/ mem_size,
1549
0
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1550
0
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1551
0
        /*.no_alloc           =*/ params.no_alloc,
1552
0
        /*.n_objects          =*/ 0,
1553
0
        /*.objects_begin      =*/ NULL,
1554
0
        /*.objects_end        =*/ NULL,
1555
0
    };
1556
1557
0
    GGML_ASSERT(ctx->mem_buffer != NULL);
1558
1559
0
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1560
1561
0
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1562
1563
0
    return ctx;
1564
0
}
1565
1566
0
void ggml_reset(struct ggml_context * ctx) {
1567
0
    if (ctx == NULL) {
1568
0
        return;
1569
0
    }
1570
1571
0
    ctx->n_objects     = 0;
1572
0
    ctx->objects_begin = NULL;
1573
0
    ctx->objects_end   = NULL;
1574
0
}
1575
1576
0
void ggml_free(struct ggml_context * ctx) {
1577
0
    if (ctx == NULL) {
1578
0
        return;
1579
0
    }
1580
1581
0
    if (ctx->mem_buffer_owned) {
1582
0
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1583
0
    }
1584
1585
0
    GGML_FREE(ctx);
1586
0
}
1587
1588
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1589
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1590
0
}
1591
1592
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1593
0
    return ctx->no_alloc;
1594
0
}
1595
1596
0
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1597
0
    ctx->no_alloc = no_alloc;
1598
0
}
1599
1600
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1601
0
    return ctx->mem_buffer;
1602
0
}
1603
1604
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1605
0
    return ctx->mem_size;
1606
0
}
1607
1608
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1609
0
    size_t max_size = 0;
1610
1611
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1612
0
        size_t bytes = ggml_nbytes(tensor);
1613
0
        max_size = MAX(max_size, bytes);
1614
0
    }
1615
1616
0
    return max_size;
1617
0
}
1618
1619
////////////////////////////////////////////////////////////////////////////////
1620
1621
0
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1622
    // always insert objects at the end of the context's memory pool
1623
0
    struct ggml_object * obj_cur = ctx->objects_end;
1624
1625
0
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1626
0
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1627
0
    const size_t cur_end  = cur_offs + cur_size;
1628
1629
    // align to GGML_MEM_ALIGN
1630
0
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1631
1632
0
    char * const mem_buffer = ctx->mem_buffer;
1633
0
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1634
1635
0
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1636
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1637
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1638
#ifndef NDEBUG
1639
        GGML_ABORT("not enough space in the context's memory pool");
1640
#endif
1641
0
        return NULL;
1642
0
    }
1643
1644
0
    *obj_new = (struct ggml_object) {
1645
0
        .offs = cur_end + GGML_OBJECT_SIZE,
1646
0
        .size = size_needed,
1647
0
        .next = NULL,
1648
0
        .type = type,
1649
0
    };
1650
1651
0
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1652
1653
0
    if (obj_cur != NULL) {
1654
0
        obj_cur->next = obj_new;
1655
0
    } else {
1656
        // this is the first object in this context
1657
0
        ctx->objects_begin = obj_new;
1658
0
    }
1659
1660
0
    ctx->objects_end = obj_new;
1661
1662
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1663
1664
0
    return obj_new;
1665
0
}
1666
1667
static struct ggml_tensor * ggml_new_tensor_impl(
1668
        struct ggml_context * ctx,
1669
        enum   ggml_type      type,
1670
        int                   n_dims,
1671
        const int64_t       * ne,
1672
        struct ggml_tensor  * view_src,
1673
0
        size_t                view_offs) {
1674
1675
0
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1676
0
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1677
1678
    // find the base tensor and absolute offset
1679
0
    if (view_src != NULL && view_src->view_src != NULL) {
1680
0
        view_offs += view_src->view_offs;
1681
0
        view_src   = view_src->view_src;
1682
0
    }
1683
1684
0
    size_t data_size = ggml_row_size(type, ne[0]);
1685
0
    for (int i = 1; i < n_dims; i++) {
1686
0
        data_size *= ne[i];
1687
0
    }
1688
1689
0
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1690
1691
0
    void * data = view_src != NULL ? view_src->data : NULL;
1692
0
    if (data != NULL) {
1693
0
        data = (char *) data + view_offs;
1694
0
    }
1695
1696
0
    size_t obj_alloc_size = 0;
1697
1698
0
    if (view_src == NULL && !ctx->no_alloc) {
1699
        // allocate tensor data in the context's memory pool
1700
0
        obj_alloc_size = data_size;
1701
0
    }
1702
1703
0
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1704
0
    GGML_ASSERT(obj_new);
1705
1706
0
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1707
1708
0
    *result = (struct ggml_tensor) {
1709
0
        /*.type         =*/ type,
1710
0
        /*.buffer       =*/ NULL,
1711
0
        /*.ne           =*/ { 1, 1, 1, 1 },
1712
0
        /*.nb           =*/ { 0, 0, 0, 0 },
1713
0
        /*.op           =*/ GGML_OP_NONE,
1714
0
        /*.op_params    =*/ { 0 },
1715
0
        /*.flags        =*/ 0,
1716
0
        /*.src          =*/ { NULL },
1717
0
        /*.view_src     =*/ view_src,
1718
0
        /*.view_offs    =*/ view_offs,
1719
0
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1720
0
        /*.name         =*/ { 0 },
1721
0
        /*.extra        =*/ NULL,
1722
0
        /*.padding      =*/ { 0 },
1723
0
    };
1724
1725
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1726
    //GGML_ASSERT_ALIGNED(result->data);
1727
1728
0
    for (int i = 0; i < n_dims; i++) {
1729
0
        result->ne[i] = ne[i];
1730
0
    }
1731
1732
0
    result->nb[0] = ggml_type_size(type);
1733
0
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1734
0
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1735
0
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1736
0
    }
1737
1738
0
    ctx->n_objects++;
1739
1740
0
    return result;
1741
0
}
1742
1743
struct ggml_tensor * ggml_new_tensor(
1744
        struct ggml_context * ctx,
1745
        enum   ggml_type      type,
1746
        int                   n_dims,
1747
0
        const int64_t       * ne) {
1748
0
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1749
0
}
1750
1751
struct ggml_tensor * ggml_new_tensor_1d(
1752
        struct ggml_context * ctx,
1753
        enum   ggml_type      type,
1754
0
        int64_t ne0) {
1755
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1756
0
}
1757
1758
struct ggml_tensor * ggml_new_tensor_2d(
1759
        struct ggml_context * ctx,
1760
        enum   ggml_type      type,
1761
        int64_t ne0,
1762
0
        int64_t ne1) {
1763
0
    const int64_t ne[2] = { ne0, ne1 };
1764
0
    return ggml_new_tensor(ctx, type, 2, ne);
1765
0
}
1766
1767
struct ggml_tensor * ggml_new_tensor_3d(
1768
        struct ggml_context * ctx,
1769
        enum   ggml_type      type,
1770
        int64_t ne0,
1771
        int64_t ne1,
1772
0
        int64_t ne2) {
1773
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1774
0
    return ggml_new_tensor(ctx, type, 3, ne);
1775
0
}
1776
1777
struct ggml_tensor * ggml_new_tensor_4d(
1778
        struct ggml_context * ctx,
1779
        enum   ggml_type type,
1780
        int64_t ne0,
1781
        int64_t ne1,
1782
        int64_t ne2,
1783
0
        int64_t ne3) {
1784
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1785
0
    return ggml_new_tensor(ctx, type, 4, ne);
1786
0
}
1787
1788
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1789
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1790
1791
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1792
0
}
1793
1794
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1795
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1796
0
}
1797
1798
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1799
0
    const int64_t ne2 = tensor->ne[2];
1800
0
    const int64_t ne1 = tensor->ne[1];
1801
0
    const int64_t ne0 = tensor->ne[0];
1802
1803
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1804
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1805
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1806
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1807
1808
0
    if (i0) {
1809
0
        * i0 = i0_;
1810
0
    }
1811
0
    if (i1) {
1812
0
        * i1 = i1_;
1813
0
    }
1814
0
    if (i2) {
1815
0
        * i2 = i2_;
1816
0
    }
1817
0
    if (i3) {
1818
0
        * i3 = i3_;
1819
0
    }
1820
0
}
1821
1822
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1823
0
    return tensor->data;
1824
0
}
1825
1826
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1827
0
    assert(tensor->type == GGML_TYPE_F32);
1828
0
    return (float *)(tensor->data);
1829
0
}
1830
1831
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1832
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1833
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1834
0
}
1835
1836
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1837
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1838
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1839
0
}
1840
1841
0
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1842
0
    return tensor->name;
1843
0
}
1844
1845
0
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1846
0
    size_t i;
1847
0
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1848
0
        tensor->name[i] = name[i];
1849
0
    }
1850
0
    tensor->name[i] = '\0';
1851
0
    return tensor;
1852
0
}
1853
1854
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1855
0
    va_list args;
1856
0
    va_start(args, fmt);
1857
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1858
0
    va_end(args);
1859
0
    return tensor;
1860
0
}
1861
1862
struct ggml_tensor * ggml_view_tensor(
1863
        struct ggml_context * ctx,
1864
0
        struct ggml_tensor  * src) {
1865
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1866
0
    ggml_format_name(result, "%s (view)", src->name);
1867
1868
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1869
0
        result->nb[i] = src->nb[i];
1870
0
    }
1871
1872
0
    return result;
1873
0
}
1874
1875
0
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1876
0
    struct ggml_object * obj = ctx->objects_begin;
1877
1878
0
    char * const mem_buffer = ctx->mem_buffer;
1879
1880
0
    while (obj != NULL) {
1881
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1882
0
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1883
0
        }
1884
1885
0
        obj = obj->next;
1886
0
    }
1887
1888
0
    return NULL;
1889
0
}
1890
1891
0
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1892
0
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1893
0
    obj = obj->next;
1894
1895
0
    char * const mem_buffer = ctx->mem_buffer;
1896
1897
0
    while (obj != NULL) {
1898
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1899
0
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1900
0
        }
1901
1902
0
        obj = obj->next;
1903
0
    }
1904
1905
0
    return NULL;
1906
0
}
1907
1908
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1909
0
    struct ggml_object * obj = ctx->objects_begin;
1910
1911
0
    char * const mem_buffer = ctx->mem_buffer;
1912
1913
0
    while (obj != NULL) {
1914
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1915
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1916
0
            if (strcmp(cur->name, name) == 0) {
1917
0
                return cur;
1918
0
            }
1919
0
        }
1920
1921
0
        obj = obj->next;
1922
0
    }
1923
1924
0
    return NULL;
1925
0
}
1926
1927
////////////////////////////////////////////////////////////////////////////////
1928
1929
// ggml_dup
1930
1931
static struct ggml_tensor * ggml_dup_impl(
1932
        struct ggml_context * ctx,
1933
        struct ggml_tensor  * a,
1934
0
        bool                  inplace) {
1935
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1936
1937
0
    result->op     = GGML_OP_DUP;
1938
0
    result->src[0] = a;
1939
1940
0
    return result;
1941
0
}
1942
1943
struct ggml_tensor * ggml_dup(
1944
        struct ggml_context * ctx,
1945
0
        struct ggml_tensor  * a) {
1946
0
    return ggml_dup_impl(ctx, a, false);
1947
0
}
1948
1949
struct ggml_tensor * ggml_dup_inplace(
1950
        struct ggml_context * ctx,
1951
0
        struct ggml_tensor  * a) {
1952
0
    return ggml_dup_impl(ctx, a, true);
1953
0
}
1954
1955
// ggml_add
1956
1957
static struct ggml_tensor * ggml_add_impl(
1958
        struct ggml_context * ctx,
1959
        struct ggml_tensor  * a,
1960
        struct ggml_tensor  * b,
1961
0
        bool                  inplace) {
1962
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1963
1964
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1965
1966
0
    result->op     = GGML_OP_ADD;
1967
0
    result->src[0] = a;
1968
0
    result->src[1] = b;
1969
1970
0
    return result;
1971
0
}
1972
1973
struct ggml_tensor * ggml_add(
1974
        struct ggml_context * ctx,
1975
        struct ggml_tensor  * a,
1976
0
        struct ggml_tensor  * b) {
1977
0
    return ggml_add_impl(ctx, a, b, false);
1978
0
}
1979
1980
struct ggml_tensor * ggml_add_inplace(
1981
        struct ggml_context * ctx,
1982
        struct ggml_tensor  * a,
1983
0
        struct ggml_tensor  * b) {
1984
0
    return ggml_add_impl(ctx, a, b, true);
1985
0
}
1986
1987
// ggml_add_cast
1988
1989
static struct ggml_tensor * ggml_add_cast_impl(
1990
        struct ggml_context * ctx,
1991
        struct ggml_tensor  * a,
1992
        struct ggml_tensor  * b,
1993
0
        enum   ggml_type      type) {
1994
    // TODO: support less-strict constraint
1995
    //       GGML_ASSERT(ggml_can_repeat(b, a));
1996
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
1997
1998
    // currently only supported for quantized input and f16
1999
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2000
0
                a->type == GGML_TYPE_F16 ||
2001
0
                a->type == GGML_TYPE_BF16);
2002
2003
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2004
2005
0
    result->op     = GGML_OP_ADD;
2006
0
    result->src[0] = a;
2007
0
    result->src[1] = b;
2008
2009
0
    return result;
2010
0
}
2011
2012
struct ggml_tensor * ggml_add_cast(
2013
        struct ggml_context * ctx,
2014
        struct ggml_tensor  * a,
2015
        struct ggml_tensor  * b,
2016
0
        enum   ggml_type      type) {
2017
0
    return ggml_add_cast_impl(ctx, a, b, type);
2018
0
}
2019
2020
struct ggml_tensor * ggml_add_id(
2021
            struct ggml_context * ctx,
2022
            struct ggml_tensor  * a,
2023
            struct ggml_tensor  * b,
2024
0
            struct ggml_tensor  * ids) {
2025
2026
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2027
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2028
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2029
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2030
2031
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2032
2033
0
    result->op     = GGML_OP_ADD_ID;
2034
0
    result->src[0] = a;
2035
0
    result->src[1] = b;
2036
0
    result->src[2] = ids;
2037
2038
0
    return result;
2039
0
}
2040
2041
// ggml_add1
2042
2043
static struct ggml_tensor * ggml_add1_impl(
2044
        struct ggml_context * ctx,
2045
        struct ggml_tensor  * a,
2046
        struct ggml_tensor  * b,
2047
0
        bool                  inplace) {
2048
0
    GGML_ASSERT(ggml_is_scalar(b));
2049
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2050
2051
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2052
2053
0
    result->op     = GGML_OP_ADD1;
2054
0
    result->src[0] = a;
2055
0
    result->src[1] = b;
2056
2057
0
    return result;
2058
0
}
2059
2060
struct ggml_tensor * ggml_add1(
2061
        struct ggml_context * ctx,
2062
        struct ggml_tensor  * a,
2063
0
        struct ggml_tensor  * b) {
2064
0
    return ggml_add1_impl(ctx, a, b, false);
2065
0
}
2066
2067
struct ggml_tensor * ggml_add1_inplace(
2068
        struct ggml_context * ctx,
2069
        struct ggml_tensor  * a,
2070
0
        struct ggml_tensor  * b) {
2071
0
    return ggml_add1_impl(ctx, a, b, true);
2072
0
}
2073
2074
// ggml_acc
2075
2076
static struct ggml_tensor * ggml_acc_impl(
2077
        struct ggml_context * ctx,
2078
        struct ggml_tensor  * a,
2079
        struct ggml_tensor  * b,
2080
        size_t                nb1,
2081
        size_t                nb2,
2082
        size_t                nb3,
2083
        size_t                offset,
2084
0
        bool                  inplace) {
2085
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2086
0
    GGML_ASSERT(ggml_is_contiguous(a));
2087
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2088
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2089
2090
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2091
2092
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2093
0
    ggml_set_op_params(result, params, sizeof(params));
2094
2095
0
    result->op     = GGML_OP_ACC;
2096
0
    result->src[0] = a;
2097
0
    result->src[1] = b;
2098
2099
0
    return result;
2100
0
}
2101
2102
struct ggml_tensor * ggml_acc(
2103
        struct ggml_context * ctx,
2104
        struct ggml_tensor  * a,
2105
        struct ggml_tensor  * b,
2106
        size_t                nb1,
2107
        size_t                nb2,
2108
        size_t                nb3,
2109
0
        size_t                offset) {
2110
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2111
0
}
2112
2113
struct ggml_tensor * ggml_acc_inplace(
2114
        struct ggml_context * ctx,
2115
        struct ggml_tensor  * a,
2116
        struct ggml_tensor  * b,
2117
        size_t                nb1,
2118
        size_t                nb2,
2119
        size_t                nb3,
2120
0
        size_t                offset) {
2121
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2122
0
}
2123
2124
// ggml_sub
2125
2126
static struct ggml_tensor * ggml_sub_impl(
2127
        struct ggml_context * ctx,
2128
        struct ggml_tensor  * a,
2129
        struct ggml_tensor  * b,
2130
0
        bool                  inplace) {
2131
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2132
2133
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2134
2135
0
    result->op     = GGML_OP_SUB;
2136
0
    result->src[0] = a;
2137
0
    result->src[1] = b;
2138
2139
0
    return result;
2140
0
}
2141
2142
struct ggml_tensor * ggml_sub(
2143
        struct ggml_context * ctx,
2144
        struct ggml_tensor  * a,
2145
0
        struct ggml_tensor  * b) {
2146
0
    return ggml_sub_impl(ctx, a, b, false);
2147
0
}
2148
2149
struct ggml_tensor * ggml_sub_inplace(
2150
        struct ggml_context * ctx,
2151
        struct ggml_tensor  * a,
2152
0
        struct ggml_tensor  * b) {
2153
0
    return ggml_sub_impl(ctx, a, b, true);
2154
0
}
2155
2156
// ggml_mul
2157
2158
static struct ggml_tensor * ggml_mul_impl(
2159
        struct ggml_context * ctx,
2160
        struct ggml_tensor  * a,
2161
        struct ggml_tensor  * b,
2162
0
        bool                  inplace) {
2163
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2164
2165
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2166
2167
0
    result->op     = GGML_OP_MUL;
2168
0
    result->src[0] = a;
2169
0
    result->src[1] = b;
2170
2171
0
    return result;
2172
0
}
2173
2174
struct ggml_tensor * ggml_mul(
2175
        struct ggml_context * ctx,
2176
        struct ggml_tensor  * a,
2177
0
        struct ggml_tensor  * b) {
2178
0
    return ggml_mul_impl(ctx, a, b, false);
2179
0
}
2180
2181
struct ggml_tensor * ggml_mul_inplace(
2182
        struct ggml_context * ctx,
2183
        struct ggml_tensor  * a,
2184
0
        struct ggml_tensor  * b) {
2185
0
    return ggml_mul_impl(ctx, a, b, true);
2186
0
}
2187
2188
// ggml_div
2189
2190
static struct ggml_tensor * ggml_div_impl(
2191
        struct ggml_context * ctx,
2192
        struct ggml_tensor  * a,
2193
        struct ggml_tensor  * b,
2194
0
        bool                  inplace) {
2195
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2196
2197
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2198
2199
0
    result->op     = GGML_OP_DIV;
2200
0
    result->src[0] = a;
2201
0
    result->src[1] = b;
2202
2203
0
    return result;
2204
0
}
2205
2206
struct ggml_tensor * ggml_div(
2207
        struct ggml_context * ctx,
2208
        struct ggml_tensor  * a,
2209
0
        struct ggml_tensor  * b) {
2210
0
    return ggml_div_impl(ctx, a, b, false);
2211
0
}
2212
2213
struct ggml_tensor * ggml_div_inplace(
2214
        struct ggml_context * ctx,
2215
        struct ggml_tensor  * a,
2216
0
        struct ggml_tensor  * b) {
2217
0
    return ggml_div_impl(ctx, a, b, true);
2218
0
}
2219
2220
// ggml_sqr
2221
2222
static struct ggml_tensor * ggml_sqr_impl(
2223
        struct ggml_context * ctx,
2224
        struct ggml_tensor  * a,
2225
0
        bool                  inplace) {
2226
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2227
2228
0
    result->op     = GGML_OP_SQR;
2229
0
    result->src[0] = a;
2230
2231
0
    return result;
2232
0
}
2233
2234
struct ggml_tensor * ggml_sqr(
2235
        struct ggml_context * ctx,
2236
0
        struct ggml_tensor  * a) {
2237
0
    return ggml_sqr_impl(ctx, a, false);
2238
0
}
2239
2240
struct ggml_tensor * ggml_sqr_inplace(
2241
        struct ggml_context * ctx,
2242
0
        struct ggml_tensor  * a) {
2243
0
    return ggml_sqr_impl(ctx, a, true);
2244
0
}
2245
2246
// ggml_sqrt
2247
2248
static struct ggml_tensor * ggml_sqrt_impl(
2249
        struct ggml_context * ctx,
2250
        struct ggml_tensor  * a,
2251
0
        bool                  inplace) {
2252
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2253
2254
0
    result->op     = GGML_OP_SQRT;
2255
0
    result->src[0] = a;
2256
2257
0
    return result;
2258
0
}
2259
2260
struct ggml_tensor * ggml_sqrt(
2261
        struct ggml_context * ctx,
2262
0
        struct ggml_tensor  * a) {
2263
0
    return ggml_sqrt_impl(ctx, a, false);
2264
0
}
2265
2266
struct ggml_tensor * ggml_sqrt_inplace(
2267
        struct ggml_context * ctx,
2268
0
        struct ggml_tensor  * a) {
2269
0
    return ggml_sqrt_impl(ctx, a, true);
2270
0
}
2271
2272
// ggml_log
2273
2274
static struct ggml_tensor * ggml_log_impl(
2275
        struct ggml_context * ctx,
2276
        struct ggml_tensor  * a,
2277
0
        bool                  inplace) {
2278
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2279
2280
0
    result->op     = GGML_OP_LOG;
2281
0
    result->src[0] = a;
2282
2283
0
    return result;
2284
0
}
2285
2286
struct ggml_tensor * ggml_log(
2287
        struct ggml_context * ctx,
2288
0
        struct ggml_tensor  * a) {
2289
0
    return ggml_log_impl(ctx, a, false);
2290
0
}
2291
2292
struct ggml_tensor * ggml_log_inplace(
2293
        struct ggml_context * ctx,
2294
0
        struct ggml_tensor  * a) {
2295
0
    return ggml_log_impl(ctx, a, true);
2296
0
}
2297
2298
struct ggml_tensor * ggml_expm1(
2299
        struct ggml_context * ctx,
2300
0
        struct ggml_tensor  * a) {
2301
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2302
0
}
2303
2304
struct ggml_tensor * ggml_expm1_inplace(
2305
        struct ggml_context * ctx,
2306
0
        struct ggml_tensor  * a) {
2307
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2308
0
}
2309
2310
struct ggml_tensor * ggml_softplus(
2311
        struct ggml_context * ctx,
2312
0
        struct ggml_tensor  * a) {
2313
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2314
0
}
2315
2316
struct ggml_tensor * ggml_softplus_inplace(
2317
        struct ggml_context * ctx,
2318
0
        struct ggml_tensor  * a) {
2319
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2320
0
}
2321
2322
// ggml_sin
2323
2324
static struct ggml_tensor * ggml_sin_impl(
2325
        struct ggml_context * ctx,
2326
        struct ggml_tensor  * a,
2327
0
        bool                  inplace) {
2328
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2329
2330
0
    result->op     = GGML_OP_SIN;
2331
0
    result->src[0] = a;
2332
2333
0
    return result;
2334
0
}
2335
2336
struct ggml_tensor * ggml_sin(
2337
        struct ggml_context * ctx,
2338
0
        struct ggml_tensor  * a) {
2339
0
    return ggml_sin_impl(ctx, a, false);
2340
0
}
2341
2342
struct ggml_tensor * ggml_sin_inplace(
2343
        struct ggml_context * ctx,
2344
0
        struct ggml_tensor  * a) {
2345
0
    return ggml_sin_impl(ctx, a, true);
2346
0
}
2347
2348
// ggml_cos
2349
2350
static struct ggml_tensor * ggml_cos_impl(
2351
        struct ggml_context * ctx,
2352
        struct ggml_tensor  * a,
2353
0
        bool                  inplace) {
2354
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2355
2356
0
    result->op     = GGML_OP_COS;
2357
0
    result->src[0] = a;
2358
2359
0
    return result;
2360
0
}
2361
2362
struct ggml_tensor * ggml_cos(
2363
        struct ggml_context * ctx,
2364
0
        struct ggml_tensor  * a) {
2365
0
    return ggml_cos_impl(ctx, a, false);
2366
0
}
2367
2368
struct ggml_tensor * ggml_cos_inplace(
2369
        struct ggml_context * ctx,
2370
0
        struct ggml_tensor  * a) {
2371
0
    return ggml_cos_impl(ctx, a, true);
2372
0
}
2373
2374
// ggml_sum
2375
2376
struct ggml_tensor * ggml_sum(
2377
        struct ggml_context * ctx,
2378
0
        struct ggml_tensor  * a) {
2379
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2380
2381
0
    result->op     = GGML_OP_SUM;
2382
0
    result->src[0] = a;
2383
2384
0
    return result;
2385
0
}
2386
2387
// ggml_sum_rows
2388
2389
struct ggml_tensor * ggml_sum_rows(
2390
        struct ggml_context * ctx,
2391
0
        struct ggml_tensor  * a) {
2392
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2393
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2394
0
        ne[i] = a->ne[i];
2395
0
    }
2396
2397
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2398
2399
0
    result->op     = GGML_OP_SUM_ROWS;
2400
0
    result->src[0] = a;
2401
2402
0
    return result;
2403
0
}
2404
2405
// ggml_cumsum
2406
2407
struct ggml_tensor * ggml_cumsum(
2408
        struct ggml_context * ctx,
2409
0
        struct ggml_tensor  * a) {
2410
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2411
2412
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2413
2414
0
    result->op     = GGML_OP_CUMSUM;
2415
0
    result->src[0] = a;
2416
2417
0
    return result;
2418
0
}
2419
2420
// ggml_mean
2421
2422
struct ggml_tensor * ggml_mean(
2423
        struct ggml_context * ctx,
2424
0
        struct ggml_tensor  * a) {
2425
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2426
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2427
2428
0
    result->op     = GGML_OP_MEAN;
2429
0
    result->src[0] = a;
2430
2431
0
    return result;
2432
0
}
2433
2434
// ggml_argmax
2435
2436
struct ggml_tensor * ggml_argmax(
2437
        struct ggml_context * ctx,
2438
0
        struct ggml_tensor  * a) {
2439
0
    GGML_ASSERT(ggml_is_matrix(a));
2440
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2441
2442
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2443
2444
0
    result->op     = GGML_OP_ARGMAX;
2445
0
    result->src[0] = a;
2446
2447
0
    return result;
2448
0
}
2449
2450
// ggml_count_equal
2451
2452
struct ggml_tensor * ggml_count_equal(
2453
        struct ggml_context * ctx,
2454
        struct ggml_tensor  * a,
2455
0
        struct ggml_tensor  * b) {
2456
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2457
2458
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2459
2460
0
    result->op     = GGML_OP_COUNT_EQUAL;
2461
0
    result->src[0] = a;
2462
0
    result->src[1] = b;
2463
2464
0
    return result;
2465
0
}
2466
2467
// ggml_repeat
2468
2469
struct ggml_tensor * ggml_repeat(
2470
        struct ggml_context * ctx,
2471
        struct ggml_tensor  * a,
2472
0
        struct ggml_tensor  * b) {
2473
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2474
2475
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2476
2477
0
    result->op     = GGML_OP_REPEAT;
2478
0
    result->src[0] = a;
2479
2480
0
    return result;
2481
0
}
2482
2483
struct ggml_tensor * ggml_repeat_4d(
2484
        struct ggml_context * ctx,
2485
        struct ggml_tensor * a,
2486
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2487
0
    const bool can_repeat = ggml_is_empty(a) || (
2488
0
        (ne0 % a->ne[0] == 0) &&
2489
0
        (ne1 % a->ne[1] == 0) &&
2490
0
        (ne2 % a->ne[2] == 0) &&
2491
0
        (ne3 % a->ne[3] == 0)
2492
0
    );
2493
0
    GGML_ASSERT(can_repeat);
2494
2495
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2496
2497
0
    result->op     = GGML_OP_REPEAT;
2498
0
    result->src[0] = a;
2499
2500
0
    return result;
2501
0
}
2502
2503
// ggml_repeat_back
2504
2505
struct ggml_tensor * ggml_repeat_back(
2506
        struct ggml_context * ctx,
2507
        struct ggml_tensor  * a,
2508
0
        struct ggml_tensor  * b) {
2509
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2510
2511
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2512
2513
0
    result->op     = GGML_OP_REPEAT_BACK;
2514
0
    result->src[0] = a;
2515
2516
0
    return result;
2517
0
}
2518
2519
// ggml_concat
2520
2521
struct ggml_tensor * ggml_concat(
2522
    struct ggml_context * ctx,
2523
    struct ggml_tensor  * a,
2524
    struct ggml_tensor  * b,
2525
0
    int                   dim) {
2526
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2527
0
    GGML_ASSERT(a->type == b->type);
2528
2529
0
    int64_t ne[GGML_MAX_DIMS];
2530
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2531
0
        if (d == dim) {
2532
0
            ne[d] = a->ne[d] + b->ne[d];
2533
0
            continue;
2534
0
        }
2535
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2536
0
        ne[d] = a->ne[d];
2537
0
    }
2538
2539
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2540
2541
0
    ggml_set_op_params_i32(result, 0, dim);
2542
2543
0
    result->op     = GGML_OP_CONCAT;
2544
0
    result->src[0] = a;
2545
0
    result->src[1] = b;
2546
2547
0
    return result;
2548
0
}
2549
2550
// ggml_abs
2551
2552
struct ggml_tensor * ggml_abs(
2553
        struct ggml_context * ctx,
2554
0
        struct ggml_tensor  * a) {
2555
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2556
0
}
2557
2558
struct ggml_tensor * ggml_abs_inplace(
2559
        struct ggml_context * ctx,
2560
0
        struct ggml_tensor  * a) {
2561
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2562
0
}
2563
2564
// ggml_sgn
2565
2566
struct ggml_tensor * ggml_sgn(
2567
        struct ggml_context * ctx,
2568
0
        struct ggml_tensor  * a) {
2569
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2570
0
}
2571
2572
struct ggml_tensor * ggml_sgn_inplace(
2573
        struct ggml_context * ctx,
2574
0
        struct ggml_tensor  * a) {
2575
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2576
0
}
2577
2578
// ggml_neg
2579
2580
struct ggml_tensor * ggml_neg(
2581
        struct ggml_context * ctx,
2582
0
        struct ggml_tensor  * a) {
2583
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2584
0
}
2585
2586
struct ggml_tensor * ggml_neg_inplace(
2587
        struct ggml_context * ctx,
2588
0
        struct ggml_tensor  * a) {
2589
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2590
0
}
2591
2592
// ggml_step
2593
2594
struct ggml_tensor * ggml_step(
2595
        struct ggml_context * ctx,
2596
0
        struct ggml_tensor  * a) {
2597
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2598
0
}
2599
2600
struct ggml_tensor * ggml_step_inplace(
2601
        struct ggml_context * ctx,
2602
0
        struct ggml_tensor  * a) {
2603
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2604
0
}
2605
2606
// ggml_tanh
2607
2608
struct ggml_tensor * ggml_tanh(
2609
        struct ggml_context * ctx,
2610
0
        struct ggml_tensor  * a) {
2611
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2612
0
}
2613
2614
struct ggml_tensor * ggml_tanh_inplace(
2615
        struct ggml_context * ctx,
2616
0
        struct ggml_tensor  * a) {
2617
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2618
0
}
2619
2620
// ggml_elu
2621
2622
struct ggml_tensor * ggml_elu(
2623
    struct ggml_context * ctx,
2624
0
    struct ggml_tensor  * a) {
2625
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2626
0
}
2627
2628
struct ggml_tensor * ggml_elu_inplace(
2629
    struct ggml_context * ctx,
2630
0
    struct ggml_tensor  * a) {
2631
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2632
0
}
2633
2634
// ggml_relu
2635
2636
struct ggml_tensor * ggml_relu(
2637
        struct ggml_context * ctx,
2638
0
        struct ggml_tensor  * a) {
2639
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2640
0
}
2641
2642
struct ggml_tensor * ggml_relu_inplace(
2643
        struct ggml_context * ctx,
2644
0
        struct ggml_tensor  * a) {
2645
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2646
0
}
2647
2648
// ggml_leaky_relu
2649
2650
struct ggml_tensor * ggml_leaky_relu(
2651
        struct ggml_context * ctx,
2652
        struct ggml_tensor  * a,
2653
        float                 negative_slope,
2654
0
        bool                  inplace) {
2655
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2656
2657
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2658
2659
0
    result->op     = GGML_OP_LEAKY_RELU;
2660
0
    result->src[0] = a;
2661
2662
0
    return result;
2663
0
}
2664
2665
// ggml_sigmoid
2666
2667
struct ggml_tensor * ggml_sigmoid(
2668
        struct ggml_context * ctx,
2669
0
        struct ggml_tensor  * a) {
2670
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2671
0
}
2672
2673
struct ggml_tensor * ggml_sigmoid_inplace(
2674
        struct ggml_context * ctx,
2675
0
        struct ggml_tensor  * a) {
2676
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2677
0
}
2678
2679
// ggml_gelu
2680
2681
struct ggml_tensor * ggml_gelu(
2682
        struct ggml_context * ctx,
2683
0
        struct ggml_tensor  * a) {
2684
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2685
0
}
2686
2687
struct ggml_tensor * ggml_gelu_inplace(
2688
        struct ggml_context * ctx,
2689
0
        struct ggml_tensor  * a) {
2690
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2691
0
}
2692
2693
// ggml_gelu_erf
2694
2695
struct ggml_tensor * ggml_gelu_erf(
2696
        struct ggml_context * ctx,
2697
0
        struct ggml_tensor  * a) {
2698
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2699
0
}
2700
2701
struct ggml_tensor * ggml_gelu_erf_inplace(
2702
        struct ggml_context * ctx,
2703
0
        struct ggml_tensor  * a) {
2704
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2705
0
}
2706
2707
// ggml_gelu_quick
2708
2709
struct ggml_tensor * ggml_gelu_quick(
2710
        struct ggml_context * ctx,
2711
0
        struct ggml_tensor  * a) {
2712
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2713
0
}
2714
2715
struct ggml_tensor * ggml_gelu_quick_inplace(
2716
        struct ggml_context * ctx,
2717
0
        struct ggml_tensor  * a) {
2718
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2719
0
}
2720
2721
// ggml_silu
2722
2723
struct ggml_tensor * ggml_silu(
2724
        struct ggml_context * ctx,
2725
0
        struct ggml_tensor  * a) {
2726
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2727
0
}
2728
2729
struct ggml_tensor * ggml_silu_inplace(
2730
        struct ggml_context * ctx,
2731
0
        struct ggml_tensor  * a) {
2732
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2733
0
}
2734
2735
// ggml_xielu
2736
2737
struct ggml_tensor * ggml_xielu(
2738
        struct ggml_context * ctx,
2739
        struct ggml_tensor  * a,
2740
        float alpha_n,
2741
        float alpha_p,
2742
        float beta,
2743
0
        float eps) {
2744
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2745
2746
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2747
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2748
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2749
0
    ggml_set_op_params_f32(result, 3, beta);
2750
0
    ggml_set_op_params_f32(result, 4, eps);
2751
2752
0
    result->op     = GGML_OP_UNARY;
2753
0
    result->src[0] = a;
2754
2755
0
    return result;
2756
0
}
2757
2758
// ggml_silu_back
2759
2760
struct ggml_tensor * ggml_silu_back(
2761
        struct ggml_context * ctx,
2762
        struct ggml_tensor  * a,
2763
0
        struct ggml_tensor  * b) {
2764
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2765
2766
0
    result->op     = GGML_OP_SILU_BACK;
2767
0
    result->src[0] = a;
2768
0
    result->src[1] = b;
2769
2770
0
    return result;
2771
0
}
2772
2773
// ggml hardswish
2774
2775
struct ggml_tensor * ggml_hardswish(
2776
        struct ggml_context * ctx,
2777
0
        struct ggml_tensor  * a) {
2778
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2779
0
}
2780
2781
// ggml hardsigmoid
2782
2783
struct ggml_tensor * ggml_hardsigmoid(
2784
        struct ggml_context * ctx,
2785
0
        struct ggml_tensor  * a) {
2786
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2787
0
}
2788
2789
// ggml exp
2790
2791
struct ggml_tensor * ggml_exp(
2792
        struct ggml_context * ctx,
2793
0
        struct ggml_tensor  * a) {
2794
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2795
0
}
2796
2797
struct ggml_tensor * ggml_exp_inplace(
2798
        struct ggml_context * ctx,
2799
0
        struct ggml_tensor  * a) {
2800
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2801
0
}
2802
2803
// ggml_glu
2804
2805
static struct ggml_tensor * ggml_glu_impl(
2806
        struct ggml_context * ctx,
2807
        struct ggml_tensor  * a,
2808
        struct ggml_tensor  * b,
2809
        enum ggml_glu_op      op,
2810
0
        bool                  swapped) {
2811
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2812
2813
0
    if (b) {
2814
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2815
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2816
0
        GGML_ASSERT(a->type == b->type);
2817
0
    }
2818
2819
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2820
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2821
2822
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2823
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2824
2825
0
    result->op     = GGML_OP_GLU;
2826
0
    result->src[0] = a;
2827
0
    result->src[1] = b;
2828
2829
0
    return result;
2830
0
}
2831
2832
// ggml_floor
2833
2834
struct ggml_tensor * ggml_floor(
2835
        struct ggml_context * ctx,
2836
0
        struct ggml_tensor  * a) {
2837
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2838
0
}
2839
2840
struct ggml_tensor * ggml_floor_inplace(
2841
        struct ggml_context * ctx,
2842
0
        struct ggml_tensor  * a) {
2843
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2844
0
}
2845
2846
// ggml_ceil
2847
2848
struct ggml_tensor * ggml_ceil(
2849
        struct ggml_context * ctx,
2850
0
        struct ggml_tensor  * a) {
2851
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2852
0
}
2853
2854
struct ggml_tensor * ggml_ceil_inplace(
2855
        struct ggml_context * ctx,
2856
0
        struct ggml_tensor  * a) {
2857
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2858
0
}
2859
2860
//ggml_round
2861
2862
struct ggml_tensor * ggml_round(
2863
        struct ggml_context * ctx,
2864
0
        struct ggml_tensor  * a) {
2865
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2866
0
}
2867
2868
struct ggml_tensor * ggml_round_inplace(
2869
        struct ggml_context * ctx,
2870
0
        struct ggml_tensor  * a) {
2871
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2872
0
}
2873
2874
//ggml_trunc
2875
2876
struct ggml_tensor * ggml_trunc(
2877
        struct ggml_context * ctx,
2878
0
        struct ggml_tensor  * a) {
2879
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2880
0
}
2881
2882
struct ggml_tensor * ggml_trunc_inplace(
2883
        struct ggml_context * ctx,
2884
0
        struct ggml_tensor  * a) {
2885
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2886
0
}
2887
2888
struct ggml_tensor * ggml_glu(
2889
        struct ggml_context * ctx,
2890
        struct ggml_tensor  * a,
2891
        enum ggml_glu_op      op,
2892
0
        bool                  swapped) {
2893
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2894
0
}
2895
2896
struct ggml_tensor * ggml_glu_split(
2897
        struct ggml_context * ctx,
2898
        struct ggml_tensor  * a,
2899
        struct ggml_tensor  * b,
2900
0
        enum ggml_glu_op      op) {
2901
0
    return ggml_glu_impl(ctx, a, b, op, false);
2902
0
}
2903
2904
// ggml_reglu
2905
2906
struct ggml_tensor * ggml_reglu(
2907
        struct ggml_context * ctx,
2908
0
        struct ggml_tensor  * a) {
2909
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2910
0
}
2911
2912
struct ggml_tensor * ggml_reglu_swapped(
2913
        struct ggml_context * ctx,
2914
0
        struct ggml_tensor  * a) {
2915
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2916
0
}
2917
2918
struct ggml_tensor * ggml_reglu_split(
2919
        struct ggml_context * ctx,
2920
        struct ggml_tensor  * a,
2921
0
        struct ggml_tensor  * b) {
2922
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2923
0
}
2924
2925
// ggml_geglu
2926
2927
struct ggml_tensor * ggml_geglu(
2928
        struct ggml_context * ctx,
2929
0
        struct ggml_tensor  * a) {
2930
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2931
0
}
2932
2933
struct ggml_tensor * ggml_geglu_swapped(
2934
        struct ggml_context * ctx,
2935
0
        struct ggml_tensor  * a) {
2936
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2937
0
}
2938
2939
struct ggml_tensor * ggml_geglu_split(
2940
        struct ggml_context * ctx,
2941
        struct ggml_tensor  * a,
2942
0
        struct ggml_tensor  * b) {
2943
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2944
0
}
2945
2946
// ggml_swiglu
2947
2948
struct ggml_tensor * ggml_swiglu(
2949
        struct ggml_context * ctx,
2950
0
        struct ggml_tensor  * a) {
2951
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2952
0
}
2953
2954
struct ggml_tensor * ggml_swiglu_swapped(
2955
        struct ggml_context * ctx,
2956
0
        struct ggml_tensor  * a) {
2957
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2958
0
}
2959
2960
struct ggml_tensor * ggml_swiglu_split(
2961
        struct ggml_context * ctx,
2962
        struct ggml_tensor  * a,
2963
0
        struct ggml_tensor  * b) {
2964
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2965
0
}
2966
2967
// ggml_geglu_erf
2968
2969
struct ggml_tensor * ggml_geglu_erf(
2970
        struct ggml_context * ctx,
2971
0
        struct ggml_tensor  * a) {
2972
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2973
0
}
2974
2975
struct ggml_tensor * ggml_geglu_erf_swapped(
2976
        struct ggml_context * ctx,
2977
0
        struct ggml_tensor  * a) {
2978
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
2979
0
}
2980
2981
struct ggml_tensor * ggml_geglu_erf_split(
2982
        struct ggml_context * ctx,
2983
        struct ggml_tensor  * a,
2984
0
        struct ggml_tensor  * b) {
2985
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
2986
0
}
2987
2988
// ggml_geglu_quick
2989
2990
struct ggml_tensor * ggml_geglu_quick(
2991
        struct ggml_context * ctx,
2992
0
        struct ggml_tensor  * a) {
2993
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
2994
0
}
2995
2996
struct ggml_tensor * ggml_geglu_quick_swapped(
2997
        struct ggml_context * ctx,
2998
0
        struct ggml_tensor  * a) {
2999
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3000
0
}
3001
3002
struct ggml_tensor * ggml_geglu_quick_split(
3003
        struct ggml_context * ctx,
3004
        struct ggml_tensor  * a,
3005
0
        struct ggml_tensor  * b) {
3006
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3007
0
}
3008
3009
struct ggml_tensor * ggml_swiglu_oai(
3010
        struct ggml_context * ctx,
3011
        struct ggml_tensor  * a,
3012
        struct ggml_tensor  * b,
3013
        float                 alpha,
3014
0
        float                 limit) {
3015
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3016
0
    ggml_set_op_params_f32(result, 2, alpha);
3017
0
    ggml_set_op_params_f32(result, 3, limit);
3018
3019
0
    return result;
3020
0
}
3021
3022
// ggml_norm
3023
3024
static struct ggml_tensor * ggml_norm_impl(
3025
        struct ggml_context * ctx,
3026
        struct ggml_tensor  * a,
3027
        float                 eps,
3028
0
        bool                  inplace) {
3029
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3030
3031
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3032
3033
0
    result->op     = GGML_OP_NORM;
3034
0
    result->src[0] = a;
3035
3036
0
    return result;
3037
0
}
3038
3039
struct ggml_tensor * ggml_norm(
3040
        struct ggml_context * ctx,
3041
        struct ggml_tensor  * a,
3042
0
        float                 eps) {
3043
0
    return ggml_norm_impl(ctx, a, eps, false);
3044
0
}
3045
3046
struct ggml_tensor * ggml_norm_inplace(
3047
        struct ggml_context * ctx,
3048
        struct ggml_tensor  * a,
3049
0
        float                 eps) {
3050
0
    return ggml_norm_impl(ctx, a, eps, true);
3051
0
}
3052
3053
// ggml_rms_norm
3054
3055
static struct ggml_tensor * ggml_rms_norm_impl(
3056
        struct ggml_context * ctx,
3057
        struct ggml_tensor  * a,
3058
        float                 eps,
3059
0
        bool                  inplace) {
3060
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3061
3062
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3063
3064
0
    result->op     = GGML_OP_RMS_NORM;
3065
0
    result->src[0] = a;
3066
3067
0
    return result;
3068
0
}
3069
3070
struct ggml_tensor * ggml_rms_norm(
3071
        struct ggml_context * ctx,
3072
        struct ggml_tensor  * a,
3073
0
        float                 eps) {
3074
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3075
0
}
3076
3077
struct ggml_tensor * ggml_rms_norm_inplace(
3078
        struct ggml_context * ctx,
3079
        struct ggml_tensor  * a,
3080
0
        float                 eps) {
3081
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3082
0
}
3083
3084
// ggml_rms_norm_back
3085
3086
struct ggml_tensor * ggml_rms_norm_back(
3087
        struct ggml_context * ctx,
3088
        struct ggml_tensor  * a,
3089
        struct ggml_tensor  * b,
3090
0
        float                 eps) {
3091
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3092
3093
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3094
3095
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3096
0
    result->src[0] = a;
3097
0
    result->src[1] = b;
3098
3099
0
    return result;
3100
0
}
3101
3102
// ggml_group_norm
3103
3104
static struct ggml_tensor * ggml_group_norm_impl(
3105
        struct ggml_context * ctx,
3106
        struct ggml_tensor  * a,
3107
        int                   n_groups,
3108
        float                 eps,
3109
0
        bool                  inplace) {
3110
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3111
3112
0
    ggml_set_op_params_i32(result, 0, n_groups);
3113
0
    ggml_set_op_params_f32(result, 1, eps);
3114
3115
0
    result->op     = GGML_OP_GROUP_NORM;
3116
0
    result->src[0] = a;
3117
3118
0
    return result;
3119
0
}
3120
3121
struct ggml_tensor * ggml_group_norm(
3122
        struct ggml_context * ctx,
3123
        struct ggml_tensor  * a,
3124
        int                   n_groups,
3125
0
        float                 eps) {
3126
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3127
0
}
3128
3129
struct ggml_tensor * ggml_group_norm_inplace(
3130
        struct ggml_context * ctx,
3131
        struct ggml_tensor  * a,
3132
        int                   n_groups,
3133
0
        float                 eps) {
3134
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3135
0
}
3136
3137
// ggml_l2_norm
3138
3139
static struct ggml_tensor * ggml_l2_norm_impl(
3140
        struct ggml_context * ctx,
3141
        struct ggml_tensor  * a,
3142
        float                 eps,
3143
0
        bool                  inplace) {
3144
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3145
3146
0
    ggml_set_op_params_f32(result, 0, eps);
3147
3148
0
    result->op     = GGML_OP_L2_NORM;
3149
0
    result->src[0] = a;
3150
3151
0
    return result;
3152
0
}
3153
3154
struct ggml_tensor * ggml_l2_norm(
3155
        struct ggml_context * ctx,
3156
        struct ggml_tensor  * a,
3157
0
        float                 eps) {
3158
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3159
0
}
3160
3161
struct ggml_tensor * ggml_l2_norm_inplace(
3162
        struct ggml_context * ctx,
3163
        struct ggml_tensor  * a,
3164
0
        float                 eps) {
3165
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3166
0
}
3167
3168
// ggml_mul_mat
3169
3170
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3171
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3172
3173
0
    return (t0->ne[0]           == t1->ne[0])  &&
3174
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3175
0
           (t1->ne[3]%t0->ne[3] == 0);
3176
0
}
3177
3178
struct ggml_tensor * ggml_mul_mat(
3179
        struct ggml_context * ctx,
3180
        struct ggml_tensor  * a,
3181
0
        struct ggml_tensor  * b) {
3182
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3183
0
    GGML_ASSERT(!ggml_is_transposed(a));
3184
3185
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3186
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3187
3188
0
    result->op     = GGML_OP_MUL_MAT;
3189
0
    result->src[0] = a;
3190
0
    result->src[1] = b;
3191
3192
0
    return result;
3193
0
}
3194
3195
void ggml_mul_mat_set_prec(
3196
        struct ggml_tensor * a,
3197
0
        enum ggml_prec       prec) {
3198
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3199
3200
0
    const int32_t prec_i32 = (int32_t) prec;
3201
3202
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3203
0
}
3204
3205
// ggml_mul_mat_id
3206
3207
/*
3208
    c = ggml_mul_mat_id(ctx, as, b, ids);
3209
3210
    as  -> [cols, rows, n_expert]
3211
    b   -> [cols, n_expert_used, n_tokens]
3212
    ids -> [n_expert_used, n_tokens] (i32)
3213
    c   -> [rows, n_expert_used, n_tokens]
3214
3215
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3216
3217
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3218
*/
3219
struct ggml_tensor * ggml_mul_mat_id(
3220
        struct ggml_context * ctx,
3221
        struct ggml_tensor  * as,
3222
        struct ggml_tensor  * b,
3223
0
        struct ggml_tensor  * ids) {
3224
0
    GGML_ASSERT(!ggml_is_transposed(as));
3225
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3226
3227
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3228
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3229
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3230
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3231
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3232
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3233
3234
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3235
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3236
3237
0
    result->op     = GGML_OP_MUL_MAT_ID;
3238
0
    result->src[0] = as;
3239
0
    result->src[1] = b;
3240
0
    result->src[2] = ids;
3241
3242
0
    return result;
3243
0
}
3244
3245
// ggml_out_prod
3246
3247
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3248
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3249
3250
0
    return (t0->ne[1] == t1->ne[1])   &&
3251
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3252
0
           (t1->ne[3]%t0->ne[3] == 0);
3253
0
}
3254
3255
struct ggml_tensor * ggml_out_prod(
3256
        struct ggml_context * ctx,
3257
        struct ggml_tensor  * a,
3258
0
        struct ggml_tensor  * b) {
3259
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3260
0
    GGML_ASSERT(!ggml_is_transposed(a));
3261
3262
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3263
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3264
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3265
3266
0
    result->op     = GGML_OP_OUT_PROD;
3267
0
    result->src[0] = a;
3268
0
    result->src[1] = b;
3269
3270
0
    return result;
3271
0
}
3272
3273
// ggml_scale
3274
3275
static struct ggml_tensor * ggml_scale_impl(
3276
        struct ggml_context * ctx,
3277
        struct ggml_tensor  * a,
3278
        float                 s,
3279
        float                 b,
3280
0
        bool                  inplace) {
3281
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3282
3283
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3284
3285
0
    float params[2] = { s, b };
3286
0
    ggml_set_op_params(result, &params, sizeof(params));
3287
3288
0
    result->op     = GGML_OP_SCALE;
3289
0
    result->src[0] = a;
3290
3291
0
    return result;
3292
0
}
3293
3294
struct ggml_tensor * ggml_scale(
3295
        struct ggml_context * ctx,
3296
        struct ggml_tensor  * a,
3297
0
        float                 s) {
3298
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3299
0
}
3300
3301
struct ggml_tensor * ggml_scale_inplace(
3302
        struct ggml_context * ctx,
3303
        struct ggml_tensor  * a,
3304
0
        float                 s) {
3305
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3306
0
}
3307
3308
struct ggml_tensor * ggml_scale_bias(
3309
        struct ggml_context * ctx,
3310
        struct ggml_tensor  * a,
3311
        float                 s,
3312
0
        float                 b) {
3313
0
    return ggml_scale_impl(ctx, a, s, b, false);
3314
0
}
3315
3316
struct ggml_tensor * ggml_scale_bias_inplace(
3317
        struct ggml_context * ctx,
3318
        struct ggml_tensor  * a,
3319
        float                 s,
3320
0
        float                 b) {
3321
0
    return ggml_scale_impl(ctx, a, s, b, true);
3322
0
}
3323
3324
// ggml_set
3325
3326
static struct ggml_tensor * ggml_set_impl(
3327
        struct ggml_context * ctx,
3328
        struct ggml_tensor  * a,
3329
        struct ggml_tensor  * b,
3330
        size_t                nb1,
3331
        size_t                nb2,
3332
        size_t                nb3,
3333
        size_t                offset,
3334
0
        bool                  inplace) {
3335
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3336
3337
    // make a view of the destination
3338
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3339
3340
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3341
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3342
0
    ggml_set_op_params(result, params, sizeof(params));
3343
3344
0
    result->op     = GGML_OP_SET;
3345
0
    result->src[0] = a;
3346
0
    result->src[1] = b;
3347
3348
0
    return result;
3349
0
}
3350
3351
struct ggml_tensor * ggml_set(
3352
        struct ggml_context * ctx,
3353
        struct ggml_tensor  * a,
3354
        struct ggml_tensor  * b,
3355
        size_t                nb1,
3356
        size_t                nb2,
3357
        size_t                nb3,
3358
0
        size_t                offset) {
3359
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3360
0
}
3361
3362
struct ggml_tensor * ggml_set_inplace(
3363
        struct ggml_context * ctx,
3364
        struct ggml_tensor  * a,
3365
        struct ggml_tensor  * b,
3366
        size_t                nb1,
3367
        size_t                nb2,
3368
        size_t                nb3,
3369
0
        size_t                offset) {
3370
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3371
0
}
3372
3373
struct ggml_tensor * ggml_set_1d(
3374
        struct ggml_context * ctx,
3375
        struct ggml_tensor  * a,
3376
        struct ggml_tensor  * b,
3377
0
        size_t                offset) {
3378
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3379
0
}
3380
3381
struct ggml_tensor * ggml_set_1d_inplace(
3382
        struct ggml_context * ctx,
3383
        struct ggml_tensor  * a,
3384
        struct ggml_tensor  * b,
3385
0
        size_t                offset) {
3386
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3387
0
}
3388
3389
struct ggml_tensor * ggml_set_2d(
3390
        struct ggml_context * ctx,
3391
        struct ggml_tensor  * a,
3392
        struct ggml_tensor  * b,
3393
        size_t                nb1,
3394
0
        size_t                offset) {
3395
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3396
0
}
3397
3398
struct ggml_tensor * ggml_set_2d_inplace(
3399
        struct ggml_context * ctx,
3400
        struct ggml_tensor  * a,
3401
        struct ggml_tensor  * b,
3402
        size_t                nb1,
3403
0
        size_t                offset) {
3404
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3405
0
}
3406
3407
// ggml_cpy
3408
3409
static struct ggml_tensor * ggml_cpy_impl(
3410
        struct ggml_context * ctx,
3411
        struct ggml_tensor  * a,
3412
0
        struct ggml_tensor  * b) {
3413
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3414
3415
    // make a view of the destination
3416
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3417
0
    if (strlen(b->name) > 0) {
3418
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3419
0
    } else {
3420
0
        ggml_format_name(result, "%s (copy)", a->name);
3421
0
    }
3422
3423
0
    result->op     = GGML_OP_CPY;
3424
0
    result->src[0] = a;
3425
0
    result->src[1] = b;
3426
3427
0
    return result;
3428
0
}
3429
3430
struct ggml_tensor * ggml_cpy(
3431
        struct ggml_context * ctx,
3432
        struct ggml_tensor * a,
3433
0
        struct ggml_tensor * b) {
3434
0
    return ggml_cpy_impl(ctx, a, b);
3435
0
}
3436
3437
struct ggml_tensor * ggml_cast(
3438
        struct ggml_context * ctx,
3439
        struct ggml_tensor  * a,
3440
0
        enum   ggml_type      type) {
3441
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3442
0
    ggml_format_name(result, "%s (copy)", a->name);
3443
3444
0
    result->op     = GGML_OP_CPY;
3445
0
    result->src[0] = a;
3446
0
    result->src[1] = result;
3447
3448
0
    return result;
3449
0
}
3450
3451
// ggml_cont
3452
3453
static struct ggml_tensor * ggml_cont_impl(
3454
        struct ggml_context * ctx,
3455
0
        struct ggml_tensor  * a) {
3456
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3457
0
    ggml_format_name(result, "%s (cont)", a->name);
3458
3459
0
    result->op     = GGML_OP_CONT;
3460
0
    result->src[0] = a;
3461
3462
0
    return result;
3463
0
}
3464
3465
struct ggml_tensor * ggml_cont(
3466
        struct ggml_context * ctx,
3467
0
        struct ggml_tensor * a) {
3468
0
    return ggml_cont_impl(ctx, a);
3469
0
}
3470
3471
// make contiguous, with new shape
3472
GGML_API struct ggml_tensor * ggml_cont_1d(
3473
        struct ggml_context * ctx,
3474
        struct ggml_tensor  * a,
3475
0
        int64_t               ne0) {
3476
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3477
0
}
3478
3479
GGML_API struct ggml_tensor * ggml_cont_2d(
3480
        struct ggml_context * ctx,
3481
        struct ggml_tensor  * a,
3482
        int64_t               ne0,
3483
0
        int64_t               ne1) {
3484
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3485
0
}
3486
3487
GGML_API struct ggml_tensor * ggml_cont_3d(
3488
        struct ggml_context * ctx,
3489
        struct ggml_tensor  * a,
3490
        int64_t               ne0,
3491
        int64_t               ne1,
3492
0
        int64_t               ne2) {
3493
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3494
0
}
3495
3496
struct ggml_tensor * ggml_cont_4d(
3497
        struct ggml_context * ctx,
3498
        struct ggml_tensor  * a,
3499
        int64_t               ne0,
3500
        int64_t               ne1,
3501
        int64_t               ne2,
3502
0
        int64_t               ne3) {
3503
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3504
3505
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3506
0
    ggml_format_name(result, "%s (cont)", a->name);
3507
3508
0
    result->op     = GGML_OP_CONT;
3509
0
    result->src[0] = a;
3510
3511
0
    return result;
3512
0
}
3513
3514
// ggml_reshape
3515
3516
struct ggml_tensor * ggml_reshape(
3517
        struct ggml_context * ctx,
3518
        struct ggml_tensor * a,
3519
0
        struct ggml_tensor * b) {
3520
0
    GGML_ASSERT(ggml_is_contiguous(a));
3521
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3522
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3523
3524
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3525
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3526
3527
0
    result->op     = GGML_OP_RESHAPE;
3528
0
    result->src[0] = a;
3529
3530
0
    return result;
3531
0
}
3532
3533
struct ggml_tensor * ggml_reshape_1d(
3534
        struct ggml_context * ctx,
3535
        struct ggml_tensor  * a,
3536
0
        int64_t               ne0) {
3537
0
    GGML_ASSERT(ggml_is_contiguous(a));
3538
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3539
3540
0
    const int64_t ne[1] = { ne0 };
3541
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3542
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3543
3544
0
    result->op     = GGML_OP_RESHAPE;
3545
0
    result->src[0] = a;
3546
3547
0
    return result;
3548
0
}
3549
3550
struct ggml_tensor * ggml_reshape_2d(
3551
        struct ggml_context * ctx,
3552
        struct ggml_tensor  * a,
3553
        int64_t               ne0,
3554
0
        int64_t               ne1) {
3555
0
    GGML_ASSERT(ggml_is_contiguous(a));
3556
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3557
3558
0
    const int64_t ne[2] = { ne0, ne1 };
3559
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3560
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3561
3562
0
    result->op     = GGML_OP_RESHAPE;
3563
0
    result->src[0] = a;
3564
3565
0
    return result;
3566
0
}
3567
3568
struct ggml_tensor * ggml_reshape_3d(
3569
        struct ggml_context * ctx,
3570
        struct ggml_tensor  * a,
3571
        int64_t               ne0,
3572
        int64_t               ne1,
3573
0
        int64_t               ne2) {
3574
0
    GGML_ASSERT(ggml_is_contiguous(a));
3575
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3576
3577
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3578
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3579
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3580
3581
0
    result->op     = GGML_OP_RESHAPE;
3582
0
    result->src[0] = a;
3583
3584
0
    return result;
3585
0
}
3586
3587
struct ggml_tensor * ggml_reshape_4d(
3588
        struct ggml_context * ctx,
3589
        struct ggml_tensor  * a,
3590
        int64_t               ne0,
3591
        int64_t               ne1,
3592
        int64_t               ne2,
3593
0
        int64_t               ne3) {
3594
0
    GGML_ASSERT(ggml_is_contiguous(a));
3595
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3596
3597
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3598
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3599
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3600
3601
0
    result->op     = GGML_OP_RESHAPE;
3602
0
    result->src[0] = a;
3603
3604
0
    return result;
3605
0
}
3606
3607
static struct ggml_tensor * ggml_view_impl(
3608
        struct ggml_context * ctx,
3609
        struct ggml_tensor  * a,
3610
        int                   n_dims,
3611
        const int64_t       * ne,
3612
0
        size_t                offset) {
3613
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3614
0
    ggml_format_name(result, "%s (view)", a->name);
3615
3616
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3617
3618
0
    result->op     = GGML_OP_VIEW;
3619
0
    result->src[0] = a;
3620
3621
0
    return result;
3622
0
}
3623
3624
// ggml_view_1d
3625
3626
struct ggml_tensor * ggml_view_1d(
3627
        struct ggml_context * ctx,
3628
        struct ggml_tensor  * a,
3629
        int64_t               ne0,
3630
0
        size_t                offset) {
3631
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3632
3633
0
    return result;
3634
0
}
3635
3636
// ggml_view_2d
3637
3638
struct ggml_tensor * ggml_view_2d(
3639
        struct ggml_context * ctx,
3640
        struct ggml_tensor  * a,
3641
        int64_t               ne0,
3642
        int64_t               ne1,
3643
        size_t                nb1,
3644
0
        size_t                offset) {
3645
0
    const int64_t ne[2] = { ne0, ne1 };
3646
3647
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3648
3649
0
    result->nb[1] = nb1;
3650
0
    result->nb[2] = result->nb[1]*ne1;
3651
0
    result->nb[3] = result->nb[2];
3652
3653
0
    return result;
3654
0
}
3655
3656
// ggml_view_3d
3657
3658
struct ggml_tensor * ggml_view_3d(
3659
        struct ggml_context * ctx,
3660
        struct ggml_tensor  * a,
3661
        int64_t               ne0,
3662
        int64_t               ne1,
3663
        int64_t               ne2,
3664
        size_t                nb1,
3665
        size_t                nb2,
3666
0
        size_t                offset) {
3667
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3668
3669
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3670
3671
0
    result->nb[1] = nb1;
3672
0
    result->nb[2] = nb2;
3673
0
    result->nb[3] = result->nb[2]*ne2;
3674
3675
0
    return result;
3676
0
}
3677
3678
// ggml_view_4d
3679
3680
struct ggml_tensor * ggml_view_4d(
3681
        struct ggml_context * ctx,
3682
        struct ggml_tensor  * a,
3683
        int64_t               ne0,
3684
        int64_t               ne1,
3685
        int64_t               ne2,
3686
        int64_t               ne3,
3687
        size_t                nb1,
3688
        size_t                nb2,
3689
        size_t                nb3,
3690
0
        size_t                offset) {
3691
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3692
3693
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3694
3695
0
    result->nb[1] = nb1;
3696
0
    result->nb[2] = nb2;
3697
0
    result->nb[3] = nb3;
3698
3699
0
    return result;
3700
0
}
3701
3702
// ggml_permute
3703
3704
struct ggml_tensor * ggml_permute(
3705
        struct ggml_context * ctx,
3706
        struct ggml_tensor  * a,
3707
        int                   axis0,
3708
        int                   axis1,
3709
        int                   axis2,
3710
0
        int                   axis3) {
3711
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3712
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3713
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3714
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3715
3716
0
    GGML_ASSERT(axis0 != axis1);
3717
0
    GGML_ASSERT(axis0 != axis2);
3718
0
    GGML_ASSERT(axis0 != axis3);
3719
0
    GGML_ASSERT(axis1 != axis2);
3720
0
    GGML_ASSERT(axis1 != axis3);
3721
0
    GGML_ASSERT(axis2 != axis3);
3722
3723
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3724
0
    ggml_format_name(result, "%s (permuted)", a->name);
3725
3726
0
    int ne[GGML_MAX_DIMS];
3727
0
    int nb[GGML_MAX_DIMS];
3728
3729
0
    ne[axis0] = a->ne[0];
3730
0
    ne[axis1] = a->ne[1];
3731
0
    ne[axis2] = a->ne[2];
3732
0
    ne[axis3] = a->ne[3];
3733
3734
0
    nb[axis0] = a->nb[0];
3735
0
    nb[axis1] = a->nb[1];
3736
0
    nb[axis2] = a->nb[2];
3737
0
    nb[axis3] = a->nb[3];
3738
3739
0
    result->ne[0] = ne[0];
3740
0
    result->ne[1] = ne[1];
3741
0
    result->ne[2] = ne[2];
3742
0
    result->ne[3] = ne[3];
3743
3744
0
    result->nb[0] = nb[0];
3745
0
    result->nb[1] = nb[1];
3746
0
    result->nb[2] = nb[2];
3747
0
    result->nb[3] = nb[3];
3748
3749
0
    result->op     = GGML_OP_PERMUTE;
3750
0
    result->src[0] = a;
3751
3752
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3753
0
    ggml_set_op_params(result, params, sizeof(params));
3754
3755
0
    return result;
3756
0
}
3757
3758
// ggml_transpose
3759
3760
struct ggml_tensor * ggml_transpose(
3761
        struct ggml_context * ctx,
3762
0
        struct ggml_tensor  * a) {
3763
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3764
0
    ggml_format_name(result, "%s (transposed)", a->name);
3765
3766
0
    result->ne[0] = a->ne[1];
3767
0
    result->ne[1] = a->ne[0];
3768
3769
0
    result->nb[0] = a->nb[1];
3770
0
    result->nb[1] = a->nb[0];
3771
3772
0
    result->op     = GGML_OP_TRANSPOSE;
3773
0
    result->src[0] = a;
3774
3775
0
    return result;
3776
0
}
3777
3778
// ggml_get_rows
3779
3780
struct ggml_tensor * ggml_get_rows(
3781
        struct ggml_context * ctx,
3782
        struct ggml_tensor  * a,
3783
0
        struct ggml_tensor  * b) {
3784
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3785
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3786
0
    GGML_ASSERT(b->ne[3] == 1);
3787
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3788
3789
    // TODO: implement non F32 return
3790
0
    enum ggml_type type = GGML_TYPE_F32;
3791
0
    if (a->type == GGML_TYPE_I32) {
3792
0
        type = a->type;
3793
0
    }
3794
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3795
3796
0
    result->op     = GGML_OP_GET_ROWS;
3797
0
    result->src[0] = a;
3798
0
    result->src[1] = b;
3799
3800
0
    return result;
3801
0
}
3802
3803
// ggml_get_rows_back
3804
3805
struct ggml_tensor * ggml_get_rows_back(
3806
        struct ggml_context * ctx,
3807
        struct ggml_tensor  * a,
3808
        struct ggml_tensor  * b,
3809
0
        struct ggml_tensor  * c) {
3810
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3811
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3812
3813
    // TODO: implement non F32 return
3814
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3815
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3816
3817
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3818
0
    result->src[0] = a;
3819
0
    result->src[1] = b;
3820
3821
0
    return result;
3822
0
}
3823
3824
// ggml_set_rows
3825
3826
struct ggml_tensor * ggml_set_rows(
3827
        struct ggml_context * ctx,
3828
        struct ggml_tensor  * a,
3829
        struct ggml_tensor  * b,
3830
0
        struct ggml_tensor  * c) {
3831
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3832
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3833
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3834
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3835
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3836
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3837
0
    GGML_ASSERT(c->ne[3] == 1);
3838
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3839
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3840
3841
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3842
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3843
3844
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3845
3846
0
    result->op     = GGML_OP_SET_ROWS;
3847
0
    result->src[0] = b;
3848
0
    result->src[1] = c;
3849
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3850
3851
0
    return result;
3852
0
}
3853
3854
// ggml_diag
3855
3856
struct ggml_tensor * ggml_diag(
3857
        struct ggml_context * ctx,
3858
0
        struct ggml_tensor  * a) {
3859
0
    GGML_ASSERT(a->ne[1] == 1);
3860
3861
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3862
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3863
3864
0
    result->op     = GGML_OP_DIAG;
3865
0
    result->src[0] = a;
3866
3867
0
    return result;
3868
0
}
3869
3870
// ggml_diag_mask_inf
3871
3872
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3873
        struct ggml_context * ctx,
3874
        struct ggml_tensor  * a,
3875
        int                   n_past,
3876
0
        bool                  inplace) {
3877
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3878
3879
0
    int32_t params[] = { n_past };
3880
0
    ggml_set_op_params(result, params, sizeof(params));
3881
3882
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3883
0
    result->src[0] = a;
3884
3885
0
    return result;
3886
0
}
3887
3888
struct ggml_tensor * ggml_diag_mask_inf(
3889
        struct ggml_context * ctx,
3890
        struct ggml_tensor  * a,
3891
0
        int                   n_past) {
3892
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3893
0
}
3894
3895
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3896
        struct ggml_context * ctx,
3897
        struct ggml_tensor  * a,
3898
0
        int                   n_past) {
3899
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3900
0
}
3901
3902
// ggml_diag_mask_zero
3903
3904
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3905
        struct ggml_context * ctx,
3906
        struct ggml_tensor  * a,
3907
        int                   n_past,
3908
0
        bool                  inplace) {
3909
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3910
3911
0
    int32_t params[] = { n_past };
3912
0
    ggml_set_op_params(result, params, sizeof(params));
3913
3914
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3915
0
    result->src[0] = a;
3916
3917
0
    return result;
3918
0
}
3919
3920
struct ggml_tensor * ggml_diag_mask_zero(
3921
        struct ggml_context * ctx,
3922
        struct ggml_tensor  * a,
3923
0
        int                   n_past) {
3924
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3925
0
}
3926
3927
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3928
        struct ggml_context * ctx,
3929
        struct ggml_tensor  * a,
3930
0
        int                   n_past) {
3931
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3932
0
}
3933
3934
// ggml_soft_max
3935
3936
static struct ggml_tensor * ggml_soft_max_impl(
3937
        struct ggml_context * ctx,
3938
        struct ggml_tensor  * a,
3939
        struct ggml_tensor  * mask,
3940
        float                 scale,
3941
        float                 max_bias,
3942
0
        bool                  inplace) {
3943
0
    GGML_ASSERT(ggml_is_contiguous(a));
3944
3945
0
    if (mask) {
3946
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3947
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3948
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3949
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3950
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3951
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3952
0
    }
3953
3954
0
    if (max_bias > 0.0f) {
3955
0
        GGML_ASSERT(mask);
3956
0
    }
3957
3958
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3959
3960
0
    float params[] = { scale, max_bias };
3961
0
    ggml_set_op_params(result, params, sizeof(params));
3962
3963
0
    result->op     = GGML_OP_SOFT_MAX;
3964
0
    result->src[0] = a;
3965
0
    result->src[1] = mask;
3966
3967
0
    return result;
3968
0
}
3969
3970
struct ggml_tensor * ggml_soft_max(
3971
        struct ggml_context * ctx,
3972
0
        struct ggml_tensor  * a) {
3973
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
3974
0
}
3975
3976
struct ggml_tensor * ggml_soft_max_inplace(
3977
        struct ggml_context * ctx,
3978
0
        struct ggml_tensor  * a) {
3979
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
3980
0
}
3981
3982
struct ggml_tensor * ggml_soft_max_ext(
3983
        struct ggml_context * ctx,
3984
        struct ggml_tensor  * a,
3985
        struct ggml_tensor  * mask,
3986
        float                 scale,
3987
0
        float                 max_bias) {
3988
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3989
0
}
3990
3991
struct ggml_tensor * ggml_soft_max_ext_inplace(
3992
        struct ggml_context * ctx,
3993
        struct ggml_tensor  * a,
3994
        struct ggml_tensor  * mask,
3995
        float                 scale,
3996
0
        float                 max_bias) {
3997
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3998
0
}
3999
4000
void ggml_soft_max_add_sinks(
4001
        struct ggml_tensor * a,
4002
0
        struct ggml_tensor * sinks) {
4003
0
    if (!sinks) {
4004
0
        a->src[2] = NULL;
4005
0
        return;
4006
0
    }
4007
4008
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4009
0
    GGML_ASSERT(a->src[2] == NULL);
4010
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4011
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4012
4013
0
    a->src[2] = sinks;
4014
0
}
4015
4016
// ggml_soft_max_ext_back
4017
4018
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4019
        struct ggml_context * ctx,
4020
        struct ggml_tensor  * a,
4021
        struct ggml_tensor  * b,
4022
        float                 scale,
4023
        float                 max_bias,
4024
0
        bool                  inplace) {
4025
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4026
4027
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4028
0
    result->src[0] = a;
4029
0
    result->src[1] = b;
4030
4031
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4032
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4033
4034
0
    return result;
4035
0
}
4036
4037
struct ggml_tensor * ggml_soft_max_ext_back(
4038
        struct ggml_context * ctx,
4039
        struct ggml_tensor  * a,
4040
        struct ggml_tensor  * b,
4041
        float                 scale,
4042
0
        float                 max_bias) {
4043
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4044
0
}
4045
4046
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4047
        struct ggml_context * ctx,
4048
        struct ggml_tensor  * a,
4049
        struct ggml_tensor  * b,
4050
        float                 scale,
4051
0
        float                 max_bias) {
4052
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4053
0
}
4054
4055
// ggml_rope
4056
4057
static struct ggml_tensor * ggml_rope_impl(
4058
        struct ggml_context * ctx,
4059
        struct ggml_tensor  * a,
4060
        struct ggml_tensor  * b,
4061
        struct ggml_tensor  * c,
4062
        int                   n_dims,
4063
        int                   sections[GGML_MROPE_SECTIONS],
4064
        int                   mode,
4065
        int                   n_ctx_orig,
4066
        float                 freq_base,
4067
        float                 freq_scale,
4068
        float                 ext_factor,
4069
        float                 attn_factor,
4070
        float                 beta_fast,
4071
        float                 beta_slow,
4072
0
        bool                  inplace) {
4073
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4074
4075
0
    GGML_ASSERT(ggml_is_vector(b));
4076
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4077
4078
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4079
0
    if (mrope_used) {
4080
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4081
0
    } else {
4082
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4083
0
    }
4084
4085
0
    if (c) {
4086
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4087
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4088
0
    }
4089
4090
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4091
4092
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4093
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4094
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4095
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4096
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4097
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4098
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4099
0
    if (mrope_used && sections) {
4100
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4101
0
    } else {
4102
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4103
0
    }
4104
0
    ggml_set_op_params(result, params, sizeof(params));
4105
4106
0
    result->op     = GGML_OP_ROPE;
4107
0
    result->src[0] = a;
4108
0
    result->src[1] = b;
4109
0
    result->src[2] = c;
4110
4111
0
    return result;
4112
0
}
4113
4114
struct ggml_tensor * ggml_rope(
4115
        struct ggml_context * ctx,
4116
        struct ggml_tensor  * a,
4117
        struct ggml_tensor  * b,
4118
        int                   n_dims,
4119
0
        int                   mode) {
4120
0
    return ggml_rope_impl(
4121
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4122
0
    );
4123
0
}
4124
4125
struct ggml_tensor * ggml_rope_multi(
4126
        struct ggml_context * ctx,
4127
        struct ggml_tensor  * a,
4128
        struct ggml_tensor  * b,
4129
        struct ggml_tensor  * c,
4130
        int                   n_dims,
4131
        int                   sections[GGML_MROPE_SECTIONS],
4132
        int                   mode,
4133
        int                   n_ctx_orig,
4134
        float                 freq_base,
4135
        float                 freq_scale,
4136
        float                 ext_factor,
4137
        float                 attn_factor,
4138
        float                 beta_fast,
4139
0
        float                 beta_slow) {
4140
0
    return ggml_rope_impl(
4141
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4142
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4143
0
    );
4144
0
}
4145
4146
struct ggml_tensor * ggml_rope_multi_inplace(
4147
        struct ggml_context * ctx,
4148
        struct ggml_tensor  * a,
4149
        struct ggml_tensor  * b,
4150
        struct ggml_tensor  * c,
4151
        int                   n_dims,
4152
        int                   sections[GGML_MROPE_SECTIONS],
4153
        int                   mode,
4154
        int                   n_ctx_orig,
4155
        float                 freq_base,
4156
        float                 freq_scale,
4157
        float                 ext_factor,
4158
        float                 attn_factor,
4159
        float                 beta_fast,
4160
0
        float                 beta_slow) {
4161
0
    return ggml_rope_impl(
4162
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4163
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4164
0
    );
4165
0
}
4166
4167
struct ggml_tensor * ggml_rope_inplace(
4168
        struct ggml_context * ctx,
4169
        struct ggml_tensor  * a,
4170
        struct ggml_tensor  * b,
4171
        int                   n_dims,
4172
0
        int                   mode) {
4173
0
    return ggml_rope_impl(
4174
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4175
0
    );
4176
0
}
4177
4178
struct ggml_tensor * ggml_rope_ext(
4179
        struct ggml_context * ctx,
4180
        struct ggml_tensor  * a,
4181
        struct ggml_tensor  * b,
4182
        struct ggml_tensor  * c,
4183
        int                   n_dims,
4184
        int                   mode,
4185
        int                   n_ctx_orig,
4186
        float                 freq_base,
4187
        float                 freq_scale,
4188
        float                 ext_factor,
4189
        float                 attn_factor,
4190
        float                 beta_fast,
4191
0
        float                 beta_slow) {
4192
0
    return ggml_rope_impl(
4193
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4194
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4195
0
    );
4196
0
}
4197
4198
struct ggml_tensor * ggml_rope_ext_inplace(
4199
        struct ggml_context * ctx,
4200
        struct ggml_tensor  * a,
4201
        struct ggml_tensor  * b,
4202
        struct ggml_tensor  * c,
4203
        int                   n_dims,
4204
        int                   mode,
4205
        int                   n_ctx_orig,
4206
        float                 freq_base,
4207
        float                 freq_scale,
4208
        float                 ext_factor,
4209
        float                 attn_factor,
4210
        float                 beta_fast,
4211
0
        float                 beta_slow) {
4212
0
    return ggml_rope_impl(
4213
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4214
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4215
0
    );
4216
0
}
4217
4218
struct ggml_tensor * ggml_rope_custom(
4219
        struct ggml_context * ctx,
4220
        struct ggml_tensor  * a,
4221
        struct ggml_tensor  * b,
4222
        int                   n_dims,
4223
        int                   mode,
4224
        int                   n_ctx_orig,
4225
        float                 freq_base,
4226
        float                 freq_scale,
4227
        float                 ext_factor,
4228
        float                 attn_factor,
4229
        float                 beta_fast,
4230
0
        float                 beta_slow) {
4231
0
    return ggml_rope_impl(
4232
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4233
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4234
0
    );
4235
0
}
4236
4237
struct ggml_tensor * ggml_rope_custom_inplace(
4238
        struct ggml_context * ctx,
4239
        struct ggml_tensor  * a,
4240
        struct ggml_tensor  * b,
4241
        int                   n_dims,
4242
        int                   mode,
4243
        int                   n_ctx_orig,
4244
        float                 freq_base,
4245
        float                 freq_scale,
4246
        float                 ext_factor,
4247
        float                 attn_factor,
4248
        float                 beta_fast,
4249
0
        float                 beta_slow) {
4250
0
    return ggml_rope_impl(
4251
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4252
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4253
0
    );
4254
0
}
4255
4256
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4257
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4258
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4259
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4260
0
}
4261
4262
void ggml_rope_yarn_corr_dims(
4263
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4264
0
) {
4265
    // start and end correction dims
4266
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4267
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4268
0
    dims[0] = MAX(0, start);
4269
0
    dims[1] = MIN(n_dims - 1, end);
4270
0
}
4271
4272
// ggml_rope_back
4273
4274
struct ggml_tensor * ggml_rope_ext_back(
4275
        struct ggml_context * ctx,
4276
        struct ggml_tensor  * a,
4277
        struct ggml_tensor  * b,
4278
        struct ggml_tensor  * c,
4279
        int                   n_dims,
4280
        int                   mode,
4281
        int                   n_ctx_orig,
4282
        float                 freq_base,
4283
        float                 freq_scale,
4284
        float                 ext_factor,
4285
        float                 attn_factor,
4286
        float                 beta_fast,
4287
0
        float                 beta_slow) {
4288
0
    struct ggml_tensor * result = ggml_rope_ext(
4289
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4290
0
    result->op = GGML_OP_ROPE_BACK;
4291
0
    return result;
4292
0
}
4293
4294
struct ggml_tensor * ggml_rope_multi_back(
4295
        struct ggml_context * ctx,
4296
        struct ggml_tensor  * a,
4297
        struct ggml_tensor  * b,
4298
        struct ggml_tensor  * c,
4299
        int                   n_dims,
4300
        int                   sections[4],
4301
        int                   mode,
4302
        int                   n_ctx_orig,
4303
        float                 freq_base,
4304
        float                 freq_scale,
4305
        float                 ext_factor,
4306
        float                 attn_factor,
4307
        float                 beta_fast,
4308
0
        float                 beta_slow) {
4309
0
    struct ggml_tensor * result = ggml_rope_multi(
4310
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4311
0
    result->op = GGML_OP_ROPE_BACK;
4312
0
    return result;
4313
0
}
4314
// ggml_clamp
4315
4316
struct ggml_tensor * ggml_clamp(
4317
        struct ggml_context * ctx,
4318
        struct ggml_tensor  * a,
4319
        float                 min,
4320
0
        float                 max) {
4321
    // TODO: when implement backward, fix this:
4322
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4323
4324
0
    float params[] = { min, max };
4325
0
    ggml_set_op_params(result, params, sizeof(params));
4326
4327
0
    result->op     = GGML_OP_CLAMP;
4328
0
    result->src[0] = a;
4329
4330
0
    return result;
4331
0
}
4332
4333
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4334
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4335
0
}
4336
4337
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4338
// a: [OC,IC, KH, KW]
4339
// b: [N, IC, IH, IW]
4340
// result: [N, OH, OW, IC*KH*KW]
4341
struct ggml_tensor * ggml_im2col(
4342
        struct ggml_context * ctx,
4343
        struct ggml_tensor  * a,
4344
        struct ggml_tensor  * b,
4345
        int                   s0,
4346
        int                   s1,
4347
        int                   p0,
4348
        int                   p1,
4349
        int                   d0,
4350
        int                   d1,
4351
        bool                  is_2D,
4352
0
        enum ggml_type        dst_type) {
4353
0
    if (is_2D) {
4354
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4355
0
    } else {
4356
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4357
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4358
0
        GGML_ASSERT(b->ne[3] == 1);
4359
0
    }
4360
4361
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4362
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4363
4364
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4365
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4366
4367
0
    const int64_t ne[4] = {
4368
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4369
0
        OW,
4370
0
        is_2D ? OH : b->ne[2],
4371
0
        is_2D ?      b->ne[3] : 1,
4372
0
    };
4373
4374
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4375
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4376
0
    ggml_set_op_params(result, params, sizeof(params));
4377
4378
0
    result->op     = GGML_OP_IM2COL;
4379
0
    result->src[0] = a;
4380
0
    result->src[1] = b;
4381
4382
0
    return result;
4383
0
}
4384
4385
struct ggml_tensor * ggml_im2col_back(
4386
        struct ggml_context * ctx,
4387
        struct ggml_tensor  * a,
4388
        struct ggml_tensor  * b,
4389
        int64_t             * ne,
4390
        int                   s0,
4391
        int                   s1,
4392
        int                   p0,
4393
        int                   p1,
4394
        int                   d0,
4395
        int                   d1,
4396
0
        bool                  is_2D) {
4397
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4398
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4399
0
    ggml_set_op_params(result, params, sizeof(params));
4400
4401
0
    result->op     = GGML_OP_IM2COL_BACK;
4402
0
    result->src[0] = a;
4403
0
    result->src[1] = b;
4404
4405
0
    return result;
4406
0
}
4407
4408
// ggml_conv_1d
4409
4410
struct ggml_tensor * ggml_conv_1d(
4411
        struct ggml_context * ctx,
4412
        struct ggml_tensor  * a,
4413
        struct ggml_tensor  * b,
4414
        int                   s0,
4415
        int                   p0,
4416
0
        int                   d0) {
4417
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4418
4419
0
    struct ggml_tensor * result =
4420
0
        ggml_mul_mat(ctx,
4421
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4422
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4423
4424
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4425
4426
0
    return result;
4427
0
}
4428
4429
// ggml_conv_1d_ph
4430
4431
struct ggml_tensor* ggml_conv_1d_ph(
4432
        struct ggml_context * ctx,
4433
        struct ggml_tensor  * a,
4434
        struct ggml_tensor  * b,
4435
        int                   s,
4436
0
        int                   d) {
4437
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4438
0
}
4439
4440
// ggml_conv_1d_dw
4441
4442
struct ggml_tensor * ggml_conv_1d_dw(
4443
        struct ggml_context * ctx,
4444
        struct ggml_tensor  * a,
4445
        struct ggml_tensor  * b,
4446
        int                   s0,
4447
        int                   p0,
4448
0
        int                   d0) {
4449
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4450
4451
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4452
4453
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4454
4455
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4456
4457
0
    return result;
4458
0
}
4459
4460
// ggml_conv_1d_dw_ph
4461
4462
struct ggml_tensor * ggml_conv_1d_dw_ph(
4463
        struct ggml_context * ctx,
4464
        struct ggml_tensor  * a,
4465
        struct ggml_tensor  * b,
4466
        int                   s0,
4467
0
        int                   d0) {
4468
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4469
0
}
4470
4471
// ggml_conv_transpose_1d
4472
4473
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4474
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4475
0
}
4476
4477
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4478
        struct ggml_context * ctx,
4479
        struct ggml_tensor  * a,
4480
        struct ggml_tensor  * b,
4481
        int                   s0,
4482
        int                   p0,
4483
0
        int                   d0) {
4484
0
    GGML_ASSERT(ggml_is_matrix(b));
4485
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4486
0
    GGML_ASSERT(a->ne[3] == 1);
4487
4488
0
    GGML_ASSERT(p0 == 0);
4489
0
    GGML_ASSERT(d0 == 1);
4490
4491
0
    const int64_t ne[4] = {
4492
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4493
0
        a->ne[1], b->ne[2], 1,
4494
0
    };
4495
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4496
4497
0
    int32_t params[] = { s0, p0, d0 };
4498
0
    ggml_set_op_params(result, params, sizeof(params));
4499
4500
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4501
0
    result->src[0] = a;
4502
0
    result->src[1] = b;
4503
4504
0
    return result;
4505
0
}
4506
4507
// ggml_conv_2d
4508
4509
// a: [OC,IC, KH, KW]
4510
// b: [N, IC, IH, IW]
4511
// result: [N, OC, OH, OW]
4512
struct ggml_tensor * ggml_conv_2d(
4513
        struct ggml_context * ctx,
4514
        struct ggml_tensor  * a,
4515
        struct ggml_tensor  * b,
4516
        int                   s0,
4517
        int                   s1,
4518
        int                   p0,
4519
        int                   p1,
4520
        int                   d0,
4521
0
        int                   d1) {
4522
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4523
4524
0
    struct ggml_tensor * result =
4525
0
        ggml_mul_mat(ctx,
4526
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4527
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4528
4529
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4530
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4531
4532
4533
0
    return result;
4534
0
}
4535
4536
// a: [OC*IC, KD, KH, KW]
4537
// b: [N*IC, ID, IH, IW]
4538
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4539
struct ggml_tensor * ggml_im2col_3d(
4540
        struct ggml_context * ctx,
4541
        struct ggml_tensor  * a,
4542
        struct ggml_tensor  * b,
4543
        int64_t               IC,
4544
        int                   s0, // stride width
4545
        int                   s1, // stride height
4546
        int                   s2, // stride depth
4547
        int                   p0, // padding width
4548
        int                   p1, // padding height
4549
        int                   p2, // padding depth
4550
        int                   d0, // dilation width
4551
        int                   d1, // dilation height
4552
        int                   d2, // dilation depth
4553
0
        enum ggml_type        dst_type) {
4554
0
    const int64_t N = b->ne[3] / IC;
4555
0
    const int64_t ID = b->ne[2];
4556
0
    const int64_t IH = b->ne[1];
4557
0
    const int64_t IW = b->ne[0];
4558
4559
0
    const int64_t OC = a->ne[3] / IC;
4560
0
    UNUSED(OC);
4561
0
    const int64_t KD = a->ne[2];
4562
0
    const int64_t KH = a->ne[1];
4563
0
    const int64_t KW = a->ne[0];
4564
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4565
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4566
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4567
4568
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4569
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4570
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4571
4572
4573
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4574
4575
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4576
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4577
0
    ggml_set_op_params(result, params, sizeof(params));
4578
4579
0
    result->op     = GGML_OP_IM2COL_3D;
4580
0
    result->src[0] = a;
4581
0
    result->src[1] = b;
4582
4583
0
    return result;
4584
0
}
4585
4586
// a: [OC*IC, KD, KH, KW]
4587
// b: [N*IC, ID, IH, IW]
4588
// result: [N*OC, OD, OH, OW]
4589
struct ggml_tensor * ggml_conv_3d(
4590
        struct ggml_context * ctx,
4591
        struct ggml_tensor  * a,
4592
        struct ggml_tensor  * b,
4593
        int64_t               IC,
4594
        int                   s0, // stride width
4595
        int                   s1, // stride height
4596
        int                   s2, // stride depth
4597
        int                   p0, // padding width
4598
        int                   p1, // padding height
4599
        int                   p2, // padding depth
4600
        int                   d0, // dilation width
4601
        int                   d1, // dilation height
4602
        int                   d2  // dilation depth
4603
0
        ) {
4604
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4605
4606
0
    int64_t OC = a->ne[3] / IC;
4607
0
    int64_t N = b->ne[3] / IC;
4608
0
    struct ggml_tensor * result =
4609
0
        ggml_mul_mat(ctx,
4610
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4611
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4612
4613
0
    int64_t OD = im2col->ne[3] / N;
4614
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4615
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4616
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4617
4618
0
    return result;
4619
0
}
4620
4621
// ggml_conv_2d_sk_p0
4622
4623
struct ggml_tensor * ggml_conv_2d_sk_p0(
4624
        struct ggml_context * ctx,
4625
        struct ggml_tensor  * a,
4626
0
        struct ggml_tensor  * b) {
4627
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4628
0
}
4629
4630
// ggml_conv_2d_s1_ph
4631
4632
struct ggml_tensor * ggml_conv_2d_s1_ph(
4633
        struct ggml_context * ctx,
4634
        struct ggml_tensor  * a,
4635
0
        struct ggml_tensor  * b) {
4636
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4637
0
}
4638
4639
// ggml_conv_2d_dw
4640
4641
struct ggml_tensor * ggml_conv_2d_dw(
4642
        struct ggml_context * ctx,
4643
        struct ggml_tensor  * a,
4644
        struct ggml_tensor  * b,
4645
        int                   s0,
4646
        int                   s1,
4647
        int                   p0,
4648
        int                   p1,
4649
        int                   d0,
4650
0
        int                   d1) {
4651
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4652
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4653
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4654
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4655
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4656
4657
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4658
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4659
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4660
4661
0
    return result;
4662
0
}
4663
4664
// ggml_conv_2d_dw_direct
4665
4666
struct ggml_tensor * ggml_conv_2d_dw_direct(
4667
        struct ggml_context * ctx,
4668
        struct ggml_tensor  * a,
4669
        struct ggml_tensor  * b,
4670
        int                   stride0,
4671
        int                   stride1,
4672
        int                   pad0,
4673
        int                   pad1,
4674
        int                   dilation0,
4675
0
        int                   dilation1) {
4676
0
    GGML_ASSERT(a->ne[2] == 1);
4677
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4678
0
    int64_t ne[4];
4679
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4680
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4681
0
    ne[2] = b->ne[2];
4682
0
    ne[3] = b->ne[3];
4683
4684
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4685
4686
0
    if (ggml_is_contiguous_channels(b)) {
4687
        // Result will be permuted the same way as input (CWHN order)
4688
0
        const int64_t type_size = ggml_type_size(result->type);
4689
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4690
0
        result->nb[0] = result->ne[2] * type_size;
4691
0
        result->nb[1] = result->ne[0] * result->nb[0];
4692
0
        result->nb[2] = type_size;
4693
0
    }
4694
4695
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4696
0
    ggml_set_op_params(result, params, sizeof(params));
4697
4698
0
    result->op     = GGML_OP_CONV_2D_DW;
4699
0
    result->src[0] = a;
4700
0
    result->src[1] = b;
4701
0
    return result;
4702
0
}
4703
4704
// ggml_conv_2d_direct
4705
4706
struct ggml_tensor * ggml_conv_2d_direct(
4707
        struct ggml_context * ctx,
4708
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4709
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4710
        int                   s0,  // stride dimension 0
4711
        int                   s1,  // stride dimension 1
4712
        int                   p0,  // padding dimension 0
4713
        int                   p1,  // padding dimension 1
4714
        int                   d0,  // dilation dimension 0
4715
0
        int                   d1) {// dilation dimension 1
4716
4717
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4718
    //GGML_ASSERT(a->type == b->type);
4719
4720
0
    int64_t ne[4];
4721
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4722
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4723
0
    ne[2] = a->ne[3];
4724
0
    ne[3] = b->ne[3];
4725
4726
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4727
4728
0
    ggml_set_op_params_i32(result, 0, s0);
4729
0
    ggml_set_op_params_i32(result, 1, s1);
4730
0
    ggml_set_op_params_i32(result, 2, p0);
4731
0
    ggml_set_op_params_i32(result, 3, p1);
4732
0
    ggml_set_op_params_i32(result, 4, d0);
4733
0
    ggml_set_op_params_i32(result, 5, d1);
4734
4735
0
    result->op = GGML_OP_CONV_2D;
4736
0
    result->src[0] = a;
4737
0
    result->src[1] = b;
4738
4739
0
    return result;
4740
0
}
4741
4742
// ggml_conv_3d_direct
4743
4744
struct ggml_tensor * ggml_conv_3d_direct(
4745
        struct ggml_context * ctx,
4746
        struct ggml_tensor  * a,
4747
        struct ggml_tensor  * b,
4748
        int                   s0,
4749
        int                   s1,
4750
        int                   s2,
4751
        int                   p0,
4752
        int                   p1,
4753
        int                   p2,
4754
        int                   d0,
4755
        int                   d1,
4756
        int                   d2,
4757
        int                   c,
4758
        int                   n,
4759
0
        int                   oc) {
4760
4761
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4762
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4763
4764
0
    int64_t ne[4];
4765
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4766
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4767
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4768
0
    ne[3] = (int64_t) oc * n;
4769
4770
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4771
4772
0
    ggml_set_op_params_i32(result, 0,  s0);
4773
0
    ggml_set_op_params_i32(result, 1,  s1);
4774
0
    ggml_set_op_params_i32(result, 2,  s2);
4775
0
    ggml_set_op_params_i32(result, 3,  p0);
4776
0
    ggml_set_op_params_i32(result, 4,  p1);
4777
0
    ggml_set_op_params_i32(result, 5,  p2);
4778
0
    ggml_set_op_params_i32(result, 6,  d0);
4779
0
    ggml_set_op_params_i32(result, 7,  d1);
4780
0
    ggml_set_op_params_i32(result, 8,  d2);
4781
0
    ggml_set_op_params_i32(result, 9,  c);
4782
0
    ggml_set_op_params_i32(result, 10, n);
4783
0
    ggml_set_op_params_i32(result, 11, oc);
4784
4785
0
    result->op = GGML_OP_CONV_3D;
4786
0
    result->src[0] = a;
4787
0
    result->src[1] = b;
4788
4789
0
    return result;
4790
0
}
4791
4792
// ggml_conv_transpose_2d_p0
4793
4794
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4795
0
    return (ins - 1) * s - 2 * p + ks;
4796
0
}
4797
4798
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4799
        struct ggml_context * ctx,
4800
        struct ggml_tensor  * a,
4801
        struct ggml_tensor  * b,
4802
0
        int                   stride) {
4803
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4804
4805
0
    const int64_t ne[4] = {
4806
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4807
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4808
0
        a->ne[2], b->ne[3],
4809
0
    };
4810
4811
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4812
4813
0
    ggml_set_op_params_i32(result, 0, stride);
4814
4815
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4816
0
    result->src[0] = a;
4817
0
    result->src[1] = b;
4818
4819
0
    return result;
4820
0
}
4821
4822
// ggml_pool_*
4823
4824
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4825
0
    return (ins + 2 * p - ks) / s + 1;
4826
0
}
4827
4828
// ggml_pool_1d
4829
4830
struct ggml_tensor * ggml_pool_1d(
4831
        struct ggml_context * ctx,
4832
        struct ggml_tensor  * a,
4833
        enum ggml_op_pool     op,
4834
        int                   k0,
4835
        int                   s0,
4836
0
        int                   p0) {
4837
0
    const int64_t ne[4] = {
4838
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4839
0
        a->ne[1],
4840
0
        a->ne[2],
4841
0
        a->ne[3],
4842
0
    };
4843
0
    GGML_ASSERT(ne[0] > 0);
4844
4845
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4846
4847
0
    int32_t params[] = { op, k0, s0, p0 };
4848
0
    ggml_set_op_params(result, params, sizeof(params));
4849
4850
0
    result->op     = GGML_OP_POOL_1D;
4851
0
    result->src[0] = a;
4852
4853
0
    return result;
4854
0
}
4855
4856
// ggml_pool_2d
4857
4858
struct ggml_tensor * ggml_pool_2d(
4859
        struct ggml_context * ctx,
4860
        struct ggml_tensor  * a,
4861
        enum ggml_op_pool     op,
4862
        int                   k0,
4863
        int                   k1,
4864
        int                   s0,
4865
        int                   s1,
4866
        float                 p0,
4867
0
        float                 p1) {
4868
0
    struct ggml_tensor * result;
4869
0
    const int64_t ne[4] = {
4870
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4871
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4872
0
        a->ne[2],
4873
0
        a->ne[3],
4874
0
    };
4875
0
    GGML_ASSERT(ne[0] > 0);
4876
0
    GGML_ASSERT(ne[1] > 0);
4877
4878
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4879
4880
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4881
0
    ggml_set_op_params(result, params, sizeof(params));
4882
4883
0
    result->op     = GGML_OP_POOL_2D;
4884
0
    result->src[0] = a;
4885
4886
0
    return result;
4887
0
}
4888
4889
struct ggml_tensor * ggml_pool_2d_back(
4890
        struct ggml_context * ctx,
4891
        struct ggml_tensor  * a,
4892
        struct ggml_tensor  * af,
4893
        enum ggml_op_pool     op,
4894
        int                   k0,
4895
        int                   k1,
4896
        int                   s0,
4897
        int                   s1,
4898
        float                 p0,
4899
0
        float                 p1) {
4900
0
    struct ggml_tensor * result;
4901
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4902
4903
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4904
0
    ggml_set_op_params(result, params, sizeof(params));
4905
4906
0
    result->op     = GGML_OP_POOL_2D_BACK;
4907
0
    result->src[0] = a;
4908
0
    result->src[1] = af;
4909
4910
0
    return result;
4911
0
}
4912
4913
// ggml_upscale / ggml_interpolate
4914
4915
static struct ggml_tensor * ggml_interpolate_impl(
4916
        struct ggml_context * ctx,
4917
        struct ggml_tensor  * a,
4918
        int64_t               ne0,
4919
        int64_t               ne1,
4920
        int64_t               ne2,
4921
        int64_t               ne3,
4922
0
        uint32_t              mode) {
4923
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4924
    // TODO: implement antialias for modes other than bilinear
4925
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4926
4927
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4928
4929
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4930
4931
0
    result->op     = GGML_OP_UPSCALE;
4932
0
    result->src[0] = a;
4933
4934
0
    return result;
4935
0
}
4936
4937
struct ggml_tensor * ggml_upscale(
4938
        struct ggml_context * ctx,
4939
        struct ggml_tensor  * a,
4940
        int                   scale_factor,
4941
0
        enum ggml_scale_mode  mode) {
4942
0
    GGML_ASSERT(scale_factor > 1);
4943
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4944
0
}
4945
4946
struct ggml_tensor * ggml_upscale_ext(
4947
        struct ggml_context * ctx,
4948
        struct ggml_tensor  * a,
4949
        int                   ne0,
4950
        int                   ne1,
4951
        int                   ne2,
4952
        int                   ne3,
4953
0
        enum ggml_scale_mode  mode) {
4954
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4955
0
}
4956
4957
struct ggml_tensor * ggml_interpolate(
4958
        struct ggml_context * ctx,
4959
        struct ggml_tensor  * a,
4960
        int64_t               ne0,
4961
        int64_t               ne1,
4962
        int64_t               ne2,
4963
        int64_t               ne3,
4964
0
        uint32_t              mode) {
4965
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4966
0
}
4967
4968
// ggml_pad
4969
4970
struct ggml_tensor * ggml_pad(
4971
        struct ggml_context * ctx,
4972
        struct ggml_tensor  * a,
4973
        int                   p0,
4974
        int                   p1,
4975
        int                   p2,
4976
0
        int                   p3) {
4977
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4978
0
}
4979
4980
// ggml_pad_circular
4981
4982
struct ggml_tensor * ggml_pad_circular(
4983
        struct ggml_context * ctx,
4984
        struct ggml_tensor  * a,
4985
        int                   p0,
4986
        int                   p1,
4987
        int                   p2,
4988
0
        int                   p3) {
4989
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4990
0
}
4991
4992
struct ggml_tensor * ggml_pad_ext(
4993
            struct ggml_context * ctx,
4994
            struct ggml_tensor  * a,
4995
            int                  lp0,
4996
            int                  rp0,
4997
            int                  lp1,
4998
            int                  rp1,
4999
            int                  lp2,
5000
            int                  rp2,
5001
            int                  lp3,
5002
            int                  rp3
5003
0
            ) {
5004
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5005
0
            a->ne[0] + lp0 + rp0,
5006
0
            a->ne[1] + lp1 + rp1,
5007
0
            a->ne[2] + lp2 + rp2,
5008
0
            a->ne[3] + lp3 + rp3);
5009
5010
0
    ggml_set_op_params_i32(result, 0, lp0);
5011
0
    ggml_set_op_params_i32(result, 1, rp0);
5012
0
    ggml_set_op_params_i32(result, 2, lp1);
5013
0
    ggml_set_op_params_i32(result, 3, rp1);
5014
0
    ggml_set_op_params_i32(result, 4, lp2);
5015
0
    ggml_set_op_params_i32(result, 5, rp2);
5016
0
    ggml_set_op_params_i32(result, 6, lp3);
5017
0
    ggml_set_op_params_i32(result, 7, rp3);
5018
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5019
5020
5021
0
    result->op     = GGML_OP_PAD;
5022
0
    result->src[0] = a;
5023
5024
0
    return result;
5025
0
}
5026
5027
// ggml_pad_ext_circular
5028
5029
struct ggml_tensor * ggml_pad_ext_circular(
5030
        struct ggml_context * ctx,
5031
        struct ggml_tensor  * a,
5032
        int                  lp0,
5033
        int                  rp0,
5034
        int                  lp1,
5035
        int                  rp1,
5036
        int                  lp2,
5037
        int                  rp2,
5038
        int                  lp3,
5039
        int                  rp3
5040
0
        ) {
5041
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5042
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5043
0
    return result;
5044
0
}
5045
5046
// ggml_pad_reflect_1d
5047
5048
struct ggml_tensor * ggml_pad_reflect_1d(
5049
        struct ggml_context * ctx,
5050
        struct ggml_tensor  * a,
5051
        int                   p0,
5052
0
        int                   p1) {
5053
0
    GGML_ASSERT(p0 >= 0);
5054
0
    GGML_ASSERT(p1 >= 0);
5055
5056
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5057
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5058
5059
0
    GGML_ASSERT(ggml_is_contiguous(a));
5060
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5061
5062
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5063
0
            a->ne[0] + p0 + p1,
5064
0
            a->ne[1],
5065
0
            a->ne[2],
5066
0
            a->ne[3]);
5067
5068
0
    int32_t params[] = { p0, p1 };
5069
0
    ggml_set_op_params(result, params, sizeof(params));
5070
5071
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5072
0
    result->src[0] = a;
5073
5074
0
    return result;
5075
0
}
5076
5077
// ggml_roll
5078
5079
struct ggml_tensor * ggml_roll(
5080
        struct ggml_context * ctx,
5081
        struct ggml_tensor  * a,
5082
        int                   shift0,
5083
        int                   shift1,
5084
        int                   shift2,
5085
0
        int                   shift3) {
5086
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5087
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5088
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5089
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5090
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5091
5092
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5093
5094
0
    ggml_set_op_params_i32(result, 0, shift0);
5095
0
    ggml_set_op_params_i32(result, 1, shift1);
5096
0
    ggml_set_op_params_i32(result, 2, shift2);
5097
0
    ggml_set_op_params_i32(result, 3, shift3);
5098
5099
0
    result->op     = GGML_OP_ROLL;
5100
0
    result->src[0] = a;
5101
5102
0
    return result;
5103
0
}
5104
5105
// ggml_timestep_embedding
5106
5107
struct ggml_tensor * ggml_timestep_embedding(
5108
        struct ggml_context * ctx,
5109
        struct ggml_tensor  * timesteps,
5110
        int                   dim,
5111
0
        int                   max_period) {
5112
5113
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5114
5115
0
    ggml_set_op_params_i32(result, 0, dim);
5116
0
    ggml_set_op_params_i32(result, 1, max_period);
5117
5118
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5119
0
    result->src[0] = timesteps;
5120
5121
0
    return result;
5122
0
}
5123
5124
// ggml_tri
5125
5126
struct ggml_tensor * ggml_tri(
5127
    struct ggml_context * ctx,
5128
    struct ggml_tensor  * a,
5129
0
    enum ggml_tri_type    type) {
5130
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5131
5132
0
    GGML_ASSERT(ggml_is_contiguous(a));
5133
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5134
5135
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5136
5137
0
    ggml_set_op_params_i32(result, 0, type);
5138
5139
0
    result->op = GGML_OP_TRI;
5140
0
    result->src[0] = a;
5141
5142
0
    return result;
5143
0
}
5144
5145
// ggml_fill
5146
5147
static struct ggml_tensor * ggml_fill_impl(
5148
    struct ggml_context * ctx,
5149
    struct ggml_tensor  * a,
5150
    float                 c,
5151
0
    bool                  inplace) {
5152
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5153
0
    GGML_ASSERT(ggml_is_contiguous(a));
5154
5155
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5156
5157
0
    ggml_set_op_params_f32(result, 0, c);
5158
5159
0
    result->op = GGML_OP_FILL;
5160
0
    result->src[0] = a;
5161
5162
0
    return result;
5163
0
}
5164
5165
struct ggml_tensor * ggml_fill(
5166
    struct ggml_context * ctx,
5167
    struct ggml_tensor  * a,
5168
0
    float                 c) {
5169
0
    return ggml_fill_impl(ctx, a, c, false);
5170
0
}
5171
5172
struct ggml_tensor * ggml_fill_inplace(
5173
    struct ggml_context * ctx,
5174
    struct ggml_tensor  * a,
5175
0
    float                 c) {
5176
0
    return ggml_fill_impl(ctx, a, c, true);
5177
0
}
5178
5179
// ggml_argsort
5180
5181
struct ggml_tensor * ggml_argsort(
5182
        struct ggml_context  * ctx,
5183
        struct ggml_tensor   * a,
5184
0
        enum ggml_sort_order   order) {
5185
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5186
5187
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5188
5189
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5190
5191
0
    result->op     = GGML_OP_ARGSORT;
5192
0
    result->src[0] = a;
5193
5194
0
    return result;
5195
0
}
5196
5197
// ggml_argsort_top_k
5198
5199
struct ggml_tensor * ggml_argsort_top_k(
5200
        struct ggml_context * ctx,
5201
        struct ggml_tensor  * a,
5202
0
        int                   k) {
5203
0
    GGML_ASSERT(a->ne[0] >= k);
5204
5205
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5206
5207
0
    result = ggml_view_4d(ctx, result,
5208
0
                k, result->ne[1], result->ne[2], result->ne[3],
5209
0
                   result->nb[1], result->nb[2], result->nb[3],
5210
0
                0);
5211
5212
0
    return result;
5213
0
}
5214
5215
// ggml_top_k
5216
5217
struct ggml_tensor * ggml_top_k(
5218
        struct ggml_context * ctx,
5219
        struct ggml_tensor  * a,
5220
0
        int                   k) {
5221
0
    GGML_ASSERT(a->ne[0] >= k);
5222
5223
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5224
5225
0
    result->op     = GGML_OP_TOP_K;
5226
0
    result->src[0] = a;
5227
5228
0
    return result;
5229
0
}
5230
5231
// ggml_arange
5232
5233
struct ggml_tensor * ggml_arange(
5234
        struct ggml_context * ctx,
5235
        float                 start,
5236
        float                 stop,
5237
0
        float                 step) {
5238
0
    GGML_ASSERT(stop > start);
5239
5240
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5241
5242
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5243
5244
0
    ggml_set_op_params_f32(result, 0, start);
5245
0
    ggml_set_op_params_f32(result, 1, stop);
5246
0
    ggml_set_op_params_f32(result, 2, step);
5247
5248
0
    result->op = GGML_OP_ARANGE;
5249
5250
0
    return result;
5251
0
}
5252
5253
// ggml_flash_attn_ext
5254
5255
struct ggml_tensor * ggml_flash_attn_ext(
5256
        struct ggml_context * ctx,
5257
        struct ggml_tensor  * q,
5258
        struct ggml_tensor  * k,
5259
        struct ggml_tensor  * v,
5260
        struct ggml_tensor  * mask,
5261
        float                 scale,
5262
        float                 max_bias,
5263
0
        float                 logit_softcap) {
5264
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5265
    // TODO: check if vT can be multiplied by (k*qT)
5266
5267
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5268
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5269
5270
0
    if (mask) {
5271
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5272
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5273
5274
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5275
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5276
0
    }
5277
5278
0
    if (max_bias > 0.0f) {
5279
0
        GGML_ASSERT(mask);
5280
0
    }
5281
5282
    // permute(0, 2, 1, 3)
5283
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5284
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5285
5286
0
    float params[] = { scale, max_bias, logit_softcap };
5287
0
    ggml_set_op_params(result, params, sizeof(params));
5288
5289
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5290
0
    result->src[0] = q;
5291
0
    result->src[1] = k;
5292
0
    result->src[2] = v;
5293
0
    result->src[3] = mask;
5294
5295
0
    return result;
5296
0
}
5297
5298
void ggml_flash_attn_ext_set_prec(
5299
        struct ggml_tensor * a,
5300
0
        enum ggml_prec       prec) {
5301
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5302
5303
0
    const int32_t prec_i32 = (int32_t) prec;
5304
5305
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5306
0
}
5307
5308
enum ggml_prec ggml_flash_attn_ext_get_prec(
5309
0
        const struct ggml_tensor * a) {
5310
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5311
5312
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5313
5314
0
    return (enum ggml_prec) prec_i32;
5315
0
}
5316
5317
void ggml_flash_attn_ext_add_sinks(
5318
        struct ggml_tensor * a,
5319
0
        struct ggml_tensor * sinks) {
5320
0
    if (!sinks) {
5321
0
        a->src[4] = NULL;
5322
0
        return;
5323
0
    }
5324
5325
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5326
0
    GGML_ASSERT(a->src[4] == NULL);
5327
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5328
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5329
5330
0
    a->src[4] = sinks;
5331
0
}
5332
5333
// ggml_flash_attn_back
5334
5335
struct ggml_tensor * ggml_flash_attn_back(
5336
        struct ggml_context * ctx,
5337
        struct ggml_tensor  * q,
5338
        struct ggml_tensor  * k,
5339
        struct ggml_tensor  * v,
5340
        struct ggml_tensor  * d,
5341
0
        bool                  masked) {
5342
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5343
5344
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5345
    // TODO: check if vT can be multiplied by (k*qT)
5346
5347
    // d shape [D,N,ne2,ne3]
5348
    // q shape [D,N,ne2,ne3]
5349
    // k shape [D,M,kvne2,ne3]
5350
    // v shape [M,D,kvne2,ne3]
5351
5352
0
    const int64_t     D = q->ne[0];
5353
0
    const int64_t     N = q->ne[1];
5354
0
    const int64_t     M = k->ne[1];
5355
0
    const int64_t   ne2 = q->ne[2];
5356
0
    const int64_t   ne3 = q->ne[3];
5357
0
    const int64_t kvne2 = k->ne[2];
5358
5359
0
    GGML_ASSERT(k->ne[0] == D);
5360
0
    GGML_ASSERT(v->ne[0] == M);
5361
0
    GGML_ASSERT(v->ne[1] == D);
5362
0
    GGML_ASSERT(d->ne[0] == D);
5363
0
    GGML_ASSERT(d->ne[1] == N);
5364
0
    GGML_ASSERT(k->ne[2] == kvne2);
5365
0
    GGML_ASSERT(k->ne[3] == ne3);
5366
0
    GGML_ASSERT(v->ne[2] == kvne2);
5367
0
    GGML_ASSERT(v->ne[3] == ne3);
5368
0
    GGML_ASSERT(d->ne[2] == ne2);
5369
0
    GGML_ASSERT(d->ne[3] == ne3);
5370
5371
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5372
5373
    // store gradients of q, k and v as continuous tensors concatenated in result.
5374
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5375
0
    const int64_t elem_q = ggml_nelements(q);
5376
0
    const int64_t elem_k = ggml_nelements(k);
5377
0
    const int64_t elem_v = ggml_nelements(v);
5378
5379
0
    enum ggml_type result_type = GGML_TYPE_F32;
5380
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5381
0
    const size_t tsize = ggml_type_size(result_type);
5382
5383
0
    const size_t offs_q = 0;
5384
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5385
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5386
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5387
5388
0
    const size_t nelements = (end + tsize - 1)/tsize;
5389
5390
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5391
5392
0
    int32_t masked_i = masked ? 1 : 0;
5393
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5394
5395
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5396
0
    result->src[0] = q;
5397
0
    result->src[1] = k;
5398
0
    result->src[2] = v;
5399
0
    result->src[3] = d;
5400
5401
0
    return result;
5402
0
}
5403
5404
// ggml_ssm_conv
5405
5406
struct ggml_tensor * ggml_ssm_conv(
5407
        struct ggml_context * ctx,
5408
        struct ggml_tensor  * sx,
5409
0
        struct ggml_tensor  * c) {
5410
0
    GGML_ASSERT(ggml_is_3d(sx));
5411
0
    GGML_ASSERT(ggml_is_matrix(c));
5412
5413
0
    const int64_t d_conv  = c->ne[0];
5414
0
    const int64_t d_inner = c->ne[1];
5415
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5416
0
    const int64_t n_s     = sx->ne[2];
5417
5418
    // TODO: maybe support other strides than 1?
5419
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5420
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5421
0
    GGML_ASSERT(n_t >= 0);
5422
5423
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5424
5425
0
    result->op     = GGML_OP_SSM_CONV;
5426
0
    result->src[0] = sx;
5427
0
    result->src[1] = c;
5428
5429
0
    return result;
5430
0
}
5431
5432
// ggml_ssm_scan
5433
5434
struct ggml_tensor * ggml_ssm_scan(
5435
        struct ggml_context * ctx,
5436
        struct ggml_tensor  * s,
5437
        struct ggml_tensor  * x,
5438
        struct ggml_tensor  * dt,
5439
        struct ggml_tensor  * A,
5440
        struct ggml_tensor  * B,
5441
        struct ggml_tensor  * C,
5442
0
        struct ggml_tensor  * ids) {
5443
0
    GGML_ASSERT(ggml_is_contiguous(s));
5444
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5445
0
    GGML_ASSERT(ggml_is_contiguous(A));
5446
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5447
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5448
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5449
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5450
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5451
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5452
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5453
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5454
5455
0
    {
5456
0
        const int64_t d_state      = s->ne[0];
5457
0
        const int64_t head_dim     = x->ne[0];
5458
0
        const int64_t n_head       = x->ne[1];
5459
0
        const int64_t n_seq_tokens = x->ne[2];
5460
0
        const int64_t n_seqs       = x->ne[3];
5461
5462
0
        GGML_ASSERT(dt->ne[0] == n_head);
5463
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5464
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5465
0
        GGML_ASSERT(ggml_is_3d(dt));
5466
0
        GGML_ASSERT(s->ne[1] == head_dim);
5467
0
        GGML_ASSERT(s->ne[2] == n_head);
5468
0
        GGML_ASSERT(B->ne[0] == d_state);
5469
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5470
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5471
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5472
0
        GGML_ASSERT(ggml_is_vector(ids));
5473
0
        GGML_ASSERT(A->ne[1] == n_head);
5474
0
        GGML_ASSERT(ggml_is_matrix(A));
5475
5476
0
        if (A->ne[0] != 1) {
5477
            // Mamba-1 has more granular decay factors
5478
0
            GGML_ASSERT(A->ne[0] == d_state);
5479
0
        }
5480
0
    }
5481
5482
    // concatenated y + ssm_states
5483
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5484
5485
0
    result->op   = GGML_OP_SSM_SCAN;
5486
0
    result->src[0] = s;
5487
0
    result->src[1] = x;
5488
0
    result->src[2] = dt;
5489
0
    result->src[3] = A;
5490
0
    result->src[4] = B;
5491
0
    result->src[5] = C;
5492
0
    result->src[6] = ids;
5493
5494
0
    return result;
5495
0
}
5496
5497
// ggml_win_part
5498
5499
struct ggml_tensor * ggml_win_part(
5500
        struct ggml_context * ctx,
5501
        struct ggml_tensor  * a,
5502
0
        int                   w) {
5503
0
    GGML_ASSERT(a->ne[3] == 1);
5504
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5505
5506
    // padding
5507
0
    const int px = (w - a->ne[1]%w)%w;
5508
0
    const int py = (w - a->ne[2]%w)%w;
5509
5510
0
    const int npx = (px + a->ne[1])/w;
5511
0
    const int npy = (py + a->ne[2])/w;
5512
0
    const int np  = npx*npy;
5513
5514
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5515
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5516
5517
0
    int32_t params[] = { npx, npy, w };
5518
0
    ggml_set_op_params(result, params, sizeof(params));
5519
5520
0
    result->op     = GGML_OP_WIN_PART;
5521
0
    result->src[0] = a;
5522
5523
0
    return result;
5524
0
}
5525
5526
// ggml_win_unpart
5527
5528
struct ggml_tensor * ggml_win_unpart(
5529
        struct ggml_context * ctx,
5530
        struct ggml_tensor  * a,
5531
        int                   w0,
5532
        int                   h0,
5533
0
        int                   w) {
5534
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5535
5536
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5537
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5538
5539
0
    int32_t params[] = { w };
5540
0
    ggml_set_op_params(result, params, sizeof(params));
5541
5542
0
    result->op     = GGML_OP_WIN_UNPART;
5543
0
    result->src[0] = a;
5544
5545
0
    return result;
5546
0
}
5547
5548
// ggml_get_rel_pos
5549
5550
struct ggml_tensor * ggml_get_rel_pos(
5551
        struct ggml_context * ctx,
5552
        struct ggml_tensor  * a,
5553
        int                   qh,
5554
0
        int                   kh) {
5555
0
    GGML_ASSERT(qh == kh);
5556
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5557
5558
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5559
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5560
5561
0
    result->op     = GGML_OP_GET_REL_POS;
5562
0
    result->src[0] = a;
5563
5564
0
    return result;
5565
0
}
5566
5567
// ggml_add_rel_pos
5568
5569
static struct ggml_tensor * ggml_add_rel_pos_impl(
5570
        struct ggml_context * ctx,
5571
        struct ggml_tensor  * a,
5572
        struct ggml_tensor  * pw,
5573
        struct ggml_tensor  * ph,
5574
0
        bool                  inplace) {
5575
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5576
0
    GGML_ASSERT(ggml_is_contiguous(a));
5577
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5578
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5579
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5580
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5581
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5582
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5583
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5584
5585
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5586
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5587
5588
0
    result->op     = GGML_OP_ADD_REL_POS;
5589
0
    result->src[0] = a;
5590
0
    result->src[1] = pw;
5591
0
    result->src[2] = ph;
5592
5593
0
    return result;
5594
0
}
5595
5596
struct ggml_tensor * ggml_add_rel_pos(
5597
        struct ggml_context * ctx,
5598
        struct ggml_tensor  * a,
5599
        struct ggml_tensor  * pw,
5600
0
        struct ggml_tensor  * ph) {
5601
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5602
0
}
5603
5604
struct ggml_tensor * ggml_add_rel_pos_inplace(
5605
        struct ggml_context * ctx,
5606
        struct ggml_tensor  * a,
5607
        struct ggml_tensor  * pw,
5608
0
        struct ggml_tensor  * ph) {
5609
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5610
0
}
5611
5612
// ggml_rwkv_wkv6
5613
5614
struct ggml_tensor * ggml_rwkv_wkv6(
5615
        struct ggml_context * ctx,
5616
        struct ggml_tensor  * k,
5617
        struct ggml_tensor  * v,
5618
        struct ggml_tensor  * r,
5619
        struct ggml_tensor  * tf,
5620
        struct ggml_tensor  * td,
5621
0
        struct ggml_tensor  * state) {
5622
0
    GGML_ASSERT(ggml_is_contiguous(k));
5623
0
    GGML_ASSERT(ggml_is_contiguous(v));
5624
0
    GGML_ASSERT(ggml_is_contiguous(r));
5625
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5626
0
    GGML_ASSERT(ggml_is_contiguous(td));
5627
0
    GGML_ASSERT(ggml_is_contiguous(state));
5628
5629
0
    const int64_t S = k->ne[0];
5630
0
    const int64_t H = k->ne[1];
5631
0
    const int64_t n_tokens = k->ne[2];
5632
0
    const int64_t n_seqs = state->ne[1];
5633
0
    {
5634
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5635
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5636
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5637
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5638
0
    }
5639
5640
    // concat output and new_state
5641
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5642
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5643
5644
0
    result->op     = GGML_OP_RWKV_WKV6;
5645
0
    result->src[0] = k;
5646
0
    result->src[1] = v;
5647
0
    result->src[2] = r;
5648
0
    result->src[3] = tf;
5649
0
    result->src[4] = td;
5650
0
    result->src[5] = state;
5651
5652
0
    return result;
5653
0
}
5654
5655
// ggml_gated_linear_attn
5656
5657
struct ggml_tensor * ggml_gated_linear_attn(
5658
        struct ggml_context * ctx,
5659
        struct ggml_tensor  * k,
5660
        struct ggml_tensor  * v,
5661
        struct ggml_tensor  * q,
5662
        struct ggml_tensor  * g,
5663
        struct ggml_tensor  * state,
5664
0
        float scale) {
5665
0
    GGML_ASSERT(ggml_is_contiguous(k));
5666
0
    GGML_ASSERT(ggml_is_contiguous(v));
5667
0
    GGML_ASSERT(ggml_is_contiguous(q));
5668
0
    GGML_ASSERT(ggml_is_contiguous(g));
5669
0
    GGML_ASSERT(ggml_is_contiguous(state));
5670
5671
0
    const int64_t S = k->ne[0];
5672
0
    const int64_t H = k->ne[1];
5673
0
    const int64_t n_tokens = k->ne[2];
5674
0
    const int64_t n_seqs = state->ne[1];
5675
0
    {
5676
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5677
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5678
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5679
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5680
0
    }
5681
5682
    // concat output and new_state
5683
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5684
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5685
5686
0
    ggml_set_op_params_f32(result, 0, scale);
5687
5688
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5689
0
    result->src[0] = k;
5690
0
    result->src[1] = v;
5691
0
    result->src[2] = q;
5692
0
    result->src[3] = g;
5693
0
    result->src[4] = state;
5694
5695
0
    return result;
5696
0
}
5697
5698
// ggml_rwkv_wkv7
5699
5700
struct ggml_tensor * ggml_rwkv_wkv7(
5701
        struct ggml_context * ctx,
5702
        struct ggml_tensor  * r,
5703
        struct ggml_tensor  * w,
5704
        struct ggml_tensor  * k,
5705
        struct ggml_tensor  * v,
5706
        struct ggml_tensor  * a,
5707
        struct ggml_tensor  * b,
5708
0
        struct ggml_tensor  * state) {
5709
0
    GGML_ASSERT(ggml_is_contiguous(r));
5710
0
    GGML_ASSERT(ggml_is_contiguous(w));
5711
0
    GGML_ASSERT(ggml_is_contiguous(k));
5712
0
    GGML_ASSERT(ggml_is_contiguous(v));
5713
0
    GGML_ASSERT(ggml_is_contiguous(a));
5714
0
    GGML_ASSERT(ggml_is_contiguous(b));
5715
0
    GGML_ASSERT(ggml_is_contiguous(state));
5716
5717
0
    const int64_t S = k->ne[0];
5718
0
    const int64_t H = k->ne[1];
5719
0
    const int64_t n_tokens = k->ne[2];
5720
0
    const int64_t n_seqs = state->ne[1];
5721
0
    {
5722
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5723
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5724
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5725
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5726
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5727
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5728
0
    }
5729
5730
    // concat output and new_state
5731
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5732
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5733
5734
0
    result->op     = GGML_OP_RWKV_WKV7;
5735
0
    result->src[0] = r;
5736
0
    result->src[1] = w;
5737
0
    result->src[2] = k;
5738
0
    result->src[3] = v;
5739
0
    result->src[4] = a;
5740
0
    result->src[5] = b;
5741
0
    result->src[6] = state;
5742
5743
0
    return result;
5744
0
}
5745
5746
// ggml_unary
5747
5748
static struct ggml_tensor * ggml_unary_impl(
5749
        struct ggml_context * ctx,
5750
        struct ggml_tensor  * a,
5751
        enum ggml_unary_op    op,
5752
0
        bool                  inplace) {
5753
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
5754
5755
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5756
5757
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5758
5759
0
    result->op     = GGML_OP_UNARY;
5760
0
    result->src[0] = a;
5761
5762
0
    return result;
5763
0
}
5764
5765
struct ggml_tensor * ggml_unary(
5766
        struct ggml_context * ctx,
5767
        struct ggml_tensor  * a,
5768
0
        enum ggml_unary_op    op) {
5769
0
    return ggml_unary_impl(ctx, a, op, false);
5770
0
}
5771
5772
struct ggml_tensor * ggml_unary_inplace(
5773
        struct ggml_context * ctx,
5774
        struct ggml_tensor  * a,
5775
0
        enum ggml_unary_op    op) {
5776
0
    return ggml_unary_impl(ctx, a, op, true);
5777
0
}
5778
5779
// ggml_map_custom1
5780
5781
static struct ggml_tensor * ggml_map_custom1_impl(
5782
        struct ggml_context      * ctx,
5783
        struct ggml_tensor       * a,
5784
        const  ggml_custom1_op_t   fun,
5785
        int                        n_tasks,
5786
        void                     * userdata,
5787
0
        bool                       inplace) {
5788
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5789
5790
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5791
5792
0
    struct ggml_map_custom1_op_params params = {
5793
0
        /*.fun      =*/ fun,
5794
0
        /*.n_tasks  =*/ n_tasks,
5795
0
        /*.userdata =*/ userdata
5796
0
    };
5797
0
    ggml_set_op_params(result, &params, sizeof(params));
5798
5799
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5800
0
    result->src[0] = a;
5801
5802
0
    return result;
5803
0
}
5804
5805
struct ggml_tensor * ggml_map_custom1(
5806
        struct ggml_context      * ctx,
5807
        struct ggml_tensor       * a,
5808
        const  ggml_custom1_op_t   fun,
5809
        int                        n_tasks,
5810
0
        void                     * userdata) {
5811
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5812
0
}
5813
5814
struct ggml_tensor * ggml_map_custom1_inplace(
5815
        struct ggml_context      * ctx,
5816
        struct ggml_tensor       * a,
5817
        const  ggml_custom1_op_t   fun,
5818
        int                        n_tasks,
5819
0
        void                     * userdata) {
5820
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5821
0
}
5822
5823
// ggml_map_custom2
5824
5825
static struct ggml_tensor * ggml_map_custom2_impl(
5826
        struct ggml_context      * ctx,
5827
        struct ggml_tensor       * a,
5828
        struct ggml_tensor       * b,
5829
        const  ggml_custom2_op_t   fun,
5830
        int                        n_tasks,
5831
        void                     * userdata,
5832
0
        bool                       inplace) {
5833
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5834
5835
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5836
5837
0
    struct ggml_map_custom2_op_params params = {
5838
0
        /*.fun      =*/ fun,
5839
0
        /*.n_tasks  =*/ n_tasks,
5840
0
        /*.userdata =*/ userdata
5841
0
    };
5842
0
    ggml_set_op_params(result, &params, sizeof(params));
5843
5844
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5845
0
    result->src[0] = a;
5846
0
    result->src[1] = b;
5847
5848
0
    return result;
5849
0
}
5850
5851
struct ggml_tensor * ggml_map_custom2(
5852
        struct ggml_context      * ctx,
5853
        struct ggml_tensor       * a,
5854
        struct ggml_tensor       * b,
5855
        const  ggml_custom2_op_t   fun,
5856
        int                        n_tasks,
5857
0
        void                     * userdata) {
5858
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5859
0
}
5860
5861
struct ggml_tensor * ggml_map_custom2_inplace(
5862
        struct ggml_context      * ctx,
5863
        struct ggml_tensor       * a,
5864
        struct ggml_tensor       * b,
5865
        const  ggml_custom2_op_t   fun,
5866
        int                        n_tasks,
5867
0
        void                     * userdata) {
5868
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5869
0
}
5870
5871
// ggml_map_custom3
5872
5873
static struct ggml_tensor * ggml_map_custom3_impl(
5874
        struct ggml_context      * ctx,
5875
        struct ggml_tensor       * a,
5876
        struct ggml_tensor       * b,
5877
        struct ggml_tensor       * c,
5878
        const  ggml_custom3_op_t   fun,
5879
        int                        n_tasks,
5880
        void                     * userdata,
5881
0
        bool                       inplace) {
5882
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5883
5884
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5885
5886
0
    struct ggml_map_custom3_op_params params = {
5887
0
        /*.fun      =*/ fun,
5888
0
        /*.n_tasks  =*/ n_tasks,
5889
0
        /*.userdata =*/ userdata
5890
0
    };
5891
0
    ggml_set_op_params(result, &params, sizeof(params));
5892
5893
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5894
0
    result->src[0] = a;
5895
0
    result->src[1] = b;
5896
0
    result->src[2] = c;
5897
5898
0
    return result;
5899
0
}
5900
5901
struct ggml_tensor * ggml_map_custom3(
5902
        struct ggml_context      * ctx,
5903
        struct ggml_tensor       * a,
5904
        struct ggml_tensor       * b,
5905
        struct ggml_tensor       * c,
5906
        const  ggml_custom3_op_t   fun,
5907
        int                        n_tasks,
5908
0
        void                     * userdata) {
5909
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5910
0
}
5911
5912
struct ggml_tensor * ggml_map_custom3_inplace(
5913
        struct ggml_context      * ctx,
5914
        struct ggml_tensor       * a,
5915
        struct ggml_tensor       * b,
5916
        struct ggml_tensor       * c,
5917
        const  ggml_custom3_op_t   fun,
5918
        int                        n_tasks,
5919
0
        void                     * userdata) {
5920
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5921
0
}
5922
5923
struct ggml_tensor * ggml_custom_4d(
5924
        struct ggml_context * ctx,
5925
        enum ggml_type        type,
5926
        int64_t               ne0,
5927
        int64_t               ne1,
5928
        int64_t               ne2,
5929
        int64_t               ne3,
5930
        struct ggml_tensor ** args,
5931
        int                   n_args,
5932
        ggml_custom_op_t      fun,
5933
        int                   n_tasks,
5934
0
        void                * userdata) {
5935
5936
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5937
5938
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5939
5940
0
    struct ggml_custom_op_params params = {
5941
0
        /*.fun      =*/ fun,
5942
0
        /*.n_tasks  =*/ n_tasks,
5943
0
        /*.userdata =*/ userdata
5944
0
    };
5945
0
    ggml_set_op_params(result, &params, sizeof(params));
5946
5947
0
    result->op = GGML_OP_CUSTOM;
5948
0
    for (int i = 0; i < n_args; i++) {
5949
0
        result->src[i] = args[i];
5950
0
    }
5951
5952
0
    return result;
5953
0
}
5954
5955
struct ggml_tensor * ggml_custom_inplace(
5956
        struct ggml_context * ctx,
5957
        struct ggml_tensor  * a,
5958
        struct ggml_tensor ** args,
5959
        int                   n_args,
5960
        ggml_custom_op_t      fun,
5961
        int                   n_tasks,
5962
0
        void                * userdata) {
5963
5964
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5965
5966
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5967
5968
0
    struct ggml_custom_op_params params = {
5969
0
        /*.fun      =*/ fun,
5970
0
        /*.n_tasks  =*/ n_tasks,
5971
0
        /*.userdata =*/ userdata
5972
0
    };
5973
0
    ggml_set_op_params(result, &params, sizeof(params));
5974
5975
0
    result->op = GGML_OP_CUSTOM;
5976
0
    result->src[0] = a;
5977
0
    for (int i = 0; i < n_args; i++) {
5978
0
        result->src[i + 1] = args[i];
5979
0
    }
5980
5981
0
    return result;
5982
0
}
5983
// ggml_cross_entropy_loss
5984
5985
struct ggml_tensor * ggml_cross_entropy_loss(
5986
        struct ggml_context * ctx,
5987
        struct ggml_tensor  * a,
5988
0
        struct ggml_tensor  * b) {
5989
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
5990
5991
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5992
5993
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
5994
0
    result->src[0] = a;
5995
0
    result->src[1] = b;
5996
5997
0
    return result;
5998
0
}
5999
6000
// ggml_cross_entropy_loss_back
6001
6002
struct ggml_tensor * ggml_cross_entropy_loss_back(
6003
        struct ggml_context * ctx,
6004
        struct ggml_tensor  * a,
6005
        struct ggml_tensor  * b,
6006
0
        struct ggml_tensor  * c) {
6007
0
    GGML_ASSERT(ggml_is_scalar(a));
6008
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6009
6010
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6011
6012
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6013
0
    result->src[0] = a;
6014
0
    result->src[1] = b;
6015
0
    result->src[2] = c;
6016
6017
0
    return result;
6018
0
}
6019
6020
// opt_step_adamw
6021
6022
struct ggml_tensor * ggml_opt_step_adamw(
6023
        struct ggml_context * ctx,
6024
        struct ggml_tensor  * a,
6025
        struct ggml_tensor  * grad,
6026
        struct ggml_tensor  * m,
6027
        struct ggml_tensor  * v,
6028
0
        struct ggml_tensor  * adamw_params) {
6029
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6030
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6031
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6032
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6033
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6034
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6035
6036
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6037
6038
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6039
0
    result->src[0] = a;
6040
0
    result->src[1] = grad;
6041
0
    result->src[2] = m;
6042
0
    result->src[3] = v;
6043
0
    result->src[4] = adamw_params;
6044
6045
0
    return result;
6046
0
}
6047
6048
// opt_step_sgd
6049
6050
struct ggml_tensor * ggml_opt_step_sgd(
6051
        struct ggml_context * ctx,
6052
        struct ggml_tensor  * a,
6053
        struct ggml_tensor  * grad,
6054
0
        struct ggml_tensor  * params) {
6055
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6056
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6057
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6058
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6059
6060
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6061
6062
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6063
0
    result->src[0] = a;
6064
0
    result->src[1] = grad;
6065
0
    result->src[2] = params;
6066
6067
0
    return result;
6068
0
}
6069
6070
// solve_tri
6071
6072
struct ggml_tensor * ggml_solve_tri(
6073
        struct ggml_context * ctx,
6074
        struct ggml_tensor  * a,
6075
        struct ggml_tensor  * b,
6076
        bool                  left,
6077
        bool                  lower,
6078
0
        bool                  uni) {
6079
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6080
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6081
6082
    // A must be square and lower diagonal
6083
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6084
    // B must have same outer dimension as A
6085
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6086
6087
    // batch dimensions must be equal
6088
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6089
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6090
6091
0
    GGML_ASSERT(ggml_is_contiguous(a));
6092
0
    GGML_ASSERT(ggml_is_contiguous(b));
6093
6094
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6095
6096
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6097
6098
0
    result->op     = GGML_OP_SOLVE_TRI;
6099
0
    result->src[0] = a;
6100
0
    result->src[1] = b;
6101
6102
0
    return result;
6103
0
}
6104
6105
////////////////////////////////////////////////////////////////////////////////
6106
6107
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6108
0
    size = ggml_hash_size(size);
6109
0
    struct ggml_hash_set result;
6110
0
    result.size = size;
6111
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6112
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6113
0
    return result;
6114
0
}
6115
6116
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6117
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6118
0
}
6119
6120
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6121
0
    GGML_FREE(hash_set->used);
6122
0
    GGML_FREE(hash_set->keys);
6123
0
}
6124
6125
0
size_t ggml_hash_size(size_t min_sz) {
6126
    // next primes after powers of two
6127
0
    static const size_t primes[] = {
6128
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6129
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6130
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6131
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6132
0
        536870923, 1073741827, 2147483659
6133
0
    };
6134
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6135
6136
    // find the smallest prime that is larger or equal than min_sz
6137
0
    size_t l = 0;
6138
0
    size_t r = n_primes;
6139
0
    while (l < r) {
6140
0
        size_t m = (l + r)/2;
6141
0
        if (primes[m] < min_sz) {
6142
0
            l = m + 1;
6143
0
        } else {
6144
0
            r = m;
6145
0
        }
6146
0
    }
6147
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6148
0
    return sz;
6149
0
}
6150
6151
struct hash_map {
6152
    struct ggml_hash_set set;
6153
    struct ggml_tensor ** vals;
6154
};
6155
6156
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6157
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6158
0
    result->set = ggml_hash_set_new(size);
6159
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6160
0
    return result;
6161
0
}
6162
6163
0
static void ggml_hash_map_free(struct hash_map * map) {
6164
0
    ggml_hash_set_free(&map->set);
6165
0
    GGML_FREE(map->vals);
6166
0
    GGML_FREE(map);
6167
0
}
6168
6169
// utility functions to change gradients
6170
// isrc is the index of tensor in cgraph->visited_has_set.keys
6171
// the corresponding gradient (accumulators) are also at position isrc
6172
// if tensor has a gradient accumulator, modify that accumulator in-place
6173
// else if there is no gradient for tensor, set the corresponding value
6174
// else, just add/subtract/etc. the gradients
6175
6176
static void ggml_add_or_set(
6177
        struct ggml_context * ctx,
6178
        struct ggml_cgraph  * cgraph,
6179
        size_t                isrc,
6180
0
        struct ggml_tensor  * tensor) {
6181
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6182
0
    GGML_ASSERT(src);
6183
0
    if (cgraph->grads[isrc]) {
6184
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6185
0
    } else {
6186
0
        cgraph->grads[isrc] = tensor;
6187
0
    }
6188
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6189
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6190
0
}
6191
6192
static void ggml_acc_or_set(
6193
        struct ggml_context * ctx,
6194
        struct ggml_cgraph  * cgraph,
6195
        size_t                isrc,
6196
        struct ggml_tensor  * tensor,
6197
        const  size_t         nb1,
6198
        const  size_t         nb2,
6199
        const  size_t         nb3,
6200
0
        const  size_t         offset) {
6201
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6202
0
    GGML_ASSERT(src);
6203
0
    if (cgraph->grads[isrc]) {
6204
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6205
0
    } else {
6206
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6207
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6208
0
    }
6209
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6210
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6211
0
}
6212
6213
static void ggml_add1_or_set(
6214
        struct ggml_context * ctx,
6215
        struct ggml_cgraph  * cgraph,
6216
        size_t                isrc,
6217
0
        struct ggml_tensor  * tensor) {
6218
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6219
0
    GGML_ASSERT(src);
6220
0
    if (cgraph->grads[isrc]) {
6221
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6222
0
    } else {
6223
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6224
0
    }
6225
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6226
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6227
0
}
6228
6229
static void ggml_sub_or_set(
6230
        struct ggml_context * ctx,
6231
        struct ggml_cgraph  * cgraph,
6232
        size_t                isrc,
6233
0
        struct ggml_tensor  * tensor) {
6234
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6235
0
    GGML_ASSERT(src);
6236
0
    if (cgraph->grads[isrc]) {
6237
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6238
0
    } else {
6239
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6240
0
    }
6241
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6242
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6243
0
}
6244
6245
static void ggml_compute_backward(
6246
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6247
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6248
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6249
6250
0
    if (!grad) {
6251
0
        return;
6252
0
    }
6253
6254
0
    struct ggml_tensor * src0 = tensor->src[0];
6255
0
    struct ggml_tensor * src1 = tensor->src[1];
6256
0
    struct ggml_tensor * src2 = tensor->src[2];
6257
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6258
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6259
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6260
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6261
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6262
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6263
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6264
6265
0
    switch (tensor->op) {
6266
0
        case GGML_OP_DUP: {
6267
0
            if (src0_needs_grads) {
6268
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6269
0
            }
6270
0
        } break;
6271
0
        case GGML_OP_ADD: {
6272
0
            if (src0_needs_grads) {
6273
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6274
0
            }
6275
0
            if (src1_needs_grads) {
6276
0
                struct ggml_tensor * tmp = grad;
6277
0
                if (!ggml_are_same_shape(src0, src1)) {
6278
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6279
0
                }
6280
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6281
0
            }
6282
0
        } break;
6283
0
        case GGML_OP_ADD1: {
6284
0
            if (src0_needs_grads) {
6285
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6286
0
            }
6287
0
            if (src1_needs_grads) {
6288
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6289
0
            }
6290
0
        } break;
6291
0
        case GGML_OP_ACC: {
6292
0
            if (src0_needs_grads) {
6293
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6294
0
            }
6295
0
            if (src1_needs_grads) {
6296
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6297
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6298
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6299
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6300
6301
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6302
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6303
0
                    nb1, nb2, nb3, offset);
6304
6305
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6306
0
            }
6307
0
        } break;
6308
0
        case GGML_OP_SUB: {
6309
0
            if (src0_needs_grads) {
6310
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6311
0
            }
6312
0
            if (src1_needs_grads) {
6313
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6314
0
            }
6315
0
        } break;
6316
0
        case GGML_OP_MUL: {
6317
0
            if (src0_needs_grads) {
6318
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6319
0
            }
6320
0
            if (src1_needs_grads) {
6321
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6322
0
                if (!ggml_are_same_shape(src0, src1)) {
6323
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6324
0
                }
6325
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6326
0
            }
6327
0
        } break;
6328
0
        case GGML_OP_DIV: {
6329
0
            if (src0_needs_grads) {
6330
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6331
0
            }
6332
0
            if (src1_needs_grads) {
6333
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6334
0
            }
6335
0
        } break;
6336
0
        case GGML_OP_SQR: {
6337
0
            if (src0_needs_grads) {
6338
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6339
0
            }
6340
0
        } break;
6341
0
        case GGML_OP_SQRT: {
6342
0
            if (src0_needs_grads) {
6343
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6344
0
            }
6345
0
        } break;
6346
0
        case GGML_OP_LOG: {
6347
0
            if (src0_needs_grads) {
6348
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6349
0
            }
6350
0
        } break;
6351
0
        case GGML_OP_SIN: {
6352
0
            if (src0_needs_grads) {
6353
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6354
0
            }
6355
0
        } break;
6356
0
        case GGML_OP_COS: {
6357
0
            if (src0_needs_grads) {
6358
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6359
0
            }
6360
0
        } break;
6361
0
        case GGML_OP_SUM: {
6362
0
            if (src0_needs_grads) {
6363
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6364
0
            }
6365
0
        } break;
6366
0
        case GGML_OP_SUM_ROWS: {
6367
0
            if (src0_needs_grads) {
6368
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6369
0
            }
6370
0
        } break;
6371
0
        case GGML_OP_MEAN: {
6372
0
            if (src0_needs_grads) {
6373
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6374
0
            }
6375
0
        } break;
6376
0
        case GGML_OP_REPEAT: {
6377
0
            if (src0_needs_grads) {
6378
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6379
0
            }
6380
0
        } break;
6381
0
        case GGML_OP_REPEAT_BACK: {
6382
0
            if (src0_needs_grads) {
6383
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6384
0
            }
6385
0
        } break;
6386
0
        case GGML_OP_RMS_NORM: {
6387
0
            if (src0_needs_grads) {
6388
0
                float eps;
6389
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6390
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6391
0
            }
6392
0
        } break;
6393
0
        case GGML_OP_MUL_MAT: {
6394
            // https://cs231n.github.io/optimization-2/#staged
6395
            // # forward pass
6396
            // s0 = np.random.randn(5, 10)
6397
            // s1 = np.random.randn(10, 3)
6398
            // t = s0.dot(s1)
6399
6400
            // # now suppose we had the gradient on t from above in the circuit
6401
            // dt = np.random.randn(*t.shape) # same shape as t
6402
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6403
            // ds1 = t.T.dot(dt)
6404
6405
            // tensor.shape [m,p,qq,rr]
6406
            // src0.shape   [n,m,q1,r1]
6407
            // src1.shape   [n,p,qq,rr]
6408
6409
0
            if (src0_needs_grads) {
6410
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6411
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6412
0
                struct ggml_tensor * tmp =
6413
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6414
0
                        src1,          // [n,p,qq,rr]
6415
0
                        grad);         // [m,p,qq,rr]
6416
0
                if (!ggml_are_same_shape(tmp, src0)) {
6417
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6418
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6419
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6420
6421
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6422
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6423
0
                    const size_t nb3 = tmp->nb[2];
6424
6425
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6426
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6427
0
                }
6428
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6429
0
            }
6430
0
            if (src1_needs_grads) {
6431
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6432
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6433
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6434
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6435
                        //     grad),                          // [m,p,qq,rr]
6436
6437
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6438
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6439
                        // and then use ggml_out_prod
6440
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6441
0
                            src0,               // [n,m,q1,r1]
6442
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6443
0
                                grad)));        // [m,p,qq,rr]
6444
0
            }
6445
0
        } break;
6446
0
        case GGML_OP_SCALE: {
6447
0
            if (src0_needs_grads) {
6448
0
                float s;
6449
0
                memcpy(&s, tensor->op_params, sizeof(float));
6450
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6451
0
            }
6452
0
        } break;
6453
0
        case GGML_OP_SET: {
6454
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6455
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6456
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6457
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6458
6459
0
            struct ggml_tensor * tensor_grad_view = NULL;
6460
6461
0
            if (src0_needs_grads || src1_needs_grads) {
6462
0
                GGML_ASSERT(src0->type == tensor->type);
6463
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6464
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6465
6466
0
                tensor_grad_view = ggml_view_4d(ctx,
6467
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6468
0
                    nb1, nb2, nb3, offset);
6469
0
            }
6470
6471
0
            if (src0_needs_grads) {
6472
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6473
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6474
0
            }
6475
6476
0
            if (src1_needs_grads) {
6477
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6478
0
            }
6479
0
        } break;
6480
0
        case GGML_OP_CPY: {
6481
            // cpy overwrites value of src1 by src0 and returns view(src1)
6482
            // the overwriting is mathematically equivalent to:
6483
            // tensor = src0 * 1 + src1 * 0
6484
0
            if (src0_needs_grads) {
6485
                // dsrc0 = dtensor * 1
6486
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6487
0
            }
6488
0
            if (src1_needs_grads) {
6489
                // dsrc1 = dtensor * 0 -> noop
6490
0
            }
6491
0
        } break;
6492
0
        case GGML_OP_CONT: {
6493
            // same as cpy
6494
0
            if (src0_needs_grads) {
6495
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6496
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6497
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6498
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6499
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6500
0
            }
6501
0
        } break;
6502
0
        case GGML_OP_RESHAPE: {
6503
0
            if (src0_needs_grads) {
6504
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6505
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6506
0
            }
6507
0
        } break;
6508
0
        case GGML_OP_VIEW: {
6509
0
            if (src0_needs_grads) {
6510
0
                size_t offset;
6511
6512
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6513
6514
0
                size_t nb1 = tensor->nb[1];
6515
0
                size_t nb2 = tensor->nb[2];
6516
0
                size_t nb3 = tensor->nb[3];
6517
6518
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6519
                    // gradient is typically F32, but src0 could be other type
6520
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6521
0
                    size_t n0 = ggml_element_size(src0);
6522
0
                    GGML_ASSERT(offset % n0 == 0);
6523
0
                    GGML_ASSERT(nb1 % n0 == 0);
6524
0
                    GGML_ASSERT(nb2 % n0 == 0);
6525
0
                    GGML_ASSERT(nb3 % n0 == 0);
6526
0
                    offset = (offset / n0) * ng;
6527
0
                    nb1 = (nb1 / n0) * ng;
6528
0
                    nb2 = (nb2 / n0) * ng;
6529
0
                    nb3 = (nb3 / n0) * ng;
6530
0
                }
6531
6532
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6533
0
            }
6534
0
        } break;
6535
0
        case GGML_OP_PERMUTE: {
6536
0
            if (src0_needs_grads) {
6537
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6538
0
                const int axis0 = axes[0] & 0x3;
6539
0
                const int axis1 = axes[1] & 0x3;
6540
0
                const int axis2 = axes[2] & 0x3;
6541
0
                const int axis3 = axes[3] & 0x3;
6542
0
                int axb[4] = {0,0,0,0}; // axes backward
6543
0
                axb[axis0] = 0;
6544
0
                axb[axis1] = 1;
6545
0
                axb[axis2] = 2;
6546
0
                axb[axis3] = 3;
6547
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6548
0
            }
6549
0
        } break;
6550
0
        case GGML_OP_TRANSPOSE: {
6551
0
            if (src0_needs_grads) {
6552
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6553
0
            }
6554
0
        } break;
6555
0
        case GGML_OP_GET_ROWS: {
6556
0
            if (src0_needs_grads) {
6557
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6558
0
            }
6559
0
            if (src1_needs_grads) {
6560
                // noop
6561
0
            }
6562
0
        } break;
6563
0
        case GGML_OP_DIAG_MASK_INF: {
6564
0
            if (src0_needs_grads) {
6565
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6566
                /* ref:  https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
6567
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6568
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6569
0
            }
6570
0
        } break;
6571
0
        case GGML_OP_DIAG_MASK_ZERO: {
6572
0
            if (src0_needs_grads) {
6573
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6574
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6575
0
            }
6576
0
        } break;
6577
0
        case GGML_OP_SOFT_MAX: {
6578
0
            if (src0_needs_grads) {
6579
0
                float scale    = 1.0f;
6580
0
                float max_bias = 0.0f;
6581
6582
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6583
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6584
6585
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6586
0
            }
6587
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6588
0
        } break;
6589
0
        case GGML_OP_ROPE: {
6590
0
            if (src0_needs_grads) {
6591
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6592
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6593
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6594
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6595
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6596
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6597
0
                int sections[4] = {0, 0, 0, 0};
6598
6599
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6600
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6601
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6602
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6603
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6604
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6605
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6606
6607
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6608
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6609
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6610
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6611
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6612
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6613
0
            }
6614
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6615
0
        } break;
6616
0
        case GGML_OP_IM2COL: {
6617
0
            if (src1_needs_grads) {
6618
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6619
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6620
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6621
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6622
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6623
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6624
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6625
6626
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6627
0
            }
6628
0
        } break;
6629
0
        case GGML_OP_POOL_2D: {
6630
0
            if (src0_needs_grads) {
6631
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6632
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6633
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6634
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6635
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6636
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6637
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6638
6639
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6640
0
            }
6641
0
        } break;
6642
0
        case GGML_OP_WIN_PART:
6643
0
        case GGML_OP_WIN_UNPART:
6644
0
        case GGML_OP_UNARY: {
6645
0
            switch (ggml_get_unary_op(tensor)) {
6646
0
                case GGML_UNARY_OP_ABS: {
6647
0
                    if (src0_needs_grads) {
6648
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6649
0
                    }
6650
0
                } break;
6651
0
                case GGML_UNARY_OP_SGN: {
6652
                    // noop
6653
0
                } break;
6654
0
                case GGML_UNARY_OP_NEG: {
6655
0
                    if (src0_needs_grads) {
6656
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6657
0
                    }
6658
0
                } break;
6659
0
                case GGML_UNARY_OP_STEP: {
6660
                    // noop
6661
0
                } break;
6662
0
                case GGML_UNARY_OP_RELU: {
6663
0
                    if (src0_needs_grads) {
6664
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6665
0
                    }
6666
0
                } break;
6667
0
                case GGML_UNARY_OP_SILU: {
6668
0
                    if (src0_needs_grads) {
6669
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6670
0
                    }
6671
0
                } break;
6672
0
                case GGML_UNARY_OP_EXP: {
6673
0
                    if (src0_needs_grads) {
6674
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6675
0
                    }
6676
0
                } break;
6677
0
                case GGML_UNARY_OP_EXPM1: {
6678
0
                    if (src0_needs_grads) {
6679
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6680
0
                    }
6681
0
                } break;
6682
0
                case GGML_UNARY_OP_SOFTPLUS: {
6683
0
                    if (src0_needs_grads) {
6684
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6685
0
                    }
6686
0
                } break;
6687
0
                default: {
6688
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6689
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6690
0
                    GGML_ABORT("fatal error");
6691
0
                } //break;
6692
0
            }
6693
0
        } break;
6694
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6695
0
            if (src0_needs_grads) {
6696
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6697
0
            }
6698
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6699
0
        } break;
6700
0
        case GGML_OP_GLU: {
6701
0
            switch (ggml_get_glu_op(tensor)) {
6702
0
                case GGML_GLU_OP_SWIGLU: {
6703
0
                    if (src0_needs_grads) {
6704
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6705
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6706
0
                    }
6707
0
                    if (src1_needs_grads) {
6708
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6709
0
                    }
6710
0
                } break;
6711
0
                default: {
6712
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6713
0
                } //break;
6714
0
            }
6715
0
        } break;
6716
0
        case GGML_OP_NONE: {
6717
            // noop
6718
0
        } break;
6719
0
        case GGML_OP_COUNT:
6720
0
        default: {
6721
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6722
0
        } //break;
6723
0
    }
6724
6725
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6726
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6727
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6728
0
}
6729
6730
0
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
6731
    // check if already visited
6732
0
    size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6733
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6734
0
    if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6735
        // This is the first time we see this node in the current graph.
6736
0
        cgraph->visited_hash_set.keys[node_hash_pos] = node;
6737
0
        ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6738
0
        cgraph->use_counts[node_hash_pos] = 0;
6739
0
    } else {
6740
        // already visited
6741
0
        return node_hash_pos;
6742
0
    }
6743
6744
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6745
0
        const int k =
6746
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6747
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6748
0
            /* unknown order, just fall back to using i */ i;
6749
6750
0
        struct ggml_tensor * src = node->src[k];
6751
0
        if (src) {
6752
0
            size_t src_hash_pos = ggml_visit_parents(cgraph, src);
6753
6754
            // Update the use count for this operand.
6755
0
            cgraph->use_counts[src_hash_pos]++;
6756
0
        }
6757
0
    }
6758
6759
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6760
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6761
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6762
6763
0
        if (strlen(node->name) == 0) {
6764
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6765
0
        }
6766
6767
0
        cgraph->leafs[cgraph->n_leafs] = node;
6768
0
        cgraph->n_leafs++;
6769
0
    } else {
6770
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6771
6772
0
        if (strlen(node->name) == 0) {
6773
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6774
0
        }
6775
6776
0
        cgraph->nodes[cgraph->n_nodes] = node;
6777
0
        cgraph->n_nodes++;
6778
0
    }
6779
6780
0
    return node_hash_pos;
6781
0
}
6782
6783
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
6784
0
    if (!expand) {
6785
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6786
0
        ggml_graph_clear(cgraph);
6787
0
    }
6788
6789
0
    const int n0 = cgraph->n_nodes;
6790
6791
0
    ggml_visit_parents(cgraph, tensor);
6792
6793
0
    const int n_new = cgraph->n_nodes - n0;
6794
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6795
6796
0
    if (n_new > 0) {
6797
        // the last added node should always be starting point
6798
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6799
0
    }
6800
0
}
6801
6802
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6803
0
    ggml_build_forward_impl(cgraph, tensor, true);
6804
0
}
6805
6806
void ggml_build_backward_expand(
6807
        struct ggml_context *  ctx,
6808
        struct ggml_cgraph  *  cgraph,
6809
0
        struct ggml_tensor  ** grad_accs) {
6810
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6811
0
    GGML_ASSERT(cgraph->grads);
6812
0
    GGML_ASSERT(cgraph->grad_accs);
6813
6814
0
    const int n_nodes_f = cgraph->n_nodes;
6815
6816
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6817
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6818
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6819
6820
0
    {
6821
0
        bool any_params = false;
6822
0
        bool any_loss   = false;
6823
0
        for (int i = 0; i < n_nodes_f; ++i) {
6824
0
            struct ggml_tensor * node = cgraph->nodes[i];
6825
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6826
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6827
0
        }
6828
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6829
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6830
0
    }
6831
6832
0
    for (int i = 0; i < n_nodes_f; ++i) {
6833
0
        struct ggml_tensor * node = cgraph->nodes[i];
6834
6835
0
        if (node->type == GGML_TYPE_I32) {
6836
0
            continue;
6837
0
        }
6838
6839
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6840
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6841
0
        switch (node->op) {
6842
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6843
0
            case GGML_OP_IM2COL:      // only used for its shape
6844
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6845
0
                ignore_src[0] = true;
6846
0
                break;
6847
0
            case GGML_OP_UNARY: {
6848
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6849
                // SGN and STEP unary ops are piecewise constant
6850
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6851
0
                    ignore_src[0] = true;
6852
0
                }
6853
0
            } break;
6854
6855
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6856
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6857
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6858
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6859
0
            case GGML_OP_ROPE:          // positions not differentiable
6860
0
                ignore_src[1] = true;
6861
0
                break;
6862
6863
0
            default:
6864
0
                break;
6865
0
        }
6866
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6867
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6868
0
                continue;
6869
0
            }
6870
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6871
0
            node_needs_grad = true;
6872
0
            break;
6873
0
        }
6874
0
        if (!node_needs_grad) {
6875
0
            continue;
6876
0
        }
6877
6878
        // inplace operations are currently not supported
6879
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6880
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6881
6882
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6883
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6884
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6885
0
        if (grad_accs && grad_accs[i]) {
6886
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6887
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6888
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6889
            // loss tensors always need a gradient accumulator
6890
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6891
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6892
0
        }
6893
0
        grads_needed[ihash] = true;
6894
0
    }
6895
6896
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6897
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6898
        // use allocator to automatically make inplace operations
6899
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6900
0
    }
6901
6902
0
    free(grads_needed);
6903
0
}
6904
6905
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6906
0
    void * ptr = *p;
6907
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6908
0
    *p = (void *) ((char *) ptr + size);
6909
0
    return ptr;
6910
0
}
6911
6912
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6913
0
    size_t hash_size = ggml_hash_size(size * 2);
6914
0
    void * p = 0;
6915
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6916
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6917
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6918
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6919
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6920
0
    if (grads) {
6921
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6922
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6923
0
    }
6924
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6925
6926
0
    size_t nbytes = (size_t) p;
6927
0
    return nbytes;
6928
0
}
6929
6930
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6931
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6932
0
}
6933
6934
0
size_t ggml_graph_overhead(void) {
6935
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6936
0
}
6937
6938
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6939
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6940
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6941
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6942
6943
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6944
0
    size_t hash_size = ggml_hash_size(size * 2);
6945
6946
0
    void * p = cgraph + 1;
6947
6948
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6949
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6950
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
6951
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6952
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6953
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6954
6955
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6956
6957
    // check that we allocated the correct amount of memory
6958
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
6959
6960
0
    *cgraph = (struct ggml_cgraph) {
6961
0
        /*.size         =*/ size,
6962
0
        /*.n_nodes      =*/ 0,
6963
0
        /*.n_leafs      =*/ 0,
6964
0
        /*.nodes        =*/ nodes_ptr,
6965
0
        /*.grads        =*/ grads_ptr,
6966
0
        /*.grad_accs    =*/ grad_accs_ptr,
6967
0
        /*.leafs        =*/ leafs_ptr,
6968
0
        /*.use_counts   =*/ use_counts_ptr,
6969
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
6970
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
6971
0
    };
6972
6973
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
6974
0
    if (grads) {
6975
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
6976
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
6977
0
    }
6978
6979
0
    return cgraph;
6980
0
}
6981
6982
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
6983
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
6984
0
}
6985
6986
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
6987
0
    struct ggml_cgraph cgraph = {
6988
0
        /*.size             =*/ 0,
6989
0
        /*.n_nodes          =*/ i1 - i0,
6990
0
        /*.n_leafs          =*/ 0,
6991
0
        /*.nodes            =*/ cgraph0->nodes + i0,
6992
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
6993
0
        /*.grad_accs        =*/ NULL,
6994
0
        /*.leafs            =*/ NULL,
6995
0
        /*.use_counts       =*/ cgraph0->use_counts,
6996
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
6997
0
        /*.order            =*/ cgraph0->order,
6998
0
    };
6999
7000
0
    return cgraph;
7001
0
}
7002
7003
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
7004
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7005
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7006
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7007
7008
0
    dst->n_leafs = src->n_leafs;
7009
0
    dst->n_nodes = src->n_nodes;
7010
0
    dst->order   = src->order;
7011
7012
0
    for (int i = 0; i < src->n_leafs; ++i) {
7013
0
        dst->leafs[i] = src->leafs[i];
7014
0
    }
7015
7016
0
    for (int i = 0; i < src->n_nodes; ++i) {
7017
0
        dst->nodes[i] = src->nodes[i];
7018
0
    }
7019
7020
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7021
        // copy all hashset keys (tensors) that are in use
7022
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7023
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7024
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7025
0
        }
7026
0
    }
7027
7028
0
    if (dst->grads) {
7029
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7030
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7031
0
    }
7032
0
    if (src->grads) {
7033
0
        GGML_ASSERT(dst->grads     != NULL);
7034
0
        GGML_ASSERT(dst->grad_accs != NULL);
7035
0
        for (int i = 0; i < src->n_nodes; ++i) {
7036
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7037
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7038
7039
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7040
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7041
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7042
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7043
7044
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7045
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7046
0
        }
7047
0
    }
7048
0
}
7049
7050
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7051
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7052
0
    ggml_graph_cpy(cgraph, result);
7053
0
    return result;
7054
0
}
7055
7056
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7057
0
    if (ggml_is_empty(tensor)) {
7058
0
        return tensor;
7059
0
    }
7060
0
    if (tensor->buffer) {
7061
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7062
0
    } else {
7063
0
        GGML_ASSERT(tensor->data);
7064
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7065
0
    }
7066
0
    return tensor;
7067
0
}
7068
7069
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7070
0
    if (!cgraph) {
7071
0
        return;
7072
0
    }
7073
0
    GGML_ASSERT(cgraph->grads != NULL);
7074
7075
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7076
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7077
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7078
7079
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7080
            // clear momenta
7081
0
            ggml_set_zero(node->src[2]);
7082
0
            ggml_set_zero(node->src[3]);
7083
0
        }
7084
7085
        // initial gradients of loss should be 1, 0 otherwise
7086
0
        if (grad_acc) {
7087
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7088
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7089
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7090
7091
0
                const float onef = 1.0f;
7092
0
                if (grad_acc->buffer) {
7093
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7094
0
                } else {
7095
0
                    GGML_ASSERT(grad_acc->data);
7096
0
                    *((float *) grad_acc->data) = onef;
7097
0
                }
7098
0
            } else {
7099
0
                ggml_set_zero(grad_acc);
7100
0
            }
7101
0
        }
7102
0
    }
7103
0
}
7104
7105
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7106
0
    cgraph->n_leafs = 0;
7107
0
    cgraph->n_nodes = 0;
7108
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7109
0
}
7110
7111
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7112
0
    return cgraph->size;
7113
0
}
7114
7115
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7116
0
    if (i < 0) {
7117
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7118
0
        return cgraph->nodes[cgraph->n_nodes + i];
7119
0
    }
7120
7121
0
    GGML_ASSERT(i < cgraph->n_nodes);
7122
0
    return cgraph->nodes[i];
7123
0
}
7124
7125
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7126
0
    return cgraph->nodes;
7127
0
}
7128
7129
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7130
0
    return cgraph->n_nodes;
7131
0
}
7132
7133
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7134
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7135
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7136
0
    cgraph->n_nodes++;
7137
0
}
7138
7139
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7140
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7141
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7142
7143
0
        if (strcmp(leaf->name, name) == 0) {
7144
0
            return leaf;
7145
0
        }
7146
0
    }
7147
7148
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7149
0
        struct ggml_tensor * node = cgraph->nodes[i];
7150
7151
0
        if (strcmp(node->name, name) == 0) {
7152
0
            return node;
7153
0
        }
7154
0
    }
7155
7156
0
    return NULL;
7157
0
}
7158
7159
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7160
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7161
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7162
0
}
7163
7164
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7165
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7166
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7167
0
}
7168
7169
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7170
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7171
7172
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7173
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7174
0
        struct ggml_tensor * node = cgraph->nodes[i];
7175
7176
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7177
0
                i,
7178
0
                node->ne[0], node->ne[1], node->ne[2],
7179
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7180
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7181
0
    }
7182
7183
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7184
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7185
0
        struct ggml_tensor * node = cgraph->leafs[i];
7186
7187
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7188
0
                i,
7189
0
                node->ne[0], node->ne[1],
7190
0
                ggml_op_name(node->op),
7191
0
                ggml_get_name(node));
7192
0
    }
7193
7194
0
    GGML_LOG_INFO("========================================\n");
7195
0
}
7196
7197
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7198
                                      const int *                idxs,
7199
                                      int                        count,
7200
0
                                      const struct ggml_tensor * tensor) {
7201
0
    GGML_ASSERT(cgraph && idxs);
7202
0
    for (int i = 0; i < count; ++i) {
7203
0
        const int node_idx = idxs[i];
7204
7205
0
        if (node_idx >= cgraph->n_nodes) {
7206
0
            return -1;
7207
0
        }
7208
0
        if (cgraph->nodes[node_idx] == tensor) {
7209
0
            return i;
7210
0
        }
7211
0
    }
7212
0
    return -1;
7213
0
}
7214
7215
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7216
                                const int *                node_idxs,
7217
                                int                        count,
7218
                                const enum ggml_op *       ops,
7219
                                const int *                outputs,
7220
0
                                int                        num_outputs) {
7221
0
    GGML_ASSERT(outputs && num_outputs > 0);
7222
7223
0
    for (int i = 0; i < count; ++i) {
7224
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7225
0
            return false;
7226
0
        }
7227
7228
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7229
7230
0
        if (node->op != ops[i]) {
7231
0
            return false;
7232
0
        }
7233
7234
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7235
0
            continue;
7236
0
        }
7237
7238
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7239
0
            return false;
7240
0
        }
7241
7242
0
        int subgraph_uses = 0;
7243
0
        for (int j = i + 1; j < count; ++j) {
7244
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7245
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7246
0
                if (other_node->src[src_idx] == node) {
7247
0
                    subgraph_uses++;
7248
0
                }
7249
0
            }
7250
0
        }
7251
7252
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7253
0
            return false;
7254
0
        }
7255
7256
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7257
0
        struct ggml_tensor * view_src = node->view_src;
7258
0
        while (view_src) {
7259
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7260
0
                return false;
7261
0
            }
7262
0
            view_src = view_src->view_src;
7263
0
        }
7264
0
    }
7265
7266
0
    return true;
7267
0
}
7268
7269
// check if node is part of the graph
7270
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7271
0
    if (cgraph == NULL) {
7272
0
        return true;
7273
0
    }
7274
7275
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7276
0
        if (cgraph->nodes[i] == node) {
7277
0
            return true;
7278
0
        }
7279
0
    }
7280
7281
0
    return false;
7282
0
}
7283
7284
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7285
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7286
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7287
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7288
7289
0
        if (grad == node) {
7290
0
            return parent;
7291
0
        }
7292
0
    }
7293
7294
0
    return NULL;
7295
0
}
7296
7297
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7298
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7299
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7300
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7301
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7302
0
            gparent ? (void *) gparent : (void *) node,
7303
0
            gparent ? "empty" : "vee",
7304
0
            gparent ? "dashed" : "solid",
7305
0
            label);
7306
0
}
7307
7308
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7309
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7310
0
            (void *) parent,
7311
0
            (void *) node,
7312
0
            label);
7313
0
}
7314
7315
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
7316
0
    char color[16];
7317
7318
0
    FILE * fp = ggml_fopen(filename, "w");
7319
0
    GGML_ASSERT(fp);
7320
7321
0
    fprintf(fp, "digraph G {\n");
7322
0
    fprintf(fp, "  newrank = true;\n");
7323
0
    fprintf(fp, "  rankdir = TB;\n");
7324
7325
0
    for (int i = 0; i < gb->n_nodes; i++) {
7326
0
        struct ggml_tensor * node = gb->nodes[i];
7327
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7328
7329
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7330
0
            continue;
7331
0
        }
7332
7333
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7334
0
            snprintf(color, sizeof(color), "yellow");
7335
0
        } else if (grad) {
7336
0
            if (ggml_graph_find(gf, node)) {
7337
0
                snprintf(color, sizeof(color), "green");
7338
0
            } else {
7339
0
                snprintf(color, sizeof(color), "lightblue");
7340
0
            }
7341
0
        } else {
7342
0
            snprintf(color, sizeof(color), "white");
7343
0
        }
7344
7345
0
        fprintf(fp, "  \"%p\" [ "
7346
0
                    "style = filled; fillcolor = %s; shape = record; "
7347
0
                    "label=\"",
7348
0
                (void *) node, color);
7349
7350
0
        if (strlen(node->name) > 0) {
7351
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7352
0
        } else {
7353
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7354
0
        }
7355
7356
0
        if (ggml_is_matrix(node)) {
7357
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7358
0
        } else {
7359
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7360
0
        }
7361
7362
0
        if (grad) {
7363
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7364
0
        } else {
7365
0
            fprintf(fp, "\"; ]\n");
7366
0
        }
7367
0
    }
7368
7369
0
    for (int i = 0; i < gb->n_leafs; i++) {
7370
0
        struct ggml_tensor * node = gb->leafs[i];
7371
7372
0
        snprintf(color, sizeof(color), "pink");
7373
7374
0
        fprintf(fp, "  \"%p\" [ "
7375
0
                    "style = filled; fillcolor = %s; shape = record; "
7376
0
                    "label=\"<x>",
7377
0
                (void *) node, color);
7378
7379
0
        if (strlen(node->name) > 0) {
7380
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7381
0
        } else {
7382
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7383
0
        }
7384
7385
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7386
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7387
0
            fprintf(fp, " | (");
7388
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7389
                // FIXME: use ggml-backend to obtain the tensor data
7390
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7391
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7392
                //}
7393
                //else if (node->type == GGML_TYPE_F32 ||
7394
                //         node->type == GGML_TYPE_F16 ||
7395
                //         node->type == GGML_TYPE_BF16) {
7396
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7397
                //}
7398
                //else
7399
0
                {
7400
0
                    fprintf(fp, "#");
7401
0
                }
7402
0
                if (j < ggml_nelements(node) - 1) {
7403
0
                    fprintf(fp, ", ");
7404
0
                }
7405
0
            }
7406
0
            fprintf(fp, ")");
7407
0
        }
7408
0
        fprintf(fp, "\"; ]\n");
7409
0
    }
7410
7411
0
    for (int i = 0; i < gb->n_nodes; i++) {
7412
0
        struct ggml_tensor * node = gb->nodes[i];
7413
7414
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7415
0
            if (node->src[j]) {
7416
0
                char label[16];
7417
0
                snprintf(label, sizeof(label), "src %d", j);
7418
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7419
0
            }
7420
0
        }
7421
0
    }
7422
7423
0
    for (int i = 0; i < gb->n_leafs; i++) {
7424
0
        struct ggml_tensor * node = gb->leafs[i];
7425
7426
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7427
0
            if (node->src[j]) {
7428
0
                char label[16];
7429
0
                snprintf(label, sizeof(label), "src %d", j);
7430
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7431
0
            }
7432
0
        }
7433
0
    }
7434
7435
0
    fprintf(fp, "}\n");
7436
7437
0
    fclose(fp);
7438
7439
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7440
0
}
7441
7442
////////////////////////////////////////////////////////////////////////////////
7443
7444
0
void ggml_set_input(struct ggml_tensor * tensor) {
7445
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7446
0
}
7447
7448
0
void ggml_set_output(struct ggml_tensor * tensor) {
7449
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7450
0
}
7451
7452
0
void ggml_set_param(struct ggml_tensor * tensor) {
7453
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7454
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7455
0
}
7456
7457
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7458
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7459
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7460
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7461
0
}
7462
7463
////////////////////////////////////////////////////////////////////////////////
7464
7465
0
void ggml_quantize_init(enum ggml_type type) {
7466
0
    ggml_critical_section_start();
7467
7468
0
    switch (type) {
7469
0
        case GGML_TYPE_IQ2_XXS:
7470
0
        case GGML_TYPE_IQ2_XS:
7471
0
        case GGML_TYPE_IQ2_S:
7472
0
        case GGML_TYPE_IQ1_S:
7473
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7474
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7475
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7476
0
        default: // nothing
7477
0
            break;
7478
0
    }
7479
7480
0
    ggml_critical_section_end();
7481
0
}
7482
7483
0
void ggml_quantize_free(void) {
7484
0
    ggml_critical_section_start();
7485
7486
0
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7487
0
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7488
0
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7489
0
    iq3xs_free_impl(256);
7490
7491
0
    ggml_critical_section_end();
7492
0
}
7493
7494
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7495
0
    return
7496
0
        type == GGML_TYPE_IQ2_XXS ||
7497
0
        type == GGML_TYPE_IQ2_XS  ||
7498
0
        type == GGML_TYPE_IQ1_S;//   ||
7499
        //type == GGML_TYPE_IQ1_M;
7500
0
}
7501
7502
size_t ggml_quantize_chunk(
7503
        enum ggml_type   type,
7504
           const float * src,
7505
                  void * dst,
7506
               int64_t   start,
7507
               int64_t   nrows,
7508
               int64_t   n_per_row,
7509
0
           const float * imatrix) {
7510
0
    const int64_t n = (int64_t) nrows * n_per_row;
7511
7512
0
    if (ggml_quantize_requires_imatrix(type)) {
7513
0
        GGML_ASSERT(imatrix != NULL);
7514
0
    }
7515
7516
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7517
0
    GGML_ASSERT(start % n_per_row == 0);
7518
7519
0
    ggml_quantize_init(type); // this is noop if already initialized
7520
7521
0
    const size_t start_row = start / n_per_row;
7522
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7523
7524
0
    size_t result = 0;
7525
7526
0
    switch (type) {
7527
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7528
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7529
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7530
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7531
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7532
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7533
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7534
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7535
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7536
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7537
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7538
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7539
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7540
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7541
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7542
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7543
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7544
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7545
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7546
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7547
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7548
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7549
0
        case GGML_TYPE_F16:
7550
0
            {
7551
0
                size_t elemsize = sizeof(ggml_fp16_t);
7552
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7553
0
                result = n * elemsize;
7554
0
            } break;
7555
0
        case GGML_TYPE_BF16:
7556
0
            {
7557
0
                size_t elemsize = sizeof(ggml_bf16_t);
7558
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7559
0
                result = n * elemsize;
7560
0
            } break;
7561
0
        case GGML_TYPE_F32:
7562
0
            {
7563
0
                size_t elemsize = sizeof(float);
7564
0
                result = n * elemsize;
7565
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7566
0
            } break;
7567
0
        default:
7568
0
            assert(false);
7569
0
    }
7570
7571
0
    GGML_ASSERT(result == nrows * row_size);
7572
7573
0
    return result;
7574
0
}
7575
7576
////////////////////////////////////////////////////////////////////////////////
7577
7578
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7579
0
    *log_callback = g_logger_state.log_callback;
7580
0
    *user_data    = g_logger_state.log_callback_user_data;
7581
0
}
7582
7583
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7584
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7585
0
    g_logger_state.log_callback_user_data = user_data;
7586
0
}
7587
7588
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7589
0
    p->n_threads  = n_threads;
7590
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7591
0
    p->poll       = 50;    // hybrid-polling enabled
7592
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7593
0
    p->paused     = false; // threads are ready to go
7594
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7595
0
}
7596
7597
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7598
0
    struct ggml_threadpool_params p;
7599
0
    ggml_threadpool_params_init(&p, n_threads);
7600
0
    return p;
7601
0
}
7602
7603
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7604
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7605
0
    if (p0->prio           != p1->prio       )    return false;
7606
0
    if (p0->poll           != p1->poll       )    return false;
7607
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7608
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7609
0
}