Coverage Report

Created: 2025-11-24 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
#if defined(_MSC_VER)
57
#define m512bh(p) p
58
#define m512i(p) p
59
#else
60
#define m512bh(p) (__m512bh)(p)
61
#define m512i(p) (__m512i)(p)
62
#endif
63
64
#if defined(__linux__) || \
65
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
66
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
67
68
#include <unistd.h>
69
#include <sys/types.h>
70
#include <sys/stat.h>
71
#include <sys/wait.h>
72
#if defined(__linux__)
73
#include <sys/prctl.h>
74
#endif
75
76
#if defined(__ANDROID__)
77
#include <unwind.h>
78
#include <dlfcn.h>
79
#include <stdio.h>
80
81
struct backtrace_state {
82
    void ** current;
83
    void ** end;
84
};
85
86
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
87
    struct backtrace_state * state = (struct backtrace_state *)arg;
88
    uintptr_t pc = _Unwind_GetIP(context);
89
    if (pc) {
90
        if (state->current == state->end) {
91
            return _URC_END_OF_STACK;
92
        } else {
93
            *state->current++ = (void*)pc;
94
        }
95
    }
96
    return _URC_NO_REASON;
97
}
98
99
static void ggml_print_backtrace_symbols(void) {
100
    const int max = 100;
101
    void* buffer[max];
102
103
    struct backtrace_state state = {buffer, buffer + max};
104
    _Unwind_Backtrace(unwind_callback, &state);
105
106
    int count = state.current - buffer;
107
108
    for (int idx = 0; idx < count; ++idx) {
109
        const void * addr = buffer[idx];
110
        const char * symbol = "";
111
112
        Dl_info info;
113
        if (dladdr(addr, &info) && info.dli_sname) {
114
            symbol = info.dli_sname;
115
        }
116
117
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
118
    }
119
}
120
#elif defined(__linux__) && defined(__GLIBC__)
121
#include <execinfo.h>
122
0
static void ggml_print_backtrace_symbols(void) {
123
0
    void * trace[100];
124
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
125
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
126
0
}
127
#else
128
static void ggml_print_backtrace_symbols(void) {
129
    // platform not supported
130
}
131
#endif
132
133
0
void ggml_print_backtrace(void) {
134
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
135
0
    if (GGML_NO_BACKTRACE) {
136
0
        return;
137
0
    }
138
0
#if defined(__linux__)
139
0
    FILE * f = fopen("/proc/self/status", "r");
140
0
    size_t size = 0;
141
0
    char * line = NULL;
142
0
    ssize_t length = 0;
143
0
    while ((length = getline(&line, &size, f)) > 0) {
144
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
145
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
146
            // Already being debugged, and the breakpoint is the later abort()
147
0
            free(line);
148
0
            fclose(f);
149
0
            return;
150
0
        }
151
0
    }
152
0
    free(line);
153
0
    fclose(f);
154
0
    int lock[2] = { -1, -1 };
155
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
156
0
#endif
157
0
    const int parent_pid = getpid();
158
0
    const int child_pid = fork();
159
0
    if (child_pid < 0) { // error
160
0
#if defined(__linux__)
161
0
        close(lock[1]);
162
0
        close(lock[0]);
163
0
#endif
164
0
        return;
165
0
    } else if (child_pid == 0) { // child
166
0
        char attach[32];
167
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
168
0
#if defined(__linux__)
169
0
        close(lock[1]);
170
0
        (void) !read(lock[0], lock, 1);
171
0
        close(lock[0]);
172
0
#endif
173
        // try gdb
174
0
        execlp("gdb", "gdb", "--batch",
175
0
            "-ex", "set style enabled on",
176
0
            "-ex", attach,
177
0
            "-ex", "bt -frame-info source-and-location",
178
0
            "-ex", "detach",
179
0
            "-ex", "quit",
180
0
            (char *) NULL);
181
        // try lldb
182
0
        execlp("lldb", "lldb", "--batch",
183
0
            "-o", "bt",
184
0
            "-o", "quit",
185
0
            "-p", &attach[sizeof("attach ") - 1],
186
0
            (char *) NULL);
187
        // gdb failed, fallback to backtrace_symbols
188
0
        ggml_print_backtrace_symbols();
189
0
        _Exit(0);
190
0
    } else { // parent
191
0
#if defined(__linux__)
192
0
        prctl(PR_SET_PTRACER, child_pid);
193
0
        close(lock[1]);
194
0
        close(lock[0]);
195
0
#endif
196
0
        waitpid(child_pid, NULL, 0);
197
0
    }
198
0
}
199
#else
200
void ggml_print_backtrace(void) {
201
    // platform not supported
202
}
203
#endif
204
205
static ggml_abort_callback_t g_abort_callback = NULL;
206
207
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
208
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
209
0
    ggml_abort_callback_t ret_val = g_abort_callback;
210
0
    g_abort_callback = callback;
211
0
    return ret_val;
212
0
}
213
214
55
void ggml_abort(const char * file, int line, const char * fmt, ...) {
215
55
    fflush(stdout);
216
217
55
    char message[2048];
218
55
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
219
220
55
    va_list args;
221
55
    va_start(args, fmt);
222
55
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
223
55
    va_end(args);
224
225
55
    if (g_abort_callback) {
226
0
        g_abort_callback(message);
227
55
    } else {
228
        // default: print error and backtrace to stderr
229
55
        fprintf(stderr, "%s\n", message);
230
        
231
55
    }
232
233
55
    abort();
234
55
}
235
236
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
237
238
//
239
// logging
240
//
241
242
struct ggml_logger_state {
243
    ggml_log_callback log_callback;
244
    void * log_callback_user_data;
245
};
246
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
247
248
918
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
249
918
    if (format == NULL) {
250
0
        return;
251
0
    }
252
918
    va_list args_copy;
253
918
    va_copy(args_copy, args);
254
918
    char buffer[128];
255
918
    int len = vsnprintf(buffer, 128, format, args);
256
918
    if (len < 128) {
257
902
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
258
902
    } else {
259
16
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
260
16
        vsnprintf(buffer2, len + 1, format, args_copy);
261
16
        buffer2[len] = 0;
262
16
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
263
16
        free(buffer2);
264
16
    }
265
918
    va_end(args_copy);
266
918
}
267
268
918
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
269
918
    va_list args;
270
918
    va_start(args, format);
271
918
    ggml_log_internal_v(level, format, args);
272
918
    va_end(args);
273
918
}
274
275
918
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
276
918
    (void) level;
277
918
    (void) user_data;
278
918
    fputs(text, stderr);
279
918
    fflush(stderr);
280
918
}
281
282
//
283
// end of logging block
284
//
285
286
#ifdef GGML_USE_ACCELERATE
287
// uncomment to use vDSP for soft max computation
288
// note: not sure if it is actually faster
289
//#define GGML_SOFT_MAX_ACCELERATE
290
#endif
291
292
293
1.10k
void * ggml_aligned_malloc(size_t size) {
294
#if defined(__s390x__)
295
    const int alignment = 256;
296
#else
297
1.10k
    const int alignment = 64;
298
1.10k
#endif
299
300
#if defined(_MSC_VER) || defined(__MINGW32__)
301
    return _aligned_malloc(size, alignment);
302
#else
303
1.10k
    if (size == 0) {
304
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
305
0
        return NULL;
306
0
    }
307
1.10k
    void * aligned_memory = NULL;
308
  #ifdef GGML_USE_CPU_HBM
309
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
310
  #elif TARGET_OS_OSX
311
    GGML_UNUSED(alignment);
312
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
313
    int result = EFAULT;
314
    switch (alloc_status) {
315
        case KERN_SUCCESS:
316
            result = 0;
317
            break;
318
        case KERN_INVALID_ADDRESS:
319
            result = EINVAL;
320
            break;
321
        case KERN_NO_SPACE:
322
            result = ENOMEM;
323
            break;
324
        default:
325
            result = EFAULT;
326
            break;
327
    }
328
  #else
329
1.10k
    int result = posix_memalign(&aligned_memory, alignment, size);
330
1.10k
  #endif
331
1.10k
    if (result != 0) {
332
        // Handle allocation failure
333
0
        const char *error_desc = "unknown allocation error";
334
0
        switch (result) {
335
0
            case EINVAL:
336
0
                error_desc = "invalid alignment value";
337
0
                break;
338
0
            case ENOMEM:
339
0
                error_desc = "insufficient memory";
340
0
                break;
341
0
        }
342
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
343
0
        return NULL;
344
0
    }
345
1.10k
    return aligned_memory;
346
1.10k
#endif
347
1.10k
}
348
349
1.10k
void ggml_aligned_free(void * ptr, size_t size) {
350
1.10k
    GGML_UNUSED(size);
351
#if defined(_MSC_VER) || defined(__MINGW32__)
352
    _aligned_free(ptr);
353
#elif GGML_USE_CPU_HBM
354
    if (ptr != NULL) {
355
        hbw_free(ptr);
356
    }
357
#elif TARGET_OS_OSX
358
    if (ptr != NULL) {
359
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
360
    }
361
#else
362
1.10k
    free(ptr);
363
1.10k
#endif
364
1.10k
}
365
366
367
1.10k
inline static void * ggml_malloc(size_t size) {
368
1.10k
    if (size == 0) {
369
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
370
0
        return NULL;
371
0
    }
372
1.10k
    void * result = malloc(size);
373
1.10k
    if (result == NULL) {
374
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
375
0
        GGML_ABORT("fatal error");
376
0
    }
377
1.10k
    return result;
378
1.10k
}
379
380
// calloc
381
0
inline static void * ggml_calloc(size_t num, size_t size) {
382
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
383
384
0
    if (num == 0 || size == 0) {
385
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
386
0
        return NULL;
387
0
    }
388
0
    void * result = calloc(num, size);
389
0
    if (result == NULL) {
390
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
391
0
        GGML_ABORT("fatal error");
392
0
    }
393
0
    return result;
394
0
}
395
396
1.10k
#define GGML_MALLOC(size)      ggml_malloc(size)
397
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
398
399
1.10k
#define GGML_FREE(ptr) free(ptr)
400
401
0
const char * ggml_status_to_string(enum ggml_status status) {
402
0
    switch (status) {
403
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
404
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
405
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
406
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
407
0
    }
408
409
0
    return "GGML status: unknown";
410
0
}
411
412
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
413
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
414
0
    return GGML_FP16_TO_FP32(x);
415
0
}
416
417
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
418
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
419
0
    return GGML_FP32_TO_FP16(x);
420
0
}
421
422
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
423
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
424
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
425
0
}
426
427
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
428
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
429
0
    return GGML_FP32_TO_BF16(x);
430
0
}
431
432
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
433
0
    for (int64_t i = 0; i < n; i++) {
434
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
435
0
    }
436
0
}
437
438
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
439
0
    int i = 0;
440
0
    for (; i < n; ++i) {
441
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
442
0
    }
443
0
}
444
445
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
446
0
    int i = 0;
447
0
    for (; i < n; ++i) {
448
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
449
0
    }
450
0
}
451
452
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
453
0
    for (int i = 0; i < n; i++) {
454
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
455
0
    }
456
0
}
457
458
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
459
0
  int i = 0;
460
#if defined(__AVX512BF16__)
461
  // subnormals are flushed to zero on this platform
462
  for (; i + 32 <= n; i += 32) {
463
        _mm512_storeu_si512(
464
            (__m512i *)(y + i),
465
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
466
                                _mm512_loadu_ps(x + i))));
467
  }
468
#endif
469
0
    for (; i < n; i++) {
470
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
471
0
    }
472
0
}
473
474
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
475
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
476
0
}
477
478
0
const char * ggml_version(void) {
479
0
    return GGML_VERSION;
480
0
}
481
482
0
const char * ggml_commit(void) {
483
0
    return GGML_COMMIT;
484
0
}
485
486
//
487
// timing
488
//
489
490
#if defined(_MSC_VER) || defined(__MINGW32__)
491
static int64_t timer_freq, timer_start;
492
void ggml_time_init(void) {
493
    LARGE_INTEGER t;
494
    QueryPerformanceFrequency(&t);
495
    timer_freq = t.QuadPart;
496
497
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
498
    // and the uptime is high enough.
499
    // We subtract the program start time to reduce the likelihood of that happening.
500
    QueryPerformanceCounter(&t);
501
    timer_start = t.QuadPart;
502
}
503
int64_t ggml_time_ms(void) {
504
    LARGE_INTEGER t;
505
    QueryPerformanceCounter(&t);
506
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
507
}
508
int64_t ggml_time_us(void) {
509
    LARGE_INTEGER t;
510
    QueryPerformanceCounter(&t);
511
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
512
}
513
#else
514
2.82k
void ggml_time_init(void) {}
515
0
int64_t ggml_time_ms(void) {
516
0
    struct timespec ts;
517
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
518
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
519
0
}
520
521
1.67k
int64_t ggml_time_us(void) {
522
1.67k
    struct timespec ts;
523
1.67k
    clock_gettime(CLOCK_MONOTONIC, &ts);
524
1.67k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
525
1.67k
}
526
#endif
527
528
0
int64_t ggml_cycles(void) {
529
0
    return clock();
530
0
}
531
532
0
int64_t ggml_cycles_per_ms(void) {
533
0
    return CLOCKS_PER_SEC/1000;
534
0
}
535
536
//
537
// cross-platform UTF-8 file paths
538
//
539
540
#ifdef _WIN32
541
static wchar_t * ggml_mbstowcs(const char * mbs) {
542
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
543
    if (!wlen) {
544
        errno = EINVAL;
545
        return NULL;
546
    }
547
548
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
549
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
550
    if (!wlen) {
551
        GGML_FREE(wbuf);
552
        errno = EINVAL;
553
        return NULL;
554
    }
555
556
    return wbuf;
557
}
558
#endif
559
560
1.09k
FILE * ggml_fopen(const char * fname, const char * mode) {
561
#ifdef _WIN32
562
    FILE * file = NULL;
563
564
    // convert fname (UTF-8)
565
    wchar_t * wfname = ggml_mbstowcs(fname);
566
    if (wfname) {
567
        // convert mode (ANSI)
568
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
569
        wchar_t * wmode_p = wmode;
570
        do {
571
            *wmode_p++ = (wchar_t)*mode;
572
        } while (*mode++);
573
574
        // open file
575
        file = _wfopen(wfname, wmode);
576
577
        GGML_FREE(wfname);
578
        GGML_FREE(wmode);
579
    }
580
581
    return file;
582
#else
583
1.09k
    return fopen(fname, mode);
584
1.09k
#endif
585
586
1.09k
}
587
588
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
589
    [GGML_TYPE_I8] = {
590
        .type_name                = "i8",
591
        .blck_size                = 1,
592
        .type_size                = sizeof(int8_t),
593
        .is_quantized             = false,
594
    },
595
    [GGML_TYPE_I16] = {
596
        .type_name                = "i16",
597
        .blck_size                = 1,
598
        .type_size                = sizeof(int16_t),
599
        .is_quantized             = false,
600
    },
601
    [GGML_TYPE_I32] = {
602
        .type_name                = "i32",
603
        .blck_size                = 1,
604
        .type_size                = sizeof(int32_t),
605
        .is_quantized             = false,
606
    },
607
    [GGML_TYPE_I64] = {
608
        .type_name                = "i64",
609
        .blck_size                = 1,
610
        .type_size                = sizeof(int64_t),
611
        .is_quantized             = false,
612
    },
613
    [GGML_TYPE_F64] = {
614
        .type_name                = "f64",
615
        .blck_size                = 1,
616
        .type_size                = sizeof(double),
617
        .is_quantized             = false,
618
    },
619
    [GGML_TYPE_F32] = {
620
        .type_name                = "f32",
621
        .blck_size                = 1,
622
        .type_size                = sizeof(float),
623
        .is_quantized             = false,
624
    },
625
    [GGML_TYPE_F16] = {
626
        .type_name                = "f16",
627
        .blck_size                = 1,
628
        .type_size                = sizeof(ggml_fp16_t),
629
        .is_quantized             = false,
630
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
631
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
632
    },
633
    [GGML_TYPE_Q4_0] = {
634
        .type_name                = "q4_0",
635
        .blck_size                = QK4_0,
636
        .type_size                = sizeof(block_q4_0),
637
        .is_quantized             = true,
638
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
639
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
640
    },
641
    [GGML_TYPE_Q4_1] = {
642
        .type_name                = "q4_1",
643
        .blck_size                = QK4_1,
644
        .type_size                = sizeof(block_q4_1),
645
        .is_quantized             = true,
646
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
647
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
648
    },
649
    [4] = { // GGML_TYPE_Q4_2
650
        .type_name                = "DEPRECATED",
651
        .blck_size                = 0,
652
        .type_size                = 0,
653
        .is_quantized             = false,
654
    },
655
    [5] = { // GGML_TYPE_Q4_3
656
        .type_name                = "DEPRECATED",
657
        .blck_size                = 0,
658
        .type_size                = 0,
659
        .is_quantized             = false,
660
    },
661
    [GGML_TYPE_Q5_0] = {
662
        .type_name                = "q5_0",
663
        .blck_size                = QK5_0,
664
        .type_size                = sizeof(block_q5_0),
665
        .is_quantized             = true,
666
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
667
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
668
    },
669
    [GGML_TYPE_Q5_1] = {
670
        .type_name                = "q5_1",
671
        .blck_size                = QK5_1,
672
        .type_size                = sizeof(block_q5_1),
673
        .is_quantized             = true,
674
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
675
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
676
    },
677
    [GGML_TYPE_Q8_0] = {
678
        .type_name                = "q8_0",
679
        .blck_size                = QK8_0,
680
        .type_size                = sizeof(block_q8_0),
681
        .is_quantized             = true,
682
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
683
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
684
    },
685
    [GGML_TYPE_Q8_1] = {
686
        .type_name                = "q8_1",
687
        .blck_size                = QK8_1,
688
        .type_size                = sizeof(block_q8_1),
689
        .is_quantized             = true,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
691
    },
692
    [GGML_TYPE_MXFP4] = {
693
        .type_name                = "mxfp4",
694
        .blck_size                = QK_MXFP4,
695
        .type_size                = sizeof(block_mxfp4),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
698
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
699
    },
700
    [GGML_TYPE_Q2_K] = {
701
        .type_name                = "q2_K",
702
        .blck_size                = QK_K,
703
        .type_size                = sizeof(block_q2_K),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
707
    },
708
    [GGML_TYPE_Q3_K] = {
709
        .type_name                = "q3_K",
710
        .blck_size                = QK_K,
711
        .type_size                = sizeof(block_q3_K),
712
        .is_quantized             = true,
713
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
714
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
715
    },
716
    [GGML_TYPE_Q4_K] = {
717
        .type_name                = "q4_K",
718
        .blck_size                = QK_K,
719
        .type_size                = sizeof(block_q4_K),
720
        .is_quantized             = true,
721
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
722
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
723
    },
724
    [GGML_TYPE_Q5_K] = {
725
        .type_name                = "q5_K",
726
        .blck_size                = QK_K,
727
        .type_size                = sizeof(block_q5_K),
728
        .is_quantized             = true,
729
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
730
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
731
    },
732
    [GGML_TYPE_Q6_K] = {
733
        .type_name                = "q6_K",
734
        .blck_size                = QK_K,
735
        .type_size                = sizeof(block_q6_K),
736
        .is_quantized             = true,
737
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
738
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
739
    },
740
    [GGML_TYPE_IQ2_XXS] = {
741
        .type_name                = "iq2_xxs",
742
        .blck_size                = QK_K,
743
        .type_size                = sizeof(block_iq2_xxs),
744
        .is_quantized             = true,
745
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
746
        .from_float_ref           = NULL,
747
    },
748
    [GGML_TYPE_IQ2_XS] = {
749
        .type_name                = "iq2_xs",
750
        .blck_size                = QK_K,
751
        .type_size                = sizeof(block_iq2_xs),
752
        .is_quantized             = true,
753
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
754
        .from_float_ref           = NULL,
755
    },
756
    [GGML_TYPE_IQ3_XXS] = {
757
        .type_name                = "iq3_xxs",
758
        .blck_size                = QK_K,
759
        .type_size                = sizeof(block_iq3_xxs),
760
        .is_quantized             = true,
761
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
762
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
763
    },
764
    [GGML_TYPE_IQ3_S] = {
765
        .type_name                = "iq3_s",
766
        .blck_size                = QK_K,
767
        .type_size                = sizeof(block_iq3_s),
768
        .is_quantized             = true,
769
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
770
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
771
    },
772
    [GGML_TYPE_IQ2_S] = {
773
        .type_name                = "iq2_s",
774
        .blck_size                = QK_K,
775
        .type_size                = sizeof(block_iq2_s),
776
        .is_quantized             = true,
777
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
778
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
779
    },
780
    [GGML_TYPE_IQ1_S] = {
781
        .type_name                = "iq1_s",
782
        .blck_size                = QK_K,
783
        .type_size                = sizeof(block_iq1_s),
784
        .is_quantized             = true,
785
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
786
        .from_float_ref           = NULL,
787
    },
788
    [GGML_TYPE_IQ1_M] = {
789
        .type_name                = "iq1_m",
790
        .blck_size                = QK_K,
791
        .type_size                = sizeof(block_iq1_m),
792
        .is_quantized             = true,
793
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
794
        .from_float_ref           = NULL,
795
    },
796
    [GGML_TYPE_IQ4_NL] = {
797
        .type_name                = "iq4_nl",
798
        .blck_size                = QK4_NL,
799
        .type_size                = sizeof(block_iq4_nl),
800
        .is_quantized             = true,
801
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
802
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
803
    },
804
    [GGML_TYPE_IQ4_XS] = {
805
        .type_name                = "iq4_xs",
806
        .blck_size                = QK_K,
807
        .type_size                = sizeof(block_iq4_xs),
808
        .is_quantized             = true,
809
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
810
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
811
    },
812
    [GGML_TYPE_Q8_K] = {
813
        .type_name                = "q8_K",
814
        .blck_size                = QK_K,
815
        .type_size                = sizeof(block_q8_K),
816
        .is_quantized             = true,
817
    },
818
    [GGML_TYPE_BF16] = {
819
        .type_name                = "bf16",
820
        .blck_size                = 1,
821
        .type_size                = sizeof(ggml_bf16_t),
822
        .is_quantized             = false,
823
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
824
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
825
    },
826
    [31] = { // GGML_TYPE_Q4_0_4_4
827
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
828
        .blck_size                = 0,
829
        .type_size                = 0,
830
        .is_quantized             = false,
831
    },
832
    [32] = { // GGML_TYPE_Q4_0_4_8
833
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
834
        .blck_size                = 0,
835
        .type_size                = 0,
836
        .is_quantized             = false,
837
    },
838
    [33] = { // GGML_TYPE_Q4_0_8_8
839
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
840
        .blck_size                = 0,
841
        .type_size                = 0,
842
        .is_quantized             = false,
843
    },
844
    [GGML_TYPE_TQ1_0] = {
845
        .type_name                = "tq1_0",
846
        .blck_size                = QK_K,
847
        .type_size                = sizeof(block_tq1_0),
848
        .is_quantized             = true,
849
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
850
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
851
    },
852
    [GGML_TYPE_TQ2_0] = {
853
        .type_name                = "tq2_0",
854
        .blck_size                = QK_K,
855
        .type_size                = sizeof(block_tq2_0),
856
        .is_quantized             = true,
857
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
858
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
859
    },
860
    [36] = { // GGML_TYPE_IQ4_NL_4_4
861
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
862
        .blck_size                = 0,
863
        .type_size                = 0,
864
        .is_quantized             = false,
865
    },
866
    [37] = { // GGML_TYPE_IQ4_NL_4_8
867
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
868
        .blck_size                = 0,
869
        .type_size                = 0,
870
        .is_quantized             = false,
871
    },
872
    [38] = { // GGML_TYPE_IQ4_NL_8_8
873
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
874
        .blck_size                = 0,
875
        .type_size                = 0,
876
        .is_quantized             = false,
877
    },
878
};
879
880
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
881
0
    GGML_ASSERT(type < GGML_TYPE_COUNT);
882
0
    return &type_traits[type];
883
0
}
884
885
//
886
// ggml object
887
//
888
889
struct ggml_object {
890
    size_t offs;
891
    size_t size;
892
893
    struct ggml_object * next;
894
895
    enum ggml_object_type type;
896
897
    char padding[4];
898
};
899
900
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
901
902
//
903
// ggml context
904
//
905
906
struct ggml_context {
907
    size_t mem_size;
908
    void * mem_buffer;
909
    bool   mem_buffer_owned;
910
    bool   no_alloc;
911
912
    int    n_objects;
913
914
    struct ggml_object * objects_begin;
915
    struct ggml_object * objects_end;
916
};
917
918
//
919
// data types
920
//
921
922
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
923
    "NONE",
924
925
    "DUP",
926
    "ADD",
927
    "ADD_ID",
928
    "ADD1",
929
    "ACC",
930
    "SUB",
931
    "MUL",
932
    "DIV",
933
    "SQR",
934
    "SQRT",
935
    "LOG",
936
    "SIN",
937
    "COS",
938
    "SUM",
939
    "SUM_ROWS",
940
    "CUMSUM",
941
    "MEAN",
942
    "ARGMAX",
943
    "COUNT_EQUAL",
944
    "REPEAT",
945
    "REPEAT_BACK",
946
    "CONCAT",
947
    "SILU_BACK",
948
    "NORM",
949
    "RMS_NORM",
950
    "RMS_NORM_BACK",
951
    "GROUP_NORM",
952
    "L2_NORM",
953
954
    "MUL_MAT",
955
    "MUL_MAT_ID",
956
    "OUT_PROD",
957
958
    "SCALE",
959
    "SET",
960
    "CPY",
961
    "CONT",
962
    "RESHAPE",
963
    "VIEW",
964
    "PERMUTE",
965
    "TRANSPOSE",
966
    "GET_ROWS",
967
    "GET_ROWS_BACK",
968
    "SET_ROWS",
969
    "DIAG",
970
    "DIAG_MASK_INF",
971
    "DIAG_MASK_ZERO",
972
    "SOFT_MAX",
973
    "SOFT_MAX_BACK",
974
    "ROPE",
975
    "ROPE_BACK",
976
    "CLAMP",
977
    "CONV_TRANSPOSE_1D",
978
    "IM2COL",
979
    "IM2COL_BACK",
980
    "IM2COL_3D",
981
    "CONV_2D",
982
    "CONV_3D",
983
    "CONV_2D_DW",
984
    "CONV_TRANSPOSE_2D",
985
    "POOL_1D",
986
    "POOL_2D",
987
    "POOL_2D_BACK",
988
    "UPSCALE",
989
    "PAD",
990
    "PAD_REFLECT_1D",
991
    "ROLL",
992
    "ARANGE",
993
    "TIMESTEP_EMBEDDING",
994
    "ARGSORT",
995
    "LEAKY_RELU",
996
    "TRI",
997
    "FILL",
998
999
    "FLASH_ATTN_EXT",
1000
    "FLASH_ATTN_BACK",
1001
    "SSM_CONV",
1002
    "SSM_SCAN",
1003
    "WIN_PART",
1004
    "WIN_UNPART",
1005
    "GET_REL_POS",
1006
    "ADD_REL_POS",
1007
    "RWKV_WKV6",
1008
    "GATED_LINEAR_ATTN",
1009
    "RWKV_WKV7",
1010
    "SOLVE_TRI",
1011
1012
    "UNARY",
1013
1014
    "MAP_CUSTOM1",
1015
    "MAP_CUSTOM2",
1016
    "MAP_CUSTOM3",
1017
1018
    "CUSTOM",
1019
1020
    "CROSS_ENTROPY_LOSS",
1021
    "CROSS_ENTROPY_LOSS_BACK",
1022
    "OPT_STEP_ADAMW",
1023
    "OPT_STEP_SGD",
1024
1025
    "GLU",
1026
};
1027
1028
static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
1029
1030
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1031
    "none",
1032
1033
    "x",
1034
    "x+y",
1035
    "x[i]+y",
1036
    "x+y",
1037
    "view(x,nb,offset)+=y->x",
1038
    "x-y",
1039
    "x*y",
1040
    "x/y",
1041
    "x^2",
1042
    "√x",
1043
    "log(x)",
1044
    "sin(x)",
1045
    "cos(x)",
1046
    "Σx",
1047
    "Σx_k",
1048
    "cumsum(x)",
1049
    "Σx/n",
1050
    "argmax(x)",
1051
    "count_equal(x)",
1052
    "repeat(x)",
1053
    "repeat_back(x)",
1054
    "concat(x, y)",
1055
    "silu_back(x)",
1056
    "norm(x)",
1057
    "rms_norm(x)",
1058
    "rms_norm_back(x)",
1059
    "group_norm(x)",
1060
    "l2_norm(x)",
1061
1062
    "X*Y",
1063
    "X[i]*Y",
1064
    "X*Y",
1065
1066
    "x*v",
1067
    "y-\\>view(x)",
1068
    "x-\\>y",
1069
    "cont(x)",
1070
    "reshape(x)",
1071
    "view(x)",
1072
    "permute(x)",
1073
    "transpose(x)",
1074
    "get_rows(x)",
1075
    "get_rows_back(x)",
1076
    "set_rows(x)",
1077
    "diag(x)",
1078
    "diag_mask_inf(x)",
1079
    "diag_mask_zero(x)",
1080
    "soft_max(x)",
1081
    "soft_max_back(x)",
1082
    "rope(x)",
1083
    "rope_back(x)",
1084
    "clamp(x)",
1085
    "conv_transpose_1d(x)",
1086
    "im2col(x)",
1087
    "im2col_back(x)",
1088
    "im2col_3d(x)",
1089
    "conv_2d(x)",
1090
    "conv_3d(x)",
1091
    "conv_2d_dw(x)",
1092
    "conv_transpose_2d(x)",
1093
    "pool_1d(x)",
1094
    "pool_2d(x)",
1095
    "pool_2d_back(x)",
1096
    "upscale(x)",
1097
    "pad(x)",
1098
    "pad_reflect_1d(x)",
1099
    "roll(x)",
1100
    "arange(start, stop, step)",
1101
    "timestep_embedding(timesteps, dim, max_period)",
1102
    "argsort(x)",
1103
    "leaky_relu(x)",
1104
    "tri(x)",
1105
    "fill(x, c)",
1106
1107
    "flash_attn_ext(x)",
1108
    "flash_attn_back(x)",
1109
    "ssm_conv(x)",
1110
    "ssm_scan(x)",
1111
    "win_part(x)",
1112
    "win_unpart(x)",
1113
    "get_rel_pos(x)",
1114
    "add_rel_pos(x)",
1115
    "rwkv_wkv6(k, v, r, tf, td, s)",
1116
    "gated_linear_attn(k, v, q, gate, s)",
1117
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1118
    "A X = B, A triangular, solve X",
1119
1120
    "unary(x)",
1121
1122
    "map_custom(x)",
1123
    "map_custom(x,y)",
1124
    "map_custom(x,y,z)",
1125
1126
    "custom(x)",
1127
1128
    "cross_entropy_loss(x,y)",
1129
    "cross_entropy_loss_back(x,y)",
1130
    "adamw(x)",
1131
    "sgd(x)",
1132
1133
    "glu(x)",
1134
};
1135
1136
static_assert(GGML_OP_COUNT == 94, "GGML_OP_COUNT != 94");
1137
1138
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1139
1140
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1141
    "ABS",
1142
    "SGN",
1143
    "NEG",
1144
    "STEP",
1145
    "TANH",
1146
    "ELU",
1147
    "RELU",
1148
    "SIGMOID",
1149
    "GELU",
1150
    "GELU_QUICK",
1151
    "SILU",
1152
    "HARDSWISH",
1153
    "HARDSIGMOID",
1154
    "EXP",
1155
    "EXPM1",
1156
    "SOFTPLUS",
1157
    "GELU_ERF",
1158
    "XIELU",
1159
    "FLOOR",
1160
    "CEIL",
1161
    "ROUND",
1162
    "TRUNC",
1163
};
1164
1165
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1166
1167
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1168
    "REGLU",
1169
    "GEGLU",
1170
    "SWIGLU",
1171
    "SWIGLU_OAI",
1172
    "GEGLU_ERF",
1173
    "GEGLU_QUICK",
1174
};
1175
1176
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1177
1178
1179
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1180
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1181
1182
1183
////////////////////////////////////////////////////////////////////////////////
1184
1185
0
void ggml_print_object(const struct ggml_object * obj) {
1186
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1187
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1188
0
}
1189
1190
0
void ggml_print_objects(const struct ggml_context * ctx) {
1191
0
    struct ggml_object * obj = ctx->objects_begin;
1192
1193
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1194
1195
0
    while (obj != NULL) {
1196
0
        ggml_print_object(obj);
1197
0
        obj = obj->next;
1198
0
    }
1199
1200
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1201
0
}
1202
1203
638
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1204
638
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1205
1206
638
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1207
638
}
1208
1209
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1210
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1211
1212
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1213
0
}
1214
1215
2.76k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1216
13.8k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1217
11.0k
        if (tensor->ne[i] <= 0) {
1218
2
            return 0;
1219
2
        }
1220
11.0k
    }
1221
1222
2.76k
    size_t nbytes;
1223
2.76k
    const size_t blck_size = ggml_blck_size(tensor->type);
1224
2.76k
    if (blck_size == 1) {
1225
2.76k
        nbytes = ggml_type_size(tensor->type);
1226
13.8k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1227
11.0k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1228
11.0k
        }
1229
2.76k
    }
1230
0
    else {
1231
0
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1232
0
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1233
0
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1234
0
        }
1235
0
    }
1236
1237
2.76k
    return nbytes;
1238
2.76k
}
1239
1240
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1241
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1242
0
}
1243
1244
5.33k
int64_t ggml_blck_size(enum ggml_type type) {
1245
5.33k
    return type_traits[type].blck_size;
1246
5.33k
}
1247
1248
5.33k
size_t ggml_type_size(enum ggml_type type) {
1249
5.33k
    return type_traits[type].type_size;
1250
5.33k
}
1251
1252
769
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1253
769
    assert(ne % ggml_blck_size(type) == 0);
1254
769
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1255
769
}
1256
1257
0
double ggml_type_sizef(enum ggml_type type) {
1258
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1259
0
}
1260
1261
90
const char * ggml_type_name(enum ggml_type type) {
1262
90
    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
1263
90
}
1264
1265
0
bool ggml_is_quantized(enum ggml_type type) {
1266
0
    return type_traits[type].is_quantized;
1267
0
}
1268
1269
0
const char * ggml_op_name(enum ggml_op op) {
1270
0
    return GGML_OP_NAME[op];
1271
0
}
1272
1273
0
const char * ggml_op_symbol(enum ggml_op op) {
1274
0
    return GGML_OP_SYMBOL[op];
1275
0
}
1276
1277
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1278
0
    return GGML_UNARY_OP_NAME[op];
1279
0
}
1280
1281
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1282
0
    return GGML_GLU_OP_NAME[op];
1283
0
}
1284
1285
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1286
0
    if (t->op == GGML_OP_UNARY) {
1287
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1288
0
        return ggml_unary_op_name(uop);
1289
0
    }
1290
0
    if (t->op == GGML_OP_GLU) {
1291
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1292
0
        return ggml_glu_op_name(gop);
1293
0
    }
1294
0
    return ggml_op_name(t->op);
1295
0
}
1296
1297
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1298
0
    return ggml_type_size(tensor->type);
1299
0
}
1300
1301
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1302
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1303
1304
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1305
0
}
1306
1307
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1308
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1309
1310
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1311
0
}
1312
1313
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1314
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1315
1316
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1317
0
}
1318
1319
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1320
0
    return tensor->ne[3] == 1;
1321
0
}
1322
1323
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1324
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1325
0
        if (tensor->ne[i] > 1) {
1326
0
            return i + 1;
1327
0
        }
1328
0
    }
1329
0
    return 1;
1330
0
}
1331
1332
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1333
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1334
1335
0
    switch (ftype) {
1336
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1337
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1338
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1339
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1340
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1341
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1342
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1343
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1344
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1345
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1346
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1347
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1348
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1349
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1350
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1351
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1352
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1353
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1354
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1355
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1356
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1357
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1358
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1359
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1360
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1361
0
    }
1362
1363
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1364
1365
0
    return wtype;
1366
0
}
1367
1368
239
size_t ggml_tensor_overhead(void) {
1369
239
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1370
239
}
1371
1372
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1373
0
    return tensor->nb[0] > tensor->nb[1];
1374
0
}
1375
1376
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1377
0
    size_t next_nb = ggml_type_size(tensor->type);
1378
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1379
0
        return false;
1380
0
    }
1381
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1382
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1383
0
        if (tensor->ne[i] != 1) {
1384
0
            if (i > n) {
1385
0
                if (tensor->nb[i] != next_nb) {
1386
0
                    return false;
1387
0
                }
1388
0
                next_nb *= tensor->ne[i];
1389
0
            } else {
1390
                // this dimension does not need to be contiguous
1391
0
                next_nb = tensor->ne[i]*tensor->nb[i];
1392
0
            }
1393
0
        }
1394
0
    }
1395
0
    return true;
1396
0
}
1397
1398
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1399
0
    return ggml_is_contiguous_0(tensor);
1400
0
}
1401
1402
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1403
0
    return ggml_is_contiguous_n(tensor, 0);
1404
0
}
1405
1406
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1407
0
    return ggml_is_contiguous_n(tensor, 1);
1408
0
}
1409
1410
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1411
0
    return ggml_is_contiguous_n(tensor, 2);
1412
0
}
1413
1414
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1415
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1416
0
}
1417
1418
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1419
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1420
1421
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1422
0
}
1423
1424
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1425
0
    return
1426
0
        tensor->nb[0] > tensor->nb[2] &&
1427
0
        tensor->nb[1] > tensor->nb[0] &&
1428
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1429
0
}
1430
1431
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1432
0
    return
1433
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1434
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1435
0
}
1436
1437
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1438
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1439
1440
0
    return
1441
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1442
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1443
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1444
0
}
1445
1446
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1447
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1448
0
        if (tensor->ne[i] == 0) {
1449
            // empty if any dimension has no elements
1450
0
            return true;
1451
0
        }
1452
0
    }
1453
0
    return false;
1454
0
}
1455
1456
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1457
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1458
1459
0
    return
1460
0
        (t0->ne[0] == t1->ne[0]) &&
1461
0
        (t0->ne[1] == t1->ne[1]) &&
1462
0
        (t0->ne[2] == t1->ne[2]) &&
1463
0
        (t0->ne[3] == t1->ne[3]);
1464
0
}
1465
1466
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1467
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1468
1469
0
    return
1470
0
        (t0->nb[0] == t1->nb[0]) &&
1471
0
        (t0->nb[1] == t1->nb[1]) &&
1472
0
        (t0->nb[2] == t1->nb[2]) &&
1473
0
        (t0->nb[3] == t1->nb[3]);
1474
0
}
1475
1476
// check if t1 can be represented as a repetition of t0
1477
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1478
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1479
1480
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1481
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1482
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1483
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1484
0
        (t1->ne[3]%t0->ne[3] == 0);
1485
0
}
1486
1487
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1488
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1489
1490
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1491
0
}
1492
1493
// assert that pointer is aligned to GGML_MEM_ALIGN
1494
#define GGML_ASSERT_ALIGNED(ptr) \
1495
1.87k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1496
1497
////////////////////////////////////////////////////////////////////////////////
1498
1499
1.10k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1500
1.10k
    bool is_first_call = true;
1501
1502
1.10k
    ggml_critical_section_start();
1503
1504
1.10k
    if (is_first_call) {
1505
        // initialize time system (required on Windows)
1506
1.10k
        ggml_time_init();
1507
1508
1.10k
        is_first_call = false;
1509
1.10k
    }
1510
1511
1.10k
    ggml_critical_section_end();
1512
1513
1.10k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1514
1515
    // allow to call ggml_init with 0 size
1516
1.10k
    if (params.mem_size == 0) {
1517
972
        params.mem_size = GGML_MEM_ALIGN;
1518
972
    }
1519
1520
1.10k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1521
1522
1.10k
    *ctx = (struct ggml_context) {
1523
1.10k
        /*.mem_size           =*/ mem_size,
1524
1.10k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1525
1.10k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1526
1.10k
        /*.no_alloc           =*/ params.no_alloc,
1527
1.10k
        /*.n_objects          =*/ 0,
1528
1.10k
        /*.objects_begin      =*/ NULL,
1529
1.10k
        /*.objects_end        =*/ NULL,
1530
1.10k
    };
1531
1532
1.10k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1533
1534
1.10k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1535
1536
1.10k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1537
1538
1.10k
    return ctx;
1539
1.10k
}
1540
1541
0
void ggml_reset(struct ggml_context * ctx) {
1542
0
    if (ctx == NULL) {
1543
0
        return;
1544
0
    }
1545
1546
0
    ctx->n_objects     = 0;
1547
0
    ctx->objects_begin = NULL;
1548
0
    ctx->objects_end   = NULL;
1549
0
}
1550
1551
1.10k
void ggml_free(struct ggml_context * ctx) {
1552
1.10k
    if (ctx == NULL) {
1553
0
        return;
1554
0
    }
1555
1556
1.10k
    if (ctx->mem_buffer_owned) {
1557
1.10k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1558
1.10k
    }
1559
1560
1.10k
    GGML_FREE(ctx);
1561
1.10k
}
1562
1563
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1564
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1565
0
}
1566
1567
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1568
0
    return ctx->no_alloc;
1569
0
}
1570
1571
478
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1572
478
    ctx->no_alloc = no_alloc;
1573
478
}
1574
1575
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1576
0
    return ctx->mem_buffer;
1577
0
}
1578
1579
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1580
0
    return ctx->mem_size;
1581
0
}
1582
1583
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1584
0
    size_t max_size = 0;
1585
1586
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1587
0
        size_t bytes = ggml_nbytes(tensor);
1588
0
        max_size = MAX(max_size, bytes);
1589
0
    }
1590
1591
0
    return max_size;
1592
0
}
1593
1594
////////////////////////////////////////////////////////////////////////////////
1595
1596
769
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1597
    // always insert objects at the end of the context's memory pool
1598
769
    struct ggml_object * obj_cur = ctx->objects_end;
1599
1600
769
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1601
769
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1602
769
    const size_t cur_end  = cur_offs + cur_size;
1603
1604
    // align to GGML_MEM_ALIGN
1605
769
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1606
1607
769
    char * const mem_buffer = ctx->mem_buffer;
1608
769
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1609
1610
769
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1611
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1612
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1613
#ifndef NDEBUG
1614
        GGML_ABORT("not enough space in the context's memory pool");
1615
#endif
1616
0
        return NULL;
1617
0
    }
1618
1619
769
    *obj_new = (struct ggml_object) {
1620
769
        .offs = cur_end + GGML_OBJECT_SIZE,
1621
769
        .size = size_needed,
1622
769
        .next = NULL,
1623
769
        .type = type,
1624
769
    };
1625
1626
769
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1627
1628
769
    if (obj_cur != NULL) {
1629
639
        obj_cur->next = obj_new;
1630
639
    } else {
1631
        // this is the first object in this context
1632
130
        ctx->objects_begin = obj_new;
1633
130
    }
1634
1635
769
    ctx->objects_end = obj_new;
1636
1637
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1638
1639
769
    return obj_new;
1640
769
}
1641
1642
static struct ggml_tensor * ggml_new_tensor_impl(
1643
        struct ggml_context * ctx,
1644
        enum   ggml_type      type,
1645
        int                   n_dims,
1646
        const int64_t       * ne,
1647
        struct ggml_tensor  * view_src,
1648
769
        size_t                view_offs) {
1649
1650
769
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1651
769
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1652
1653
    // find the base tensor and absolute offset
1654
769
    if (view_src != NULL && view_src->view_src != NULL) {
1655
0
        view_offs += view_src->view_offs;
1656
0
        view_src   = view_src->view_src;
1657
0
    }
1658
1659
769
    size_t data_size = ggml_row_size(type, ne[0]);
1660
3.07k
    for (int i = 1; i < n_dims; i++) {
1661
2.30k
        data_size *= ne[i];
1662
2.30k
    }
1663
1664
769
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1665
1666
769
    void * data = view_src != NULL ? view_src->data : NULL;
1667
769
    if (data != NULL) {
1668
0
        data = (char *) data + view_offs;
1669
0
    }
1670
1671
769
    size_t obj_alloc_size = 0;
1672
1673
769
    if (view_src == NULL && !ctx->no_alloc) {
1674
        // allocate tensor data in the context's memory pool
1675
0
        obj_alloc_size = data_size;
1676
0
    }
1677
1678
769
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1679
769
    GGML_ASSERT(obj_new);
1680
1681
769
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1682
1683
769
    *result = (struct ggml_tensor) {
1684
769
        /*.type         =*/ type,
1685
769
        /*.buffer       =*/ NULL,
1686
769
        /*.ne           =*/ { 1, 1, 1, 1 },
1687
769
        /*.nb           =*/ { 0, 0, 0, 0 },
1688
769
        /*.op           =*/ GGML_OP_NONE,
1689
769
        /*.op_params    =*/ { 0 },
1690
769
        /*.flags        =*/ 0,
1691
769
        /*.src          =*/ { NULL },
1692
769
        /*.view_src     =*/ view_src,
1693
769
        /*.view_offs    =*/ view_offs,
1694
769
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1695
769
        /*.name         =*/ { 0 },
1696
769
        /*.extra        =*/ NULL,
1697
769
        /*.padding      =*/ { 0 },
1698
769
    };
1699
1700
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1701
    //GGML_ASSERT_ALIGNED(result->data);
1702
1703
3.84k
    for (int i = 0; i < n_dims; i++) {
1704
3.07k
        result->ne[i] = ne[i];
1705
3.07k
    }
1706
1707
769
    result->nb[0] = ggml_type_size(type);
1708
769
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1709
2.30k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1710
1.53k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1711
1.53k
    }
1712
1713
769
    ctx->n_objects++;
1714
1715
769
    return result;
1716
769
}
1717
1718
struct ggml_tensor * ggml_new_tensor(
1719
        struct ggml_context * ctx,
1720
        enum   ggml_type      type,
1721
        int                   n_dims,
1722
769
        const int64_t       * ne) {
1723
769
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1724
769
}
1725
1726
struct ggml_tensor * ggml_new_tensor_1d(
1727
        struct ggml_context * ctx,
1728
        enum   ggml_type      type,
1729
0
        int64_t ne0) {
1730
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1731
0
}
1732
1733
struct ggml_tensor * ggml_new_tensor_2d(
1734
        struct ggml_context * ctx,
1735
        enum   ggml_type      type,
1736
        int64_t ne0,
1737
0
        int64_t ne1) {
1738
0
    const int64_t ne[2] = { ne0, ne1 };
1739
0
    return ggml_new_tensor(ctx, type, 2, ne);
1740
0
}
1741
1742
struct ggml_tensor * ggml_new_tensor_3d(
1743
        struct ggml_context * ctx,
1744
        enum   ggml_type      type,
1745
        int64_t ne0,
1746
        int64_t ne1,
1747
0
        int64_t ne2) {
1748
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1749
0
    return ggml_new_tensor(ctx, type, 3, ne);
1750
0
}
1751
1752
struct ggml_tensor * ggml_new_tensor_4d(
1753
        struct ggml_context * ctx,
1754
        enum   ggml_type type,
1755
        int64_t ne0,
1756
        int64_t ne1,
1757
        int64_t ne2,
1758
0
        int64_t ne3) {
1759
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1760
0
    return ggml_new_tensor(ctx, type, 4, ne);
1761
0
}
1762
1763
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1764
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1765
1766
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1767
0
}
1768
1769
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1770
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1771
0
}
1772
1773
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1774
0
    const int64_t ne2 = tensor->ne[2];
1775
0
    const int64_t ne1 = tensor->ne[1];
1776
0
    const int64_t ne0 = tensor->ne[0];
1777
1778
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1779
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1780
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1781
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1782
1783
0
    if (i0) {
1784
0
        * i0 = i0_;
1785
0
    }
1786
0
    if (i1) {
1787
0
        * i1 = i1_;
1788
0
    }
1789
0
    if (i2) {
1790
0
        * i2 = i2_;
1791
0
    }
1792
0
    if (i3) {
1793
0
        * i3 = i3_;
1794
0
    }
1795
0
}
1796
1797
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1798
0
    return tensor->data;
1799
0
}
1800
1801
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1802
0
    assert(tensor->type == GGML_TYPE_F32);
1803
0
    return (float *)(tensor->data);
1804
0
}
1805
1806
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1807
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1808
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1809
0
}
1810
1811
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1812
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1813
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1814
0
}
1815
1816
717
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1817
717
    return tensor->name;
1818
717
}
1819
1820
1.84k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1821
1.84k
    size_t i;
1822
14.8k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1823
12.9k
        tensor->name[i] = name[i];
1824
12.9k
    }
1825
1.84k
    tensor->name[i] = '\0';
1826
1.84k
    return tensor;
1827
1.84k
}
1828
1829
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1830
0
    va_list args;
1831
0
    va_start(args, fmt);
1832
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1833
0
    va_end(args);
1834
0
    return tensor;
1835
0
}
1836
1837
struct ggml_tensor * ggml_view_tensor(
1838
        struct ggml_context * ctx,
1839
0
        struct ggml_tensor  * src) {
1840
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1841
0
    ggml_format_name(result, "%s (view)", src->name);
1842
1843
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1844
0
        result->nb[i] = src->nb[i];
1845
0
    }
1846
1847
0
    return result;
1848
0
}
1849
1850
237
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1851
237
    struct ggml_object * obj = ctx->objects_begin;
1852
1853
237
    char * const mem_buffer = ctx->mem_buffer;
1854
1855
237
    while (obj != NULL) {
1856
130
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1857
130
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1858
130
        }
1859
1860
0
        obj = obj->next;
1861
0
    }
1862
1863
107
    return NULL;
1864
237
}
1865
1866
559
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1867
559
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1868
559
    obj = obj->next;
1869
1870
559
    char * const mem_buffer = ctx->mem_buffer;
1871
1872
559
    while (obj != NULL) {
1873
508
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1874
508
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1875
508
        }
1876
1877
0
        obj = obj->next;
1878
0
    }
1879
1880
51
    return NULL;
1881
559
}
1882
1883
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1884
0
    struct ggml_object * obj = ctx->objects_begin;
1885
1886
0
    char * const mem_buffer = ctx->mem_buffer;
1887
1888
0
    while (obj != NULL) {
1889
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1890
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1891
0
            if (strcmp(cur->name, name) == 0) {
1892
0
                return cur;
1893
0
            }
1894
0
        }
1895
1896
0
        obj = obj->next;
1897
0
    }
1898
1899
0
    return NULL;
1900
0
}
1901
1902
////////////////////////////////////////////////////////////////////////////////
1903
1904
// ggml_dup
1905
1906
static struct ggml_tensor * ggml_dup_impl(
1907
        struct ggml_context * ctx,
1908
        struct ggml_tensor  * a,
1909
0
        bool                  inplace) {
1910
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1911
1912
0
    result->op     = GGML_OP_DUP;
1913
0
    result->src[0] = a;
1914
1915
0
    return result;
1916
0
}
1917
1918
struct ggml_tensor * ggml_dup(
1919
        struct ggml_context * ctx,
1920
0
        struct ggml_tensor  * a) {
1921
0
    return ggml_dup_impl(ctx, a, false);
1922
0
}
1923
1924
struct ggml_tensor * ggml_dup_inplace(
1925
        struct ggml_context * ctx,
1926
0
        struct ggml_tensor  * a) {
1927
0
    return ggml_dup_impl(ctx, a, true);
1928
0
}
1929
1930
// ggml_add
1931
1932
static struct ggml_tensor * ggml_add_impl(
1933
        struct ggml_context * ctx,
1934
        struct ggml_tensor  * a,
1935
        struct ggml_tensor  * b,
1936
0
        bool                  inplace) {
1937
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1938
1939
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1940
1941
0
    result->op     = GGML_OP_ADD;
1942
0
    result->src[0] = a;
1943
0
    result->src[1] = b;
1944
1945
0
    return result;
1946
0
}
1947
1948
struct ggml_tensor * ggml_add(
1949
        struct ggml_context * ctx,
1950
        struct ggml_tensor  * a,
1951
0
        struct ggml_tensor  * b) {
1952
0
    return ggml_add_impl(ctx, a, b, false);
1953
0
}
1954
1955
struct ggml_tensor * ggml_add_inplace(
1956
        struct ggml_context * ctx,
1957
        struct ggml_tensor  * a,
1958
0
        struct ggml_tensor  * b) {
1959
0
    return ggml_add_impl(ctx, a, b, true);
1960
0
}
1961
1962
// ggml_add_cast
1963
1964
static struct ggml_tensor * ggml_add_cast_impl(
1965
        struct ggml_context * ctx,
1966
        struct ggml_tensor  * a,
1967
        struct ggml_tensor  * b,
1968
0
        enum   ggml_type      type) {
1969
    // TODO: support less-strict constraint
1970
    //       GGML_ASSERT(ggml_can_repeat(b, a));
1971
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
1972
1973
    // currently only supported for quantized input and f16
1974
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
1975
0
                a->type == GGML_TYPE_F16 ||
1976
0
                a->type == GGML_TYPE_BF16);
1977
1978
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
1979
1980
0
    result->op     = GGML_OP_ADD;
1981
0
    result->src[0] = a;
1982
0
    result->src[1] = b;
1983
1984
0
    return result;
1985
0
}
1986
1987
struct ggml_tensor * ggml_add_cast(
1988
        struct ggml_context * ctx,
1989
        struct ggml_tensor  * a,
1990
        struct ggml_tensor  * b,
1991
0
        enum   ggml_type      type) {
1992
0
    return ggml_add_cast_impl(ctx, a, b, type);
1993
0
}
1994
1995
struct ggml_tensor * ggml_add_id(
1996
            struct ggml_context * ctx,
1997
            struct ggml_tensor  * a,
1998
            struct ggml_tensor  * b,
1999
0
            struct ggml_tensor  * ids) {
2000
2001
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2002
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2003
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2004
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2005
2006
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2007
2008
0
    result->op     = GGML_OP_ADD_ID;
2009
0
    result->src[0] = a;
2010
0
    result->src[1] = b;
2011
0
    result->src[2] = ids;
2012
2013
0
    return result;
2014
0
}
2015
2016
// ggml_add1
2017
2018
static struct ggml_tensor * ggml_add1_impl(
2019
        struct ggml_context * ctx,
2020
        struct ggml_tensor  * a,
2021
        struct ggml_tensor  * b,
2022
0
        bool                  inplace) {
2023
0
    GGML_ASSERT(ggml_is_scalar(b));
2024
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2025
2026
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2027
2028
0
    result->op     = GGML_OP_ADD1;
2029
0
    result->src[0] = a;
2030
0
    result->src[1] = b;
2031
2032
0
    return result;
2033
0
}
2034
2035
struct ggml_tensor * ggml_add1(
2036
        struct ggml_context * ctx,
2037
        struct ggml_tensor  * a,
2038
0
        struct ggml_tensor  * b) {
2039
0
    return ggml_add1_impl(ctx, a, b, false);
2040
0
}
2041
2042
struct ggml_tensor * ggml_add1_inplace(
2043
        struct ggml_context * ctx,
2044
        struct ggml_tensor  * a,
2045
0
        struct ggml_tensor  * b) {
2046
0
    return ggml_add1_impl(ctx, a, b, true);
2047
0
}
2048
2049
// ggml_acc
2050
2051
static struct ggml_tensor * ggml_acc_impl(
2052
        struct ggml_context * ctx,
2053
        struct ggml_tensor  * a,
2054
        struct ggml_tensor  * b,
2055
        size_t                nb1,
2056
        size_t                nb2,
2057
        size_t                nb3,
2058
        size_t                offset,
2059
0
        bool                  inplace) {
2060
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2061
0
    GGML_ASSERT(ggml_is_contiguous(a));
2062
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2063
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2064
2065
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2066
2067
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2068
0
    ggml_set_op_params(result, params, sizeof(params));
2069
2070
0
    result->op     = GGML_OP_ACC;
2071
0
    result->src[0] = a;
2072
0
    result->src[1] = b;
2073
2074
0
    return result;
2075
0
}
2076
2077
struct ggml_tensor * ggml_acc(
2078
        struct ggml_context * ctx,
2079
        struct ggml_tensor  * a,
2080
        struct ggml_tensor  * b,
2081
        size_t                nb1,
2082
        size_t                nb2,
2083
        size_t                nb3,
2084
0
        size_t                offset) {
2085
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2086
0
}
2087
2088
struct ggml_tensor * ggml_acc_inplace(
2089
        struct ggml_context * ctx,
2090
        struct ggml_tensor  * a,
2091
        struct ggml_tensor  * b,
2092
        size_t                nb1,
2093
        size_t                nb2,
2094
        size_t                nb3,
2095
0
        size_t                offset) {
2096
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2097
0
}
2098
2099
// ggml_sub
2100
2101
static struct ggml_tensor * ggml_sub_impl(
2102
        struct ggml_context * ctx,
2103
        struct ggml_tensor  * a,
2104
        struct ggml_tensor  * b,
2105
0
        bool                  inplace) {
2106
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2107
2108
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2109
2110
0
    result->op     = GGML_OP_SUB;
2111
0
    result->src[0] = a;
2112
0
    result->src[1] = b;
2113
2114
0
    return result;
2115
0
}
2116
2117
struct ggml_tensor * ggml_sub(
2118
        struct ggml_context * ctx,
2119
        struct ggml_tensor  * a,
2120
0
        struct ggml_tensor  * b) {
2121
0
    return ggml_sub_impl(ctx, a, b, false);
2122
0
}
2123
2124
struct ggml_tensor * ggml_sub_inplace(
2125
        struct ggml_context * ctx,
2126
        struct ggml_tensor  * a,
2127
0
        struct ggml_tensor  * b) {
2128
0
    return ggml_sub_impl(ctx, a, b, true);
2129
0
}
2130
2131
// ggml_mul
2132
2133
static struct ggml_tensor * ggml_mul_impl(
2134
        struct ggml_context * ctx,
2135
        struct ggml_tensor  * a,
2136
        struct ggml_tensor  * b,
2137
0
        bool                  inplace) {
2138
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2139
2140
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2141
2142
0
    result->op     = GGML_OP_MUL;
2143
0
    result->src[0] = a;
2144
0
    result->src[1] = b;
2145
2146
0
    return result;
2147
0
}
2148
2149
struct ggml_tensor * ggml_mul(
2150
        struct ggml_context * ctx,
2151
        struct ggml_tensor  * a,
2152
0
        struct ggml_tensor  * b) {
2153
0
    return ggml_mul_impl(ctx, a, b, false);
2154
0
}
2155
2156
struct ggml_tensor * ggml_mul_inplace(
2157
        struct ggml_context * ctx,
2158
        struct ggml_tensor  * a,
2159
0
        struct ggml_tensor  * b) {
2160
0
    return ggml_mul_impl(ctx, a, b, true);
2161
0
}
2162
2163
// ggml_div
2164
2165
static struct ggml_tensor * ggml_div_impl(
2166
        struct ggml_context * ctx,
2167
        struct ggml_tensor  * a,
2168
        struct ggml_tensor  * b,
2169
0
        bool                  inplace) {
2170
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2171
2172
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2173
2174
0
    result->op     = GGML_OP_DIV;
2175
0
    result->src[0] = a;
2176
0
    result->src[1] = b;
2177
2178
0
    return result;
2179
0
}
2180
2181
struct ggml_tensor * ggml_div(
2182
        struct ggml_context * ctx,
2183
        struct ggml_tensor  * a,
2184
0
        struct ggml_tensor  * b) {
2185
0
    return ggml_div_impl(ctx, a, b, false);
2186
0
}
2187
2188
struct ggml_tensor * ggml_div_inplace(
2189
        struct ggml_context * ctx,
2190
        struct ggml_tensor  * a,
2191
0
        struct ggml_tensor  * b) {
2192
0
    return ggml_div_impl(ctx, a, b, true);
2193
0
}
2194
2195
// ggml_sqr
2196
2197
static struct ggml_tensor * ggml_sqr_impl(
2198
        struct ggml_context * ctx,
2199
        struct ggml_tensor  * a,
2200
0
        bool                  inplace) {
2201
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2202
2203
0
    result->op     = GGML_OP_SQR;
2204
0
    result->src[0] = a;
2205
2206
0
    return result;
2207
0
}
2208
2209
struct ggml_tensor * ggml_sqr(
2210
        struct ggml_context * ctx,
2211
0
        struct ggml_tensor  * a) {
2212
0
    return ggml_sqr_impl(ctx, a, false);
2213
0
}
2214
2215
struct ggml_tensor * ggml_sqr_inplace(
2216
        struct ggml_context * ctx,
2217
0
        struct ggml_tensor  * a) {
2218
0
    return ggml_sqr_impl(ctx, a, true);
2219
0
}
2220
2221
// ggml_sqrt
2222
2223
static struct ggml_tensor * ggml_sqrt_impl(
2224
        struct ggml_context * ctx,
2225
        struct ggml_tensor  * a,
2226
0
        bool                  inplace) {
2227
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2228
2229
0
    result->op     = GGML_OP_SQRT;
2230
0
    result->src[0] = a;
2231
2232
0
    return result;
2233
0
}
2234
2235
struct ggml_tensor * ggml_sqrt(
2236
        struct ggml_context * ctx,
2237
0
        struct ggml_tensor  * a) {
2238
0
    return ggml_sqrt_impl(ctx, a, false);
2239
0
}
2240
2241
struct ggml_tensor * ggml_sqrt_inplace(
2242
        struct ggml_context * ctx,
2243
0
        struct ggml_tensor  * a) {
2244
0
    return ggml_sqrt_impl(ctx, a, true);
2245
0
}
2246
2247
// ggml_log
2248
2249
static struct ggml_tensor * ggml_log_impl(
2250
        struct ggml_context * ctx,
2251
        struct ggml_tensor  * a,
2252
0
        bool                  inplace) {
2253
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2254
2255
0
    result->op     = GGML_OP_LOG;
2256
0
    result->src[0] = a;
2257
2258
0
    return result;
2259
0
}
2260
2261
struct ggml_tensor * ggml_log(
2262
        struct ggml_context * ctx,
2263
0
        struct ggml_tensor  * a) {
2264
0
    return ggml_log_impl(ctx, a, false);
2265
0
}
2266
2267
struct ggml_tensor * ggml_log_inplace(
2268
        struct ggml_context * ctx,
2269
0
        struct ggml_tensor  * a) {
2270
0
    return ggml_log_impl(ctx, a, true);
2271
0
}
2272
2273
struct ggml_tensor * ggml_expm1(
2274
        struct ggml_context * ctx,
2275
0
        struct ggml_tensor  * a) {
2276
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2277
0
}
2278
2279
struct ggml_tensor * ggml_expm1_inplace(
2280
        struct ggml_context * ctx,
2281
0
        struct ggml_tensor  * a) {
2282
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2283
0
}
2284
2285
struct ggml_tensor * ggml_softplus(
2286
        struct ggml_context * ctx,
2287
0
        struct ggml_tensor  * a) {
2288
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2289
0
}
2290
2291
struct ggml_tensor * ggml_softplus_inplace(
2292
        struct ggml_context * ctx,
2293
0
        struct ggml_tensor  * a) {
2294
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2295
0
}
2296
2297
// ggml_sin
2298
2299
static struct ggml_tensor * ggml_sin_impl(
2300
        struct ggml_context * ctx,
2301
        struct ggml_tensor  * a,
2302
0
        bool                  inplace) {
2303
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2304
2305
0
    result->op     = GGML_OP_SIN;
2306
0
    result->src[0] = a;
2307
2308
0
    return result;
2309
0
}
2310
2311
struct ggml_tensor * ggml_sin(
2312
        struct ggml_context * ctx,
2313
0
        struct ggml_tensor  * a) {
2314
0
    return ggml_sin_impl(ctx, a, false);
2315
0
}
2316
2317
struct ggml_tensor * ggml_sin_inplace(
2318
        struct ggml_context * ctx,
2319
0
        struct ggml_tensor  * a) {
2320
0
    return ggml_sin_impl(ctx, a, true);
2321
0
}
2322
2323
// ggml_cos
2324
2325
static struct ggml_tensor * ggml_cos_impl(
2326
        struct ggml_context * ctx,
2327
        struct ggml_tensor  * a,
2328
0
        bool                  inplace) {
2329
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2330
2331
0
    result->op     = GGML_OP_COS;
2332
0
    result->src[0] = a;
2333
2334
0
    return result;
2335
0
}
2336
2337
struct ggml_tensor * ggml_cos(
2338
        struct ggml_context * ctx,
2339
0
        struct ggml_tensor  * a) {
2340
0
    return ggml_cos_impl(ctx, a, false);
2341
0
}
2342
2343
struct ggml_tensor * ggml_cos_inplace(
2344
        struct ggml_context * ctx,
2345
0
        struct ggml_tensor  * a) {
2346
0
    return ggml_cos_impl(ctx, a, true);
2347
0
}
2348
2349
// ggml_sum
2350
2351
struct ggml_tensor * ggml_sum(
2352
        struct ggml_context * ctx,
2353
0
        struct ggml_tensor  * a) {
2354
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2355
2356
0
    result->op     = GGML_OP_SUM;
2357
0
    result->src[0] = a;
2358
2359
0
    return result;
2360
0
}
2361
2362
// ggml_sum_rows
2363
2364
struct ggml_tensor * ggml_sum_rows(
2365
        struct ggml_context * ctx,
2366
0
        struct ggml_tensor  * a) {
2367
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2368
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2369
0
        ne[i] = a->ne[i];
2370
0
    }
2371
2372
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2373
2374
0
    result->op     = GGML_OP_SUM_ROWS;
2375
0
    result->src[0] = a;
2376
2377
0
    return result;
2378
0
}
2379
2380
// ggml_cumsum
2381
2382
struct ggml_tensor * ggml_cumsum(
2383
        struct ggml_context * ctx,
2384
0
        struct ggml_tensor  * a) {
2385
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2386
2387
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2388
2389
0
    result->op     = GGML_OP_CUMSUM;
2390
0
    result->src[0] = a;
2391
2392
0
    return result;
2393
0
}
2394
2395
// ggml_mean
2396
2397
struct ggml_tensor * ggml_mean(
2398
        struct ggml_context * ctx,
2399
0
        struct ggml_tensor  * a) {
2400
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2401
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2402
2403
0
    result->op     = GGML_OP_MEAN;
2404
0
    result->src[0] = a;
2405
2406
0
    return result;
2407
0
}
2408
2409
// ggml_argmax
2410
2411
struct ggml_tensor * ggml_argmax(
2412
        struct ggml_context * ctx,
2413
0
        struct ggml_tensor  * a) {
2414
0
    GGML_ASSERT(ggml_is_matrix(a));
2415
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2416
2417
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2418
2419
0
    result->op     = GGML_OP_ARGMAX;
2420
0
    result->src[0] = a;
2421
2422
0
    return result;
2423
0
}
2424
2425
// ggml_count_equal
2426
2427
struct ggml_tensor * ggml_count_equal(
2428
        struct ggml_context * ctx,
2429
        struct ggml_tensor  * a,
2430
0
        struct ggml_tensor  * b) {
2431
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2432
2433
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2434
2435
0
    result->op     = GGML_OP_COUNT_EQUAL;
2436
0
    result->src[0] = a;
2437
0
    result->src[1] = b;
2438
2439
0
    return result;
2440
0
}
2441
2442
// ggml_repeat
2443
2444
struct ggml_tensor * ggml_repeat(
2445
        struct ggml_context * ctx,
2446
        struct ggml_tensor  * a,
2447
0
        struct ggml_tensor  * b) {
2448
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2449
2450
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2451
2452
0
    result->op     = GGML_OP_REPEAT;
2453
0
    result->src[0] = a;
2454
2455
0
    return result;
2456
0
}
2457
2458
struct ggml_tensor * ggml_repeat_4d(
2459
        struct ggml_context * ctx,
2460
        struct ggml_tensor * a,
2461
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2462
0
    const bool can_repeat = ggml_is_empty(a) || (
2463
0
        (ne0 % a->ne[0] == 0) &&
2464
0
        (ne1 % a->ne[1] == 0) &&
2465
0
        (ne2 % a->ne[2] == 0) &&
2466
0
        (ne3 % a->ne[3] == 0)
2467
0
    );
2468
0
    GGML_ASSERT(can_repeat);
2469
2470
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2471
2472
0
    result->op     = GGML_OP_REPEAT;
2473
0
    result->src[0] = a;
2474
2475
0
    return result;
2476
0
}
2477
2478
// ggml_repeat_back
2479
2480
struct ggml_tensor * ggml_repeat_back(
2481
        struct ggml_context * ctx,
2482
        struct ggml_tensor  * a,
2483
0
        struct ggml_tensor  * b) {
2484
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2485
2486
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2487
2488
0
    result->op     = GGML_OP_REPEAT_BACK;
2489
0
    result->src[0] = a;
2490
2491
0
    return result;
2492
0
}
2493
2494
// ggml_concat
2495
2496
struct ggml_tensor * ggml_concat(
2497
    struct ggml_context * ctx,
2498
    struct ggml_tensor  * a,
2499
    struct ggml_tensor  * b,
2500
0
    int                   dim) {
2501
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2502
0
    GGML_ASSERT(a->type == b->type);
2503
2504
0
    int64_t ne[GGML_MAX_DIMS];
2505
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2506
0
        if (d == dim) {
2507
0
            ne[d] = a->ne[d] + b->ne[d];
2508
0
            continue;
2509
0
        }
2510
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2511
0
        ne[d] = a->ne[d];
2512
0
    }
2513
2514
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2515
2516
0
    ggml_set_op_params_i32(result, 0, dim);
2517
2518
0
    result->op     = GGML_OP_CONCAT;
2519
0
    result->src[0] = a;
2520
0
    result->src[1] = b;
2521
2522
0
    return result;
2523
0
}
2524
2525
// ggml_abs
2526
2527
struct ggml_tensor * ggml_abs(
2528
        struct ggml_context * ctx,
2529
0
        struct ggml_tensor  * a) {
2530
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2531
0
}
2532
2533
struct ggml_tensor * ggml_abs_inplace(
2534
        struct ggml_context * ctx,
2535
0
        struct ggml_tensor  * a) {
2536
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2537
0
}
2538
2539
// ggml_sgn
2540
2541
struct ggml_tensor * ggml_sgn(
2542
        struct ggml_context * ctx,
2543
0
        struct ggml_tensor  * a) {
2544
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2545
0
}
2546
2547
struct ggml_tensor * ggml_sgn_inplace(
2548
        struct ggml_context * ctx,
2549
0
        struct ggml_tensor  * a) {
2550
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2551
0
}
2552
2553
// ggml_neg
2554
2555
struct ggml_tensor * ggml_neg(
2556
        struct ggml_context * ctx,
2557
0
        struct ggml_tensor  * a) {
2558
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2559
0
}
2560
2561
struct ggml_tensor * ggml_neg_inplace(
2562
        struct ggml_context * ctx,
2563
0
        struct ggml_tensor  * a) {
2564
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2565
0
}
2566
2567
// ggml_step
2568
2569
struct ggml_tensor * ggml_step(
2570
        struct ggml_context * ctx,
2571
0
        struct ggml_tensor  * a) {
2572
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2573
0
}
2574
2575
struct ggml_tensor * ggml_step_inplace(
2576
        struct ggml_context * ctx,
2577
0
        struct ggml_tensor  * a) {
2578
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2579
0
}
2580
2581
// ggml_tanh
2582
2583
struct ggml_tensor * ggml_tanh(
2584
        struct ggml_context * ctx,
2585
0
        struct ggml_tensor  * a) {
2586
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2587
0
}
2588
2589
struct ggml_tensor * ggml_tanh_inplace(
2590
        struct ggml_context * ctx,
2591
0
        struct ggml_tensor  * a) {
2592
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2593
0
}
2594
2595
// ggml_elu
2596
2597
struct ggml_tensor * ggml_elu(
2598
    struct ggml_context * ctx,
2599
0
    struct ggml_tensor  * a) {
2600
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2601
0
}
2602
2603
struct ggml_tensor * ggml_elu_inplace(
2604
    struct ggml_context * ctx,
2605
0
    struct ggml_tensor  * a) {
2606
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2607
0
}
2608
2609
// ggml_relu
2610
2611
struct ggml_tensor * ggml_relu(
2612
        struct ggml_context * ctx,
2613
0
        struct ggml_tensor  * a) {
2614
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2615
0
}
2616
2617
struct ggml_tensor * ggml_relu_inplace(
2618
        struct ggml_context * ctx,
2619
0
        struct ggml_tensor  * a) {
2620
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2621
0
}
2622
2623
// ggml_leaky_relu
2624
2625
struct ggml_tensor * ggml_leaky_relu(
2626
        struct ggml_context * ctx,
2627
        struct ggml_tensor  * a,
2628
        float                 negative_slope,
2629
0
        bool                  inplace) {
2630
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2631
2632
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2633
2634
0
    result->op     = GGML_OP_LEAKY_RELU;
2635
0
    result->src[0] = a;
2636
2637
0
    return result;
2638
0
}
2639
2640
// ggml_sigmoid
2641
2642
struct ggml_tensor * ggml_sigmoid(
2643
        struct ggml_context * ctx,
2644
0
        struct ggml_tensor  * a) {
2645
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2646
0
}
2647
2648
struct ggml_tensor * ggml_sigmoid_inplace(
2649
        struct ggml_context * ctx,
2650
0
        struct ggml_tensor  * a) {
2651
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2652
0
}
2653
2654
// ggml_gelu
2655
2656
struct ggml_tensor * ggml_gelu(
2657
        struct ggml_context * ctx,
2658
0
        struct ggml_tensor  * a) {
2659
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2660
0
}
2661
2662
struct ggml_tensor * ggml_gelu_inplace(
2663
        struct ggml_context * ctx,
2664
0
        struct ggml_tensor  * a) {
2665
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2666
0
}
2667
2668
// ggml_gelu_erf
2669
2670
struct ggml_tensor * ggml_gelu_erf(
2671
        struct ggml_context * ctx,
2672
0
        struct ggml_tensor  * a) {
2673
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2674
0
}
2675
2676
struct ggml_tensor * ggml_gelu_erf_inplace(
2677
        struct ggml_context * ctx,
2678
0
        struct ggml_tensor  * a) {
2679
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2680
0
}
2681
2682
// ggml_gelu_quick
2683
2684
struct ggml_tensor * ggml_gelu_quick(
2685
        struct ggml_context * ctx,
2686
0
        struct ggml_tensor  * a) {
2687
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2688
0
}
2689
2690
struct ggml_tensor * ggml_gelu_quick_inplace(
2691
        struct ggml_context * ctx,
2692
0
        struct ggml_tensor  * a) {
2693
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2694
0
}
2695
2696
// ggml_silu
2697
2698
struct ggml_tensor * ggml_silu(
2699
        struct ggml_context * ctx,
2700
0
        struct ggml_tensor  * a) {
2701
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2702
0
}
2703
2704
struct ggml_tensor * ggml_silu_inplace(
2705
        struct ggml_context * ctx,
2706
0
        struct ggml_tensor  * a) {
2707
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2708
0
}
2709
2710
// ggml_xielu
2711
2712
struct ggml_tensor * ggml_xielu(
2713
        struct ggml_context * ctx,
2714
        struct ggml_tensor  * a,
2715
        float alpha_n,
2716
        float alpha_p,
2717
        float beta,
2718
0
        float eps) {
2719
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2720
2721
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2722
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2723
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2724
0
    ggml_set_op_params_f32(result, 3, beta);
2725
0
    ggml_set_op_params_f32(result, 4, eps);
2726
2727
0
    result->op     = GGML_OP_UNARY;
2728
0
    result->src[0] = a;
2729
2730
0
    return result;
2731
0
}
2732
2733
// ggml_silu_back
2734
2735
struct ggml_tensor * ggml_silu_back(
2736
        struct ggml_context * ctx,
2737
        struct ggml_tensor  * a,
2738
0
        struct ggml_tensor  * b) {
2739
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2740
2741
0
    result->op     = GGML_OP_SILU_BACK;
2742
0
    result->src[0] = a;
2743
0
    result->src[1] = b;
2744
2745
0
    return result;
2746
0
}
2747
2748
// ggml hardswish
2749
2750
struct ggml_tensor * ggml_hardswish(
2751
        struct ggml_context * ctx,
2752
0
        struct ggml_tensor  * a) {
2753
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2754
0
}
2755
2756
// ggml hardsigmoid
2757
2758
struct ggml_tensor * ggml_hardsigmoid(
2759
        struct ggml_context * ctx,
2760
0
        struct ggml_tensor  * a) {
2761
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2762
0
}
2763
2764
// ggml exp
2765
2766
struct ggml_tensor * ggml_exp(
2767
        struct ggml_context * ctx,
2768
0
        struct ggml_tensor  * a) {
2769
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2770
0
}
2771
2772
struct ggml_tensor * ggml_exp_inplace(
2773
        struct ggml_context * ctx,
2774
0
        struct ggml_tensor  * a) {
2775
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2776
0
}
2777
2778
// ggml_glu
2779
2780
static struct ggml_tensor * ggml_glu_impl(
2781
        struct ggml_context * ctx,
2782
        struct ggml_tensor  * a,
2783
        struct ggml_tensor  * b,
2784
        enum ggml_glu_op      op,
2785
0
        bool                  swapped) {
2786
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2787
2788
0
    if (b) {
2789
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2790
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2791
0
        GGML_ASSERT(a->type == b->type);
2792
0
    }
2793
2794
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2795
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2796
2797
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2798
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2799
2800
0
    result->op     = GGML_OP_GLU;
2801
0
    result->src[0] = a;
2802
0
    result->src[1] = b;
2803
2804
0
    return result;
2805
0
}
2806
2807
// ggml_floor
2808
2809
struct ggml_tensor * ggml_floor(
2810
        struct ggml_context * ctx,
2811
0
        struct ggml_tensor  * a) {
2812
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2813
0
}
2814
2815
struct ggml_tensor * ggml_floor_inplace(
2816
        struct ggml_context * ctx,
2817
0
        struct ggml_tensor  * a) {
2818
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2819
0
}
2820
2821
// ggml_ceil
2822
2823
struct ggml_tensor * ggml_ceil(
2824
        struct ggml_context * ctx,
2825
0
        struct ggml_tensor  * a) {
2826
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2827
0
}
2828
2829
struct ggml_tensor * ggml_ceil_inplace(
2830
        struct ggml_context * ctx,
2831
0
        struct ggml_tensor  * a) {
2832
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2833
0
}
2834
2835
//ggml_round
2836
2837
struct ggml_tensor * ggml_round(
2838
        struct ggml_context * ctx,
2839
0
        struct ggml_tensor  * a) {
2840
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2841
0
}
2842
2843
struct ggml_tensor * ggml_round_inplace(
2844
        struct ggml_context * ctx,
2845
0
        struct ggml_tensor  * a) {
2846
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2847
0
}
2848
2849
//ggml_trunc
2850
2851
struct ggml_tensor * ggml_trunc(
2852
        struct ggml_context * ctx,
2853
0
        struct ggml_tensor  * a) {
2854
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2855
0
}
2856
2857
struct ggml_tensor * ggml_trunc_inplace(
2858
        struct ggml_context * ctx,
2859
0
        struct ggml_tensor  * a) {
2860
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2861
0
}
2862
2863
struct ggml_tensor * ggml_glu(
2864
        struct ggml_context * ctx,
2865
        struct ggml_tensor  * a,
2866
        enum ggml_glu_op      op,
2867
0
        bool                  swapped) {
2868
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2869
0
}
2870
2871
struct ggml_tensor * ggml_glu_split(
2872
        struct ggml_context * ctx,
2873
        struct ggml_tensor  * a,
2874
        struct ggml_tensor  * b,
2875
0
        enum ggml_glu_op      op) {
2876
0
    return ggml_glu_impl(ctx, a, b, op, false);
2877
0
}
2878
2879
// ggml_reglu
2880
2881
struct ggml_tensor * ggml_reglu(
2882
        struct ggml_context * ctx,
2883
0
        struct ggml_tensor  * a) {
2884
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2885
0
}
2886
2887
struct ggml_tensor * ggml_reglu_swapped(
2888
        struct ggml_context * ctx,
2889
0
        struct ggml_tensor  * a) {
2890
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2891
0
}
2892
2893
struct ggml_tensor * ggml_reglu_split(
2894
        struct ggml_context * ctx,
2895
        struct ggml_tensor  * a,
2896
0
        struct ggml_tensor  * b) {
2897
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2898
0
}
2899
2900
// ggml_geglu
2901
2902
struct ggml_tensor * ggml_geglu(
2903
        struct ggml_context * ctx,
2904
0
        struct ggml_tensor  * a) {
2905
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2906
0
}
2907
2908
struct ggml_tensor * ggml_geglu_swapped(
2909
        struct ggml_context * ctx,
2910
0
        struct ggml_tensor  * a) {
2911
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2912
0
}
2913
2914
struct ggml_tensor * ggml_geglu_split(
2915
        struct ggml_context * ctx,
2916
        struct ggml_tensor  * a,
2917
0
        struct ggml_tensor  * b) {
2918
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2919
0
}
2920
2921
// ggml_swiglu
2922
2923
struct ggml_tensor * ggml_swiglu(
2924
        struct ggml_context * ctx,
2925
0
        struct ggml_tensor  * a) {
2926
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2927
0
}
2928
2929
struct ggml_tensor * ggml_swiglu_swapped(
2930
        struct ggml_context * ctx,
2931
0
        struct ggml_tensor  * a) {
2932
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2933
0
}
2934
2935
struct ggml_tensor * ggml_swiglu_split(
2936
        struct ggml_context * ctx,
2937
        struct ggml_tensor  * a,
2938
0
        struct ggml_tensor  * b) {
2939
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2940
0
}
2941
2942
// ggml_geglu_erf
2943
2944
struct ggml_tensor * ggml_geglu_erf(
2945
        struct ggml_context * ctx,
2946
0
        struct ggml_tensor  * a) {
2947
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2948
0
}
2949
2950
struct ggml_tensor * ggml_geglu_erf_swapped(
2951
        struct ggml_context * ctx,
2952
0
        struct ggml_tensor  * a) {
2953
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
2954
0
}
2955
2956
struct ggml_tensor * ggml_geglu_erf_split(
2957
        struct ggml_context * ctx,
2958
        struct ggml_tensor  * a,
2959
0
        struct ggml_tensor  * b) {
2960
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
2961
0
}
2962
2963
// ggml_geglu_quick
2964
2965
struct ggml_tensor * ggml_geglu_quick(
2966
        struct ggml_context * ctx,
2967
0
        struct ggml_tensor  * a) {
2968
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
2969
0
}
2970
2971
struct ggml_tensor * ggml_geglu_quick_swapped(
2972
        struct ggml_context * ctx,
2973
0
        struct ggml_tensor  * a) {
2974
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
2975
0
}
2976
2977
struct ggml_tensor * ggml_geglu_quick_split(
2978
        struct ggml_context * ctx,
2979
        struct ggml_tensor  * a,
2980
0
        struct ggml_tensor  * b) {
2981
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
2982
0
}
2983
2984
struct ggml_tensor * ggml_swiglu_oai(
2985
        struct ggml_context * ctx,
2986
        struct ggml_tensor  * a,
2987
        struct ggml_tensor  * b,
2988
        float                 alpha,
2989
0
        float                 limit) {
2990
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
2991
0
    ggml_set_op_params_f32(result, 2, alpha);
2992
0
    ggml_set_op_params_f32(result, 3, limit);
2993
2994
0
    return result;
2995
0
}
2996
2997
// ggml_norm
2998
2999
static struct ggml_tensor * ggml_norm_impl(
3000
        struct ggml_context * ctx,
3001
        struct ggml_tensor  * a,
3002
        float                 eps,
3003
0
        bool                  inplace) {
3004
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3005
3006
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3007
3008
0
    result->op     = GGML_OP_NORM;
3009
0
    result->src[0] = a;
3010
3011
0
    return result;
3012
0
}
3013
3014
struct ggml_tensor * ggml_norm(
3015
        struct ggml_context * ctx,
3016
        struct ggml_tensor  * a,
3017
0
        float                 eps) {
3018
0
    return ggml_norm_impl(ctx, a, eps, false);
3019
0
}
3020
3021
struct ggml_tensor * ggml_norm_inplace(
3022
        struct ggml_context * ctx,
3023
        struct ggml_tensor  * a,
3024
0
        float                 eps) {
3025
0
    return ggml_norm_impl(ctx, a, eps, true);
3026
0
}
3027
3028
// ggml_rms_norm
3029
3030
static struct ggml_tensor * ggml_rms_norm_impl(
3031
        struct ggml_context * ctx,
3032
        struct ggml_tensor  * a,
3033
        float                 eps,
3034
0
        bool                  inplace) {
3035
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3036
3037
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3038
3039
0
    result->op     = GGML_OP_RMS_NORM;
3040
0
    result->src[0] = a;
3041
3042
0
    return result;
3043
0
}
3044
3045
struct ggml_tensor * ggml_rms_norm(
3046
        struct ggml_context * ctx,
3047
        struct ggml_tensor  * a,
3048
0
        float                 eps) {
3049
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3050
0
}
3051
3052
struct ggml_tensor * ggml_rms_norm_inplace(
3053
        struct ggml_context * ctx,
3054
        struct ggml_tensor  * a,
3055
0
        float                 eps) {
3056
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3057
0
}
3058
3059
// ggml_rms_norm_back
3060
3061
struct ggml_tensor * ggml_rms_norm_back(
3062
        struct ggml_context * ctx,
3063
        struct ggml_tensor  * a,
3064
        struct ggml_tensor  * b,
3065
0
        float                 eps) {
3066
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3067
3068
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3069
3070
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3071
0
    result->src[0] = a;
3072
0
    result->src[1] = b;
3073
3074
0
    return result;
3075
0
}
3076
3077
// ggml_group_norm
3078
3079
static struct ggml_tensor * ggml_group_norm_impl(
3080
        struct ggml_context * ctx,
3081
        struct ggml_tensor  * a,
3082
        int                   n_groups,
3083
        float                 eps,
3084
0
        bool                  inplace) {
3085
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3086
3087
0
    ggml_set_op_params_i32(result, 0, n_groups);
3088
0
    ggml_set_op_params_f32(result, 1, eps);
3089
3090
0
    result->op     = GGML_OP_GROUP_NORM;
3091
0
    result->src[0] = a;
3092
3093
0
    return result;
3094
0
}
3095
3096
struct ggml_tensor * ggml_group_norm(
3097
        struct ggml_context * ctx,
3098
        struct ggml_tensor  * a,
3099
        int                   n_groups,
3100
0
        float                 eps) {
3101
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3102
0
}
3103
3104
struct ggml_tensor * ggml_group_norm_inplace(
3105
        struct ggml_context * ctx,
3106
        struct ggml_tensor  * a,
3107
        int                   n_groups,
3108
0
        float                 eps) {
3109
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3110
0
}
3111
3112
// ggml_l2_norm
3113
3114
static struct ggml_tensor * ggml_l2_norm_impl(
3115
        struct ggml_context * ctx,
3116
        struct ggml_tensor  * a,
3117
        float                 eps,
3118
0
        bool                  inplace) {
3119
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3120
3121
0
    ggml_set_op_params_f32(result, 0, eps);
3122
3123
0
    result->op     = GGML_OP_L2_NORM;
3124
0
    result->src[0] = a;
3125
3126
0
    return result;
3127
0
}
3128
3129
struct ggml_tensor * ggml_l2_norm(
3130
        struct ggml_context * ctx,
3131
        struct ggml_tensor  * a,
3132
0
        float                 eps) {
3133
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3134
0
}
3135
3136
struct ggml_tensor * ggml_l2_norm_inplace(
3137
        struct ggml_context * ctx,
3138
        struct ggml_tensor  * a,
3139
0
        float                 eps) {
3140
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3141
0
}
3142
3143
// ggml_mul_mat
3144
3145
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3146
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3147
3148
0
    return (t0->ne[0]           == t1->ne[0])  &&
3149
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3150
0
           (t1->ne[3]%t0->ne[3] == 0);
3151
0
}
3152
3153
struct ggml_tensor * ggml_mul_mat(
3154
        struct ggml_context * ctx,
3155
        struct ggml_tensor  * a,
3156
0
        struct ggml_tensor  * b) {
3157
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3158
0
    GGML_ASSERT(!ggml_is_transposed(a));
3159
3160
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3161
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3162
3163
0
    result->op     = GGML_OP_MUL_MAT;
3164
0
    result->src[0] = a;
3165
0
    result->src[1] = b;
3166
3167
0
    return result;
3168
0
}
3169
3170
void ggml_mul_mat_set_prec(
3171
        struct ggml_tensor * a,
3172
0
        enum ggml_prec       prec) {
3173
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3174
3175
0
    const int32_t prec_i32 = (int32_t) prec;
3176
3177
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3178
0
}
3179
3180
// ggml_mul_mat_id
3181
3182
/*
3183
    c = ggml_mul_mat_id(ctx, as, b, ids);
3184
3185
    as  -> [cols, rows, n_expert]
3186
    b   -> [cols, n_expert_used, n_tokens]
3187
    ids -> [n_expert_used, n_tokens] (i32)
3188
    c   -> [rows, n_expert_used, n_tokens]
3189
3190
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3191
3192
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3193
*/
3194
struct ggml_tensor * ggml_mul_mat_id(
3195
        struct ggml_context * ctx,
3196
        struct ggml_tensor  * as,
3197
        struct ggml_tensor  * b,
3198
0
        struct ggml_tensor  * ids) {
3199
0
    GGML_ASSERT(!ggml_is_transposed(as));
3200
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3201
3202
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3203
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3204
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3205
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3206
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3207
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3208
3209
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3210
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3211
3212
0
    result->op     = GGML_OP_MUL_MAT_ID;
3213
0
    result->src[0] = as;
3214
0
    result->src[1] = b;
3215
0
    result->src[2] = ids;
3216
3217
0
    return result;
3218
0
}
3219
3220
// ggml_out_prod
3221
3222
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3223
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3224
3225
0
    return (t0->ne[1] == t1->ne[1])   &&
3226
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3227
0
           (t1->ne[3]%t0->ne[3] == 0);
3228
0
}
3229
3230
struct ggml_tensor * ggml_out_prod(
3231
        struct ggml_context * ctx,
3232
        struct ggml_tensor  * a,
3233
0
        struct ggml_tensor  * b) {
3234
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3235
0
    GGML_ASSERT(!ggml_is_transposed(a));
3236
3237
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3238
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3239
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3240
3241
0
    result->op     = GGML_OP_OUT_PROD;
3242
0
    result->src[0] = a;
3243
0
    result->src[1] = b;
3244
3245
0
    return result;
3246
0
}
3247
3248
// ggml_scale
3249
3250
static struct ggml_tensor * ggml_scale_impl(
3251
        struct ggml_context * ctx,
3252
        struct ggml_tensor  * a,
3253
        float                 s,
3254
        float                 b,
3255
0
        bool                  inplace) {
3256
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3257
3258
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3259
3260
0
    float params[2] = { s, b };
3261
0
    ggml_set_op_params(result, &params, sizeof(params));
3262
3263
0
    result->op     = GGML_OP_SCALE;
3264
0
    result->src[0] = a;
3265
3266
0
    return result;
3267
0
}
3268
3269
struct ggml_tensor * ggml_scale(
3270
        struct ggml_context * ctx,
3271
        struct ggml_tensor  * a,
3272
0
        float                 s) {
3273
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3274
0
}
3275
3276
struct ggml_tensor * ggml_scale_inplace(
3277
        struct ggml_context * ctx,
3278
        struct ggml_tensor  * a,
3279
0
        float                 s) {
3280
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3281
0
}
3282
3283
struct ggml_tensor * ggml_scale_bias(
3284
        struct ggml_context * ctx,
3285
        struct ggml_tensor  * a,
3286
        float                 s,
3287
0
        float                 b) {
3288
0
    return ggml_scale_impl(ctx, a, s, b, false);
3289
0
}
3290
3291
struct ggml_tensor * ggml_scale_bias_inplace(
3292
        struct ggml_context * ctx,
3293
        struct ggml_tensor  * a,
3294
        float                 s,
3295
0
        float                 b) {
3296
0
    return ggml_scale_impl(ctx, a, s, b, true);
3297
0
}
3298
3299
// ggml_set
3300
3301
static struct ggml_tensor * ggml_set_impl(
3302
        struct ggml_context * ctx,
3303
        struct ggml_tensor  * a,
3304
        struct ggml_tensor  * b,
3305
        size_t                nb1,
3306
        size_t                nb2,
3307
        size_t                nb3,
3308
        size_t                offset,
3309
0
        bool                  inplace) {
3310
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3311
3312
    // make a view of the destination
3313
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3314
3315
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3316
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3317
0
    ggml_set_op_params(result, params, sizeof(params));
3318
3319
0
    result->op     = GGML_OP_SET;
3320
0
    result->src[0] = a;
3321
0
    result->src[1] = b;
3322
3323
0
    return result;
3324
0
}
3325
3326
struct ggml_tensor * ggml_set(
3327
        struct ggml_context * ctx,
3328
        struct ggml_tensor  * a,
3329
        struct ggml_tensor  * b,
3330
        size_t                nb1,
3331
        size_t                nb2,
3332
        size_t                nb3,
3333
0
        size_t                offset) {
3334
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3335
0
}
3336
3337
struct ggml_tensor * ggml_set_inplace(
3338
        struct ggml_context * ctx,
3339
        struct ggml_tensor  * a,
3340
        struct ggml_tensor  * b,
3341
        size_t                nb1,
3342
        size_t                nb2,
3343
        size_t                nb3,
3344
0
        size_t                offset) {
3345
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3346
0
}
3347
3348
struct ggml_tensor * ggml_set_1d(
3349
        struct ggml_context * ctx,
3350
        struct ggml_tensor  * a,
3351
        struct ggml_tensor  * b,
3352
0
        size_t                offset) {
3353
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3354
0
}
3355
3356
struct ggml_tensor * ggml_set_1d_inplace(
3357
        struct ggml_context * ctx,
3358
        struct ggml_tensor  * a,
3359
        struct ggml_tensor  * b,
3360
0
        size_t                offset) {
3361
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3362
0
}
3363
3364
struct ggml_tensor * ggml_set_2d(
3365
        struct ggml_context * ctx,
3366
        struct ggml_tensor  * a,
3367
        struct ggml_tensor  * b,
3368
        size_t                nb1,
3369
0
        size_t                offset) {
3370
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3371
0
}
3372
3373
struct ggml_tensor * ggml_set_2d_inplace(
3374
        struct ggml_context * ctx,
3375
        struct ggml_tensor  * a,
3376
        struct ggml_tensor  * b,
3377
        size_t                nb1,
3378
0
        size_t                offset) {
3379
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3380
0
}
3381
3382
// ggml_cpy
3383
3384
static struct ggml_tensor * ggml_cpy_impl(
3385
        struct ggml_context * ctx,
3386
        struct ggml_tensor  * a,
3387
0
        struct ggml_tensor  * b) {
3388
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3389
3390
    // make a view of the destination
3391
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3392
0
    if (strlen(b->name) > 0) {
3393
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3394
0
    } else {
3395
0
        ggml_format_name(result, "%s (copy)", a->name);
3396
0
    }
3397
3398
0
    result->op     = GGML_OP_CPY;
3399
0
    result->src[0] = a;
3400
0
    result->src[1] = b;
3401
3402
0
    return result;
3403
0
}
3404
3405
struct ggml_tensor * ggml_cpy(
3406
        struct ggml_context * ctx,
3407
        struct ggml_tensor * a,
3408
0
        struct ggml_tensor * b) {
3409
0
    return ggml_cpy_impl(ctx, a, b);
3410
0
}
3411
3412
struct ggml_tensor * ggml_cast(
3413
        struct ggml_context * ctx,
3414
        struct ggml_tensor  * a,
3415
0
        enum   ggml_type      type) {
3416
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3417
0
    ggml_format_name(result, "%s (copy)", a->name);
3418
3419
0
    result->op     = GGML_OP_CPY;
3420
0
    result->src[0] = a;
3421
0
    result->src[1] = result;
3422
3423
0
    return result;
3424
0
}
3425
3426
// ggml_cont
3427
3428
static struct ggml_tensor * ggml_cont_impl(
3429
        struct ggml_context * ctx,
3430
0
        struct ggml_tensor  * a) {
3431
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3432
0
    ggml_format_name(result, "%s (cont)", a->name);
3433
3434
0
    result->op     = GGML_OP_CONT;
3435
0
    result->src[0] = a;
3436
3437
0
    return result;
3438
0
}
3439
3440
struct ggml_tensor * ggml_cont(
3441
        struct ggml_context * ctx,
3442
0
        struct ggml_tensor * a) {
3443
0
    return ggml_cont_impl(ctx, a);
3444
0
}
3445
3446
// make contiguous, with new shape
3447
GGML_API struct ggml_tensor * ggml_cont_1d(
3448
        struct ggml_context * ctx,
3449
        struct ggml_tensor  * a,
3450
0
        int64_t               ne0) {
3451
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3452
0
}
3453
3454
GGML_API struct ggml_tensor * ggml_cont_2d(
3455
        struct ggml_context * ctx,
3456
        struct ggml_tensor  * a,
3457
        int64_t               ne0,
3458
0
        int64_t               ne1) {
3459
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3460
0
}
3461
3462
GGML_API struct ggml_tensor * ggml_cont_3d(
3463
        struct ggml_context * ctx,
3464
        struct ggml_tensor  * a,
3465
        int64_t               ne0,
3466
        int64_t               ne1,
3467
0
        int64_t               ne2) {
3468
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3469
0
}
3470
3471
struct ggml_tensor * ggml_cont_4d(
3472
        struct ggml_context * ctx,
3473
        struct ggml_tensor  * a,
3474
        int64_t               ne0,
3475
        int64_t               ne1,
3476
        int64_t               ne2,
3477
0
        int64_t               ne3) {
3478
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3479
3480
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3481
0
    ggml_format_name(result, "%s (cont)", a->name);
3482
3483
0
    result->op     = GGML_OP_CONT;
3484
0
    result->src[0] = a;
3485
3486
0
    return result;
3487
0
}
3488
3489
// ggml_reshape
3490
3491
struct ggml_tensor * ggml_reshape(
3492
        struct ggml_context * ctx,
3493
        struct ggml_tensor * a,
3494
0
        struct ggml_tensor * b) {
3495
0
    GGML_ASSERT(ggml_is_contiguous(a));
3496
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3497
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3498
3499
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3500
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3501
3502
0
    result->op     = GGML_OP_RESHAPE;
3503
0
    result->src[0] = a;
3504
3505
0
    return result;
3506
0
}
3507
3508
struct ggml_tensor * ggml_reshape_1d(
3509
        struct ggml_context * ctx,
3510
        struct ggml_tensor  * a,
3511
0
        int64_t               ne0) {
3512
0
    GGML_ASSERT(ggml_is_contiguous(a));
3513
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3514
3515
0
    const int64_t ne[1] = { ne0 };
3516
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3517
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3518
3519
0
    result->op     = GGML_OP_RESHAPE;
3520
0
    result->src[0] = a;
3521
3522
0
    return result;
3523
0
}
3524
3525
struct ggml_tensor * ggml_reshape_2d(
3526
        struct ggml_context * ctx,
3527
        struct ggml_tensor  * a,
3528
        int64_t               ne0,
3529
0
        int64_t               ne1) {
3530
0
    GGML_ASSERT(ggml_is_contiguous(a));
3531
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3532
3533
0
    const int64_t ne[2] = { ne0, ne1 };
3534
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3535
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3536
3537
0
    result->op     = GGML_OP_RESHAPE;
3538
0
    result->src[0] = a;
3539
3540
0
    return result;
3541
0
}
3542
3543
struct ggml_tensor * ggml_reshape_3d(
3544
        struct ggml_context * ctx,
3545
        struct ggml_tensor  * a,
3546
        int64_t               ne0,
3547
        int64_t               ne1,
3548
0
        int64_t               ne2) {
3549
0
    GGML_ASSERT(ggml_is_contiguous(a));
3550
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3551
3552
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3553
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3554
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3555
3556
0
    result->op     = GGML_OP_RESHAPE;
3557
0
    result->src[0] = a;
3558
3559
0
    return result;
3560
0
}
3561
3562
struct ggml_tensor * ggml_reshape_4d(
3563
        struct ggml_context * ctx,
3564
        struct ggml_tensor  * a,
3565
        int64_t               ne0,
3566
        int64_t               ne1,
3567
        int64_t               ne2,
3568
0
        int64_t               ne3) {
3569
0
    GGML_ASSERT(ggml_is_contiguous(a));
3570
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3571
3572
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3573
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3574
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3575
3576
0
    result->op     = GGML_OP_RESHAPE;
3577
0
    result->src[0] = a;
3578
3579
0
    return result;
3580
0
}
3581
3582
static struct ggml_tensor * ggml_view_impl(
3583
        struct ggml_context * ctx,
3584
        struct ggml_tensor  * a,
3585
        int                   n_dims,
3586
        const int64_t       * ne,
3587
0
        size_t                offset) {
3588
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3589
0
    ggml_format_name(result, "%s (view)", a->name);
3590
3591
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3592
3593
0
    result->op     = GGML_OP_VIEW;
3594
0
    result->src[0] = a;
3595
3596
0
    return result;
3597
0
}
3598
3599
// ggml_view_1d
3600
3601
struct ggml_tensor * ggml_view_1d(
3602
        struct ggml_context * ctx,
3603
        struct ggml_tensor  * a,
3604
        int64_t               ne0,
3605
0
        size_t                offset) {
3606
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3607
3608
0
    return result;
3609
0
}
3610
3611
// ggml_view_2d
3612
3613
struct ggml_tensor * ggml_view_2d(
3614
        struct ggml_context * ctx,
3615
        struct ggml_tensor  * a,
3616
        int64_t               ne0,
3617
        int64_t               ne1,
3618
        size_t                nb1,
3619
0
        size_t                offset) {
3620
0
    const int64_t ne[2] = { ne0, ne1 };
3621
3622
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3623
3624
0
    result->nb[1] = nb1;
3625
0
    result->nb[2] = result->nb[1]*ne1;
3626
0
    result->nb[3] = result->nb[2];
3627
3628
0
    return result;
3629
0
}
3630
3631
// ggml_view_3d
3632
3633
struct ggml_tensor * ggml_view_3d(
3634
        struct ggml_context * ctx,
3635
        struct ggml_tensor  * a,
3636
        int64_t               ne0,
3637
        int64_t               ne1,
3638
        int64_t               ne2,
3639
        size_t                nb1,
3640
        size_t                nb2,
3641
0
        size_t                offset) {
3642
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3643
3644
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3645
3646
0
    result->nb[1] = nb1;
3647
0
    result->nb[2] = nb2;
3648
0
    result->nb[3] = result->nb[2]*ne2;
3649
3650
0
    return result;
3651
0
}
3652
3653
// ggml_view_4d
3654
3655
struct ggml_tensor * ggml_view_4d(
3656
        struct ggml_context * ctx,
3657
        struct ggml_tensor  * a,
3658
        int64_t               ne0,
3659
        int64_t               ne1,
3660
        int64_t               ne2,
3661
        int64_t               ne3,
3662
        size_t                nb1,
3663
        size_t                nb2,
3664
        size_t                nb3,
3665
0
        size_t                offset) {
3666
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3667
3668
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3669
3670
0
    result->nb[1] = nb1;
3671
0
    result->nb[2] = nb2;
3672
0
    result->nb[3] = nb3;
3673
3674
0
    return result;
3675
0
}
3676
3677
// ggml_permute
3678
3679
struct ggml_tensor * ggml_permute(
3680
        struct ggml_context * ctx,
3681
        struct ggml_tensor  * a,
3682
        int                   axis0,
3683
        int                   axis1,
3684
        int                   axis2,
3685
0
        int                   axis3) {
3686
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3687
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3688
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3689
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3690
3691
0
    GGML_ASSERT(axis0 != axis1);
3692
0
    GGML_ASSERT(axis0 != axis2);
3693
0
    GGML_ASSERT(axis0 != axis3);
3694
0
    GGML_ASSERT(axis1 != axis2);
3695
0
    GGML_ASSERT(axis1 != axis3);
3696
0
    GGML_ASSERT(axis2 != axis3);
3697
3698
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3699
0
    ggml_format_name(result, "%s (permuted)", a->name);
3700
3701
0
    int ne[GGML_MAX_DIMS];
3702
0
    int nb[GGML_MAX_DIMS];
3703
3704
0
    ne[axis0] = a->ne[0];
3705
0
    ne[axis1] = a->ne[1];
3706
0
    ne[axis2] = a->ne[2];
3707
0
    ne[axis3] = a->ne[3];
3708
3709
0
    nb[axis0] = a->nb[0];
3710
0
    nb[axis1] = a->nb[1];
3711
0
    nb[axis2] = a->nb[2];
3712
0
    nb[axis3] = a->nb[3];
3713
3714
0
    result->ne[0] = ne[0];
3715
0
    result->ne[1] = ne[1];
3716
0
    result->ne[2] = ne[2];
3717
0
    result->ne[3] = ne[3];
3718
3719
0
    result->nb[0] = nb[0];
3720
0
    result->nb[1] = nb[1];
3721
0
    result->nb[2] = nb[2];
3722
0
    result->nb[3] = nb[3];
3723
3724
0
    result->op     = GGML_OP_PERMUTE;
3725
0
    result->src[0] = a;
3726
3727
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3728
0
    ggml_set_op_params(result, params, sizeof(params));
3729
3730
0
    return result;
3731
0
}
3732
3733
// ggml_transpose
3734
3735
struct ggml_tensor * ggml_transpose(
3736
        struct ggml_context * ctx,
3737
0
        struct ggml_tensor  * a) {
3738
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3739
0
    ggml_format_name(result, "%s (transposed)", a->name);
3740
3741
0
    result->ne[0] = a->ne[1];
3742
0
    result->ne[1] = a->ne[0];
3743
3744
0
    result->nb[0] = a->nb[1];
3745
0
    result->nb[1] = a->nb[0];
3746
3747
0
    result->op     = GGML_OP_TRANSPOSE;
3748
0
    result->src[0] = a;
3749
3750
0
    return result;
3751
0
}
3752
3753
// ggml_get_rows
3754
3755
struct ggml_tensor * ggml_get_rows(
3756
        struct ggml_context * ctx,
3757
        struct ggml_tensor  * a,
3758
0
        struct ggml_tensor  * b) {
3759
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3760
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3761
0
    GGML_ASSERT(b->ne[3] == 1);
3762
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3763
3764
    // TODO: implement non F32 return
3765
0
    enum ggml_type type = GGML_TYPE_F32;
3766
0
    if (a->type == GGML_TYPE_I32) {
3767
0
        type = a->type;
3768
0
    }
3769
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3770
3771
0
    result->op     = GGML_OP_GET_ROWS;
3772
0
    result->src[0] = a;
3773
0
    result->src[1] = b;
3774
3775
0
    return result;
3776
0
}
3777
3778
// ggml_get_rows_back
3779
3780
struct ggml_tensor * ggml_get_rows_back(
3781
        struct ggml_context * ctx,
3782
        struct ggml_tensor  * a,
3783
        struct ggml_tensor  * b,
3784
0
        struct ggml_tensor  * c) {
3785
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3786
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3787
3788
    // TODO: implement non F32 return
3789
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3790
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3791
3792
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3793
0
    result->src[0] = a;
3794
0
    result->src[1] = b;
3795
3796
0
    return result;
3797
0
}
3798
3799
// ggml_set_rows
3800
3801
struct ggml_tensor * ggml_set_rows(
3802
        struct ggml_context * ctx,
3803
        struct ggml_tensor  * a,
3804
        struct ggml_tensor  * b,
3805
0
        struct ggml_tensor  * c) {
3806
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3807
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3808
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3809
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3810
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3811
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3812
0
    GGML_ASSERT(c->ne[3] == 1);
3813
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3814
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3815
3816
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3817
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3818
3819
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3820
3821
0
    result->op     = GGML_OP_SET_ROWS;
3822
0
    result->src[0] = b;
3823
0
    result->src[1] = c;
3824
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3825
3826
0
    return result;
3827
0
}
3828
3829
// ggml_diag
3830
3831
struct ggml_tensor * ggml_diag(
3832
        struct ggml_context * ctx,
3833
0
        struct ggml_tensor  * a) {
3834
0
    GGML_ASSERT(a->ne[1] == 1);
3835
3836
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3837
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3838
3839
0
    result->op     = GGML_OP_DIAG;
3840
0
    result->src[0] = a;
3841
3842
0
    return result;
3843
0
}
3844
3845
// ggml_diag_mask_inf
3846
3847
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3848
        struct ggml_context * ctx,
3849
        struct ggml_tensor  * a,
3850
        int                   n_past,
3851
0
        bool                  inplace) {
3852
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3853
3854
0
    int32_t params[] = { n_past };
3855
0
    ggml_set_op_params(result, params, sizeof(params));
3856
3857
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3858
0
    result->src[0] = a;
3859
3860
0
    return result;
3861
0
}
3862
3863
struct ggml_tensor * ggml_diag_mask_inf(
3864
        struct ggml_context * ctx,
3865
        struct ggml_tensor  * a,
3866
0
        int                   n_past) {
3867
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3868
0
}
3869
3870
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3871
        struct ggml_context * ctx,
3872
        struct ggml_tensor  * a,
3873
0
        int                   n_past) {
3874
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3875
0
}
3876
3877
// ggml_diag_mask_zero
3878
3879
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3880
        struct ggml_context * ctx,
3881
        struct ggml_tensor  * a,
3882
        int                   n_past,
3883
0
        bool                  inplace) {
3884
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3885
3886
0
    int32_t params[] = { n_past };
3887
0
    ggml_set_op_params(result, params, sizeof(params));
3888
3889
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3890
0
    result->src[0] = a;
3891
3892
0
    return result;
3893
0
}
3894
3895
struct ggml_tensor * ggml_diag_mask_zero(
3896
        struct ggml_context * ctx,
3897
        struct ggml_tensor  * a,
3898
0
        int                   n_past) {
3899
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3900
0
}
3901
3902
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3903
        struct ggml_context * ctx,
3904
        struct ggml_tensor  * a,
3905
0
        int                   n_past) {
3906
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3907
0
}
3908
3909
// ggml_soft_max
3910
3911
static struct ggml_tensor * ggml_soft_max_impl(
3912
        struct ggml_context * ctx,
3913
        struct ggml_tensor  * a,
3914
        struct ggml_tensor  * mask,
3915
        float                 scale,
3916
        float                 max_bias,
3917
0
        bool                  inplace) {
3918
0
    GGML_ASSERT(ggml_is_contiguous(a));
3919
3920
0
    if (mask) {
3921
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3922
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3923
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3924
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3925
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3926
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3927
0
    }
3928
3929
0
    if (max_bias > 0.0f) {
3930
0
        GGML_ASSERT(mask);
3931
0
    }
3932
3933
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3934
3935
0
    float params[] = { scale, max_bias };
3936
0
    ggml_set_op_params(result, params, sizeof(params));
3937
3938
0
    result->op     = GGML_OP_SOFT_MAX;
3939
0
    result->src[0] = a;
3940
0
    result->src[1] = mask;
3941
3942
0
    return result;
3943
0
}
3944
3945
struct ggml_tensor * ggml_soft_max(
3946
        struct ggml_context * ctx,
3947
0
        struct ggml_tensor  * a) {
3948
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
3949
0
}
3950
3951
struct ggml_tensor * ggml_soft_max_inplace(
3952
        struct ggml_context * ctx,
3953
0
        struct ggml_tensor  * a) {
3954
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
3955
0
}
3956
3957
struct ggml_tensor * ggml_soft_max_ext(
3958
        struct ggml_context * ctx,
3959
        struct ggml_tensor  * a,
3960
        struct ggml_tensor  * mask,
3961
        float                 scale,
3962
0
        float                 max_bias) {
3963
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3964
0
}
3965
3966
struct ggml_tensor * ggml_soft_max_ext_inplace(
3967
        struct ggml_context * ctx,
3968
        struct ggml_tensor  * a,
3969
        struct ggml_tensor  * mask,
3970
        float                 scale,
3971
0
        float                 max_bias) {
3972
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3973
0
}
3974
3975
void ggml_soft_max_add_sinks(
3976
        struct ggml_tensor * a,
3977
0
        struct ggml_tensor * sinks) {
3978
0
    if (!sinks) {
3979
0
        a->src[2] = NULL;
3980
0
        return;
3981
0
    }
3982
3983
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
3984
0
    GGML_ASSERT(a->src[2] == NULL);
3985
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
3986
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
3987
3988
0
    a->src[2] = sinks;
3989
0
}
3990
3991
// ggml_soft_max_ext_back
3992
3993
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
3994
        struct ggml_context * ctx,
3995
        struct ggml_tensor  * a,
3996
        struct ggml_tensor  * b,
3997
        float                 scale,
3998
        float                 max_bias,
3999
0
        bool                  inplace) {
4000
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4001
4002
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4003
0
    result->src[0] = a;
4004
0
    result->src[1] = b;
4005
4006
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4007
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4008
4009
0
    return result;
4010
0
}
4011
4012
struct ggml_tensor * ggml_soft_max_ext_back(
4013
        struct ggml_context * ctx,
4014
        struct ggml_tensor  * a,
4015
        struct ggml_tensor  * b,
4016
        float                 scale,
4017
0
        float                 max_bias) {
4018
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4019
0
}
4020
4021
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4022
        struct ggml_context * ctx,
4023
        struct ggml_tensor  * a,
4024
        struct ggml_tensor  * b,
4025
        float                 scale,
4026
0
        float                 max_bias) {
4027
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4028
0
}
4029
4030
// ggml_rope
4031
4032
static struct ggml_tensor * ggml_rope_impl(
4033
        struct ggml_context * ctx,
4034
        struct ggml_tensor  * a,
4035
        struct ggml_tensor  * b,
4036
        struct ggml_tensor  * c,
4037
        int                   n_dims,
4038
        int                   sections[GGML_MROPE_SECTIONS],
4039
        int                   mode,
4040
        int                   n_ctx_orig,
4041
        float                 freq_base,
4042
        float                 freq_scale,
4043
        float                 ext_factor,
4044
        float                 attn_factor,
4045
        float                 beta_fast,
4046
        float                 beta_slow,
4047
0
        bool                  inplace) {
4048
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4049
4050
0
    GGML_ASSERT(ggml_is_vector(b));
4051
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4052
4053
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4054
0
    if (mrope_used) {
4055
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4056
0
    } else {
4057
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4058
0
    }
4059
4060
0
    if (c) {
4061
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4062
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4063
0
    }
4064
4065
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4066
4067
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4068
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4069
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4070
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4071
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4072
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4073
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4074
0
    if (mrope_used && sections) {
4075
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4076
0
    } else {
4077
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4078
0
    }
4079
0
    ggml_set_op_params(result, params, sizeof(params));
4080
4081
0
    result->op     = GGML_OP_ROPE;
4082
0
    result->src[0] = a;
4083
0
    result->src[1] = b;
4084
0
    result->src[2] = c;
4085
4086
0
    return result;
4087
0
}
4088
4089
struct ggml_tensor * ggml_rope(
4090
        struct ggml_context * ctx,
4091
        struct ggml_tensor  * a,
4092
        struct ggml_tensor  * b,
4093
        int                   n_dims,
4094
0
        int                   mode) {
4095
0
    return ggml_rope_impl(
4096
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4097
0
    );
4098
0
}
4099
4100
struct ggml_tensor * ggml_rope_multi(
4101
        struct ggml_context * ctx,
4102
        struct ggml_tensor  * a,
4103
        struct ggml_tensor  * b,
4104
        struct ggml_tensor  * c,
4105
        int                   n_dims,
4106
        int                   sections[GGML_MROPE_SECTIONS],
4107
        int                   mode,
4108
        int                   n_ctx_orig,
4109
        float                 freq_base,
4110
        float                 freq_scale,
4111
        float                 ext_factor,
4112
        float                 attn_factor,
4113
        float                 beta_fast,
4114
0
        float                 beta_slow) {
4115
0
    return ggml_rope_impl(
4116
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4117
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4118
0
    );
4119
0
}
4120
4121
struct ggml_tensor * ggml_rope_multi_inplace(
4122
        struct ggml_context * ctx,
4123
        struct ggml_tensor  * a,
4124
        struct ggml_tensor  * b,
4125
        struct ggml_tensor  * c,
4126
        int                   n_dims,
4127
        int                   sections[GGML_MROPE_SECTIONS],
4128
        int                   mode,
4129
        int                   n_ctx_orig,
4130
        float                 freq_base,
4131
        float                 freq_scale,
4132
        float                 ext_factor,
4133
        float                 attn_factor,
4134
        float                 beta_fast,
4135
0
        float                 beta_slow) {
4136
0
    return ggml_rope_impl(
4137
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4138
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4139
0
    );
4140
0
}
4141
4142
struct ggml_tensor * ggml_rope_inplace(
4143
        struct ggml_context * ctx,
4144
        struct ggml_tensor  * a,
4145
        struct ggml_tensor  * b,
4146
        int                   n_dims,
4147
0
        int                   mode) {
4148
0
    return ggml_rope_impl(
4149
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4150
0
    );
4151
0
}
4152
4153
struct ggml_tensor * ggml_rope_ext(
4154
        struct ggml_context * ctx,
4155
        struct ggml_tensor  * a,
4156
        struct ggml_tensor  * b,
4157
        struct ggml_tensor  * c,
4158
        int                   n_dims,
4159
        int                   mode,
4160
        int                   n_ctx_orig,
4161
        float                 freq_base,
4162
        float                 freq_scale,
4163
        float                 ext_factor,
4164
        float                 attn_factor,
4165
        float                 beta_fast,
4166
0
        float                 beta_slow) {
4167
0
    return ggml_rope_impl(
4168
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4169
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4170
0
    );
4171
0
}
4172
4173
struct ggml_tensor * ggml_rope_ext_inplace(
4174
        struct ggml_context * ctx,
4175
        struct ggml_tensor  * a,
4176
        struct ggml_tensor  * b,
4177
        struct ggml_tensor  * c,
4178
        int                   n_dims,
4179
        int                   mode,
4180
        int                   n_ctx_orig,
4181
        float                 freq_base,
4182
        float                 freq_scale,
4183
        float                 ext_factor,
4184
        float                 attn_factor,
4185
        float                 beta_fast,
4186
0
        float                 beta_slow) {
4187
0
    return ggml_rope_impl(
4188
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4189
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4190
0
    );
4191
0
}
4192
4193
struct ggml_tensor * ggml_rope_custom(
4194
        struct ggml_context * ctx,
4195
        struct ggml_tensor  * a,
4196
        struct ggml_tensor  * b,
4197
        int                   n_dims,
4198
        int                   mode,
4199
        int                   n_ctx_orig,
4200
        float                 freq_base,
4201
        float                 freq_scale,
4202
        float                 ext_factor,
4203
        float                 attn_factor,
4204
        float                 beta_fast,
4205
0
        float                 beta_slow) {
4206
0
    return ggml_rope_impl(
4207
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4208
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4209
0
    );
4210
0
}
4211
4212
struct ggml_tensor * ggml_rope_custom_inplace(
4213
        struct ggml_context * ctx,
4214
        struct ggml_tensor  * a,
4215
        struct ggml_tensor  * b,
4216
        int                   n_dims,
4217
        int                   mode,
4218
        int                   n_ctx_orig,
4219
        float                 freq_base,
4220
        float                 freq_scale,
4221
        float                 ext_factor,
4222
        float                 attn_factor,
4223
        float                 beta_fast,
4224
0
        float                 beta_slow) {
4225
0
    return ggml_rope_impl(
4226
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4227
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4228
0
    );
4229
0
}
4230
4231
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4232
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4233
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4234
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4235
0
}
4236
4237
void ggml_rope_yarn_corr_dims(
4238
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4239
0
) {
4240
    // start and end correction dims
4241
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4242
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4243
0
    dims[0] = MAX(0, start);
4244
0
    dims[1] = MIN(n_dims - 1, end);
4245
0
}
4246
4247
// ggml_rope_back
4248
4249
struct ggml_tensor * ggml_rope_ext_back(
4250
        struct ggml_context * ctx,
4251
        struct ggml_tensor  * a,
4252
        struct ggml_tensor  * b,
4253
        struct ggml_tensor  * c,
4254
        int                   n_dims,
4255
        int                   mode,
4256
        int                   n_ctx_orig,
4257
        float                 freq_base,
4258
        float                 freq_scale,
4259
        float                 ext_factor,
4260
        float                 attn_factor,
4261
        float                 beta_fast,
4262
0
        float                 beta_slow) {
4263
0
    struct ggml_tensor * result = ggml_rope_ext(
4264
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4265
0
    result->op = GGML_OP_ROPE_BACK;
4266
0
    return result;
4267
0
}
4268
4269
struct ggml_tensor * ggml_rope_multi_back(
4270
        struct ggml_context * ctx,
4271
        struct ggml_tensor  * a,
4272
        struct ggml_tensor  * b,
4273
        struct ggml_tensor  * c,
4274
        int                   n_dims,
4275
        int                   sections[4],
4276
        int                   mode,
4277
        int                   n_ctx_orig,
4278
        float                 freq_base,
4279
        float                 freq_scale,
4280
        float                 ext_factor,
4281
        float                 attn_factor,
4282
        float                 beta_fast,
4283
0
        float                 beta_slow) {
4284
0
    struct ggml_tensor * result = ggml_rope_multi(
4285
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4286
0
    result->op = GGML_OP_ROPE_BACK;
4287
0
    return result;
4288
0
}
4289
// ggml_clamp
4290
4291
struct ggml_tensor * ggml_clamp(
4292
        struct ggml_context * ctx,
4293
        struct ggml_tensor  * a,
4294
        float                 min,
4295
0
        float                 max) {
4296
    // TODO: when implement backward, fix this:
4297
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4298
4299
0
    float params[] = { min, max };
4300
0
    ggml_set_op_params(result, params, sizeof(params));
4301
4302
0
    result->op     = GGML_OP_CLAMP;
4303
0
    result->src[0] = a;
4304
4305
0
    return result;
4306
0
}
4307
4308
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4309
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4310
0
}
4311
4312
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4313
// a: [OC,IC, KH, KW]
4314
// b: [N, IC, IH, IW]
4315
// result: [N, OH, OW, IC*KH*KW]
4316
struct ggml_tensor * ggml_im2col(
4317
        struct ggml_context * ctx,
4318
        struct ggml_tensor  * a,
4319
        struct ggml_tensor  * b,
4320
        int                   s0,
4321
        int                   s1,
4322
        int                   p0,
4323
        int                   p1,
4324
        int                   d0,
4325
        int                   d1,
4326
        bool                  is_2D,
4327
0
        enum ggml_type        dst_type) {
4328
0
    if (is_2D) {
4329
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4330
0
    } else {
4331
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4332
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4333
0
        GGML_ASSERT(b->ne[3] == 1);
4334
0
    }
4335
4336
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4337
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4338
4339
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4340
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4341
4342
0
    const int64_t ne[4] = {
4343
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4344
0
        OW,
4345
0
        is_2D ? OH : b->ne[2],
4346
0
        is_2D ?      b->ne[3] : 1,
4347
0
    };
4348
4349
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4350
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4351
0
    ggml_set_op_params(result, params, sizeof(params));
4352
4353
0
    result->op     = GGML_OP_IM2COL;
4354
0
    result->src[0] = a;
4355
0
    result->src[1] = b;
4356
4357
0
    return result;
4358
0
}
4359
4360
struct ggml_tensor * ggml_im2col_back(
4361
        struct ggml_context * ctx,
4362
        struct ggml_tensor  * a,
4363
        struct ggml_tensor  * b,
4364
        int64_t             * ne,
4365
        int                   s0,
4366
        int                   s1,
4367
        int                   p0,
4368
        int                   p1,
4369
        int                   d0,
4370
        int                   d1,
4371
0
        bool                  is_2D) {
4372
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4373
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4374
0
    ggml_set_op_params(result, params, sizeof(params));
4375
4376
0
    result->op     = GGML_OP_IM2COL_BACK;
4377
0
    result->src[0] = a;
4378
0
    result->src[1] = b;
4379
4380
0
    return result;
4381
0
}
4382
4383
// ggml_conv_1d
4384
4385
struct ggml_tensor * ggml_conv_1d(
4386
        struct ggml_context * ctx,
4387
        struct ggml_tensor  * a,
4388
        struct ggml_tensor  * b,
4389
        int                   s0,
4390
        int                   p0,
4391
0
        int                   d0) {
4392
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4393
4394
0
    struct ggml_tensor * result =
4395
0
        ggml_mul_mat(ctx,
4396
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4397
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4398
4399
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4400
4401
0
    return result;
4402
0
}
4403
4404
// ggml_conv_1d_ph
4405
4406
struct ggml_tensor* ggml_conv_1d_ph(
4407
        struct ggml_context * ctx,
4408
        struct ggml_tensor  * a,
4409
        struct ggml_tensor  * b,
4410
        int                   s,
4411
0
        int                   d) {
4412
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4413
0
}
4414
4415
// ggml_conv_1d_dw
4416
4417
struct ggml_tensor * ggml_conv_1d_dw(
4418
        struct ggml_context * ctx,
4419
        struct ggml_tensor  * a,
4420
        struct ggml_tensor  * b,
4421
        int                   s0,
4422
        int                   p0,
4423
0
        int                   d0) {
4424
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4425
4426
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4427
4428
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4429
4430
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4431
4432
0
    return result;
4433
0
}
4434
4435
// ggml_conv_1d_dw_ph
4436
4437
struct ggml_tensor * ggml_conv_1d_dw_ph(
4438
        struct ggml_context * ctx,
4439
        struct ggml_tensor  * a,
4440
        struct ggml_tensor  * b,
4441
        int                   s0,
4442
0
        int                   d0) {
4443
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4444
0
}
4445
4446
// ggml_conv_transpose_1d
4447
4448
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4449
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4450
0
}
4451
4452
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4453
        struct ggml_context * ctx,
4454
        struct ggml_tensor  * a,
4455
        struct ggml_tensor  * b,
4456
        int                   s0,
4457
        int                   p0,
4458
0
        int                   d0) {
4459
0
    GGML_ASSERT(ggml_is_matrix(b));
4460
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4461
0
    GGML_ASSERT(a->ne[3] == 1);
4462
4463
0
    GGML_ASSERT(p0 == 0);
4464
0
    GGML_ASSERT(d0 == 1);
4465
4466
0
    const int64_t ne[4] = {
4467
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4468
0
        a->ne[1], b->ne[2], 1,
4469
0
    };
4470
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4471
4472
0
    int32_t params[] = { s0, p0, d0 };
4473
0
    ggml_set_op_params(result, params, sizeof(params));
4474
4475
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4476
0
    result->src[0] = a;
4477
0
    result->src[1] = b;
4478
4479
0
    return result;
4480
0
}
4481
4482
// ggml_conv_2d
4483
4484
// a: [OC,IC, KH, KW]
4485
// b: [N, IC, IH, IW]
4486
// result: [N, OC, OH, OW]
4487
struct ggml_tensor * ggml_conv_2d(
4488
        struct ggml_context * ctx,
4489
        struct ggml_tensor  * a,
4490
        struct ggml_tensor  * b,
4491
        int                   s0,
4492
        int                   s1,
4493
        int                   p0,
4494
        int                   p1,
4495
        int                   d0,
4496
0
        int                   d1) {
4497
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4498
4499
0
    struct ggml_tensor * result =
4500
0
        ggml_mul_mat(ctx,
4501
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4502
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4503
4504
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4505
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4506
4507
4508
0
    return result;
4509
0
}
4510
4511
// a: [OC*IC, KD, KH, KW]
4512
// b: [N*IC, ID, IH, IW]
4513
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4514
struct ggml_tensor * ggml_im2col_3d(
4515
        struct ggml_context * ctx,
4516
        struct ggml_tensor  * a,
4517
        struct ggml_tensor  * b,
4518
        int64_t               IC,
4519
        int                   s0, // stride width
4520
        int                   s1, // stride height
4521
        int                   s2, // stride depth
4522
        int                   p0, // padding width
4523
        int                   p1, // padding height
4524
        int                   p2, // padding depth
4525
        int                   d0, // dilation width
4526
        int                   d1, // dilation height
4527
        int                   d2, // dilation depth
4528
0
        enum ggml_type        dst_type) {
4529
0
    const int64_t N = b->ne[3] / IC;
4530
0
    const int64_t ID = b->ne[2];
4531
0
    const int64_t IH = b->ne[1];
4532
0
    const int64_t IW = b->ne[0];
4533
4534
0
    const int64_t OC = a->ne[3] / IC;
4535
0
    UNUSED(OC);
4536
0
    const int64_t KD = a->ne[2];
4537
0
    const int64_t KH = a->ne[1];
4538
0
    const int64_t KW = a->ne[0];
4539
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4540
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4541
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4542
4543
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4544
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4545
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4546
4547
4548
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4549
4550
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4551
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4552
0
    ggml_set_op_params(result, params, sizeof(params));
4553
4554
0
    result->op     = GGML_OP_IM2COL_3D;
4555
0
    result->src[0] = a;
4556
0
    result->src[1] = b;
4557
4558
0
    return result;
4559
0
}
4560
4561
// a: [OC*IC, KD, KH, KW]
4562
// b: [N*IC, ID, IH, IW]
4563
// result: [N*OC, OD, OH, OW]
4564
struct ggml_tensor * ggml_conv_3d(
4565
        struct ggml_context * ctx,
4566
        struct ggml_tensor  * a,
4567
        struct ggml_tensor  * b,
4568
        int64_t               IC,
4569
        int                   s0, // stride width
4570
        int                   s1, // stride height
4571
        int                   s2, // stride depth
4572
        int                   p0, // padding width
4573
        int                   p1, // padding height
4574
        int                   p2, // padding depth
4575
        int                   d0, // dilation width
4576
        int                   d1, // dilation height
4577
        int                   d2  // dilation depth
4578
0
        ) {
4579
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4580
4581
0
    int64_t OC = a->ne[3] / IC;
4582
0
    int64_t N = b->ne[3] / IC;
4583
0
    struct ggml_tensor * result =
4584
0
        ggml_mul_mat(ctx,
4585
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4586
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4587
4588
0
    int64_t OD = im2col->ne[3] / N;
4589
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4590
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4591
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4592
4593
0
    return result;
4594
0
}
4595
4596
// ggml_conv_2d_sk_p0
4597
4598
struct ggml_tensor * ggml_conv_2d_sk_p0(
4599
        struct ggml_context * ctx,
4600
        struct ggml_tensor  * a,
4601
0
        struct ggml_tensor  * b) {
4602
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4603
0
}
4604
4605
// ggml_conv_2d_s1_ph
4606
4607
struct ggml_tensor * ggml_conv_2d_s1_ph(
4608
        struct ggml_context * ctx,
4609
        struct ggml_tensor  * a,
4610
0
        struct ggml_tensor  * b) {
4611
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4612
0
}
4613
4614
// ggml_conv_2d_dw
4615
4616
struct ggml_tensor * ggml_conv_2d_dw(
4617
        struct ggml_context * ctx,
4618
        struct ggml_tensor  * a,
4619
        struct ggml_tensor  * b,
4620
        int                   s0,
4621
        int                   s1,
4622
        int                   p0,
4623
        int                   p1,
4624
        int                   d0,
4625
0
        int                   d1) {
4626
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4627
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4628
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4629
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4630
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4631
4632
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4633
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4634
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4635
4636
0
    return result;
4637
0
}
4638
4639
// ggml_conv_2d_dw_direct
4640
4641
struct ggml_tensor * ggml_conv_2d_dw_direct(
4642
        struct ggml_context * ctx,
4643
        struct ggml_tensor  * a,
4644
        struct ggml_tensor  * b,
4645
        int                   stride0,
4646
        int                   stride1,
4647
        int                   pad0,
4648
        int                   pad1,
4649
        int                   dilation0,
4650
0
        int                   dilation1) {
4651
0
    GGML_ASSERT(a->ne[2] == 1);
4652
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4653
0
    int64_t ne[4];
4654
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4655
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4656
0
    ne[2] = b->ne[2];
4657
0
    ne[3] = b->ne[3];
4658
4659
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4660
4661
0
    if (ggml_is_contiguous_channels(b)) {
4662
        // Result will be permuted the same way as input (CWHN order)
4663
0
        const int64_t type_size = ggml_type_size(result->type);
4664
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4665
0
        result->nb[0] = result->ne[2] * type_size;
4666
0
        result->nb[1] = result->ne[0] * result->nb[0];
4667
0
        result->nb[2] = type_size;
4668
0
    }
4669
4670
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4671
0
    ggml_set_op_params(result, params, sizeof(params));
4672
4673
0
    result->op     = GGML_OP_CONV_2D_DW;
4674
0
    result->src[0] = a;
4675
0
    result->src[1] = b;
4676
0
    return result;
4677
0
}
4678
4679
// ggml_conv_2d_direct
4680
4681
struct ggml_tensor * ggml_conv_2d_direct(
4682
        struct ggml_context * ctx,
4683
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4684
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4685
        int                   s0,  // stride dimension 0
4686
        int                   s1,  // stride dimension 1
4687
        int                   p0,  // padding dimension 0
4688
        int                   p1,  // padding dimension 1
4689
        int                   d0,  // dilation dimension 0
4690
0
        int                   d1) {// dilation dimension 1
4691
4692
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4693
    //GGML_ASSERT(a->type == b->type);
4694
4695
0
    int64_t ne[4];
4696
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4697
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4698
0
    ne[2] = a->ne[3];
4699
0
    ne[3] = b->ne[3];
4700
4701
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4702
4703
0
    ggml_set_op_params_i32(result, 0, s0);
4704
0
    ggml_set_op_params_i32(result, 1, s1);
4705
0
    ggml_set_op_params_i32(result, 2, p0);
4706
0
    ggml_set_op_params_i32(result, 3, p1);
4707
0
    ggml_set_op_params_i32(result, 4, d0);
4708
0
    ggml_set_op_params_i32(result, 5, d1);
4709
4710
0
    result->op = GGML_OP_CONV_2D;
4711
0
    result->src[0] = a;
4712
0
    result->src[1] = b;
4713
4714
0
    return result;
4715
0
}
4716
4717
// ggml_conv_3d_direct
4718
4719
struct ggml_tensor * ggml_conv_3d_direct(
4720
        struct ggml_context * ctx,
4721
        struct ggml_tensor  * a,
4722
        struct ggml_tensor  * b,
4723
        int                   s0,
4724
        int                   s1,
4725
        int                   s2,
4726
        int                   p0,
4727
        int                   p1,
4728
        int                   p2,
4729
        int                   d0,
4730
        int                   d1,
4731
        int                   d2,
4732
        int                   c,
4733
        int                   n,
4734
0
        int                   oc) {
4735
4736
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4737
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4738
4739
0
    int64_t ne[4];
4740
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4741
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4742
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4743
0
    ne[3] = (int64_t) oc * n;
4744
4745
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4746
4747
0
    ggml_set_op_params_i32(result, 0,  s0);
4748
0
    ggml_set_op_params_i32(result, 1,  s1);
4749
0
    ggml_set_op_params_i32(result, 2,  s2);
4750
0
    ggml_set_op_params_i32(result, 3,  p0);
4751
0
    ggml_set_op_params_i32(result, 4,  p1);
4752
0
    ggml_set_op_params_i32(result, 5,  p2);
4753
0
    ggml_set_op_params_i32(result, 6,  d0);
4754
0
    ggml_set_op_params_i32(result, 7,  d1);
4755
0
    ggml_set_op_params_i32(result, 8,  d2);
4756
0
    ggml_set_op_params_i32(result, 9,  c);
4757
0
    ggml_set_op_params_i32(result, 10, n);
4758
0
    ggml_set_op_params_i32(result, 11, oc);
4759
4760
0
    result->op = GGML_OP_CONV_3D;
4761
0
    result->src[0] = a;
4762
0
    result->src[1] = b;
4763
4764
0
    return result;
4765
0
}
4766
4767
// ggml_conv_transpose_2d_p0
4768
4769
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4770
0
    return (ins - 1) * s - 2 * p + ks;
4771
0
}
4772
4773
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4774
        struct ggml_context * ctx,
4775
        struct ggml_tensor  * a,
4776
        struct ggml_tensor  * b,
4777
0
        int                   stride) {
4778
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4779
4780
0
    const int64_t ne[4] = {
4781
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4782
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4783
0
        a->ne[2], b->ne[3],
4784
0
    };
4785
4786
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4787
4788
0
    ggml_set_op_params_i32(result, 0, stride);
4789
4790
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4791
0
    result->src[0] = a;
4792
0
    result->src[1] = b;
4793
4794
0
    return result;
4795
0
}
4796
4797
// ggml_pool_*
4798
4799
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4800
0
    return (ins + 2 * p - ks) / s + 1;
4801
0
}
4802
4803
// ggml_pool_1d
4804
4805
struct ggml_tensor * ggml_pool_1d(
4806
        struct ggml_context * ctx,
4807
        struct ggml_tensor  * a,
4808
        enum ggml_op_pool     op,
4809
        int                   k0,
4810
        int                   s0,
4811
0
        int                   p0) {
4812
0
    const int64_t ne[4] = {
4813
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4814
0
        a->ne[1],
4815
0
        a->ne[2],
4816
0
        a->ne[3],
4817
0
    };
4818
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4819
4820
0
    int32_t params[] = { op, k0, s0, p0 };
4821
0
    ggml_set_op_params(result, params, sizeof(params));
4822
4823
0
    result->op     = GGML_OP_POOL_1D;
4824
0
    result->src[0] = a;
4825
4826
0
    return result;
4827
0
}
4828
4829
// ggml_pool_2d
4830
4831
struct ggml_tensor * ggml_pool_2d(
4832
        struct ggml_context * ctx,
4833
        struct ggml_tensor  * a,
4834
        enum ggml_op_pool     op,
4835
        int                   k0,
4836
        int                   k1,
4837
        int                   s0,
4838
        int                   s1,
4839
        float                 p0,
4840
0
        float                 p1) {
4841
0
    struct ggml_tensor * result;
4842
0
    const int64_t ne[4] = {
4843
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4844
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4845
0
        a->ne[2],
4846
0
        a->ne[3],
4847
0
    };
4848
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4849
4850
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4851
0
    ggml_set_op_params(result, params, sizeof(params));
4852
4853
0
    result->op     = GGML_OP_POOL_2D;
4854
0
    result->src[0] = a;
4855
4856
0
    return result;
4857
0
}
4858
4859
struct ggml_tensor * ggml_pool_2d_back(
4860
        struct ggml_context * ctx,
4861
        struct ggml_tensor  * a,
4862
        struct ggml_tensor  * af,
4863
        enum ggml_op_pool     op,
4864
        int                   k0,
4865
        int                   k1,
4866
        int                   s0,
4867
        int                   s1,
4868
        float                 p0,
4869
0
        float                 p1) {
4870
0
    struct ggml_tensor * result;
4871
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4872
4873
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4874
0
    ggml_set_op_params(result, params, sizeof(params));
4875
4876
0
    result->op     = GGML_OP_POOL_2D_BACK;
4877
0
    result->src[0] = a;
4878
0
    result->src[1] = af;
4879
4880
0
    return result;
4881
0
}
4882
4883
// ggml_upscale / ggml_interpolate
4884
4885
static struct ggml_tensor * ggml_interpolate_impl(
4886
        struct ggml_context * ctx,
4887
        struct ggml_tensor  * a,
4888
        int64_t               ne0,
4889
        int64_t               ne1,
4890
        int64_t               ne2,
4891
        int64_t               ne3,
4892
0
        uint32_t              mode) {
4893
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4894
4895
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4896
4897
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4898
4899
0
    result->op     = GGML_OP_UPSCALE;
4900
0
    result->src[0] = a;
4901
4902
0
    return result;
4903
0
}
4904
4905
struct ggml_tensor * ggml_upscale(
4906
        struct ggml_context * ctx,
4907
        struct ggml_tensor  * a,
4908
        int                   scale_factor,
4909
0
        enum ggml_scale_mode  mode) {
4910
0
    GGML_ASSERT(scale_factor > 1);
4911
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4912
0
}
4913
4914
struct ggml_tensor * ggml_upscale_ext(
4915
        struct ggml_context * ctx,
4916
        struct ggml_tensor  * a,
4917
        int                   ne0,
4918
        int                   ne1,
4919
        int                   ne2,
4920
        int                   ne3,
4921
0
        enum ggml_scale_mode  mode) {
4922
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4923
0
}
4924
4925
struct ggml_tensor * ggml_interpolate(
4926
        struct ggml_context * ctx,
4927
        struct ggml_tensor  * a,
4928
        int64_t               ne0,
4929
        int64_t               ne1,
4930
        int64_t               ne2,
4931
        int64_t               ne3,
4932
0
        uint32_t              mode) {
4933
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4934
0
}
4935
4936
// ggml_pad
4937
4938
struct ggml_tensor * ggml_pad(
4939
        struct ggml_context * ctx,
4940
        struct ggml_tensor  * a,
4941
        int                   p0,
4942
        int                   p1,
4943
        int                   p2,
4944
0
        int                   p3) {
4945
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4946
0
}
4947
4948
struct ggml_tensor * ggml_pad_ext(
4949
            struct ggml_context * ctx,
4950
            struct ggml_tensor  * a,
4951
            int                  lp0,
4952
            int                  rp0,
4953
            int                  lp1,
4954
            int                  rp1,
4955
            int                  lp2,
4956
            int                  rp2,
4957
            int                  lp3,
4958
            int                  rp3
4959
0
            ) {
4960
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
4961
0
            a->ne[0] + lp0 + rp0,
4962
0
            a->ne[1] + lp1 + rp1,
4963
0
            a->ne[2] + lp2 + rp2,
4964
0
            a->ne[3] + lp3 + rp3);
4965
4966
0
    ggml_set_op_params_i32(result, 0, lp0);
4967
0
    ggml_set_op_params_i32(result, 1, rp0);
4968
0
    ggml_set_op_params_i32(result, 2, lp1);
4969
0
    ggml_set_op_params_i32(result, 3, rp1);
4970
0
    ggml_set_op_params_i32(result, 4, lp2);
4971
0
    ggml_set_op_params_i32(result, 5, rp2);
4972
0
    ggml_set_op_params_i32(result, 6, lp3);
4973
0
    ggml_set_op_params_i32(result, 7, rp3);
4974
4975
4976
0
    result->op     = GGML_OP_PAD;
4977
0
    result->src[0] = a;
4978
4979
0
    return result;
4980
0
}
4981
4982
// ggml_pad_reflect_1d
4983
4984
struct ggml_tensor * ggml_pad_reflect_1d(
4985
        struct ggml_context * ctx,
4986
        struct ggml_tensor  * a,
4987
        int                   p0,
4988
0
        int                   p1) {
4989
0
    GGML_ASSERT(p0 >= 0);
4990
0
    GGML_ASSERT(p1 >= 0);
4991
4992
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
4993
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
4994
4995
0
    GGML_ASSERT(ggml_is_contiguous(a));
4996
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
4997
4998
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
4999
0
            a->ne[0] + p0 + p1,
5000
0
            a->ne[1],
5001
0
            a->ne[2],
5002
0
            a->ne[3]);
5003
5004
0
    int32_t params[] = { p0, p1 };
5005
0
    ggml_set_op_params(result, params, sizeof(params));
5006
5007
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5008
0
    result->src[0] = a;
5009
5010
0
    return result;
5011
0
}
5012
5013
// ggml_roll
5014
5015
struct ggml_tensor * ggml_roll(
5016
        struct ggml_context * ctx,
5017
        struct ggml_tensor  * a,
5018
        int                   shift0,
5019
        int                   shift1,
5020
        int                   shift2,
5021
0
        int                   shift3) {
5022
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5023
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5024
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5025
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5026
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5027
5028
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5029
5030
0
    ggml_set_op_params_i32(result, 0, shift0);
5031
0
    ggml_set_op_params_i32(result, 1, shift1);
5032
0
    ggml_set_op_params_i32(result, 2, shift2);
5033
0
    ggml_set_op_params_i32(result, 3, shift3);
5034
5035
0
    result->op     = GGML_OP_ROLL;
5036
0
    result->src[0] = a;
5037
5038
0
    return result;
5039
0
}
5040
5041
// ggml_arange
5042
5043
struct ggml_tensor * ggml_arange(
5044
        struct ggml_context * ctx,
5045
        float                 start,
5046
        float                 stop,
5047
0
        float                 step) {
5048
0
    GGML_ASSERT(stop > start);
5049
5050
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5051
5052
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5053
5054
0
    ggml_set_op_params_f32(result, 0, start);
5055
0
    ggml_set_op_params_f32(result, 1, stop);
5056
0
    ggml_set_op_params_f32(result, 2, step);
5057
5058
0
    result->op = GGML_OP_ARANGE;
5059
5060
0
    return result;
5061
0
}
5062
5063
// ggml_timestep_embedding
5064
5065
struct ggml_tensor * ggml_timestep_embedding(
5066
        struct ggml_context * ctx,
5067
        struct ggml_tensor  * timesteps,
5068
        int                   dim,
5069
0
        int                   max_period) {
5070
5071
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5072
5073
0
    ggml_set_op_params_i32(result, 0, dim);
5074
0
    ggml_set_op_params_i32(result, 1, max_period);
5075
5076
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5077
0
    result->src[0] = timesteps;
5078
5079
0
    return result;
5080
0
}
5081
5082
// ggml_tri
5083
5084
struct ggml_tensor * ggml_tri(
5085
    struct ggml_context * ctx,
5086
    struct ggml_tensor  * a,
5087
0
    enum ggml_tri_type    type) {
5088
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5089
5090
0
    GGML_ASSERT(ggml_is_contiguous(a));
5091
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5092
5093
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5094
5095
0
    ggml_set_op_params_i32(result, 0, type);
5096
5097
0
    result->op = GGML_OP_TRI;
5098
0
    result->src[0] = a;
5099
5100
0
    return result;
5101
0
}
5102
5103
// ggml_fill
5104
5105
static struct ggml_tensor * ggml_fill_impl(
5106
    struct ggml_context * ctx,
5107
    struct ggml_tensor  * a,
5108
    float                 c,
5109
0
    bool                  inplace) {
5110
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5111
0
    GGML_ASSERT(ggml_is_contiguous(a));
5112
5113
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5114
5115
0
    ggml_set_op_params_f32(result, 0, c);
5116
5117
0
    result->op = GGML_OP_FILL;
5118
0
    result->src[0] = a;
5119
5120
0
    return result;
5121
0
}
5122
5123
struct ggml_tensor * ggml_fill(
5124
    struct ggml_context * ctx,
5125
    struct ggml_tensor  * a,
5126
0
    float                 c) {
5127
0
    return ggml_fill_impl(ctx, a, c, false);
5128
0
}
5129
5130
struct ggml_tensor * ggml_fill_inplace(
5131
    struct ggml_context * ctx,
5132
    struct ggml_tensor  * a,
5133
0
    float                 c) {
5134
0
    return ggml_fill_impl(ctx, a, c, true);
5135
0
}
5136
5137
// ggml_argsort
5138
5139
struct ggml_tensor * ggml_argsort(
5140
        struct ggml_context  * ctx,
5141
        struct ggml_tensor   * a,
5142
0
        enum ggml_sort_order   order) {
5143
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5144
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5145
5146
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5147
5148
0
    result->op     = GGML_OP_ARGSORT;
5149
0
    result->src[0] = a;
5150
5151
0
    return result;
5152
0
}
5153
5154
// ggml_top_k
5155
5156
struct ggml_tensor * ggml_top_k(
5157
        struct ggml_context * ctx,
5158
        struct ggml_tensor  * a,
5159
0
        int                   k) {
5160
0
    GGML_ASSERT(a->ne[0] >= k);
5161
5162
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5163
5164
0
    result = ggml_view_4d(ctx, result,
5165
0
                k, result->ne[1], result->ne[2], result->ne[3],
5166
0
                   result->nb[1], result->nb[2], result->nb[3],
5167
0
                0);
5168
5169
0
    return result;
5170
0
}
5171
5172
// ggml_flash_attn_ext
5173
5174
struct ggml_tensor * ggml_flash_attn_ext(
5175
        struct ggml_context * ctx,
5176
        struct ggml_tensor  * q,
5177
        struct ggml_tensor  * k,
5178
        struct ggml_tensor  * v,
5179
        struct ggml_tensor  * mask,
5180
        float                 scale,
5181
        float                 max_bias,
5182
0
        float                 logit_softcap) {
5183
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5184
    // TODO: check if vT can be multiplied by (k*qT)
5185
5186
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5187
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5188
5189
0
    if (mask) {
5190
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5191
0
        GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
5192
0
                "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
5193
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5194
5195
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5196
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5197
0
    }
5198
5199
0
    if (max_bias > 0.0f) {
5200
0
        GGML_ASSERT(mask);
5201
0
    }
5202
5203
    // permute(0, 2, 1, 3)
5204
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5205
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5206
5207
0
    float params[] = { scale, max_bias, logit_softcap };
5208
0
    ggml_set_op_params(result, params, sizeof(params));
5209
5210
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5211
0
    result->src[0] = q;
5212
0
    result->src[1] = k;
5213
0
    result->src[2] = v;
5214
0
    result->src[3] = mask;
5215
5216
0
    return result;
5217
0
}
5218
5219
void ggml_flash_attn_ext_set_prec(
5220
        struct ggml_tensor * a,
5221
0
        enum ggml_prec       prec) {
5222
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5223
5224
0
    const int32_t prec_i32 = (int32_t) prec;
5225
5226
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5227
0
}
5228
5229
enum ggml_prec ggml_flash_attn_ext_get_prec(
5230
0
        const struct ggml_tensor * a) {
5231
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5232
5233
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5234
5235
0
    return (enum ggml_prec) prec_i32;
5236
0
}
5237
5238
void ggml_flash_attn_ext_add_sinks(
5239
        struct ggml_tensor * a,
5240
0
        struct ggml_tensor * sinks) {
5241
0
    if (!sinks) {
5242
0
        a->src[4] = NULL;
5243
0
        return;
5244
0
    }
5245
5246
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5247
0
    GGML_ASSERT(a->src[4] == NULL);
5248
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5249
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5250
5251
0
    a->src[4] = sinks;
5252
0
}
5253
5254
// ggml_flash_attn_back
5255
5256
struct ggml_tensor * ggml_flash_attn_back(
5257
        struct ggml_context * ctx,
5258
        struct ggml_tensor  * q,
5259
        struct ggml_tensor  * k,
5260
        struct ggml_tensor  * v,
5261
        struct ggml_tensor  * d,
5262
0
        bool                  masked) {
5263
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5264
5265
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5266
    // TODO: check if vT can be multiplied by (k*qT)
5267
5268
    // d shape [D,N,ne2,ne3]
5269
    // q shape [D,N,ne2,ne3]
5270
    // k shape [D,M,kvne2,ne3]
5271
    // v shape [M,D,kvne2,ne3]
5272
5273
0
    const int64_t     D = q->ne[0];
5274
0
    const int64_t     N = q->ne[1];
5275
0
    const int64_t     M = k->ne[1];
5276
0
    const int64_t   ne2 = q->ne[2];
5277
0
    const int64_t   ne3 = q->ne[3];
5278
0
    const int64_t kvne2 = k->ne[2];
5279
5280
0
    GGML_ASSERT(k->ne[0] == D);
5281
0
    GGML_ASSERT(v->ne[0] == M);
5282
0
    GGML_ASSERT(v->ne[1] == D);
5283
0
    GGML_ASSERT(d->ne[0] == D);
5284
0
    GGML_ASSERT(d->ne[1] == N);
5285
0
    GGML_ASSERT(k->ne[2] == kvne2);
5286
0
    GGML_ASSERT(k->ne[3] == ne3);
5287
0
    GGML_ASSERT(v->ne[2] == kvne2);
5288
0
    GGML_ASSERT(v->ne[3] == ne3);
5289
0
    GGML_ASSERT(d->ne[2] == ne2);
5290
0
    GGML_ASSERT(d->ne[3] == ne3);
5291
5292
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5293
5294
    // store gradients of q, k and v as continuous tensors concatenated in result.
5295
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5296
0
    const int64_t elem_q = ggml_nelements(q);
5297
0
    const int64_t elem_k = ggml_nelements(k);
5298
0
    const int64_t elem_v = ggml_nelements(v);
5299
5300
0
    enum ggml_type result_type = GGML_TYPE_F32;
5301
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5302
0
    const size_t tsize = ggml_type_size(result_type);
5303
5304
0
    const size_t offs_q = 0;
5305
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5306
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5307
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5308
5309
0
    const size_t nelements = (end + tsize - 1)/tsize;
5310
5311
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5312
5313
0
    int32_t masked_i = masked ? 1 : 0;
5314
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5315
5316
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5317
0
    result->src[0] = q;
5318
0
    result->src[1] = k;
5319
0
    result->src[2] = v;
5320
0
    result->src[3] = d;
5321
5322
0
    return result;
5323
0
}
5324
5325
// ggml_ssm_conv
5326
5327
struct ggml_tensor * ggml_ssm_conv(
5328
        struct ggml_context * ctx,
5329
        struct ggml_tensor  * sx,
5330
0
        struct ggml_tensor  * c) {
5331
0
    GGML_ASSERT(ggml_is_3d(sx));
5332
0
    GGML_ASSERT(ggml_is_matrix(c));
5333
5334
0
    const int64_t d_conv  = c->ne[0];
5335
0
    const int64_t d_inner = c->ne[1];
5336
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5337
0
    const int64_t n_s     = sx->ne[2];
5338
5339
    // TODO: maybe support other strides than 1?
5340
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5341
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5342
0
    GGML_ASSERT(n_t >= 0);
5343
5344
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5345
5346
0
    result->op     = GGML_OP_SSM_CONV;
5347
0
    result->src[0] = sx;
5348
0
    result->src[1] = c;
5349
5350
0
    return result;
5351
0
}
5352
5353
// ggml_ssm_scan
5354
5355
struct ggml_tensor * ggml_ssm_scan(
5356
        struct ggml_context * ctx,
5357
        struct ggml_tensor  * s,
5358
        struct ggml_tensor  * x,
5359
        struct ggml_tensor  * dt,
5360
        struct ggml_tensor  * A,
5361
        struct ggml_tensor  * B,
5362
        struct ggml_tensor  * C,
5363
0
        struct ggml_tensor  * ids) {
5364
0
    GGML_ASSERT(ggml_is_contiguous(s));
5365
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5366
0
    GGML_ASSERT(ggml_is_contiguous(A));
5367
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5368
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5369
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5370
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5371
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5372
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5373
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5374
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5375
5376
0
    {
5377
0
        const int64_t d_state      = s->ne[0];
5378
0
        const int64_t head_dim     = x->ne[0];
5379
0
        const int64_t n_head       = x->ne[1];
5380
0
        const int64_t n_seq_tokens = x->ne[2];
5381
0
        const int64_t n_seqs       = x->ne[3];
5382
5383
0
        GGML_ASSERT(dt->ne[0] == n_head);
5384
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5385
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5386
0
        GGML_ASSERT(ggml_is_3d(dt));
5387
0
        GGML_ASSERT(s->ne[1] == head_dim);
5388
0
        GGML_ASSERT(s->ne[2] == n_head);
5389
0
        GGML_ASSERT(B->ne[0] == d_state);
5390
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5391
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5392
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5393
0
        GGML_ASSERT(ggml_is_vector(ids));
5394
0
        GGML_ASSERT(A->ne[1] == n_head);
5395
0
        GGML_ASSERT(ggml_is_matrix(A));
5396
5397
0
        if (A->ne[0] != 1) {
5398
            // Mamba-1 has more granular decay factors
5399
0
            GGML_ASSERT(A->ne[0] == d_state);
5400
0
        }
5401
0
    }
5402
5403
    // concatenated y + ssm_states
5404
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5405
5406
0
    result->op   = GGML_OP_SSM_SCAN;
5407
0
    result->src[0] = s;
5408
0
    result->src[1] = x;
5409
0
    result->src[2] = dt;
5410
0
    result->src[3] = A;
5411
0
    result->src[4] = B;
5412
0
    result->src[5] = C;
5413
0
    result->src[6] = ids;
5414
5415
0
    return result;
5416
0
}
5417
5418
// ggml_win_part
5419
5420
struct ggml_tensor * ggml_win_part(
5421
        struct ggml_context * ctx,
5422
        struct ggml_tensor  * a,
5423
0
        int                   w) {
5424
0
    GGML_ASSERT(a->ne[3] == 1);
5425
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5426
5427
    // padding
5428
0
    const int px = (w - a->ne[1]%w)%w;
5429
0
    const int py = (w - a->ne[2]%w)%w;
5430
5431
0
    const int npx = (px + a->ne[1])/w;
5432
0
    const int npy = (py + a->ne[2])/w;
5433
0
    const int np  = npx*npy;
5434
5435
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5436
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5437
5438
0
    int32_t params[] = { npx, npy, w };
5439
0
    ggml_set_op_params(result, params, sizeof(params));
5440
5441
0
    result->op     = GGML_OP_WIN_PART;
5442
0
    result->src[0] = a;
5443
5444
0
    return result;
5445
0
}
5446
5447
// ggml_win_unpart
5448
5449
struct ggml_tensor * ggml_win_unpart(
5450
        struct ggml_context * ctx,
5451
        struct ggml_tensor  * a,
5452
        int                   w0,
5453
        int                   h0,
5454
0
        int                   w) {
5455
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5456
5457
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5458
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5459
5460
0
    int32_t params[] = { w };
5461
0
    ggml_set_op_params(result, params, sizeof(params));
5462
5463
0
    result->op     = GGML_OP_WIN_UNPART;
5464
0
    result->src[0] = a;
5465
5466
0
    return result;
5467
0
}
5468
5469
// ggml_get_rel_pos
5470
5471
struct ggml_tensor * ggml_get_rel_pos(
5472
        struct ggml_context * ctx,
5473
        struct ggml_tensor  * a,
5474
        int                   qh,
5475
0
        int                   kh) {
5476
0
    GGML_ASSERT(qh == kh);
5477
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5478
5479
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5480
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5481
5482
0
    result->op     = GGML_OP_GET_REL_POS;
5483
0
    result->src[0] = a;
5484
5485
0
    return result;
5486
0
}
5487
5488
// ggml_add_rel_pos
5489
5490
static struct ggml_tensor * ggml_add_rel_pos_impl(
5491
        struct ggml_context * ctx,
5492
        struct ggml_tensor  * a,
5493
        struct ggml_tensor  * pw,
5494
        struct ggml_tensor  * ph,
5495
0
        bool                  inplace) {
5496
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5497
0
    GGML_ASSERT(ggml_is_contiguous(a));
5498
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5499
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5500
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5501
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5502
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5503
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5504
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5505
5506
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5507
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5508
5509
0
    result->op     = GGML_OP_ADD_REL_POS;
5510
0
    result->src[0] = a;
5511
0
    result->src[1] = pw;
5512
0
    result->src[2] = ph;
5513
5514
0
    return result;
5515
0
}
5516
5517
struct ggml_tensor * ggml_add_rel_pos(
5518
        struct ggml_context * ctx,
5519
        struct ggml_tensor  * a,
5520
        struct ggml_tensor  * pw,
5521
0
        struct ggml_tensor  * ph) {
5522
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5523
0
}
5524
5525
struct ggml_tensor * ggml_add_rel_pos_inplace(
5526
        struct ggml_context * ctx,
5527
        struct ggml_tensor  * a,
5528
        struct ggml_tensor  * pw,
5529
0
        struct ggml_tensor  * ph) {
5530
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5531
0
}
5532
5533
// ggml_rwkv_wkv6
5534
5535
struct ggml_tensor * ggml_rwkv_wkv6(
5536
        struct ggml_context * ctx,
5537
        struct ggml_tensor  * k,
5538
        struct ggml_tensor  * v,
5539
        struct ggml_tensor  * r,
5540
        struct ggml_tensor  * tf,
5541
        struct ggml_tensor  * td,
5542
0
        struct ggml_tensor  * state) {
5543
0
    GGML_ASSERT(ggml_is_contiguous(k));
5544
0
    GGML_ASSERT(ggml_is_contiguous(v));
5545
0
    GGML_ASSERT(ggml_is_contiguous(r));
5546
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5547
0
    GGML_ASSERT(ggml_is_contiguous(td));
5548
0
    GGML_ASSERT(ggml_is_contiguous(state));
5549
5550
0
    const int64_t S = k->ne[0];
5551
0
    const int64_t H = k->ne[1];
5552
0
    const int64_t n_tokens = k->ne[2];
5553
0
    const int64_t n_seqs = state->ne[1];
5554
0
    {
5555
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5556
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5557
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5558
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5559
0
    }
5560
5561
    // concat output and new_state
5562
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5563
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5564
5565
0
    result->op     = GGML_OP_RWKV_WKV6;
5566
0
    result->src[0] = k;
5567
0
    result->src[1] = v;
5568
0
    result->src[2] = r;
5569
0
    result->src[3] = tf;
5570
0
    result->src[4] = td;
5571
0
    result->src[5] = state;
5572
5573
0
    return result;
5574
0
}
5575
5576
// ggml_gated_linear_attn
5577
5578
struct ggml_tensor * ggml_gated_linear_attn(
5579
        struct ggml_context * ctx,
5580
        struct ggml_tensor  * k,
5581
        struct ggml_tensor  * v,
5582
        struct ggml_tensor  * q,
5583
        struct ggml_tensor  * g,
5584
        struct ggml_tensor  * state,
5585
0
        float scale) {
5586
0
    GGML_ASSERT(ggml_is_contiguous(k));
5587
0
    GGML_ASSERT(ggml_is_contiguous(v));
5588
0
    GGML_ASSERT(ggml_is_contiguous(q));
5589
0
    GGML_ASSERT(ggml_is_contiguous(g));
5590
0
    GGML_ASSERT(ggml_is_contiguous(state));
5591
5592
0
    const int64_t S = k->ne[0];
5593
0
    const int64_t H = k->ne[1];
5594
0
    const int64_t n_tokens = k->ne[2];
5595
0
    const int64_t n_seqs = state->ne[1];
5596
0
    {
5597
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5598
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5599
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5600
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5601
0
    }
5602
5603
    // concat output and new_state
5604
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5605
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5606
5607
0
    ggml_set_op_params_f32(result, 0, scale);
5608
5609
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5610
0
    result->src[0] = k;
5611
0
    result->src[1] = v;
5612
0
    result->src[2] = q;
5613
0
    result->src[3] = g;
5614
0
    result->src[4] = state;
5615
5616
0
    return result;
5617
0
}
5618
5619
// ggml_rwkv_wkv7
5620
5621
struct ggml_tensor * ggml_rwkv_wkv7(
5622
        struct ggml_context * ctx,
5623
        struct ggml_tensor  * r,
5624
        struct ggml_tensor  * w,
5625
        struct ggml_tensor  * k,
5626
        struct ggml_tensor  * v,
5627
        struct ggml_tensor  * a,
5628
        struct ggml_tensor  * b,
5629
0
        struct ggml_tensor  * state) {
5630
0
    GGML_ASSERT(ggml_is_contiguous(r));
5631
0
    GGML_ASSERT(ggml_is_contiguous(w));
5632
0
    GGML_ASSERT(ggml_is_contiguous(k));
5633
0
    GGML_ASSERT(ggml_is_contiguous(v));
5634
0
    GGML_ASSERT(ggml_is_contiguous(a));
5635
0
    GGML_ASSERT(ggml_is_contiguous(b));
5636
0
    GGML_ASSERT(ggml_is_contiguous(state));
5637
5638
0
    const int64_t S = k->ne[0];
5639
0
    const int64_t H = k->ne[1];
5640
0
    const int64_t n_tokens = k->ne[2];
5641
0
    const int64_t n_seqs = state->ne[1];
5642
0
    {
5643
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5644
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5645
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5646
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5647
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5648
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5649
0
    }
5650
5651
    // concat output and new_state
5652
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5653
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5654
5655
0
    result->op     = GGML_OP_RWKV_WKV7;
5656
0
    result->src[0] = r;
5657
0
    result->src[1] = w;
5658
0
    result->src[2] = k;
5659
0
    result->src[3] = v;
5660
0
    result->src[4] = a;
5661
0
    result->src[5] = b;
5662
0
    result->src[6] = state;
5663
5664
0
    return result;
5665
0
}
5666
5667
// ggml_unary
5668
5669
static struct ggml_tensor * ggml_unary_impl(
5670
        struct ggml_context * ctx,
5671
        struct ggml_tensor  * a,
5672
        enum ggml_unary_op    op,
5673
0
        bool                  inplace) {
5674
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
5675
5676
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5677
5678
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5679
5680
0
    result->op     = GGML_OP_UNARY;
5681
0
    result->src[0] = a;
5682
5683
0
    return result;
5684
0
}
5685
5686
struct ggml_tensor * ggml_unary(
5687
        struct ggml_context * ctx,
5688
        struct ggml_tensor  * a,
5689
0
        enum ggml_unary_op    op) {
5690
0
    return ggml_unary_impl(ctx, a, op, false);
5691
0
}
5692
5693
struct ggml_tensor * ggml_unary_inplace(
5694
        struct ggml_context * ctx,
5695
        struct ggml_tensor  * a,
5696
0
        enum ggml_unary_op    op) {
5697
0
    return ggml_unary_impl(ctx, a, op, true);
5698
0
}
5699
5700
// ggml_map_custom1
5701
5702
static struct ggml_tensor * ggml_map_custom1_impl(
5703
        struct ggml_context      * ctx,
5704
        struct ggml_tensor       * a,
5705
        const  ggml_custom1_op_t   fun,
5706
        int                        n_tasks,
5707
        void                     * userdata,
5708
0
        bool                       inplace) {
5709
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5710
5711
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5712
5713
0
    struct ggml_map_custom1_op_params params = {
5714
0
        /*.fun      =*/ fun,
5715
0
        /*.n_tasks  =*/ n_tasks,
5716
0
        /*.userdata =*/ userdata
5717
0
    };
5718
0
    ggml_set_op_params(result, &params, sizeof(params));
5719
5720
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5721
0
    result->src[0] = a;
5722
5723
0
    return result;
5724
0
}
5725
5726
struct ggml_tensor * ggml_map_custom1(
5727
        struct ggml_context      * ctx,
5728
        struct ggml_tensor       * a,
5729
        const  ggml_custom1_op_t   fun,
5730
        int                        n_tasks,
5731
0
        void                     * userdata) {
5732
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5733
0
}
5734
5735
struct ggml_tensor * ggml_map_custom1_inplace(
5736
        struct ggml_context      * ctx,
5737
        struct ggml_tensor       * a,
5738
        const  ggml_custom1_op_t   fun,
5739
        int                        n_tasks,
5740
0
        void                     * userdata) {
5741
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5742
0
}
5743
5744
// ggml_map_custom2
5745
5746
static struct ggml_tensor * ggml_map_custom2_impl(
5747
        struct ggml_context      * ctx,
5748
        struct ggml_tensor       * a,
5749
        struct ggml_tensor       * b,
5750
        const  ggml_custom2_op_t   fun,
5751
        int                        n_tasks,
5752
        void                     * userdata,
5753
0
        bool                       inplace) {
5754
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5755
5756
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5757
5758
0
    struct ggml_map_custom2_op_params params = {
5759
0
        /*.fun      =*/ fun,
5760
0
        /*.n_tasks  =*/ n_tasks,
5761
0
        /*.userdata =*/ userdata
5762
0
    };
5763
0
    ggml_set_op_params(result, &params, sizeof(params));
5764
5765
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5766
0
    result->src[0] = a;
5767
0
    result->src[1] = b;
5768
5769
0
    return result;
5770
0
}
5771
5772
struct ggml_tensor * ggml_map_custom2(
5773
        struct ggml_context      * ctx,
5774
        struct ggml_tensor       * a,
5775
        struct ggml_tensor       * b,
5776
        const  ggml_custom2_op_t   fun,
5777
        int                        n_tasks,
5778
0
        void                     * userdata) {
5779
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5780
0
}
5781
5782
struct ggml_tensor * ggml_map_custom2_inplace(
5783
        struct ggml_context      * ctx,
5784
        struct ggml_tensor       * a,
5785
        struct ggml_tensor       * b,
5786
        const  ggml_custom2_op_t   fun,
5787
        int                        n_tasks,
5788
0
        void                     * userdata) {
5789
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5790
0
}
5791
5792
// ggml_map_custom3
5793
5794
static struct ggml_tensor * ggml_map_custom3_impl(
5795
        struct ggml_context      * ctx,
5796
        struct ggml_tensor       * a,
5797
        struct ggml_tensor       * b,
5798
        struct ggml_tensor       * c,
5799
        const  ggml_custom3_op_t   fun,
5800
        int                        n_tasks,
5801
        void                     * userdata,
5802
0
        bool                       inplace) {
5803
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5804
5805
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5806
5807
0
    struct ggml_map_custom3_op_params params = {
5808
0
        /*.fun      =*/ fun,
5809
0
        /*.n_tasks  =*/ n_tasks,
5810
0
        /*.userdata =*/ userdata
5811
0
    };
5812
0
    ggml_set_op_params(result, &params, sizeof(params));
5813
5814
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5815
0
    result->src[0] = a;
5816
0
    result->src[1] = b;
5817
0
    result->src[2] = c;
5818
5819
0
    return result;
5820
0
}
5821
5822
struct ggml_tensor * ggml_map_custom3(
5823
        struct ggml_context      * ctx,
5824
        struct ggml_tensor       * a,
5825
        struct ggml_tensor       * b,
5826
        struct ggml_tensor       * c,
5827
        const  ggml_custom3_op_t   fun,
5828
        int                        n_tasks,
5829
0
        void                     * userdata) {
5830
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5831
0
}
5832
5833
struct ggml_tensor * ggml_map_custom3_inplace(
5834
        struct ggml_context      * ctx,
5835
        struct ggml_tensor       * a,
5836
        struct ggml_tensor       * b,
5837
        struct ggml_tensor       * c,
5838
        const  ggml_custom3_op_t   fun,
5839
        int                        n_tasks,
5840
0
        void                     * userdata) {
5841
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5842
0
}
5843
5844
struct ggml_tensor * ggml_custom_4d(
5845
        struct ggml_context * ctx,
5846
        enum ggml_type        type,
5847
        int64_t               ne0,
5848
        int64_t               ne1,
5849
        int64_t               ne2,
5850
        int64_t               ne3,
5851
        struct ggml_tensor ** args,
5852
        int                   n_args,
5853
        ggml_custom_op_t      fun,
5854
        int                   n_tasks,
5855
0
        void                * userdata) {
5856
5857
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5858
5859
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5860
5861
0
    struct ggml_custom_op_params params = {
5862
0
        /*.fun      =*/ fun,
5863
0
        /*.n_tasks  =*/ n_tasks,
5864
0
        /*.userdata =*/ userdata
5865
0
    };
5866
0
    ggml_set_op_params(result, &params, sizeof(params));
5867
5868
0
    result->op = GGML_OP_CUSTOM;
5869
0
    for (int i = 0; i < n_args; i++) {
5870
0
        result->src[i] = args[i];
5871
0
    }
5872
5873
0
    return result;
5874
0
}
5875
5876
struct ggml_tensor * ggml_custom_inplace(
5877
        struct ggml_context * ctx,
5878
        struct ggml_tensor  * a,
5879
        struct ggml_tensor ** args,
5880
        int                   n_args,
5881
        ggml_custom_op_t      fun,
5882
        int                   n_tasks,
5883
0
        void                * userdata) {
5884
5885
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5886
5887
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5888
5889
0
    struct ggml_custom_op_params params = {
5890
0
        /*.fun      =*/ fun,
5891
0
        /*.n_tasks  =*/ n_tasks,
5892
0
        /*.userdata =*/ userdata
5893
0
    };
5894
0
    ggml_set_op_params(result, &params, sizeof(params));
5895
5896
0
    result->op = GGML_OP_CUSTOM;
5897
0
    result->src[0] = a;
5898
0
    for (int i = 0; i < n_args; i++) {
5899
0
        result->src[i + 1] = args[i];
5900
0
    }
5901
5902
0
    return result;
5903
0
}
5904
// ggml_cross_entropy_loss
5905
5906
struct ggml_tensor * ggml_cross_entropy_loss(
5907
        struct ggml_context * ctx,
5908
        struct ggml_tensor  * a,
5909
0
        struct ggml_tensor  * b) {
5910
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
5911
5912
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5913
5914
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
5915
0
    result->src[0] = a;
5916
0
    result->src[1] = b;
5917
5918
0
    return result;
5919
0
}
5920
5921
// ggml_cross_entropy_loss_back
5922
5923
struct ggml_tensor * ggml_cross_entropy_loss_back(
5924
        struct ggml_context * ctx,
5925
        struct ggml_tensor  * a,
5926
        struct ggml_tensor  * b,
5927
0
        struct ggml_tensor  * c) {
5928
0
    GGML_ASSERT(ggml_is_scalar(a));
5929
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
5930
5931
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
5932
5933
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
5934
0
    result->src[0] = a;
5935
0
    result->src[1] = b;
5936
0
    result->src[2] = c;
5937
5938
0
    return result;
5939
0
}
5940
5941
// opt_step_adamw
5942
5943
struct ggml_tensor * ggml_opt_step_adamw(
5944
        struct ggml_context * ctx,
5945
        struct ggml_tensor  * a,
5946
        struct ggml_tensor  * grad,
5947
        struct ggml_tensor  * m,
5948
        struct ggml_tensor  * v,
5949
0
        struct ggml_tensor  * adamw_params) {
5950
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
5951
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
5952
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
5953
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
5954
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
5955
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
5956
5957
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5958
5959
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
5960
0
    result->src[0] = a;
5961
0
    result->src[1] = grad;
5962
0
    result->src[2] = m;
5963
0
    result->src[3] = v;
5964
0
    result->src[4] = adamw_params;
5965
5966
0
    return result;
5967
0
}
5968
5969
// opt_step_sgd
5970
5971
struct ggml_tensor * ggml_opt_step_sgd(
5972
        struct ggml_context * ctx,
5973
        struct ggml_tensor  * a,
5974
        struct ggml_tensor  * grad,
5975
0
        struct ggml_tensor  * params) {
5976
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
5977
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
5978
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
5979
0
    GGML_ASSERT(ggml_nelements(params) == 2);
5980
5981
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5982
5983
0
    result->op     = GGML_OP_OPT_STEP_SGD;
5984
0
    result->src[0] = a;
5985
0
    result->src[1] = grad;
5986
0
    result->src[2] = params;
5987
5988
0
    return result;
5989
0
}
5990
5991
// solve_tri
5992
5993
struct ggml_tensor * ggml_solve_tri(
5994
        struct ggml_context * ctx,
5995
        struct ggml_tensor  * a,
5996
        struct ggml_tensor  * b,
5997
        bool                  left,
5998
        bool                  lower,
5999
0
        bool                  uni) {
6000
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6001
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6002
6003
    // A must be square and lower diagonal
6004
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6005
    // B must have same outer dimension as A
6006
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6007
6008
    // batch dimensions must be equal
6009
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6010
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6011
6012
0
    GGML_ASSERT(ggml_is_contiguous(a));
6013
0
    GGML_ASSERT(ggml_is_contiguous(b));
6014
6015
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6016
6017
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6018
6019
0
    result->op     = GGML_OP_SOLVE_TRI;
6020
0
    result->src[0] = a;
6021
0
    result->src[1] = b;
6022
6023
0
    return result;
6024
0
}
6025
6026
////////////////////////////////////////////////////////////////////////////////
6027
6028
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6029
0
    size = ggml_hash_size(size);
6030
0
    struct ggml_hash_set result;
6031
0
    result.size = size;
6032
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6033
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6034
0
    return result;
6035
0
}
6036
6037
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6038
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6039
0
}
6040
6041
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6042
0
    GGML_FREE(hash_set->used);
6043
0
    GGML_FREE(hash_set->keys);
6044
0
}
6045
6046
0
size_t ggml_hash_size(size_t min_sz) {
6047
    // next primes after powers of two
6048
0
    static const size_t primes[] = {
6049
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6050
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6051
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6052
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6053
0
        536870923, 1073741827, 2147483659
6054
0
    };
6055
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6056
6057
    // find the smallest prime that is larger or equal than min_sz
6058
0
    size_t l = 0;
6059
0
    size_t r = n_primes;
6060
0
    while (l < r) {
6061
0
        size_t m = (l + r)/2;
6062
0
        if (primes[m] < min_sz) {
6063
0
            l = m + 1;
6064
0
        } else {
6065
0
            r = m;
6066
0
        }
6067
0
    }
6068
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6069
0
    return sz;
6070
0
}
6071
6072
struct hash_map {
6073
    struct ggml_hash_set set;
6074
    struct ggml_tensor ** vals;
6075
};
6076
6077
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6078
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6079
0
    result->set = ggml_hash_set_new(size);
6080
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6081
0
    return result;
6082
0
}
6083
6084
0
static void ggml_hash_map_free(struct hash_map * map) {
6085
0
    ggml_hash_set_free(&map->set);
6086
0
    GGML_FREE(map->vals);
6087
0
    GGML_FREE(map);
6088
0
}
6089
6090
// utility functions to change gradients
6091
// isrc is the index of tensor in cgraph->visited_has_set.keys
6092
// the corresponding gradient (accumulators) are also at position isrc
6093
// if tensor has a gradient accumulator, modify that accumulator in-place
6094
// else if there is no gradient for tensor, set the corresponding value
6095
// else, just add/subtract/etc. the gradients
6096
6097
static void ggml_add_or_set(
6098
        struct ggml_context * ctx,
6099
        struct ggml_cgraph  * cgraph,
6100
        size_t                isrc,
6101
0
        struct ggml_tensor  * tensor) {
6102
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6103
0
    GGML_ASSERT(src);
6104
0
    if (cgraph->grads[isrc]) {
6105
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6106
0
    } else {
6107
0
        cgraph->grads[isrc] = tensor;
6108
0
    }
6109
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6110
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6111
0
}
6112
6113
static void ggml_acc_or_set(
6114
        struct ggml_context * ctx,
6115
        struct ggml_cgraph  * cgraph,
6116
        size_t                isrc,
6117
        struct ggml_tensor  * tensor,
6118
        const  size_t         nb1,
6119
        const  size_t         nb2,
6120
        const  size_t         nb3,
6121
0
        const  size_t         offset) {
6122
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6123
0
    GGML_ASSERT(src);
6124
0
    if (cgraph->grads[isrc]) {
6125
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6126
0
    } else {
6127
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6128
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6129
0
    }
6130
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6131
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6132
0
}
6133
6134
static void ggml_add1_or_set(
6135
        struct ggml_context * ctx,
6136
        struct ggml_cgraph  * cgraph,
6137
        size_t                isrc,
6138
0
        struct ggml_tensor  * tensor) {
6139
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6140
0
    GGML_ASSERT(src);
6141
0
    if (cgraph->grads[isrc]) {
6142
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6143
0
    } else {
6144
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6145
0
    }
6146
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6147
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6148
0
}
6149
6150
static void ggml_sub_or_set(
6151
        struct ggml_context * ctx,
6152
        struct ggml_cgraph  * cgraph,
6153
        size_t                isrc,
6154
0
        struct ggml_tensor  * tensor) {
6155
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6156
0
    GGML_ASSERT(src);
6157
0
    if (cgraph->grads[isrc]) {
6158
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6159
0
    } else {
6160
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6161
0
    }
6162
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6163
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6164
0
}
6165
6166
static void ggml_compute_backward(
6167
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6168
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6169
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6170
6171
0
    if (!grad) {
6172
0
        return;
6173
0
    }
6174
6175
0
    struct ggml_tensor * src0 = tensor->src[0];
6176
0
    struct ggml_tensor * src1 = tensor->src[1];
6177
0
    struct ggml_tensor * src2 = tensor->src[2];
6178
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6179
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6180
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6181
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6182
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6183
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6184
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6185
6186
0
    switch (tensor->op) {
6187
0
        case GGML_OP_DUP: {
6188
0
            if (src0_needs_grads) {
6189
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6190
0
            }
6191
0
        } break;
6192
0
        case GGML_OP_ADD: {
6193
0
            if (src0_needs_grads) {
6194
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6195
0
            }
6196
0
            if (src1_needs_grads) {
6197
0
                struct ggml_tensor * tmp = grad;
6198
0
                if (!ggml_are_same_shape(src0, src1)) {
6199
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6200
0
                }
6201
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6202
0
            }
6203
0
        } break;
6204
0
        case GGML_OP_ADD1: {
6205
0
            if (src0_needs_grads) {
6206
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6207
0
            }
6208
0
            if (src1_needs_grads) {
6209
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6210
0
            }
6211
0
        } break;
6212
0
        case GGML_OP_ACC: {
6213
0
            if (src0_needs_grads) {
6214
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6215
0
            }
6216
0
            if (src1_needs_grads) {
6217
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6218
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6219
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6220
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6221
6222
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6223
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6224
0
                    nb1, nb2, nb3, offset);
6225
6226
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6227
0
            }
6228
0
        } break;
6229
0
        case GGML_OP_SUB: {
6230
0
            if (src0_needs_grads) {
6231
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6232
0
            }
6233
0
            if (src1_needs_grads) {
6234
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6235
0
            }
6236
0
        } break;
6237
0
        case GGML_OP_MUL: {
6238
0
            if (src0_needs_grads) {
6239
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6240
0
            }
6241
0
            if (src1_needs_grads) {
6242
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6243
0
                if (!ggml_are_same_shape(src0, src1)) {
6244
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6245
0
                }
6246
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6247
0
            }
6248
0
        } break;
6249
0
        case GGML_OP_DIV: {
6250
0
            if (src0_needs_grads) {
6251
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6252
0
            }
6253
0
            if (src1_needs_grads) {
6254
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6255
0
            }
6256
0
        } break;
6257
0
        case GGML_OP_SQR: {
6258
0
            if (src0_needs_grads) {
6259
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6260
0
            }
6261
0
        } break;
6262
0
        case GGML_OP_SQRT: {
6263
0
            if (src0_needs_grads) {
6264
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6265
0
            }
6266
0
        } break;
6267
0
        case GGML_OP_LOG: {
6268
0
            if (src0_needs_grads) {
6269
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6270
0
            }
6271
0
        } break;
6272
0
        case GGML_OP_SIN: {
6273
0
            if (src0_needs_grads) {
6274
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6275
0
            }
6276
0
        } break;
6277
0
        case GGML_OP_COS: {
6278
0
            if (src0_needs_grads) {
6279
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6280
0
            }
6281
0
        } break;
6282
0
        case GGML_OP_SUM: {
6283
0
            if (src0_needs_grads) {
6284
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6285
0
            }
6286
0
        } break;
6287
0
        case GGML_OP_SUM_ROWS: {
6288
0
            if (src0_needs_grads) {
6289
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6290
0
            }
6291
0
        } break;
6292
0
        case GGML_OP_MEAN: {
6293
0
            if (src0_needs_grads) {
6294
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6295
0
            }
6296
0
        } break;
6297
0
        case GGML_OP_REPEAT: {
6298
0
            if (src0_needs_grads) {
6299
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6300
0
            }
6301
0
        } break;
6302
0
        case GGML_OP_REPEAT_BACK: {
6303
0
            if (src0_needs_grads) {
6304
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6305
0
            }
6306
0
        } break;
6307
0
        case GGML_OP_RMS_NORM: {
6308
0
            if (src0_needs_grads) {
6309
0
                float eps;
6310
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6311
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6312
0
            }
6313
0
        } break;
6314
0
        case GGML_OP_MUL_MAT: {
6315
            // https://cs231n.github.io/optimization-2/#staged
6316
            // # forward pass
6317
            // s0 = np.random.randn(5, 10)
6318
            // s1 = np.random.randn(10, 3)
6319
            // t = s0.dot(s1)
6320
6321
            // # now suppose we had the gradient on t from above in the circuit
6322
            // dt = np.random.randn(*t.shape) # same shape as t
6323
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6324
            // ds1 = t.T.dot(dt)
6325
6326
            // tensor.shape [m,p,qq,rr]
6327
            // src0.shape   [n,m,q1,r1]
6328
            // src1.shape   [n,p,qq,rr]
6329
6330
0
            if (src0_needs_grads) {
6331
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6332
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6333
0
                struct ggml_tensor * tmp =
6334
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6335
0
                        src1,          // [n,p,qq,rr]
6336
0
                        grad);         // [m,p,qq,rr]
6337
0
                if (!ggml_are_same_shape(tmp, src0)) {
6338
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6339
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6340
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6341
6342
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6343
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6344
0
                    const size_t nb3 = tmp->nb[2];
6345
6346
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6347
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6348
0
                }
6349
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6350
0
            }
6351
0
            if (src1_needs_grads) {
6352
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6353
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6354
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6355
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6356
                        //     grad),                          // [m,p,qq,rr]
6357
6358
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6359
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6360
                        // and then use ggml_out_prod
6361
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6362
0
                            src0,               // [n,m,q1,r1]
6363
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6364
0
                                grad)));        // [m,p,qq,rr]
6365
0
            }
6366
0
        } break;
6367
0
        case GGML_OP_SCALE: {
6368
0
            if (src0_needs_grads) {
6369
0
                float s;
6370
0
                memcpy(&s, tensor->op_params, sizeof(float));
6371
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6372
0
            }
6373
0
        } break;
6374
0
        case GGML_OP_SET: {
6375
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6376
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6377
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6378
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6379
6380
0
            struct ggml_tensor * tensor_grad_view = NULL;
6381
6382
0
            if (src0_needs_grads || src1_needs_grads) {
6383
0
                GGML_ASSERT(src0->type == tensor->type);
6384
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6385
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6386
6387
0
                tensor_grad_view = ggml_view_4d(ctx,
6388
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6389
0
                    nb1, nb2, nb3, offset);
6390
0
            }
6391
6392
0
            if (src0_needs_grads) {
6393
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6394
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6395
0
            }
6396
6397
0
            if (src1_needs_grads) {
6398
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6399
0
            }
6400
0
        } break;
6401
0
        case GGML_OP_CPY: {
6402
            // cpy overwrites value of src1 by src0 and returns view(src1)
6403
            // the overwriting is mathematically equivalent to:
6404
            // tensor = src0 * 1 + src1 * 0
6405
0
            if (src0_needs_grads) {
6406
                // dsrc0 = dtensor * 1
6407
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6408
0
            }
6409
0
            if (src1_needs_grads) {
6410
                // dsrc1 = dtensor * 0 -> noop
6411
0
            }
6412
0
        } break;
6413
0
        case GGML_OP_CONT: {
6414
            // same as cpy
6415
0
            if (src0_needs_grads) {
6416
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6417
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6418
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6419
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6420
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6421
0
            }
6422
0
        } break;
6423
0
        case GGML_OP_RESHAPE: {
6424
0
            if (src0_needs_grads) {
6425
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6426
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6427
0
            }
6428
0
        } break;
6429
0
        case GGML_OP_VIEW: {
6430
0
            if (src0_needs_grads) {
6431
0
                size_t offset;
6432
6433
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6434
6435
0
                size_t nb1 = tensor->nb[1];
6436
0
                size_t nb2 = tensor->nb[2];
6437
0
                size_t nb3 = tensor->nb[3];
6438
6439
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6440
                    // gradient is typically F32, but src0 could be other type
6441
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6442
0
                    size_t n0 = ggml_element_size(src0);
6443
0
                    GGML_ASSERT(offset % n0 == 0);
6444
0
                    GGML_ASSERT(nb1 % n0 == 0);
6445
0
                    GGML_ASSERT(nb2 % n0 == 0);
6446
0
                    GGML_ASSERT(nb3 % n0 == 0);
6447
0
                    offset = (offset / n0) * ng;
6448
0
                    nb1 = (nb1 / n0) * ng;
6449
0
                    nb2 = (nb2 / n0) * ng;
6450
0
                    nb3 = (nb3 / n0) * ng;
6451
0
                }
6452
6453
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6454
0
            }
6455
0
        } break;
6456
0
        case GGML_OP_PERMUTE: {
6457
0
            if (src0_needs_grads) {
6458
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6459
0
                const int axis0 = axes[0] & 0x3;
6460
0
                const int axis1 = axes[1] & 0x3;
6461
0
                const int axis2 = axes[2] & 0x3;
6462
0
                const int axis3 = axes[3] & 0x3;
6463
0
                int axb[4] = {0,0,0,0}; // axes backward
6464
0
                axb[axis0] = 0;
6465
0
                axb[axis1] = 1;
6466
0
                axb[axis2] = 2;
6467
0
                axb[axis3] = 3;
6468
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6469
0
            }
6470
0
        } break;
6471
0
        case GGML_OP_TRANSPOSE: {
6472
0
            if (src0_needs_grads) {
6473
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6474
0
            }
6475
0
        } break;
6476
0
        case GGML_OP_GET_ROWS: {
6477
0
            if (src0_needs_grads) {
6478
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6479
0
            }
6480
0
            if (src1_needs_grads) {
6481
                // noop
6482
0
            }
6483
0
        } break;
6484
0
        case GGML_OP_DIAG_MASK_INF: {
6485
0
            if (src0_needs_grads) {
6486
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6487
                /* ref:  https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
6488
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6489
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6490
0
            }
6491
0
        } break;
6492
0
        case GGML_OP_DIAG_MASK_ZERO: {
6493
0
            if (src0_needs_grads) {
6494
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6495
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6496
0
            }
6497
0
        } break;
6498
0
        case GGML_OP_SOFT_MAX: {
6499
0
            if (src0_needs_grads) {
6500
0
                float scale    = 1.0f;
6501
0
                float max_bias = 0.0f;
6502
6503
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6504
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6505
6506
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6507
0
            }
6508
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6509
0
        } break;
6510
0
        case GGML_OP_ROPE: {
6511
0
            if (src0_needs_grads) {
6512
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6513
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6514
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6515
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6516
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6517
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6518
0
                int sections[4] = {0, 0, 0, 0};
6519
6520
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6521
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6522
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6523
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6524
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6525
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6526
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6527
6528
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6529
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6530
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6531
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6532
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6533
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6534
0
            }
6535
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6536
0
        } break;
6537
0
        case GGML_OP_IM2COL: {
6538
0
            if (src1_needs_grads) {
6539
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6540
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6541
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6542
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6543
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6544
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6545
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6546
6547
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6548
0
            }
6549
0
        } break;
6550
0
        case GGML_OP_POOL_2D: {
6551
0
            if (src0_needs_grads) {
6552
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6553
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6554
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6555
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6556
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6557
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6558
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6559
6560
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6561
0
            }
6562
0
        } break;
6563
0
        case GGML_OP_WIN_PART:
6564
0
        case GGML_OP_WIN_UNPART:
6565
0
        case GGML_OP_UNARY: {
6566
0
            switch (ggml_get_unary_op(tensor)) {
6567
0
                case GGML_UNARY_OP_ABS: {
6568
0
                    if (src0_needs_grads) {
6569
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6570
0
                    }
6571
0
                } break;
6572
0
                case GGML_UNARY_OP_SGN: {
6573
                    // noop
6574
0
                } break;
6575
0
                case GGML_UNARY_OP_NEG: {
6576
0
                    if (src0_needs_grads) {
6577
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6578
0
                    }
6579
0
                } break;
6580
0
                case GGML_UNARY_OP_STEP: {
6581
                    // noop
6582
0
                } break;
6583
0
                case GGML_UNARY_OP_RELU: {
6584
0
                    if (src0_needs_grads) {
6585
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6586
0
                    }
6587
0
                } break;
6588
0
                case GGML_UNARY_OP_SILU: {
6589
0
                    if (src0_needs_grads) {
6590
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6591
0
                    }
6592
0
                } break;
6593
0
                case GGML_UNARY_OP_EXP: {
6594
0
                    if (src0_needs_grads) {
6595
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6596
0
                    }
6597
0
                } break;
6598
0
                case GGML_UNARY_OP_EXPM1: {
6599
0
                    if (src0_needs_grads) {
6600
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6601
0
                    }
6602
0
                } break;
6603
0
                case GGML_UNARY_OP_SOFTPLUS: {
6604
0
                    if (src0_needs_grads) {
6605
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6606
0
                    }
6607
0
                } break;
6608
0
                default: {
6609
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6610
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6611
0
                    GGML_ABORT("fatal error");
6612
0
                } //break;
6613
0
            }
6614
0
        } break;
6615
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6616
0
            if (src0_needs_grads) {
6617
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6618
0
            }
6619
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6620
0
        } break;
6621
0
        case GGML_OP_GLU: {
6622
0
            switch (ggml_get_glu_op(tensor)) {
6623
0
                case GGML_GLU_OP_SWIGLU: {
6624
0
                    if (src0_needs_grads) {
6625
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6626
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6627
0
                    }
6628
0
                    if (src1_needs_grads) {
6629
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6630
0
                    }
6631
0
                } break;
6632
0
                default: {
6633
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6634
0
                } //break;
6635
0
            }
6636
0
        } break;
6637
0
        case GGML_OP_NONE: {
6638
            // noop
6639
0
        } break;
6640
0
        case GGML_OP_COUNT:
6641
0
        default: {
6642
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6643
0
        } //break;
6644
0
    }
6645
6646
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6647
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6648
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6649
0
}
6650
6651
0
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
6652
    // check if already visited
6653
0
    size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6654
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6655
0
    if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6656
        // This is the first time we see this node in the current graph.
6657
0
        cgraph->visited_hash_set.keys[node_hash_pos] = node;
6658
0
        ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6659
0
        cgraph->use_counts[node_hash_pos] = 0;
6660
0
    } else {
6661
        // already visited
6662
0
        return node_hash_pos;
6663
0
    }
6664
6665
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6666
0
        const int k =
6667
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6668
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6669
0
            /* unknown order, just fall back to using i */ i;
6670
6671
0
        struct ggml_tensor * src = node->src[k];
6672
0
        if (src) {
6673
0
            size_t src_hash_pos = ggml_visit_parents(cgraph, src);
6674
6675
            // Update the use count for this operand.
6676
0
            cgraph->use_counts[src_hash_pos]++;
6677
0
        }
6678
0
    }
6679
6680
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6681
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6682
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6683
6684
0
        if (strlen(node->name) == 0) {
6685
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6686
0
        }
6687
6688
0
        cgraph->leafs[cgraph->n_leafs] = node;
6689
0
        cgraph->n_leafs++;
6690
0
    } else {
6691
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6692
6693
0
        if (strlen(node->name) == 0) {
6694
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6695
0
        }
6696
6697
0
        cgraph->nodes[cgraph->n_nodes] = node;
6698
0
        cgraph->n_nodes++;
6699
0
    }
6700
6701
0
    return node_hash_pos;
6702
0
}
6703
6704
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
6705
0
    if (!expand) {
6706
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6707
0
        ggml_graph_clear(cgraph);
6708
0
    }
6709
6710
0
    const int n0 = cgraph->n_nodes;
6711
6712
0
    ggml_visit_parents(cgraph, tensor);
6713
6714
0
    const int n_new = cgraph->n_nodes - n0;
6715
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6716
6717
0
    if (n_new > 0) {
6718
        // the last added node should always be starting point
6719
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6720
0
    }
6721
0
}
6722
6723
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6724
0
    ggml_build_forward_impl(cgraph, tensor, true);
6725
0
}
6726
6727
void ggml_build_backward_expand(
6728
        struct ggml_context *  ctx,
6729
        struct ggml_cgraph  *  cgraph,
6730
0
        struct ggml_tensor  ** grad_accs) {
6731
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6732
0
    GGML_ASSERT(cgraph->grads);
6733
0
    GGML_ASSERT(cgraph->grad_accs);
6734
6735
0
    const int n_nodes_f = cgraph->n_nodes;
6736
6737
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6738
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6739
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6740
6741
0
    {
6742
0
        bool any_params = false;
6743
0
        bool any_loss   = false;
6744
0
        for (int i = 0; i < n_nodes_f; ++i) {
6745
0
            struct ggml_tensor * node = cgraph->nodes[i];
6746
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6747
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6748
0
        }
6749
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6750
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6751
0
    }
6752
6753
0
    for (int i = 0; i < n_nodes_f; ++i) {
6754
0
        struct ggml_tensor * node = cgraph->nodes[i];
6755
6756
0
        if (node->type == GGML_TYPE_I32) {
6757
0
            continue;
6758
0
        }
6759
6760
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6761
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6762
0
        switch (node->op) {
6763
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6764
0
            case GGML_OP_IM2COL:      // only used for its shape
6765
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6766
0
                ignore_src[0] = true;
6767
0
                break;
6768
0
            case GGML_OP_UNARY: {
6769
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6770
                // SGN and STEP unary ops are piecewise constant
6771
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6772
0
                    ignore_src[0] = true;
6773
0
                }
6774
0
            } break;
6775
6776
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6777
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6778
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6779
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6780
0
            case GGML_OP_ROPE:          // positions not differentiable
6781
0
                ignore_src[1] = true;
6782
0
                break;
6783
6784
0
            default:
6785
0
                break;
6786
0
        }
6787
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6788
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6789
0
                continue;
6790
0
            }
6791
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6792
0
            node_needs_grad = true;
6793
0
            break;
6794
0
        }
6795
0
        if (!node_needs_grad) {
6796
0
            continue;
6797
0
        }
6798
6799
        // inplace operations are currently not supported
6800
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6801
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6802
6803
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6804
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6805
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6806
0
        if (grad_accs && grad_accs[i]) {
6807
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6808
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6809
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6810
            // loss tensors always need a gradient accumulator
6811
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6812
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6813
0
        }
6814
0
        grads_needed[ihash] = true;
6815
0
    }
6816
6817
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6818
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6819
        // use allocator to automatically make inplace operations
6820
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6821
0
    }
6822
6823
0
    free(grads_needed);
6824
0
}
6825
6826
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6827
0
    void * ptr = *p;
6828
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6829
0
    *p = (void *) ((char *) ptr + size);
6830
0
    return ptr;
6831
0
}
6832
6833
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6834
0
    size_t hash_size = ggml_hash_size(size * 2);
6835
0
    void * p = 0;
6836
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6837
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6838
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6839
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6840
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6841
0
    if (grads) {
6842
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6843
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6844
0
    }
6845
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6846
6847
0
    size_t nbytes = (size_t) p;
6848
0
    return nbytes;
6849
0
}
6850
6851
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6852
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6853
0
}
6854
6855
0
size_t ggml_graph_overhead(void) {
6856
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6857
0
}
6858
6859
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6860
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6861
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6862
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6863
6864
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6865
0
    size_t hash_size = ggml_hash_size(size * 2);
6866
6867
0
    void * p = cgraph + 1;
6868
6869
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6870
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6871
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
6872
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6873
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6874
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6875
6876
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6877
6878
    // check that we allocated the correct amount of memory
6879
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
6880
6881
0
    *cgraph = (struct ggml_cgraph) {
6882
0
        /*.size         =*/ size,
6883
0
        /*.n_nodes      =*/ 0,
6884
0
        /*.n_leafs      =*/ 0,
6885
0
        /*.nodes        =*/ nodes_ptr,
6886
0
        /*.grads        =*/ grads_ptr,
6887
0
        /*.grad_accs    =*/ grad_accs_ptr,
6888
0
        /*.leafs        =*/ leafs_ptr,
6889
0
        /*.use_counts   =*/ use_counts_ptr,
6890
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
6891
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
6892
0
    };
6893
6894
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
6895
0
    if (grads) {
6896
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
6897
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
6898
0
    }
6899
6900
0
    return cgraph;
6901
0
}
6902
6903
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
6904
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
6905
0
}
6906
6907
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
6908
0
    struct ggml_cgraph cgraph = {
6909
0
        /*.size             =*/ 0,
6910
0
        /*.n_nodes          =*/ i1 - i0,
6911
0
        /*.n_leafs          =*/ 0,
6912
0
        /*.nodes            =*/ cgraph0->nodes + i0,
6913
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
6914
0
        /*.grad_accs        =*/ NULL,
6915
0
        /*.leafs            =*/ NULL,
6916
0
        /*.use_counts       =*/ cgraph0->use_counts,
6917
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
6918
0
        /*.order            =*/ cgraph0->order,
6919
0
    };
6920
6921
0
    return cgraph;
6922
0
}
6923
6924
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
6925
0
    GGML_ASSERT(dst->size >= src->n_leafs);
6926
0
    GGML_ASSERT(dst->size >= src->n_nodes);
6927
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
6928
6929
0
    dst->n_leafs = src->n_leafs;
6930
0
    dst->n_nodes = src->n_nodes;
6931
0
    dst->order   = src->order;
6932
6933
0
    for (int i = 0; i < src->n_leafs; ++i) {
6934
0
        dst->leafs[i] = src->leafs[i];
6935
0
    }
6936
6937
0
    for (int i = 0; i < src->n_nodes; ++i) {
6938
0
        dst->nodes[i] = src->nodes[i];
6939
0
    }
6940
6941
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
6942
        // copy all hashset keys (tensors) that are in use
6943
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
6944
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
6945
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
6946
0
        }
6947
0
    }
6948
6949
0
    if (dst->grads) {
6950
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
6951
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
6952
0
    }
6953
0
    if (src->grads) {
6954
0
        GGML_ASSERT(dst->grads     != NULL);
6955
0
        GGML_ASSERT(dst->grad_accs != NULL);
6956
0
        for (int i = 0; i < src->n_nodes; ++i) {
6957
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
6958
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
6959
6960
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
6961
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
6962
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
6963
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
6964
6965
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
6966
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
6967
0
        }
6968
0
    }
6969
0
}
6970
6971
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
6972
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
6973
0
    ggml_graph_cpy(cgraph, result);
6974
0
    return result;
6975
0
}
6976
6977
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
6978
0
    if (ggml_is_empty(tensor)) {
6979
0
        return tensor;
6980
0
    }
6981
0
    if (tensor->buffer) {
6982
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
6983
0
    } else {
6984
0
        GGML_ASSERT(tensor->data);
6985
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
6986
0
    }
6987
0
    return tensor;
6988
0
}
6989
6990
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
6991
0
    if (!cgraph) {
6992
0
        return;
6993
0
    }
6994
0
    GGML_ASSERT(cgraph->grads != NULL);
6995
6996
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
6997
0
        struct ggml_tensor * node     = cgraph->nodes[i];
6998
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
6999
7000
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7001
            // clear momenta
7002
0
            ggml_set_zero(node->src[2]);
7003
0
            ggml_set_zero(node->src[3]);
7004
0
        }
7005
7006
        // initial gradients of loss should be 1, 0 otherwise
7007
0
        if (grad_acc) {
7008
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7009
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7010
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7011
7012
0
                const float onef = 1.0f;
7013
0
                if (grad_acc->buffer) {
7014
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7015
0
                } else {
7016
0
                    GGML_ASSERT(grad_acc->data);
7017
0
                    *((float *) grad_acc->data) = onef;
7018
0
                }
7019
0
            } else {
7020
0
                ggml_set_zero(grad_acc);
7021
0
            }
7022
0
        }
7023
0
    }
7024
0
}
7025
7026
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7027
0
    cgraph->n_leafs = 0;
7028
0
    cgraph->n_nodes = 0;
7029
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7030
0
}
7031
7032
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7033
0
    return cgraph->size;
7034
0
}
7035
7036
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7037
0
    if (i < 0) {
7038
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7039
0
        return cgraph->nodes[cgraph->n_nodes + i];
7040
0
    }
7041
7042
0
    GGML_ASSERT(i < cgraph->n_nodes);
7043
0
    return cgraph->nodes[i];
7044
0
}
7045
7046
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7047
0
    return cgraph->nodes;
7048
0
}
7049
7050
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7051
0
    return cgraph->n_nodes;
7052
0
}
7053
7054
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7055
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7056
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7057
0
    cgraph->n_nodes++;
7058
0
}
7059
7060
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7061
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7062
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7063
7064
0
        if (strcmp(leaf->name, name) == 0) {
7065
0
            return leaf;
7066
0
        }
7067
0
    }
7068
7069
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7070
0
        struct ggml_tensor * node = cgraph->nodes[i];
7071
7072
0
        if (strcmp(node->name, name) == 0) {
7073
0
            return node;
7074
0
        }
7075
0
    }
7076
7077
0
    return NULL;
7078
0
}
7079
7080
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7081
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7082
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7083
0
}
7084
7085
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7086
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7087
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7088
0
}
7089
7090
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7091
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7092
7093
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7094
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7095
0
        struct ggml_tensor * node = cgraph->nodes[i];
7096
7097
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7098
0
                i,
7099
0
                node->ne[0], node->ne[1], node->ne[2],
7100
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7101
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7102
0
    }
7103
7104
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7105
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7106
0
        struct ggml_tensor * node = cgraph->leafs[i];
7107
7108
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7109
0
                i,
7110
0
                node->ne[0], node->ne[1],
7111
0
                ggml_op_name(node->op),
7112
0
                ggml_get_name(node));
7113
0
    }
7114
7115
0
    GGML_LOG_INFO("========================================\n");
7116
0
}
7117
7118
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7119
                                      const int *                idxs,
7120
                                      int                        count,
7121
0
                                      const struct ggml_tensor * tensor) {
7122
0
    GGML_ASSERT(cgraph && idxs);
7123
0
    for (int i = 0; i < count; ++i) {
7124
0
        const int node_idx = idxs[i];
7125
7126
0
        if (node_idx >= cgraph->n_nodes) {
7127
0
            return -1;
7128
0
        }
7129
0
        if (cgraph->nodes[node_idx] == tensor) {
7130
0
            return i;
7131
0
        }
7132
0
    }
7133
0
    return -1;
7134
0
}
7135
7136
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7137
                                const int *                node_idxs,
7138
                                int                        count,
7139
                                const enum ggml_op *       ops,
7140
                                const int *                outputs,
7141
0
                                int                        num_outputs) {
7142
0
    GGML_ASSERT(outputs && num_outputs > 0);
7143
7144
0
    for (int i = 0; i < count; ++i) {
7145
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7146
0
            return false;
7147
0
        }
7148
7149
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7150
7151
0
        if (node->op != ops[i]) {
7152
0
            return false;
7153
0
        }
7154
7155
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7156
0
            continue;
7157
0
        }
7158
7159
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7160
0
            return false;
7161
0
        }
7162
7163
0
        int subgraph_uses = 0;
7164
0
        for (int j = i + 1; j < count; ++j) {
7165
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7166
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7167
0
                if (other_node->src[src_idx] == node) {
7168
0
                    subgraph_uses++;
7169
0
                }
7170
0
            }
7171
0
        }
7172
7173
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7174
0
            return false;
7175
0
        }
7176
7177
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7178
0
        struct ggml_tensor * view_src = node->view_src;
7179
0
        while (view_src) {
7180
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7181
0
                return false;
7182
0
            }
7183
0
            view_src = view_src->view_src;
7184
0
        }
7185
0
    }
7186
7187
0
    return true;
7188
0
}
7189
7190
// check if node is part of the graph
7191
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7192
0
    if (cgraph == NULL) {
7193
0
        return true;
7194
0
    }
7195
7196
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7197
0
        if (cgraph->nodes[i] == node) {
7198
0
            return true;
7199
0
        }
7200
0
    }
7201
7202
0
    return false;
7203
0
}
7204
7205
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7206
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7207
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7208
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7209
7210
0
        if (grad == node) {
7211
0
            return parent;
7212
0
        }
7213
0
    }
7214
7215
0
    return NULL;
7216
0
}
7217
7218
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7219
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7220
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7221
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7222
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7223
0
            gparent ? (void *) gparent : (void *) node,
7224
0
            gparent ? "empty" : "vee",
7225
0
            gparent ? "dashed" : "solid",
7226
0
            label);
7227
0
}
7228
7229
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7230
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7231
0
            (void *) parent,
7232
0
            (void *) node,
7233
0
            label);
7234
0
}
7235
7236
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
7237
0
    char color[16];
7238
7239
0
    FILE * fp = ggml_fopen(filename, "w");
7240
0
    GGML_ASSERT(fp);
7241
7242
0
    fprintf(fp, "digraph G {\n");
7243
0
    fprintf(fp, "  newrank = true;\n");
7244
0
    fprintf(fp, "  rankdir = TB;\n");
7245
7246
0
    for (int i = 0; i < gb->n_nodes; i++) {
7247
0
        struct ggml_tensor * node = gb->nodes[i];
7248
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7249
7250
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7251
0
            continue;
7252
0
        }
7253
7254
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7255
0
            snprintf(color, sizeof(color), "yellow");
7256
0
        } else if (grad) {
7257
0
            if (ggml_graph_find(gf, node)) {
7258
0
                snprintf(color, sizeof(color), "green");
7259
0
            } else {
7260
0
                snprintf(color, sizeof(color), "lightblue");
7261
0
            }
7262
0
        } else {
7263
0
            snprintf(color, sizeof(color), "white");
7264
0
        }
7265
7266
0
        fprintf(fp, "  \"%p\" [ "
7267
0
                    "style = filled; fillcolor = %s; shape = record; "
7268
0
                    "label=\"",
7269
0
                (void *) node, color);
7270
7271
0
        if (strlen(node->name) > 0) {
7272
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7273
0
        } else {
7274
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7275
0
        }
7276
7277
0
        if (ggml_is_matrix(node)) {
7278
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7279
0
        } else {
7280
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7281
0
        }
7282
7283
0
        if (grad) {
7284
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7285
0
        } else {
7286
0
            fprintf(fp, "\"; ]\n");
7287
0
        }
7288
0
    }
7289
7290
0
    for (int i = 0; i < gb->n_leafs; i++) {
7291
0
        struct ggml_tensor * node = gb->leafs[i];
7292
7293
0
        snprintf(color, sizeof(color), "pink");
7294
7295
0
        fprintf(fp, "  \"%p\" [ "
7296
0
                    "style = filled; fillcolor = %s; shape = record; "
7297
0
                    "label=\"<x>",
7298
0
                (void *) node, color);
7299
7300
0
        if (strlen(node->name) > 0) {
7301
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7302
0
        } else {
7303
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7304
0
        }
7305
7306
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7307
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7308
0
            fprintf(fp, " | (");
7309
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7310
                // FIXME: use ggml-backend to obtain the tensor data
7311
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7312
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7313
                //}
7314
                //else if (node->type == GGML_TYPE_F32 ||
7315
                //         node->type == GGML_TYPE_F16 ||
7316
                //         node->type == GGML_TYPE_BF16) {
7317
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7318
                //}
7319
                //else
7320
0
                {
7321
0
                    fprintf(fp, "#");
7322
0
                }
7323
0
                if (j < ggml_nelements(node) - 1) {
7324
0
                    fprintf(fp, ", ");
7325
0
                }
7326
0
            }
7327
0
            fprintf(fp, ")");
7328
0
        }
7329
0
        fprintf(fp, "\"; ]\n");
7330
0
    }
7331
7332
0
    for (int i = 0; i < gb->n_nodes; i++) {
7333
0
        struct ggml_tensor * node = gb->nodes[i];
7334
7335
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7336
0
            if (node->src[j]) {
7337
0
                char label[16];
7338
0
                snprintf(label, sizeof(label), "src %d", j);
7339
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7340
0
            }
7341
0
        }
7342
0
    }
7343
7344
0
    for (int i = 0; i < gb->n_leafs; i++) {
7345
0
        struct ggml_tensor * node = gb->leafs[i];
7346
7347
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7348
0
            if (node->src[j]) {
7349
0
                char label[16];
7350
0
                snprintf(label, sizeof(label), "src %d", j);
7351
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7352
0
            }
7353
0
        }
7354
0
    }
7355
7356
0
    fprintf(fp, "}\n");
7357
7358
0
    fclose(fp);
7359
7360
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7361
0
}
7362
7363
////////////////////////////////////////////////////////////////////////////////
7364
7365
0
void ggml_set_input(struct ggml_tensor * tensor) {
7366
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7367
0
}
7368
7369
0
void ggml_set_output(struct ggml_tensor * tensor) {
7370
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7371
0
}
7372
7373
0
void ggml_set_param(struct ggml_tensor * tensor) {
7374
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7375
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7376
0
}
7377
7378
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7379
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7380
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7381
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7382
0
}
7383
7384
////////////////////////////////////////////////////////////////////////////////
7385
7386
0
void ggml_quantize_init(enum ggml_type type) {
7387
0
    ggml_critical_section_start();
7388
7389
0
    switch (type) {
7390
0
        case GGML_TYPE_IQ2_XXS:
7391
0
        case GGML_TYPE_IQ2_XS:
7392
0
        case GGML_TYPE_IQ2_S:
7393
0
        case GGML_TYPE_IQ1_S:
7394
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7395
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7396
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7397
0
        default: // nothing
7398
0
            break;
7399
0
    }
7400
7401
0
    ggml_critical_section_end();
7402
0
}
7403
7404
862
void ggml_quantize_free(void) {
7405
862
    ggml_critical_section_start();
7406
7407
862
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7408
862
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7409
862
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7410
862
    iq3xs_free_impl(256);
7411
7412
862
    ggml_critical_section_end();
7413
862
}
7414
7415
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7416
0
    return
7417
0
        type == GGML_TYPE_IQ2_XXS ||
7418
0
        type == GGML_TYPE_IQ2_XS  ||
7419
0
        type == GGML_TYPE_IQ1_S;//   ||
7420
        //type == GGML_TYPE_IQ1_M;
7421
0
}
7422
7423
size_t ggml_quantize_chunk(
7424
        enum ggml_type   type,
7425
           const float * src,
7426
                  void * dst,
7427
               int64_t   start,
7428
               int64_t   nrows,
7429
               int64_t   n_per_row,
7430
0
           const float * imatrix) {
7431
0
    const int64_t n = (int64_t) nrows * n_per_row;
7432
7433
0
    if (ggml_quantize_requires_imatrix(type)) {
7434
0
        GGML_ASSERT(imatrix != NULL);
7435
0
    }
7436
7437
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7438
0
    GGML_ASSERT(start % n_per_row == 0);
7439
7440
0
    ggml_quantize_init(type); // this is noop if already initialized
7441
7442
0
    const size_t start_row = start / n_per_row;
7443
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7444
7445
0
    size_t result = 0;
7446
7447
0
    switch (type) {
7448
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7449
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7450
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7451
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7452
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7453
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7454
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7455
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7456
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7457
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7458
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7459
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7460
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7461
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7462
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7463
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7464
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7465
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7466
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7467
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7468
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7469
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7470
0
        case GGML_TYPE_F16:
7471
0
            {
7472
0
                size_t elemsize = sizeof(ggml_fp16_t);
7473
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7474
0
                result = n * elemsize;
7475
0
            } break;
7476
0
        case GGML_TYPE_BF16:
7477
0
            {
7478
0
                size_t elemsize = sizeof(ggml_bf16_t);
7479
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7480
0
                result = n * elemsize;
7481
0
            } break;
7482
0
        case GGML_TYPE_F32:
7483
0
            {
7484
0
                size_t elemsize = sizeof(float);
7485
0
                result = n * elemsize;
7486
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7487
0
            } break;
7488
0
        default:
7489
0
            assert(false);
7490
0
    }
7491
7492
0
    GGML_ASSERT(result == nrows * row_size);
7493
7494
0
    return result;
7495
0
}
7496
7497
////////////////////////////////////////////////////////////////////////////////
7498
7499
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7500
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7501
0
    g_logger_state.log_callback_user_data = user_data;
7502
0
}
7503
7504
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7505
0
    p->n_threads  = n_threads;
7506
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7507
0
    p->poll       = 50;    // hybrid-polling enabled
7508
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7509
0
    p->paused     = false; // threads are ready to go
7510
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7511
0
}
7512
7513
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7514
0
    struct ggml_threadpool_params p;
7515
0
    ggml_threadpool_params_init(&p, n_threads);
7516
0
    return p;
7517
0
}
7518
7519
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7520
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7521
0
    if (p0->prio           != p1->prio       )    return false;
7522
0
    if (p0->poll           != p1->poll       )    return false;
7523
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7524
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7525
0
}